blob: d928a38e146c573d01915b969d508f01c9d61fc0 [file] [log] [blame]
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +04001/*
2 * Optimizations for Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
Peter Maydell757e7252016-01-26 18:17:08 +000026#include "qemu/osdep.h"
Richard Henderson9531c072021-08-26 06:51:39 -070027#include "qemu/int128.h"
Richard Hendersonab84dc32023-08-23 23:04:24 -070028#include "qemu/interval-tree.h"
Richard Hendersonad3d0e42023-03-28 18:17:24 -070029#include "tcg/tcg-op-common.h"
Richard Henderson90163902021-03-18 10:21:45 -060030#include "tcg-internal.h"
Richard Henderson93280b62025-01-08 22:51:55 +010031#include "tcg-has.h"
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +040032
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +040033#define CASE_OP_32_64(x) \
34 glue(glue(case INDEX_op_, x), _i32): \
35 glue(glue(case INDEX_op_, x), _i64)
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +040036
Richard Henderson170ba882017-11-22 09:07:11 +010037#define CASE_OP_32_64_VEC(x) \
38 glue(glue(case INDEX_op_, x), _i32): \
39 glue(glue(case INDEX_op_, x), _i64): \
40 glue(glue(case INDEX_op_, x), _vec)
41
Richard Hendersonab84dc32023-08-23 23:04:24 -070042typedef struct MemCopyInfo {
43 IntervalTreeNode itree;
44 QSIMPLEQ_ENTRY (MemCopyInfo) next;
45 TCGTemp *ts;
46 TCGType type;
47} MemCopyInfo;
48
Richard Henderson6fcb98e2020-03-30 17:44:30 -070049typedef struct TempOptInfo {
Aurelien Jarnob41059d2015-07-27 12:41:44 +020050 bool is_const;
Richard Henderson63490392017-06-20 13:43:15 -070051 TCGTemp *prev_copy;
52 TCGTemp *next_copy;
Richard Hendersonab84dc32023-08-23 23:04:24 -070053 QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
Richard Henderson54795542020-09-06 16:21:32 -070054 uint64_t val;
Richard Hendersonb1fde412021-08-23 13:07:49 -070055 uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
Richard Henderson6d70ddc2024-12-21 21:08:10 -080056 uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
Richard Henderson6fcb98e2020-03-30 17:44:30 -070057} TempOptInfo;
Kirill Batuzov22613af2011-07-07 16:37:13 +040058
Richard Henderson3b3f8472021-08-23 22:06:31 -070059typedef struct OptContext {
Richard Hendersondc849882021-08-24 07:13:45 -070060 TCGContext *tcg;
Richard Hendersond0ed5152021-08-24 07:38:39 -070061 TCGOp *prev_mb;
Richard Henderson3b3f8472021-08-23 22:06:31 -070062 TCGTempSet temps_used;
Richard Henderson137f1f42021-08-24 08:49:25 -070063
Richard Hendersonab84dc32023-08-23 23:04:24 -070064 IntervalTreeRoot mem_copy;
65 QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
66
Richard Henderson137f1f42021-08-24 08:49:25 -070067 /* In flight values from optimization. */
Richard Henderson67f84c92021-08-25 08:00:20 -070068 TCGType type;
Richard Hendersonaeb35142025-01-14 18:28:15 -080069 int carry_state; /* -1 = non-constant, {0,1} = constant carry-in */
Richard Henderson3b3f8472021-08-23 22:06:31 -070070} OptContext;
71
Richard Henderson6fcb98e2020-03-30 17:44:30 -070072static inline TempOptInfo *ts_info(TCGTemp *ts)
Aurelien Jarnod9c769c2015-07-27 12:41:44 +020073{
Richard Henderson63490392017-06-20 13:43:15 -070074 return ts->state_ptr;
Aurelien Jarnod9c769c2015-07-27 12:41:44 +020075}
76
Richard Henderson6fcb98e2020-03-30 17:44:30 -070077static inline TempOptInfo *arg_info(TCGArg arg)
Aurelien Jarnod9c769c2015-07-27 12:41:44 +020078{
Richard Henderson63490392017-06-20 13:43:15 -070079 return ts_info(arg_temp(arg));
80}
81
Richard Hendersone1b6c142024-12-22 10:26:14 -080082static inline bool ti_is_const(TempOptInfo *ti)
83{
84 return ti->is_const;
85}
86
87static inline uint64_t ti_const_val(TempOptInfo *ti)
88{
89 return ti->val;
90}
91
92static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
93{
94 return ti_is_const(ti) && ti_const_val(ti) == val;
95}
96
Richard Henderson63490392017-06-20 13:43:15 -070097static inline bool ts_is_const(TCGTemp *ts)
98{
Richard Hendersone1b6c142024-12-22 10:26:14 -080099 return ti_is_const(ts_info(ts));
Richard Henderson63490392017-06-20 13:43:15 -0700100}
101
Richard Henderson27cdb852023-10-23 11:38:00 -0700102static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
103{
Richard Hendersone1b6c142024-12-22 10:26:14 -0800104 return ti_is_const_val(ts_info(ts), val);
Richard Henderson27cdb852023-10-23 11:38:00 -0700105}
106
Richard Henderson63490392017-06-20 13:43:15 -0700107static inline bool arg_is_const(TCGArg arg)
108{
109 return ts_is_const(arg_temp(arg));
110}
111
Richard Henderson27cdb852023-10-23 11:38:00 -0700112static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
113{
114 return ts_is_const_val(arg_temp(arg), val);
115}
116
Richard Henderson63490392017-06-20 13:43:15 -0700117static inline bool ts_is_copy(TCGTemp *ts)
118{
119 return ts_info(ts)->next_copy != ts;
Aurelien Jarnod9c769c2015-07-27 12:41:44 +0200120}
121
Richard Henderson9f75e522023-11-02 13:37:46 -0700122static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b)
123{
124 return a->kind < b->kind ? b : a;
125}
126
Aurelien Jarno1208d7d2015-07-27 12:41:44 +0200127/* Initialize and activate a temporary. */
Richard Henderson3b3f8472021-08-23 22:06:31 -0700128static void init_ts_info(OptContext *ctx, TCGTemp *ts)
Aurelien Jarno1208d7d2015-07-27 12:41:44 +0200129{
Richard Henderson63490392017-06-20 13:43:15 -0700130 size_t idx = temp_idx(ts);
Richard Henderson8f17a972020-03-30 19:52:02 -0700131 TempOptInfo *ti;
Richard Henderson63490392017-06-20 13:43:15 -0700132
Richard Henderson3b3f8472021-08-23 22:06:31 -0700133 if (test_bit(idx, ctx->temps_used.l)) {
Richard Henderson8f17a972020-03-30 19:52:02 -0700134 return;
135 }
Richard Henderson3b3f8472021-08-23 22:06:31 -0700136 set_bit(idx, ctx->temps_used.l);
Richard Henderson8f17a972020-03-30 19:52:02 -0700137
138 ti = ts->state_ptr;
139 if (ti == NULL) {
140 ti = tcg_malloc(sizeof(TempOptInfo));
Richard Henderson63490392017-06-20 13:43:15 -0700141 ts->state_ptr = ti;
Richard Henderson8f17a972020-03-30 19:52:02 -0700142 }
143
144 ti->next_copy = ts;
145 ti->prev_copy = ts;
Richard Hendersonab84dc32023-08-23 23:04:24 -0700146 QSIMPLEQ_INIT(&ti->mem_copy);
Richard Henderson8f17a972020-03-30 19:52:02 -0700147 if (ts->kind == TEMP_CONST) {
148 ti->is_const = true;
149 ti->val = ts->val;
Richard Hendersonb1fde412021-08-23 13:07:49 -0700150 ti->z_mask = ts->val;
Richard Henderson6d70ddc2024-12-21 21:08:10 -0800151 ti->s_mask = INT64_MIN >> clrsb64(ts->val);
Richard Henderson8f17a972020-03-30 19:52:02 -0700152 } else {
153 ti->is_const = false;
Richard Hendersonb1fde412021-08-23 13:07:49 -0700154 ti->z_mask = -1;
Richard Henderson57fe5c62021-08-26 12:04:46 -0700155 ti->s_mask = 0;
Aurelien Jarno1208d7d2015-07-27 12:41:44 +0200156 }
157}
158
Richard Hendersonab84dc32023-08-23 23:04:24 -0700159static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l)
160{
161 IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l);
162 return r ? container_of(r, MemCopyInfo, itree) : NULL;
163}
164
165static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l)
166{
167 IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l);
168 return r ? container_of(r, MemCopyInfo, itree) : NULL;
169}
170
171static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc)
172{
173 TCGTemp *ts = mc->ts;
174 TempOptInfo *ti = ts_info(ts);
175
176 interval_tree_remove(&mc->itree, &ctx->mem_copy);
177 QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next);
178 QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next);
179}
180
181static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l)
182{
183 while (true) {
184 MemCopyInfo *mc = mem_copy_first(ctx, s, l);
185 if (!mc) {
186 break;
187 }
188 remove_mem_copy(ctx, mc);
189 }
190}
191
192static void remove_mem_copy_all(OptContext *ctx)
193{
194 remove_mem_copy_in(ctx, 0, -1);
195 tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy));
196}
197
Richard Henderson9f75e522023-11-02 13:37:46 -0700198static TCGTemp *find_better_copy(TCGTemp *ts)
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200199{
Richard Henderson9f75e522023-11-02 13:37:46 -0700200 TCGTemp *i, *ret;
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200201
Richard Henderson4c868ce2020-04-23 09:02:23 -0700202 /* If this is already readonly, we can't do better. */
203 if (temp_readonly(ts)) {
Richard Henderson63490392017-06-20 13:43:15 -0700204 return ts;
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200205 }
206
Richard Henderson9f75e522023-11-02 13:37:46 -0700207 ret = ts;
Richard Henderson63490392017-06-20 13:43:15 -0700208 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
Richard Henderson9f75e522023-11-02 13:37:46 -0700209 ret = cmp_better_copy(ret, i);
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200210 }
Richard Henderson9f75e522023-11-02 13:37:46 -0700211 return ret;
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200212}
213
Richard Hendersonab84dc32023-08-23 23:04:24 -0700214static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts)
215{
216 TempOptInfo *si = ts_info(src_ts);
217 TempOptInfo *di = ts_info(dst_ts);
218 MemCopyInfo *mc;
219
220 QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) {
221 tcg_debug_assert(mc->ts == src_ts);
222 mc->ts = dst_ts;
223 }
224 QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy);
225}
226
227/* Reset TEMP's state, possibly removing the temp for the list of copies. */
228static void reset_ts(OptContext *ctx, TCGTemp *ts)
229{
230 TempOptInfo *ti = ts_info(ts);
231 TCGTemp *pts = ti->prev_copy;
232 TCGTemp *nts = ti->next_copy;
233 TempOptInfo *pi = ts_info(pts);
234 TempOptInfo *ni = ts_info(nts);
235
236 ni->prev_copy = ti->prev_copy;
237 pi->next_copy = ti->next_copy;
238 ti->next_copy = ts;
239 ti->prev_copy = ts;
240 ti->is_const = false;
241 ti->z_mask = -1;
242 ti->s_mask = 0;
243
244 if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
245 if (ts == nts) {
246 /* Last temp copy being removed, the mem copies die. */
247 MemCopyInfo *mc;
248 QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) {
249 interval_tree_remove(&mc->itree, &ctx->mem_copy);
250 }
251 QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy);
252 } else {
253 move_mem_copies(find_better_copy(nts), ts);
254 }
255 }
256}
257
258static void reset_temp(OptContext *ctx, TCGArg arg)
259{
260 reset_ts(ctx, arg_temp(arg));
261}
262
263static void record_mem_copy(OptContext *ctx, TCGType type,
264 TCGTemp *ts, intptr_t start, intptr_t last)
265{
266 MemCopyInfo *mc;
267 TempOptInfo *ti;
268
269 mc = QSIMPLEQ_FIRST(&ctx->mem_free);
270 if (mc) {
271 QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next);
272 } else {
273 mc = tcg_malloc(sizeof(*mc));
274 }
275
276 memset(mc, 0, sizeof(*mc));
277 mc->itree.start = start;
278 mc->itree.last = last;
279 mc->type = type;
280 interval_tree_insert(&mc->itree, &ctx->mem_copy);
281
282 ts = find_better_copy(ts);
283 ti = ts_info(ts);
284 mc->ts = ts;
285 QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next);
286}
287
Richard Henderson63490392017-06-20 13:43:15 -0700288static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200289{
Richard Henderson63490392017-06-20 13:43:15 -0700290 TCGTemp *i;
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200291
Richard Henderson63490392017-06-20 13:43:15 -0700292 if (ts1 == ts2) {
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200293 return true;
294 }
295
Richard Henderson63490392017-06-20 13:43:15 -0700296 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200297 return false;
298 }
299
Richard Henderson63490392017-06-20 13:43:15 -0700300 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
301 if (i == ts2) {
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200302 return true;
303 }
304 }
305
306 return false;
307}
308
Richard Henderson63490392017-06-20 13:43:15 -0700309static bool args_are_copies(TCGArg arg1, TCGArg arg2)
310{
311 return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
312}
313
Richard Hendersonab84dc32023-08-23 23:04:24 -0700314static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s)
315{
316 MemCopyInfo *mc;
317
318 for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) {
319 if (mc->itree.start == s && mc->type == type) {
320 return find_better_copy(mc->ts);
321 }
322 }
323 return NULL;
324}
325
Richard Henderson26aac972023-10-23 12:31:57 -0700326static TCGArg arg_new_constant(OptContext *ctx, uint64_t val)
327{
328 TCGType type = ctx->type;
329 TCGTemp *ts;
330
331 if (type == TCG_TYPE_I32) {
332 val = (int32_t)val;
333 }
334
335 ts = tcg_constant_internal(type, val);
336 init_ts_info(ctx, ts);
337
338 return temp_arg(ts);
339}
340
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100341static TCGArg arg_new_temp(OptContext *ctx)
342{
343 TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB);
344 init_ts_info(ctx, ts);
345 return temp_arg(ts);
346}
347
Richard Hendersona3c1c572025-04-21 11:05:29 -0700348static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op,
349 TCGOpcode opc, unsigned narg)
350{
Richard Hendersoncf5c9f62025-01-21 20:34:41 -0800351 return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg);
Richard Hendersona3c1c572025-04-21 11:05:29 -0700352}
353
354static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op,
355 TCGOpcode opc, unsigned narg)
356{
Richard Hendersoncf5c9f62025-01-21 20:34:41 -0800357 return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg);
Richard Hendersona3c1c572025-04-21 11:05:29 -0700358}
359
Richard Henderson6b99d5b2021-08-24 10:57:56 -0700360static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
Kirill Batuzov22613af2011-07-07 16:37:13 +0400361{
Richard Henderson63490392017-06-20 13:43:15 -0700362 TCGTemp *dst_ts = arg_temp(dst);
363 TCGTemp *src_ts = arg_temp(src);
Richard Henderson6fcb98e2020-03-30 17:44:30 -0700364 TempOptInfo *di;
365 TempOptInfo *si;
Richard Henderson63490392017-06-20 13:43:15 -0700366 TCGOpcode new_op;
367
368 if (ts_are_copies(dst_ts, src_ts)) {
Richard Hendersondc849882021-08-24 07:13:45 -0700369 tcg_op_remove(ctx->tcg, op);
Richard Henderson6b99d5b2021-08-24 10:57:56 -0700370 return true;
Aurelien Jarno53657182015-06-04 21:53:25 +0200371 }
372
Richard Henderson986cac12023-01-09 13:59:35 -0800373 reset_ts(ctx, dst_ts);
Richard Henderson63490392017-06-20 13:43:15 -0700374 di = ts_info(dst_ts);
375 si = ts_info(src_ts);
Richard Henderson67f84c92021-08-25 08:00:20 -0700376
377 switch (ctx->type) {
378 case TCG_TYPE_I32:
Richard Henderson67f84c92021-08-25 08:00:20 -0700379 case TCG_TYPE_I64:
Richard Hendersonb5701262024-12-28 15:58:24 -0800380 new_op = INDEX_op_mov;
Richard Henderson67f84c92021-08-25 08:00:20 -0700381 break;
382 case TCG_TYPE_V64:
383 case TCG_TYPE_V128:
384 case TCG_TYPE_V256:
Richard Henderson4d872212025-01-02 19:43:06 -0800385 /* TCGOP_TYPE and TCGOP_VECE remain unchanged. */
Richard Henderson67f84c92021-08-25 08:00:20 -0700386 new_op = INDEX_op_mov_vec;
387 break;
388 default:
389 g_assert_not_reached();
Richard Henderson170ba882017-11-22 09:07:11 +0100390 }
Richard Hendersonc45cb8b2014-09-19 13:49:15 -0700391 op->opc = new_op;
Richard Henderson63490392017-06-20 13:43:15 -0700392 op->args[0] = dst;
393 op->args[1] = src;
Richard Hendersona62f6f52014-05-22 10:59:12 -0700394
Richard Hendersonfaa2e102021-08-26 09:03:59 -0700395 di->z_mask = si->z_mask;
Richard Henderson57fe5c62021-08-26 12:04:46 -0700396 di->s_mask = si->s_mask;
Richard Henderson24666ba2014-05-22 11:14:10 -0700397
Richard Henderson63490392017-06-20 13:43:15 -0700398 if (src_ts->type == dst_ts->type) {
Richard Henderson6fcb98e2020-03-30 17:44:30 -0700399 TempOptInfo *ni = ts_info(si->next_copy);
Richard Henderson63490392017-06-20 13:43:15 -0700400
401 di->next_copy = si->next_copy;
402 di->prev_copy = src_ts;
403 ni->prev_copy = dst_ts;
404 si->next_copy = dst_ts;
405 di->is_const = si->is_const;
406 di->val = si->val;
Richard Hendersonab84dc32023-08-23 23:04:24 -0700407
408 if (!QSIMPLEQ_EMPTY(&si->mem_copy)
409 && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
410 move_mem_copies(dst_ts, src_ts);
411 }
Paolo Bonzini3a9d8b12013-01-11 15:42:52 -0800412 }
Richard Henderson6b99d5b2021-08-24 10:57:56 -0700413 return true;
Kirill Batuzov22613af2011-07-07 16:37:13 +0400414}
415
Richard Henderson6b99d5b2021-08-24 10:57:56 -0700416static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
Richard Hendersondc849882021-08-24 07:13:45 -0700417 TCGArg dst, uint64_t val)
Richard Henderson8fe35e02020-03-30 20:42:43 -0700418{
Richard Hendersonfaa2e102021-08-26 09:03:59 -0700419 /* Convert movi to mov with constant temp. */
Richard Henderson26aac972023-10-23 12:31:57 -0700420 return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val));
Richard Henderson8fe35e02020-03-30 20:42:43 -0700421}
422
Richard Hendersonaa28c9e2025-01-07 10:36:24 -0800423static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type,
424 uint64_t x, uint64_t y)
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400425{
Richard Henderson03271522013-08-14 14:35:56 -0700426 uint64_t l64, h64;
427
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400428 switch (op) {
Richard Henderson79602f62025-01-06 09:11:39 -0800429 case INDEX_op_add:
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400430 return x + y;
431
Richard Henderson60f34f52025-01-06 22:06:32 -0800432 case INDEX_op_sub:
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400433 return x - y;
434
Richard Hendersond2c3eca2025-01-07 09:32:18 -0800435 case INDEX_op_mul:
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400436 return x * y;
437
Richard Hendersonc3b920b2025-01-06 10:32:44 -0800438 case INDEX_op_and:
439 case INDEX_op_and_vec:
Kirill Batuzov9a810902011-07-07 16:37:15 +0400440 return x & y;
441
Richard Henderson49bd7512025-01-06 14:00:40 -0800442 case INDEX_op_or:
443 case INDEX_op_or_vec:
Kirill Batuzov9a810902011-07-07 16:37:15 +0400444 return x | y;
445
Richard Hendersonfffd3dc2025-01-06 15:18:35 -0800446 case INDEX_op_xor:
447 case INDEX_op_xor_vec:
Kirill Batuzov9a810902011-07-07 16:37:15 +0400448 return x ^ y;
449
Richard Henderson6ca59452025-01-07 21:50:04 -0800450 case INDEX_op_shl:
451 if (type == TCG_TYPE_I32) {
452 return (uint32_t)x << (y & 31);
453 }
Richard Henderson50c5c4d2014-03-18 07:45:39 -0700454 return (uint64_t)x << (y & 63);
Kirill Batuzov55c09752011-07-07 16:37:16 +0400455
Richard Henderson74dbd362025-01-07 22:52:10 -0800456 case INDEX_op_shr:
457 if (type == TCG_TYPE_I32) {
458 return (uint32_t)x >> (y & 31);
459 }
Richard Henderson50c5c4d2014-03-18 07:45:39 -0700460 return (uint64_t)x >> (y & 63);
Kirill Batuzov55c09752011-07-07 16:37:16 +0400461
Richard Henderson3949f362025-01-08 08:05:18 -0800462 case INDEX_op_sar:
463 if (type == TCG_TYPE_I32) {
464 return (int32_t)x >> (y & 31);
465 }
Richard Henderson50c5c4d2014-03-18 07:45:39 -0700466 return (int64_t)x >> (y & 63);
Kirill Batuzov55c09752011-07-07 16:37:16 +0400467
Richard Henderson005a87e2025-01-08 10:42:16 -0800468 case INDEX_op_rotr:
469 if (type == TCG_TYPE_I32) {
470 return ror32(x, y & 31);
471 }
Richard Henderson50c5c4d2014-03-18 07:45:39 -0700472 return ror64(x, y & 63);
Kirill Batuzov55c09752011-07-07 16:37:16 +0400473
Richard Henderson005a87e2025-01-08 10:42:16 -0800474 case INDEX_op_rotl:
475 if (type == TCG_TYPE_I32) {
476 return rol32(x, y & 31);
477 }
Richard Henderson50c5c4d2014-03-18 07:45:39 -0700478 return rol64(x, y & 63);
Kirill Batuzov55c09752011-07-07 16:37:16 +0400479
Richard Henderson5c62d372025-01-06 23:46:47 -0800480 case INDEX_op_not:
481 case INDEX_op_not_vec:
Kirill Batuzova640f032011-07-07 16:37:17 +0400482 return ~x;
483
Richard Henderson69713582025-01-06 22:48:57 -0800484 case INDEX_op_neg:
Richard Hendersoncb25c802011-08-17 14:11:47 -0700485 return -x;
486
Richard Henderson46f96bf2025-01-06 12:37:02 -0800487 case INDEX_op_andc:
488 case INDEX_op_andc_vec:
Richard Hendersoncb25c802011-08-17 14:11:47 -0700489 return x & ~y;
490
Richard Henderson6aba25e2025-01-06 14:46:26 -0800491 case INDEX_op_orc:
492 case INDEX_op_orc_vec:
Richard Hendersoncb25c802011-08-17 14:11:47 -0700493 return x | ~y;
494
Richard Henderson5c0968a2025-01-06 15:47:53 -0800495 case INDEX_op_eqv:
496 case INDEX_op_eqv_vec:
Richard Hendersoncb25c802011-08-17 14:11:47 -0700497 return ~(x ^ y);
498
Richard Henderson59379a42025-01-06 20:32:54 -0800499 case INDEX_op_nand:
500 case INDEX_op_nand_vec:
Richard Hendersoncb25c802011-08-17 14:11:47 -0700501 return ~(x & y);
502
Richard Henderson3a8c4e92025-01-06 21:02:17 -0800503 case INDEX_op_nor:
504 case INDEX_op_nor_vec:
Richard Hendersoncb25c802011-08-17 14:11:47 -0700505 return ~(x | y);
506
Richard Henderson5a5bb0a2025-01-08 16:12:46 -0800507 case INDEX_op_clz:
508 if (type == TCG_TYPE_I32) {
509 return (uint32_t)x ? clz32(x) : y;
510 }
Richard Henderson0e28d002016-11-16 09:23:28 +0100511 return x ? clz64(x) : y;
512
Richard Hendersonc96447d2025-01-08 17:07:01 -0800513 case INDEX_op_ctz:
514 if (type == TCG_TYPE_I32) {
515 return (uint32_t)x ? ctz32(x) : y;
516 }
Richard Henderson0e28d002016-11-16 09:23:28 +0100517 return x ? ctz64(x) : y;
518
Richard Henderson97218ae2025-01-08 18:37:43 -0800519 case INDEX_op_ctpop:
520 return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x);
Richard Hendersona768e4e2016-11-21 11:13:39 +0100521
Richard Henderson0dd07ee2025-01-10 18:51:16 -0800522 case INDEX_op_bswap16:
Richard Henderson0b76ff82021-06-13 13:04:00 -0700523 x = bswap16(x);
524 return y & TCG_BSWAP_OS ? (int16_t)x : x;
Richard Henderson64985942018-11-20 08:53:34 +0100525
Richard Henderson7498d882025-01-10 19:53:51 -0800526 case INDEX_op_bswap32:
Richard Henderson0b76ff82021-06-13 13:04:00 -0700527 x = bswap32(x);
528 return y & TCG_BSWAP_OS ? (int32_t)x : x;
Richard Henderson64985942018-11-20 08:53:34 +0100529
Richard Henderson3ad5d4c2025-01-10 21:54:44 -0800530 case INDEX_op_bswap64:
Richard Henderson64985942018-11-20 08:53:34 +0100531 return bswap64(x);
532
Aurelien Jarno8bcb5c82015-07-27 12:41:45 +0200533 case INDEX_op_ext_i32_i64:
Kirill Batuzova640f032011-07-07 16:37:17 +0400534 return (int32_t)x;
535
Aurelien Jarno8bcb5c82015-07-27 12:41:45 +0200536 case INDEX_op_extu_i32_i64:
Richard Henderson609ad702015-07-24 07:16:00 -0700537 case INDEX_op_extrl_i64_i32:
Kirill Batuzova640f032011-07-07 16:37:17 +0400538 return (uint32_t)x;
Kirill Batuzova640f032011-07-07 16:37:17 +0400539
Richard Henderson609ad702015-07-24 07:16:00 -0700540 case INDEX_op_extrh_i64_i32:
541 return (uint64_t)x >> 32;
542
Richard Hendersonaa28c9e2025-01-07 10:36:24 -0800543 case INDEX_op_muluh:
544 if (type == TCG_TYPE_I32) {
545 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
546 }
547 mulu64(&l64, &h64, x, y);
548 return h64;
549
Richard Hendersonc7428242025-01-07 11:19:29 -0800550 case INDEX_op_mulsh:
551 if (type == TCG_TYPE_I32) {
552 return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
553 }
Richard Henderson03271522013-08-14 14:35:56 -0700554 muls64(&l64, &h64, x, y);
555 return h64;
556
Richard Hendersonb2c514f2025-01-07 13:22:56 -0800557 case INDEX_op_divs:
Richard Henderson01547f72013-08-14 15:22:46 -0700558 /* Avoid crashing on divide by zero, otherwise undefined. */
Richard Hendersonb2c514f2025-01-07 13:22:56 -0800559 if (type == TCG_TYPE_I32) {
560 return (int32_t)x / ((int32_t)y ? : 1);
561 }
562 return (int64_t)x / ((int64_t)y ? : 1);
563
Richard Henderson961b80a2025-01-07 14:27:19 -0800564 case INDEX_op_divu:
565 if (type == TCG_TYPE_I32) {
566 return (uint32_t)x / ((uint32_t)y ? : 1);
567 }
Richard Henderson01547f72013-08-14 15:22:46 -0700568 return (uint64_t)x / ((uint64_t)y ? : 1);
569
Richard Henderson9a6bc182025-01-07 19:00:51 -0800570 case INDEX_op_rems:
571 if (type == TCG_TYPE_I32) {
572 return (int32_t)x % ((int32_t)y ? : 1);
573 }
574 return (int64_t)x % ((int64_t)y ? : 1);
575
Richard Hendersoncd9acd22025-01-07 20:25:14 -0800576 case INDEX_op_remu:
577 if (type == TCG_TYPE_I32) {
578 return (uint32_t)x % ((uint32_t)y ? : 1);
579 }
Richard Henderson01547f72013-08-14 15:22:46 -0700580 return (uint64_t)x % ((uint64_t)y ? : 1);
581
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400582 default:
Richard Henderson732e89f2023-04-05 12:09:14 -0700583 g_assert_not_reached();
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400584 }
585}
586
Richard Henderson67f84c92021-08-25 08:00:20 -0700587static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
588 uint64_t x, uint64_t y)
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400589{
Richard Hendersonaa28c9e2025-01-07 10:36:24 -0800590 uint64_t res = do_constant_folding_2(op, type, x, y);
Richard Henderson67f84c92021-08-25 08:00:20 -0700591 if (type == TCG_TYPE_I32) {
Aurelien Jarno29f3ff82015-07-10 18:03:31 +0200592 res = (int32_t)res;
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400593 }
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400594 return res;
595}
596
Richard Henderson9519da72012-10-02 11:32:26 -0700597static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
598{
599 switch (c) {
600 case TCG_COND_EQ:
601 return x == y;
602 case TCG_COND_NE:
603 return x != y;
604 case TCG_COND_LT:
605 return (int32_t)x < (int32_t)y;
606 case TCG_COND_GE:
607 return (int32_t)x >= (int32_t)y;
608 case TCG_COND_LE:
609 return (int32_t)x <= (int32_t)y;
610 case TCG_COND_GT:
611 return (int32_t)x > (int32_t)y;
612 case TCG_COND_LTU:
613 return x < y;
614 case TCG_COND_GEU:
615 return x >= y;
616 case TCG_COND_LEU:
617 return x <= y;
618 case TCG_COND_GTU:
619 return x > y;
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700620 case TCG_COND_TSTEQ:
621 return (x & y) == 0;
622 case TCG_COND_TSTNE:
623 return (x & y) != 0;
624 case TCG_COND_ALWAYS:
625 case TCG_COND_NEVER:
626 break;
Richard Henderson9519da72012-10-02 11:32:26 -0700627 }
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700628 g_assert_not_reached();
Richard Henderson9519da72012-10-02 11:32:26 -0700629}
630
631static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
632{
633 switch (c) {
634 case TCG_COND_EQ:
635 return x == y;
636 case TCG_COND_NE:
637 return x != y;
638 case TCG_COND_LT:
639 return (int64_t)x < (int64_t)y;
640 case TCG_COND_GE:
641 return (int64_t)x >= (int64_t)y;
642 case TCG_COND_LE:
643 return (int64_t)x <= (int64_t)y;
644 case TCG_COND_GT:
645 return (int64_t)x > (int64_t)y;
646 case TCG_COND_LTU:
647 return x < y;
648 case TCG_COND_GEU:
649 return x >= y;
650 case TCG_COND_LEU:
651 return x <= y;
652 case TCG_COND_GTU:
653 return x > y;
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700654 case TCG_COND_TSTEQ:
655 return (x & y) == 0;
656 case TCG_COND_TSTNE:
657 return (x & y) != 0;
658 case TCG_COND_ALWAYS:
659 case TCG_COND_NEVER:
660 break;
Richard Henderson9519da72012-10-02 11:32:26 -0700661 }
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700662 g_assert_not_reached();
Richard Henderson9519da72012-10-02 11:32:26 -0700663}
664
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700665static int do_constant_folding_cond_eq(TCGCond c)
Richard Henderson9519da72012-10-02 11:32:26 -0700666{
667 switch (c) {
668 case TCG_COND_GT:
669 case TCG_COND_LTU:
670 case TCG_COND_LT:
671 case TCG_COND_GTU:
672 case TCG_COND_NE:
673 return 0;
674 case TCG_COND_GE:
675 case TCG_COND_GEU:
676 case TCG_COND_LE:
677 case TCG_COND_LEU:
678 case TCG_COND_EQ:
679 return 1;
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700680 case TCG_COND_TSTEQ:
681 case TCG_COND_TSTNE:
682 return -1;
683 case TCG_COND_ALWAYS:
684 case TCG_COND_NEVER:
685 break;
Richard Henderson9519da72012-10-02 11:32:26 -0700686 }
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700687 g_assert_not_reached();
Richard Henderson9519da72012-10-02 11:32:26 -0700688}
689
Richard Henderson8d57bf12021-08-24 08:34:27 -0700690/*
691 * Return -1 if the condition can't be simplified,
692 * and the result of the condition (0 or 1) if it can.
693 */
Richard Henderson67f84c92021-08-25 08:00:20 -0700694static int do_constant_folding_cond(TCGType type, TCGArg x,
Richard Henderson8d57bf12021-08-24 08:34:27 -0700695 TCGArg y, TCGCond c)
Aurelien Jarnof8dd19e2012-09-06 16:47:14 +0200696{
Richard Henderson63490392017-06-20 13:43:15 -0700697 if (arg_is_const(x) && arg_is_const(y)) {
Alex Bennée9becc362022-02-09 11:21:42 +0000698 uint64_t xv = arg_info(x)->val;
699 uint64_t yv = arg_info(y)->val;
700
Richard Henderson67f84c92021-08-25 08:00:20 -0700701 switch (type) {
702 case TCG_TYPE_I32:
Richard Henderson170ba882017-11-22 09:07:11 +0100703 return do_constant_folding_cond_32(xv, yv, c);
Richard Henderson67f84c92021-08-25 08:00:20 -0700704 case TCG_TYPE_I64:
705 return do_constant_folding_cond_64(xv, yv, c);
706 default:
707 /* Only scalar comparisons are optimizable */
708 return -1;
Aurelien Jarnof8dd19e2012-09-06 16:47:14 +0200709 }
Richard Henderson63490392017-06-20 13:43:15 -0700710 } else if (args_are_copies(x, y)) {
Richard Henderson9519da72012-10-02 11:32:26 -0700711 return do_constant_folding_cond_eq(c);
Richard Henderson27cdb852023-10-23 11:38:00 -0700712 } else if (arg_is_const_val(y, 0)) {
Aurelien Jarnob336ceb2012-09-18 19:37:00 +0200713 switch (c) {
714 case TCG_COND_LTU:
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700715 case TCG_COND_TSTNE:
Aurelien Jarnob336ceb2012-09-18 19:37:00 +0200716 return 0;
717 case TCG_COND_GEU:
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700718 case TCG_COND_TSTEQ:
Aurelien Jarnob336ceb2012-09-18 19:37:00 +0200719 return 1;
720 default:
Richard Henderson8d57bf12021-08-24 08:34:27 -0700721 return -1;
Aurelien Jarnob336ceb2012-09-18 19:37:00 +0200722 }
Aurelien Jarnof8dd19e2012-09-06 16:47:14 +0200723 }
Richard Henderson8d57bf12021-08-24 08:34:27 -0700724 return -1;
Aurelien Jarnof8dd19e2012-09-06 16:47:14 +0200725}
726
Richard Henderson7a2f7082021-08-26 07:06:39 -0700727/**
728 * swap_commutative:
729 * @dest: TCGArg of the destination argument, or NO_DEST.
730 * @p1: first paired argument
731 * @p2: second paired argument
732 *
733 * If *@p1 is a constant and *@p2 is not, swap.
734 * If *@p2 matches @dest, swap.
735 * Return true if a swap was performed.
736 */
737
738#define NO_DEST temp_arg(NULL)
739
Richard Hendersone2f5ee32025-01-14 23:08:24 -0800740static int pref_commutative(TempOptInfo *ti)
741{
742 /* Slight preference for non-zero constants second. */
743 return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2;
744}
745
Richard Henderson24c9ae42012-10-02 11:32:21 -0700746static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
747{
748 TCGArg a1 = *p1, a2 = *p2;
749 int sum = 0;
Richard Hendersone2f5ee32025-01-14 23:08:24 -0800750 sum += pref_commutative(arg_info(a1));
751 sum -= pref_commutative(arg_info(a2));
Richard Henderson24c9ae42012-10-02 11:32:21 -0700752
753 /* Prefer the constant in second argument, and then the form
754 op a, a, b, which is better handled on non-RISC hosts. */
755 if (sum > 0 || (sum == 0 && dest == a2)) {
756 *p1 = a2;
757 *p2 = a1;
758 return true;
759 }
760 return false;
761}
762
Richard Henderson0bfcb862012-10-02 11:32:23 -0700763static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
764{
765 int sum = 0;
Richard Hendersone2f5ee32025-01-14 23:08:24 -0800766 sum += pref_commutative(arg_info(p1[0]));
767 sum += pref_commutative(arg_info(p1[1]));
768 sum -= pref_commutative(arg_info(p2[0]));
769 sum -= pref_commutative(arg_info(p2[1]));
Richard Henderson0bfcb862012-10-02 11:32:23 -0700770 if (sum > 0) {
771 TCGArg t;
772 t = p1[0], p1[0] = p2[0], p2[0] = t;
773 t = p1[1], p1[1] = p2[1], p2[1] = t;
774 return true;
775 }
776 return false;
777}
778
Richard Henderson7e64b112023-10-24 16:53:56 -0700779/*
780 * Return -1 if the condition can't be simplified,
781 * and the result of the condition (0 or 1) if it can.
782 */
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100783static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
Richard Henderson246c4b72023-10-24 16:36:50 -0700784 TCGArg *p1, TCGArg *p2, TCGArg *pcond)
785{
786 TCGCond cond;
Paolo Bonzini35020622024-01-22 10:48:11 +0100787 TempOptInfo *i1;
Richard Henderson246c4b72023-10-24 16:36:50 -0700788 bool swap;
789 int r;
790
791 swap = swap_commutative(dest, p1, p2);
792 cond = *pcond;
793 if (swap) {
794 *pcond = cond = tcg_swap_cond(cond);
795 }
796
797 r = do_constant_folding_cond(ctx->type, *p1, *p2, cond);
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700798 if (r >= 0) {
799 return r;
800 }
801 if (!is_tst_cond(cond)) {
802 return -1;
803 }
804
Paolo Bonzini35020622024-01-22 10:48:11 +0100805 i1 = arg_info(*p1);
806
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700807 /*
808 * TSTNE x,x -> NE x,0
Paolo Bonzini35020622024-01-22 10:48:11 +0100809 * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700810 */
Paolo Bonzini35020622024-01-22 10:48:11 +0100811 if (args_are_copies(*p1, *p2) ||
812 (arg_is_const(*p2) && (i1->z_mask & ~arg_info(*p2)->val) == 0)) {
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700813 *p2 = arg_new_constant(ctx, 0);
814 *pcond = tcg_tst_eqne_cond(cond);
815 return -1;
816 }
817
Paolo Bonzini35020622024-01-22 10:48:11 +0100818 /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
819 if (arg_is_const(*p2) && (arg_info(*p2)->val & ~i1->s_mask) == 0) {
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700820 *p2 = arg_new_constant(ctx, 0);
821 *pcond = tcg_tst_ltge_cond(cond);
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100822 return -1;
823 }
824
825 /* Expand to AND with a temporary if no backend support. */
826 if (!TCG_TARGET_HAS_tst) {
Richard Hendersonc3b920b2025-01-06 10:32:44 -0800827 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100828 TCGArg tmp = arg_new_temp(ctx);
829
830 op2->args[0] = tmp;
831 op2->args[1] = *p1;
832 op2->args[2] = *p2;
833
834 *p1 = tmp;
835 *p2 = arg_new_constant(ctx, 0);
836 *pcond = tcg_tst_eqne_cond(cond);
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700837 }
838 return -1;
Richard Henderson246c4b72023-10-24 16:36:50 -0700839}
840
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100841static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args)
Richard Henderson7e64b112023-10-24 16:53:56 -0700842{
843 TCGArg al, ah, bl, bh;
844 TCGCond c;
845 bool swap;
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700846 int r;
Richard Henderson7e64b112023-10-24 16:53:56 -0700847
848 swap = swap_commutative2(args, args + 2);
849 c = args[4];
850 if (swap) {
851 args[4] = c = tcg_swap_cond(c);
852 }
853
854 al = args[0];
855 ah = args[1];
856 bl = args[2];
857 bh = args[3];
858
859 if (arg_is_const(bl) && arg_is_const(bh)) {
860 tcg_target_ulong blv = arg_info(bl)->val;
861 tcg_target_ulong bhv = arg_info(bh)->val;
862 uint64_t b = deposit64(blv, 32, 32, bhv);
863
864 if (arg_is_const(al) && arg_is_const(ah)) {
865 tcg_target_ulong alv = arg_info(al)->val;
866 tcg_target_ulong ahv = arg_info(ah)->val;
867 uint64_t a = deposit64(alv, 32, 32, ahv);
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700868
869 r = do_constant_folding_cond_64(a, b, c);
870 if (r >= 0) {
871 return r;
872 }
Richard Henderson7e64b112023-10-24 16:53:56 -0700873 }
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700874
Richard Henderson7e64b112023-10-24 16:53:56 -0700875 if (b == 0) {
876 switch (c) {
877 case TCG_COND_LTU:
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700878 case TCG_COND_TSTNE:
Richard Henderson7e64b112023-10-24 16:53:56 -0700879 return 0;
880 case TCG_COND_GEU:
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700881 case TCG_COND_TSTEQ:
Richard Henderson7e64b112023-10-24 16:53:56 -0700882 return 1;
883 default:
884 break;
885 }
886 }
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700887
888 /* TSTNE x,-1 -> NE x,0 */
889 if (b == -1 && is_tst_cond(c)) {
890 args[3] = args[2] = arg_new_constant(ctx, 0);
891 args[4] = tcg_tst_eqne_cond(c);
892 return -1;
893 }
894
895 /* TSTNE x,sign -> LT x,0 */
896 if (b == INT64_MIN && is_tst_cond(c)) {
897 /* bl must be 0, so copy that to bh */
898 args[3] = bl;
899 args[4] = tcg_tst_ltge_cond(c);
900 return -1;
901 }
Richard Henderson7e64b112023-10-24 16:53:56 -0700902 }
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700903
Richard Henderson7e64b112023-10-24 16:53:56 -0700904 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700905 r = do_constant_folding_cond_eq(c);
906 if (r >= 0) {
907 return r;
908 }
909
910 /* TSTNE x,x -> NE x,0 */
911 if (is_tst_cond(c)) {
912 args[3] = args[2] = arg_new_constant(ctx, 0);
913 args[4] = tcg_tst_eqne_cond(c);
914 return -1;
915 }
Richard Henderson7e64b112023-10-24 16:53:56 -0700916 }
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100917
918 /* Expand to AND with a temporary if no backend support. */
919 if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) {
Richard Hendersonc3b920b2025-01-06 10:32:44 -0800920 TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3);
921 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100922 TCGArg t1 = arg_new_temp(ctx);
923 TCGArg t2 = arg_new_temp(ctx);
924
925 op1->args[0] = t1;
926 op1->args[1] = al;
927 op1->args[2] = bl;
928 op2->args[0] = t2;
929 op2->args[1] = ah;
930 op2->args[2] = bh;
931
932 args[0] = t1;
933 args[1] = t2;
934 args[3] = args[2] = arg_new_constant(ctx, 0);
935 args[4] = tcg_tst_eqne_cond(c);
936 }
Richard Henderson7e64b112023-10-24 16:53:56 -0700937 return -1;
938}
939
Richard Hendersone2577ea2021-08-24 08:00:48 -0700940static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
941{
942 for (int i = 0; i < nb_args; i++) {
943 TCGTemp *ts = arg_temp(op->args[i]);
Richard Henderson39004a72022-11-11 10:09:37 +1000944 init_ts_info(ctx, ts);
Richard Hendersone2577ea2021-08-24 08:00:48 -0700945 }
946}
947
Richard Henderson8774dde2021-08-24 08:04:47 -0700948static void copy_propagate(OptContext *ctx, TCGOp *op,
949 int nb_oargs, int nb_iargs)
950{
Richard Henderson8774dde2021-08-24 08:04:47 -0700951 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
952 TCGTemp *ts = arg_temp(op->args[i]);
Richard Henderson39004a72022-11-11 10:09:37 +1000953 if (ts_is_copy(ts)) {
Richard Henderson9f75e522023-11-02 13:37:46 -0700954 op->args[i] = temp_arg(find_better_copy(ts));
Richard Henderson8774dde2021-08-24 08:04:47 -0700955 }
956 }
957}
958
Richard Henderson15268552024-12-08 07:45:11 -0600959static void finish_bb(OptContext *ctx)
960{
961 /* We only optimize memory barriers across basic blocks. */
962 ctx->prev_mb = NULL;
963}
964
965static void finish_ebb(OptContext *ctx)
966{
967 finish_bb(ctx);
968 /* We only optimize across extended basic blocks. */
969 memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
970 remove_mem_copy_all(ctx);
971}
972
Richard Hendersonf3ed3cf2024-12-08 18:39:47 -0600973static bool finish_folding(OptContext *ctx, TCGOp *op)
Richard Henderson137f1f42021-08-24 08:49:25 -0700974{
975 const TCGOpDef *def = &tcg_op_defs[op->opc];
976 int i, nb_oargs;
977
Richard Henderson137f1f42021-08-24 08:49:25 -0700978 nb_oargs = def->nb_oargs;
979 for (i = 0; i < nb_oargs; i++) {
Richard Henderson57fe5c62021-08-26 12:04:46 -0700980 TCGTemp *ts = arg_temp(op->args[i]);
Richard Henderson986cac12023-01-09 13:59:35 -0800981 reset_ts(ctx, ts);
Richard Henderson137f1f42021-08-24 08:49:25 -0700982 }
Richard Hendersonf3ed3cf2024-12-08 18:39:47 -0600983 return true;
Richard Henderson137f1f42021-08-24 08:49:25 -0700984}
985
Richard Henderson2f9f08b2021-08-25 12:03:48 -0700986/*
987 * The fold_* functions return true when processing is complete,
988 * usually by folding the operation to a constant or to a copy,
989 * and calling tcg_opt_gen_{mov,movi}. They may do other things,
990 * like collect information about the value produced, for use in
991 * optimizing a subsequent operation.
992 *
993 * These first fold_* functions are all helpers, used by other
994 * folders for more specific operations.
995 */
996
997static bool fold_const1(OptContext *ctx, TCGOp *op)
998{
999 if (arg_is_const(op->args[1])) {
1000 uint64_t t;
1001
1002 t = arg_info(op->args[1])->val;
Richard Henderson67f84c92021-08-25 08:00:20 -07001003 t = do_constant_folding(op->opc, ctx->type, t, 0);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001004 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1005 }
1006 return false;
1007}
1008
1009static bool fold_const2(OptContext *ctx, TCGOp *op)
1010{
1011 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1012 uint64_t t1 = arg_info(op->args[1])->val;
1013 uint64_t t2 = arg_info(op->args[2])->val;
1014
Richard Henderson67f84c92021-08-25 08:00:20 -07001015 t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001016 return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1017 }
1018 return false;
1019}
1020
Richard Hendersonc578ff12021-12-16 06:07:25 -08001021static bool fold_commutative(OptContext *ctx, TCGOp *op)
1022{
1023 swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1024 return false;
1025}
1026
Richard Henderson7a2f7082021-08-26 07:06:39 -07001027static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
1028{
1029 swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1030 return fold_const2(ctx, op);
1031}
1032
Richard Hendersond582b142024-12-19 10:43:26 -08001033/*
1034 * Record "zero" and "sign" masks for the single output of @op.
1035 * See TempOptInfo definition of z_mask and s_mask.
1036 * If z_mask allows, fold the output to constant zero.
Richard Henderson75c3bf32024-12-19 10:50:40 -08001037 * The passed s_mask may be augmented by z_mask.
Richard Hendersond582b142024-12-19 10:43:26 -08001038 */
1039static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
Richard Henderson6d70ddc2024-12-21 21:08:10 -08001040 uint64_t z_mask, int64_t s_mask)
Richard Hendersonfae450b2021-08-25 22:42:19 -07001041{
Richard Henderson56e06ec2024-12-08 18:26:48 -06001042 const TCGOpDef *def = &tcg_op_defs[op->opc];
1043 TCGTemp *ts;
1044 TempOptInfo *ti;
Richard Henderson6d70ddc2024-12-21 21:08:10 -08001045 int rep;
Richard Henderson56e06ec2024-12-08 18:26:48 -06001046
1047 /* Only single-output opcodes are supported here. */
1048 tcg_debug_assert(def->nb_oargs == 1);
Richard Hendersonfae450b2021-08-25 22:42:19 -07001049
1050 /*
Richard Hendersonfaa2e102021-08-26 09:03:59 -07001051 * 32-bit ops generate 32-bit results, which for the purpose of
1052 * simplifying tcg are sign-extended. Certainly that's how we
1053 * represent our constants elsewhere. Note that the bits will
1054 * be reset properly for a 64-bit value when encountering the
1055 * type changing opcodes.
Richard Hendersonfae450b2021-08-25 22:42:19 -07001056 */
1057 if (ctx->type == TCG_TYPE_I32) {
Richard Hendersonfaa2e102021-08-26 09:03:59 -07001058 z_mask = (int32_t)z_mask;
Richard Henderson6d70ddc2024-12-21 21:08:10 -08001059 s_mask |= INT32_MIN;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001060 }
1061
1062 if (z_mask == 0) {
1063 return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
1064 }
Richard Henderson56e06ec2024-12-08 18:26:48 -06001065
1066 ts = arg_temp(op->args[0]);
1067 reset_ts(ctx, ts);
1068
1069 ti = ts_info(ts);
1070 ti->z_mask = z_mask;
Richard Henderson6d70ddc2024-12-21 21:08:10 -08001071
1072 /* Canonicalize s_mask and incorporate data from z_mask. */
1073 rep = clz64(~s_mask);
1074 rep = MAX(rep, clz64(z_mask));
1075 rep = MAX(rep - 1, 0);
1076 ti->s_mask = INT64_MIN >> rep;
1077
Richard Henderson56e06ec2024-12-08 18:26:48 -06001078 return true;
Richard Henderson045ace32024-12-19 10:33:51 -08001079}
1080
Richard Henderson81be07f2024-12-08 19:49:17 -06001081static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
1082{
1083 return fold_masks_zs(ctx, op, z_mask, 0);
1084}
1085
Richard Hendersonef6be622024-12-08 20:03:15 -06001086static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
1087{
1088 return fold_masks_zs(ctx, op, -1, s_mask);
1089}
1090
Richard Henderson045ace32024-12-19 10:33:51 -08001091/*
1092 * An "affected" mask bit is 0 if and only if the result is identical
1093 * to the first input. Thus if the entire mask is 0, the operation
1094 * is equivalent to a copy.
1095 */
1096static bool fold_affected_mask(OptContext *ctx, TCGOp *op, uint64_t a_mask)
1097{
1098 if (ctx->type == TCG_TYPE_I32) {
1099 a_mask = (uint32_t)a_mask;
1100 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001101 if (a_mask == 0) {
1102 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1103 }
1104 return false;
1105}
1106
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001107/*
1108 * Convert @op to NOT, if NOT is supported by the host.
1109 * Return true f the conversion is successful, which will still
1110 * indicate that the processing is complete.
1111 */
1112static bool fold_not(OptContext *ctx, TCGOp *op);
1113static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
1114{
1115 TCGOpcode not_op;
1116 bool have_not;
1117
1118 switch (ctx->type) {
1119 case TCG_TYPE_I32:
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001120 case TCG_TYPE_I64:
Richard Henderson5c62d372025-01-06 23:46:47 -08001121 not_op = INDEX_op_not;
1122 have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0);
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001123 break;
1124 case TCG_TYPE_V64:
1125 case TCG_TYPE_V128:
1126 case TCG_TYPE_V256:
1127 not_op = INDEX_op_not_vec;
1128 have_not = TCG_TARGET_HAS_not_vec;
1129 break;
1130 default:
1131 g_assert_not_reached();
1132 }
1133 if (have_not) {
1134 op->opc = not_op;
1135 op->args[1] = op->args[idx];
1136 return fold_not(ctx, op);
1137 }
1138 return false;
1139}
1140
Richard Hendersonda48e272021-08-25 20:42:04 -07001141/* If the binary operation has first argument @i, fold to @i. */
1142static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1143{
Richard Henderson27cdb852023-10-23 11:38:00 -07001144 if (arg_is_const_val(op->args[1], i)) {
Richard Hendersonda48e272021-08-25 20:42:04 -07001145 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1146 }
1147 return false;
1148}
1149
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001150/* If the binary operation has first argument @i, fold to NOT. */
1151static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1152{
Richard Henderson27cdb852023-10-23 11:38:00 -07001153 if (arg_is_const_val(op->args[1], i)) {
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001154 return fold_to_not(ctx, op, 2);
1155 }
1156 return false;
1157}
1158
Richard Hendersone8679952021-08-25 13:19:52 -07001159/* If the binary operation has second argument @i, fold to @i. */
1160static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1161{
Richard Henderson27cdb852023-10-23 11:38:00 -07001162 if (arg_is_const_val(op->args[2], i)) {
Richard Hendersone8679952021-08-25 13:19:52 -07001163 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1164 }
1165 return false;
1166}
1167
Richard Hendersona63ce0e2021-08-25 20:28:53 -07001168/* If the binary operation has second argument @i, fold to identity. */
1169static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
1170{
Richard Henderson27cdb852023-10-23 11:38:00 -07001171 if (arg_is_const_val(op->args[2], i)) {
Richard Hendersona63ce0e2021-08-25 20:28:53 -07001172 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1173 }
1174 return false;
1175}
1176
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001177/* If the binary operation has second argument @i, fold to NOT. */
1178static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1179{
Richard Henderson27cdb852023-10-23 11:38:00 -07001180 if (arg_is_const_val(op->args[2], i)) {
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001181 return fold_to_not(ctx, op, 1);
1182 }
1183 return false;
1184}
1185
Richard Hendersoncbe42fb2021-08-25 13:02:00 -07001186/* If the binary operation has both arguments equal, fold to @i. */
1187static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1188{
1189 if (args_are_copies(op->args[1], op->args[2])) {
1190 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1191 }
1192 return false;
1193}
1194
Richard Hendersonca7bb042021-08-25 13:14:21 -07001195/* If the binary operation has both arguments equal, fold to identity. */
1196static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
1197{
1198 if (args_are_copies(op->args[1], op->args[2])) {
1199 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1200 }
1201 return false;
1202}
1203
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001204/*
1205 * These outermost fold_<op> functions are sorted alphabetically.
Richard Hendersonca7bb042021-08-25 13:14:21 -07001206 *
1207 * The ordering of the transformations should be:
1208 * 1) those that produce a constant
1209 * 2) those that produce a copy
1210 * 3) those that produce information about the result value.
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001211 */
1212
Richard Hendersonaeb35142025-01-14 18:28:15 -08001213static bool fold_addco(OptContext *ctx, TCGOp *op);
Richard Henderson7d3c63a2024-12-09 14:06:08 -06001214static bool fold_or(OptContext *ctx, TCGOp *op);
1215static bool fold_orc(OptContext *ctx, TCGOp *op);
Richard Hendersonaeb35142025-01-14 18:28:15 -08001216static bool fold_subbo(OptContext *ctx, TCGOp *op);
Richard Henderson7d3c63a2024-12-09 14:06:08 -06001217static bool fold_xor(OptContext *ctx, TCGOp *op);
1218
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001219static bool fold_add(OptContext *ctx, TCGOp *op)
1220{
Richard Henderson7a2f7082021-08-26 07:06:39 -07001221 if (fold_const2_commutative(ctx, op) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07001222 fold_xi_to_x(ctx, op, 0)) {
1223 return true;
1224 }
Richard Hendersonf3ed3cf2024-12-08 18:39:47 -06001225 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001226}
1227
Richard Hendersonc578ff12021-12-16 06:07:25 -08001228/* We cannot as yet do_constant_folding with vectors. */
1229static bool fold_add_vec(OptContext *ctx, TCGOp *op)
1230{
1231 if (fold_commutative(ctx, op) ||
1232 fold_xi_to_x(ctx, op, 0)) {
1233 return true;
1234 }
Richard Hendersonf3ed3cf2024-12-08 18:39:47 -06001235 return finish_folding(ctx, op);
Richard Hendersonc578ff12021-12-16 06:07:25 -08001236}
1237
Richard Hendersonaeb35142025-01-14 18:28:15 -08001238static void squash_prev_carryout(OptContext *ctx, TCGOp *op)
1239{
1240 TempOptInfo *t2;
1241
1242 op = QTAILQ_PREV(op, link);
1243 switch (op->opc) {
1244 case INDEX_op_addco:
1245 op->opc = INDEX_op_add;
1246 fold_add(ctx, op);
1247 break;
1248 case INDEX_op_addcio:
1249 op->opc = INDEX_op_addci;
1250 break;
1251 case INDEX_op_addc1o:
1252 op->opc = INDEX_op_add;
1253 t2 = arg_info(op->args[2]);
1254 if (ti_is_const(t2)) {
1255 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1256 /* Perform other constant folding, if needed. */
1257 fold_add(ctx, op);
1258 } else {
1259 TCGArg ret = op->args[0];
1260 op = opt_insert_after(ctx, op, INDEX_op_add, 3);
1261 op->args[0] = ret;
1262 op->args[1] = ret;
1263 op->args[2] = arg_new_constant(ctx, 1);
1264 }
1265 break;
1266 default:
1267 g_assert_not_reached();
1268 }
1269}
1270
1271static bool fold_addci(OptContext *ctx, TCGOp *op)
Richard Henderson76f42782025-01-14 13:58:39 -08001272{
1273 fold_commutative(ctx, op);
Richard Hendersonaeb35142025-01-14 18:28:15 -08001274
1275 if (ctx->carry_state < 0) {
1276 return finish_folding(ctx, op);
1277 }
1278
1279 squash_prev_carryout(ctx, op);
1280 op->opc = INDEX_op_add;
1281
1282 if (ctx->carry_state > 0) {
1283 TempOptInfo *t2 = arg_info(op->args[2]);
1284
1285 /*
1286 * Propagate the known carry-in into a constant, if possible.
1287 * Otherwise emit a second add +1.
1288 */
1289 if (ti_is_const(t2)) {
1290 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1291 } else {
1292 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3);
1293
1294 op2->args[0] = op->args[0];
1295 op2->args[1] = op->args[1];
1296 op2->args[2] = op->args[2];
1297 fold_add(ctx, op2);
1298
1299 op->args[1] = op->args[0];
1300 op->args[2] = arg_new_constant(ctx, 1);
1301 }
1302 }
1303
1304 ctx->carry_state = -1;
1305 return fold_add(ctx, op);
1306}
1307
1308static bool fold_addcio(OptContext *ctx, TCGOp *op)
1309{
1310 TempOptInfo *t1, *t2;
1311 int carry_out = -1;
1312 uint64_t sum, max;
1313
1314 fold_commutative(ctx, op);
1315 t1 = arg_info(op->args[1]);
1316 t2 = arg_info(op->args[2]);
1317
1318 /*
1319 * The z_mask value is >= the maximum value that can be represented
1320 * with the known zero bits. So adding the z_mask values will not
1321 * overflow if and only if the true values cannot overflow.
1322 */
1323 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) &&
1324 !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) {
1325 carry_out = 0;
1326 }
1327
1328 if (ctx->carry_state < 0) {
1329 ctx->carry_state = carry_out;
1330 return finish_folding(ctx, op);
1331 }
1332
1333 squash_prev_carryout(ctx, op);
1334 if (ctx->carry_state == 0) {
1335 goto do_addco;
1336 }
1337
1338 /* Propagate the known carry-in into a constant, if possible. */
1339 max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
1340 if (ti_is_const(t2)) {
1341 uint64_t v = ti_const_val(t2) & max;
1342 if (v < max) {
1343 op->args[2] = arg_new_constant(ctx, v + 1);
1344 goto do_addco;
1345 }
1346 /* max + known carry in produces known carry out. */
1347 carry_out = 1;
1348 }
1349 if (ti_is_const(t1)) {
1350 uint64_t v = ti_const_val(t1) & max;
1351 if (v < max) {
1352 op->args[1] = arg_new_constant(ctx, v + 1);
1353 goto do_addco;
1354 }
1355 carry_out = 1;
1356 }
1357
1358 /* Adjust the opcode to remember the known carry-in. */
1359 op->opc = INDEX_op_addc1o;
1360 ctx->carry_state = carry_out;
1361 return finish_folding(ctx, op);
1362
1363 do_addco:
1364 op->opc = INDEX_op_addco;
1365 return fold_addco(ctx, op);
1366}
1367
1368static bool fold_addco(OptContext *ctx, TCGOp *op)
1369{
1370 TempOptInfo *t1, *t2;
1371 int carry_out = -1;
1372 uint64_t ign;
1373
1374 fold_commutative(ctx, op);
1375 t1 = arg_info(op->args[1]);
1376 t2 = arg_info(op->args[2]);
1377
1378 if (ti_is_const(t2)) {
1379 uint64_t v2 = ti_const_val(t2);
1380
1381 if (ti_is_const(t1)) {
1382 uint64_t v1 = ti_const_val(t1);
1383 /* Given sign-extension of z_mask for I32, we need not truncate. */
1384 carry_out = uadd64_overflow(v1, v2, &ign);
1385 } else if (v2 == 0) {
1386 carry_out = 0;
1387 }
1388 } else {
1389 /*
1390 * The z_mask value is >= the maximum value that can be represented
1391 * with the known zero bits. So adding the z_mask values will not
1392 * overflow if and only if the true values cannot overflow.
1393 */
1394 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) {
1395 carry_out = 0;
1396 }
1397 }
1398 ctx->carry_state = carry_out;
Richard Henderson76f42782025-01-14 13:58:39 -08001399 return finish_folding(ctx, op);
1400}
1401
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001402static bool fold_and(OptContext *ctx, TCGOp *op)
1403{
Richard Henderson1ca73722024-12-08 18:47:15 -06001404 uint64_t z1, z2, z_mask, s_mask;
1405 TempOptInfo *t1, *t2;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001406
Richard Henderson7a2f7082021-08-26 07:06:39 -07001407 if (fold_const2_commutative(ctx, op) ||
Richard Hendersone8679952021-08-25 13:19:52 -07001408 fold_xi_to_i(ctx, op, 0) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07001409 fold_xi_to_x(ctx, op, -1) ||
Richard Hendersonca7bb042021-08-25 13:14:21 -07001410 fold_xx_to_x(ctx, op)) {
1411 return true;
1412 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001413
Richard Henderson1ca73722024-12-08 18:47:15 -06001414 t1 = arg_info(op->args[1]);
1415 t2 = arg_info(op->args[2]);
1416 z1 = t1->z_mask;
1417 z2 = t2->z_mask;
Richard Henderson3f2b1f82021-08-26 13:08:54 -07001418
1419 /*
Richard Hendersonfae450b2021-08-25 22:42:19 -07001420 * Known-zeros does not imply known-ones. Therefore unless
1421 * arg2 is constant, we can't infer affected bits from it.
1422 */
Richard Henderson1ca73722024-12-08 18:47:15 -06001423 if (ti_is_const(t2) && fold_affected_mask(ctx, op, z1 & ~z2)) {
Richard Henderson045ace32024-12-19 10:33:51 -08001424 return true;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001425 }
1426
Richard Henderson1ca73722024-12-08 18:47:15 -06001427 z_mask = z1 & z2;
1428
1429 /*
1430 * Sign repetitions are perforce all identical, whether they are 1 or 0.
1431 * Bitwise operations preserve the relative quantity of the repetitions.
1432 */
1433 s_mask = t1->s_mask & t2->s_mask;
1434
1435 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001436}
1437
1438static bool fold_andc(OptContext *ctx, TCGOp *op)
1439{
Richard Henderson21e2b5f2024-12-08 18:56:55 -06001440 uint64_t z_mask, s_mask;
1441 TempOptInfo *t1, *t2;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001442
Richard Hendersoncbe42fb2021-08-25 13:02:00 -07001443 if (fold_const2(ctx, op) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001444 fold_xx_to_i(ctx, op, 0) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07001445 fold_xi_to_x(ctx, op, 0) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001446 fold_ix_to_not(ctx, op, -1)) {
Richard Hendersoncbe42fb2021-08-25 13:02:00 -07001447 return true;
1448 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001449
Richard Henderson21e2b5f2024-12-08 18:56:55 -06001450 t1 = arg_info(op->args[1]);
1451 t2 = arg_info(op->args[2]);
1452 z_mask = t1->z_mask;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001453
Richard Henderson899281c2023-11-15 11:18:55 -08001454 if (ti_is_const(t2)) {
1455 /* Fold andc r,x,i to and r,x,~i. */
1456 switch (ctx->type) {
1457 case TCG_TYPE_I32:
1458 case TCG_TYPE_I64:
1459 op->opc = INDEX_op_and;
1460 break;
1461 case TCG_TYPE_V64:
1462 case TCG_TYPE_V128:
1463 case TCG_TYPE_V256:
1464 op->opc = INDEX_op_and_vec;
1465 break;
1466 default:
1467 g_assert_not_reached();
1468 }
1469 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1470 return fold_and(ctx, op);
1471 }
1472
Richard Hendersonfae450b2021-08-25 22:42:19 -07001473 /*
1474 * Known-zeros does not imply known-ones. Therefore unless
1475 * arg2 is constant, we can't infer anything from it.
1476 */
Richard Henderson21e2b5f2024-12-08 18:56:55 -06001477 if (ti_is_const(t2)) {
1478 uint64_t v2 = ti_const_val(t2);
1479 if (fold_affected_mask(ctx, op, z_mask & v2)) {
Richard Henderson045ace32024-12-19 10:33:51 -08001480 return true;
1481 }
Richard Henderson21e2b5f2024-12-08 18:56:55 -06001482 z_mask &= ~v2;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001483 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001484
Richard Henderson21e2b5f2024-12-08 18:56:55 -06001485 s_mask = t1->s_mask & t2->s_mask;
1486 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001487}
1488
Richard Henderson7d3c63a2024-12-09 14:06:08 -06001489static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
1490{
1491 /* If true and false values are the same, eliminate the cmp. */
1492 if (args_are_copies(op->args[2], op->args[3])) {
1493 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1494 }
1495
1496 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1497 uint64_t tv = arg_info(op->args[2])->val;
1498 uint64_t fv = arg_info(op->args[3])->val;
1499
1500 if (tv == -1 && fv == 0) {
1501 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1502 }
1503 if (tv == 0 && fv == -1) {
1504 if (TCG_TARGET_HAS_not_vec) {
1505 op->opc = INDEX_op_not_vec;
1506 return fold_not(ctx, op);
1507 } else {
1508 op->opc = INDEX_op_xor_vec;
1509 op->args[2] = arg_new_constant(ctx, -1);
1510 return fold_xor(ctx, op);
1511 }
1512 }
1513 }
1514 if (arg_is_const(op->args[2])) {
1515 uint64_t tv = arg_info(op->args[2])->val;
1516 if (tv == -1) {
1517 op->opc = INDEX_op_or_vec;
1518 op->args[2] = op->args[3];
1519 return fold_or(ctx, op);
1520 }
1521 if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
1522 op->opc = INDEX_op_andc_vec;
1523 op->args[2] = op->args[1];
1524 op->args[1] = op->args[3];
1525 return fold_andc(ctx, op);
1526 }
1527 }
1528 if (arg_is_const(op->args[3])) {
1529 uint64_t fv = arg_info(op->args[3])->val;
1530 if (fv == 0) {
1531 op->opc = INDEX_op_and_vec;
1532 return fold_and(ctx, op);
1533 }
1534 if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
1535 op->opc = INDEX_op_orc_vec;
1536 op->args[2] = op->args[1];
1537 op->args[1] = op->args[3];
1538 return fold_orc(ctx, op);
1539 }
1540 }
1541 return finish_folding(ctx, op);
1542}
1543
Richard Henderson079b0802021-08-24 09:30:59 -07001544static bool fold_brcond(OptContext *ctx, TCGOp *op)
1545{
Richard Hendersonfb04ab72024-01-10 18:21:58 +11001546 int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
Richard Henderson246c4b72023-10-24 16:36:50 -07001547 &op->args[1], &op->args[2]);
Richard Henderson079b0802021-08-24 09:30:59 -07001548 if (i == 0) {
1549 tcg_op_remove(ctx->tcg, op);
1550 return true;
1551 }
1552 if (i > 0) {
1553 op->opc = INDEX_op_br;
1554 op->args[0] = op->args[3];
Richard Henderson15268552024-12-08 07:45:11 -06001555 finish_ebb(ctx);
1556 } else {
1557 finish_bb(ctx);
Richard Henderson079b0802021-08-24 09:30:59 -07001558 }
Richard Henderson15268552024-12-08 07:45:11 -06001559 return true;
Richard Henderson079b0802021-08-24 09:30:59 -07001560}
1561
Richard Henderson764d2ab2021-08-24 09:22:11 -07001562static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1563{
Richard Henderson7e64b112023-10-24 16:53:56 -07001564 TCGCond cond;
1565 TCGArg label;
Richard Henderson7a2f7082021-08-26 07:06:39 -07001566 int i, inv = 0;
Richard Henderson764d2ab2021-08-24 09:22:11 -07001567
Richard Hendersonfb04ab72024-01-10 18:21:58 +11001568 i = do_constant_folding_cond2(ctx, op, &op->args[0]);
Richard Henderson7e64b112023-10-24 16:53:56 -07001569 cond = op->args[4];
1570 label = op->args[5];
Richard Henderson764d2ab2021-08-24 09:22:11 -07001571 if (i >= 0) {
1572 goto do_brcond_const;
1573 }
1574
1575 switch (cond) {
1576 case TCG_COND_LT:
1577 case TCG_COND_GE:
1578 /*
1579 * Simplify LT/GE comparisons vs zero to a single compare
1580 * vs the high word of the input.
1581 */
Richard Henderson27cdb852023-10-23 11:38:00 -07001582 if (arg_is_const_val(op->args[2], 0) &&
1583 arg_is_const_val(op->args[3], 0)) {
Richard Henderson764d2ab2021-08-24 09:22:11 -07001584 goto do_brcond_high;
1585 }
1586 break;
1587
1588 case TCG_COND_NE:
1589 inv = 1;
1590 QEMU_FALLTHROUGH;
1591 case TCG_COND_EQ:
1592 /*
1593 * Simplify EQ/NE comparisons where one of the pairs
1594 * can be simplified.
1595 */
Richard Henderson67f84c92021-08-25 08:00:20 -07001596 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
Richard Henderson764d2ab2021-08-24 09:22:11 -07001597 op->args[2], cond);
1598 switch (i ^ inv) {
1599 case 0:
1600 goto do_brcond_const;
1601 case 1:
1602 goto do_brcond_high;
1603 }
1604
Richard Henderson67f84c92021-08-25 08:00:20 -07001605 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
Richard Henderson764d2ab2021-08-24 09:22:11 -07001606 op->args[3], cond);
1607 switch (i ^ inv) {
1608 case 0:
1609 goto do_brcond_const;
1610 case 1:
Richard Hendersonceb9ee02023-10-23 23:44:27 -07001611 goto do_brcond_low;
1612 }
1613 break;
1614
1615 case TCG_COND_TSTEQ:
1616 case TCG_COND_TSTNE:
1617 if (arg_is_const_val(op->args[2], 0)) {
1618 goto do_brcond_high;
1619 }
1620 if (arg_is_const_val(op->args[3], 0)) {
1621 goto do_brcond_low;
Richard Henderson764d2ab2021-08-24 09:22:11 -07001622 }
1623 break;
1624
1625 default:
1626 break;
1627
Richard Hendersonceb9ee02023-10-23 23:44:27 -07001628 do_brcond_low:
Richard Hendersonb6d69fc2025-01-10 11:49:22 -08001629 op->opc = INDEX_op_brcond;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07001630 op->args[1] = op->args[2];
1631 op->args[2] = cond;
1632 op->args[3] = label;
1633 return fold_brcond(ctx, op);
1634
Richard Henderson764d2ab2021-08-24 09:22:11 -07001635 do_brcond_high:
Richard Hendersonb6d69fc2025-01-10 11:49:22 -08001636 op->opc = INDEX_op_brcond;
Richard Henderson764d2ab2021-08-24 09:22:11 -07001637 op->args[0] = op->args[1];
1638 op->args[1] = op->args[3];
1639 op->args[2] = cond;
1640 op->args[3] = label;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07001641 return fold_brcond(ctx, op);
Richard Henderson764d2ab2021-08-24 09:22:11 -07001642
1643 do_brcond_const:
1644 if (i == 0) {
1645 tcg_op_remove(ctx->tcg, op);
1646 return true;
1647 }
1648 op->opc = INDEX_op_br;
1649 op->args[0] = label;
Richard Henderson15268552024-12-08 07:45:11 -06001650 finish_ebb(ctx);
1651 return true;
Richard Henderson764d2ab2021-08-24 09:22:11 -07001652 }
Richard Henderson15268552024-12-08 07:45:11 -06001653
1654 finish_bb(ctx);
1655 return true;
Richard Henderson764d2ab2021-08-24 09:22:11 -07001656}
1657
Richard Henderson09bacdc2021-08-24 11:58:12 -07001658static bool fold_bswap(OptContext *ctx, TCGOp *op)
1659{
Richard Henderson57fe5c62021-08-26 12:04:46 -07001660 uint64_t z_mask, s_mask, sign;
Richard Hendersonc1e7b982024-12-08 19:42:20 -06001661 TempOptInfo *t1 = arg_info(op->args[1]);
Richard Hendersonfae450b2021-08-25 22:42:19 -07001662
Richard Hendersonc1e7b982024-12-08 19:42:20 -06001663 if (ti_is_const(t1)) {
1664 return tcg_opt_gen_movi(ctx, op, op->args[0],
1665 do_constant_folding(op->opc, ctx->type,
1666 ti_const_val(t1),
1667 op->args[2]));
Richard Henderson09bacdc2021-08-24 11:58:12 -07001668 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001669
Richard Hendersonc1e7b982024-12-08 19:42:20 -06001670 z_mask = t1->z_mask;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001671 switch (op->opc) {
Richard Henderson0dd07ee2025-01-10 18:51:16 -08001672 case INDEX_op_bswap16:
Richard Hendersonfae450b2021-08-25 22:42:19 -07001673 z_mask = bswap16(z_mask);
1674 sign = INT16_MIN;
1675 break;
Richard Henderson7498d882025-01-10 19:53:51 -08001676 case INDEX_op_bswap32:
Richard Hendersonfae450b2021-08-25 22:42:19 -07001677 z_mask = bswap32(z_mask);
1678 sign = INT32_MIN;
1679 break;
Richard Henderson3ad5d4c2025-01-10 21:54:44 -08001680 case INDEX_op_bswap64:
Richard Hendersonfae450b2021-08-25 22:42:19 -07001681 z_mask = bswap64(z_mask);
1682 sign = INT64_MIN;
1683 break;
1684 default:
1685 g_assert_not_reached();
1686 }
1687
Richard Henderson75c3bf32024-12-19 10:50:40 -08001688 s_mask = 0;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001689 switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
1690 case TCG_BSWAP_OZ:
1691 break;
1692 case TCG_BSWAP_OS:
1693 /* If the sign bit may be 1, force all the bits above to 1. */
1694 if (z_mask & sign) {
1695 z_mask |= sign;
1696 }
Richard Hendersonc1e7b982024-12-08 19:42:20 -06001697 /* The value and therefore s_mask is explicitly sign-extended. */
1698 s_mask = sign;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001699 break;
1700 default:
1701 /* The high bits are undefined: force all bits above the sign to 1. */
1702 z_mask |= sign << 1;
1703 break;
1704 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001705
Richard Hendersonc1e7b982024-12-08 19:42:20 -06001706 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Henderson09bacdc2021-08-24 11:58:12 -07001707}
1708
Richard Henderson5cf32be2021-08-24 08:17:08 -07001709static bool fold_call(OptContext *ctx, TCGOp *op)
1710{
1711 TCGContext *s = ctx->tcg;
1712 int nb_oargs = TCGOP_CALLO(op);
1713 int nb_iargs = TCGOP_CALLI(op);
1714 int flags, i;
1715
1716 init_arguments(ctx, op, nb_oargs + nb_iargs);
1717 copy_propagate(ctx, op, nb_oargs, nb_iargs);
1718
1719 /* If the function reads or writes globals, reset temp data. */
1720 flags = tcg_call_flags(op);
1721 if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1722 int nb_globals = s->nb_globals;
1723
1724 for (i = 0; i < nb_globals; i++) {
1725 if (test_bit(i, ctx->temps_used.l)) {
Richard Henderson986cac12023-01-09 13:59:35 -08001726 reset_ts(ctx, &ctx->tcg->temps[i]);
Richard Henderson5cf32be2021-08-24 08:17:08 -07001727 }
1728 }
1729 }
1730
Richard Hendersonab84dc32023-08-23 23:04:24 -07001731 /* If the function has side effects, reset mem data. */
1732 if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1733 remove_mem_copy_all(ctx);
1734 }
1735
Richard Henderson5cf32be2021-08-24 08:17:08 -07001736 /* Reset temp data for outputs. */
1737 for (i = 0; i < nb_oargs; i++) {
Richard Henderson986cac12023-01-09 13:59:35 -08001738 reset_temp(ctx, op->args[i]);
Richard Henderson5cf32be2021-08-24 08:17:08 -07001739 }
1740
1741 /* Stop optimizing MB across calls. */
1742 ctx->prev_mb = NULL;
1743 return true;
1744}
1745
Richard Henderson29f65862024-12-09 14:09:49 -06001746static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
1747{
1748 /* Canonicalize the comparison to put immediate second. */
1749 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1750 op->args[3] = tcg_swap_cond(op->args[3]);
1751 }
1752 return finish_folding(ctx, op);
1753}
1754
1755static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
1756{
1757 /* If true and false values are the same, eliminate the cmp. */
1758 if (args_are_copies(op->args[3], op->args[4])) {
1759 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
1760 }
1761
1762 /* Canonicalize the comparison to put immediate second. */
1763 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1764 op->args[5] = tcg_swap_cond(op->args[5]);
1765 }
1766 /*
1767 * Canonicalize the "false" input reg to match the destination,
1768 * so that the tcg backend can implement "move if true".
1769 */
1770 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1771 op->args[5] = tcg_invert_cond(op->args[5]);
1772 }
1773 return finish_folding(ctx, op);
1774}
1775
Richard Henderson30dd0bf2021-08-24 10:51:34 -07001776static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1777{
Richard Hendersonce1d6632024-12-08 19:47:51 -06001778 uint64_t z_mask, s_mask;
1779 TempOptInfo *t1 = arg_info(op->args[1]);
1780 TempOptInfo *t2 = arg_info(op->args[2]);
Richard Hendersonfae450b2021-08-25 22:42:19 -07001781
Richard Hendersonce1d6632024-12-08 19:47:51 -06001782 if (ti_is_const(t1)) {
1783 uint64_t t = ti_const_val(t1);
Richard Henderson30dd0bf2021-08-24 10:51:34 -07001784
1785 if (t != 0) {
Richard Henderson67f84c92021-08-25 08:00:20 -07001786 t = do_constant_folding(op->opc, ctx->type, t, 0);
Richard Henderson30dd0bf2021-08-24 10:51:34 -07001787 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1788 }
1789 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1790 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001791
1792 switch (ctx->type) {
1793 case TCG_TYPE_I32:
1794 z_mask = 31;
1795 break;
1796 case TCG_TYPE_I64:
1797 z_mask = 63;
1798 break;
1799 default:
1800 g_assert_not_reached();
1801 }
Richard Hendersonce1d6632024-12-08 19:47:51 -06001802 s_mask = ~z_mask;
1803 z_mask |= t2->z_mask;
1804 s_mask &= t2->s_mask;
1805
1806 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Henderson30dd0bf2021-08-24 10:51:34 -07001807}
1808
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001809static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1810{
Richard Henderson81be07f2024-12-08 19:49:17 -06001811 uint64_t z_mask;
1812
Richard Hendersonfae450b2021-08-25 22:42:19 -07001813 if (fold_const1(ctx, op)) {
1814 return true;
1815 }
1816
1817 switch (ctx->type) {
1818 case TCG_TYPE_I32:
Richard Henderson81be07f2024-12-08 19:49:17 -06001819 z_mask = 32 | 31;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001820 break;
1821 case TCG_TYPE_I64:
Richard Henderson81be07f2024-12-08 19:49:17 -06001822 z_mask = 64 | 63;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001823 break;
1824 default:
1825 g_assert_not_reached();
1826 }
Richard Henderson81be07f2024-12-08 19:49:17 -06001827 return fold_masks_z(ctx, op, z_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001828}
1829
Richard Henderson1b1907b2021-08-24 10:47:04 -07001830static bool fold_deposit(OptContext *ctx, TCGOp *op)
1831{
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001832 TempOptInfo *t1 = arg_info(op->args[1]);
1833 TempOptInfo *t2 = arg_info(op->args[2]);
1834 int ofs = op->args[3];
1835 int len = op->args[4];
Richard Hendersonc3b920b2025-01-06 10:32:44 -08001836 int width = 8 * tcg_type_size(ctx->type);
Richard Hendersonedb832c2024-12-19 17:56:05 -08001837 uint64_t z_mask, s_mask;
Richard Henderson8f7a8402023-08-13 11:03:05 -07001838
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001839 if (ti_is_const(t1) && ti_is_const(t2)) {
1840 return tcg_opt_gen_movi(ctx, op, op->args[0],
1841 deposit64(ti_const_val(t1), ofs, len,
1842 ti_const_val(t2)));
Richard Henderson1b1907b2021-08-24 10:47:04 -07001843 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001844
Richard Henderson8f7a8402023-08-13 11:03:05 -07001845 /* Inserting a value into zero at offset 0. */
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001846 if (ti_is_const_val(t1, 0) && ofs == 0) {
1847 uint64_t mask = MAKE_64BIT_MASK(0, len);
Richard Henderson8f7a8402023-08-13 11:03:05 -07001848
Richard Hendersonc3b920b2025-01-06 10:32:44 -08001849 op->opc = INDEX_op_and;
Richard Henderson8f7a8402023-08-13 11:03:05 -07001850 op->args[1] = op->args[2];
Richard Henderson26aac972023-10-23 12:31:57 -07001851 op->args[2] = arg_new_constant(ctx, mask);
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001852 return fold_and(ctx, op);
Richard Henderson8f7a8402023-08-13 11:03:05 -07001853 }
1854
1855 /* Inserting zero into a value. */
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001856 if (ti_is_const_val(t2, 0)) {
1857 uint64_t mask = deposit64(-1, ofs, len, 0);
Richard Henderson8f7a8402023-08-13 11:03:05 -07001858
Richard Hendersonc3b920b2025-01-06 10:32:44 -08001859 op->opc = INDEX_op_and;
Richard Henderson26aac972023-10-23 12:31:57 -07001860 op->args[2] = arg_new_constant(ctx, mask);
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001861 return fold_and(ctx, op);
Richard Henderson8f7a8402023-08-13 11:03:05 -07001862 }
1863
Richard Hendersonedb832c2024-12-19 17:56:05 -08001864 /* The s_mask from the top portion of the deposit is still valid. */
1865 if (ofs + len == width) {
1866 s_mask = t2->s_mask << ofs;
1867 } else {
1868 s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
1869 }
1870
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001871 z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
Richard Hendersonedb832c2024-12-19 17:56:05 -08001872 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Henderson1b1907b2021-08-24 10:47:04 -07001873}
1874
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001875static bool fold_divide(OptContext *ctx, TCGOp *op)
1876{
Richard Henderson2f9d9a32021-10-25 11:30:14 -07001877 if (fold_const2(ctx, op) ||
1878 fold_xi_to_x(ctx, op, 1)) {
1879 return true;
1880 }
Richard Henderson3d5ec802024-12-08 19:59:15 -06001881 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001882}
1883
Richard Henderson8cdb3fc2021-08-24 12:06:33 -07001884static bool fold_dup(OptContext *ctx, TCGOp *op)
1885{
1886 if (arg_is_const(op->args[1])) {
1887 uint64_t t = arg_info(op->args[1])->val;
1888 t = dup_const(TCGOP_VECE(op), t);
1889 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1890 }
Richard Hendersone089d692024-12-08 20:00:51 -06001891 return finish_folding(ctx, op);
Richard Henderson8cdb3fc2021-08-24 12:06:33 -07001892}
1893
1894static bool fold_dup2(OptContext *ctx, TCGOp *op)
1895{
1896 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1897 uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
1898 arg_info(op->args[2])->val);
1899 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1900 }
1901
1902 if (args_are_copies(op->args[1], op->args[2])) {
1903 op->opc = INDEX_op_dup_vec;
1904 TCGOP_VECE(op) = MO_32;
1905 }
Richard Hendersone089d692024-12-08 20:00:51 -06001906 return finish_folding(ctx, op);
Richard Henderson8cdb3fc2021-08-24 12:06:33 -07001907}
1908
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001909static bool fold_eqv(OptContext *ctx, TCGOp *op)
1910{
Richard Hendersonef6be622024-12-08 20:03:15 -06001911 uint64_t s_mask;
Richard Henderson46c68d72023-11-15 11:51:28 -08001912 TempOptInfo *t1, *t2;
Richard Hendersonef6be622024-12-08 20:03:15 -06001913
Richard Henderson7a2f7082021-08-26 07:06:39 -07001914 if (fold_const2_commutative(ctx, op) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07001915 fold_xi_to_x(ctx, op, -1) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001916 fold_xi_to_not(ctx, op, 0)) {
1917 return true;
1918 }
Richard Henderson3f2b1f82021-08-26 13:08:54 -07001919
Richard Henderson46c68d72023-11-15 11:51:28 -08001920 t2 = arg_info(op->args[2]);
1921 if (ti_is_const(t2)) {
1922 /* Fold eqv r,x,i to xor r,x,~i. */
1923 switch (ctx->type) {
1924 case TCG_TYPE_I32:
1925 case TCG_TYPE_I64:
1926 op->opc = INDEX_op_xor;
1927 break;
1928 case TCG_TYPE_V64:
1929 case TCG_TYPE_V128:
1930 case TCG_TYPE_V256:
1931 op->opc = INDEX_op_xor_vec;
1932 break;
1933 default:
1934 g_assert_not_reached();
1935 }
1936 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1937 return fold_xor(ctx, op);
1938 }
1939
1940 t1 = arg_info(op->args[1]);
1941 s_mask = t1->s_mask & t2->s_mask;
Richard Hendersonef6be622024-12-08 20:03:15 -06001942 return fold_masks_s(ctx, op, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001943}
1944
Richard Hendersonb6617c82021-08-24 10:44:53 -07001945static bool fold_extract(OptContext *ctx, TCGOp *op)
1946{
Richard Hendersonfae450b2021-08-25 22:42:19 -07001947 uint64_t z_mask_old, z_mask;
Richard Hendersonb6cd00f2024-12-08 20:05:11 -06001948 TempOptInfo *t1 = arg_info(op->args[1]);
Richard Henderson57fe5c62021-08-26 12:04:46 -07001949 int pos = op->args[2];
1950 int len = op->args[3];
Richard Hendersonfae450b2021-08-25 22:42:19 -07001951
Richard Hendersonb6cd00f2024-12-08 20:05:11 -06001952 if (ti_is_const(t1)) {
1953 return tcg_opt_gen_movi(ctx, op, op->args[0],
1954 extract64(ti_const_val(t1), pos, len));
Richard Hendersonb6617c82021-08-24 10:44:53 -07001955 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001956
Richard Hendersonb6cd00f2024-12-08 20:05:11 -06001957 z_mask_old = t1->z_mask;
Richard Henderson57fe5c62021-08-26 12:04:46 -07001958 z_mask = extract64(z_mask_old, pos, len);
Richard Henderson045ace32024-12-19 10:33:51 -08001959 if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
1960 return true;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001961 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001962
Richard Hendersonb6cd00f2024-12-08 20:05:11 -06001963 return fold_masks_z(ctx, op, z_mask);
Richard Hendersonb6617c82021-08-24 10:44:53 -07001964}
1965
Richard Hendersondcd08992021-08-24 10:41:39 -07001966static bool fold_extract2(OptContext *ctx, TCGOp *op)
1967{
1968 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1969 uint64_t v1 = arg_info(op->args[1])->val;
1970 uint64_t v2 = arg_info(op->args[2])->val;
1971 int shr = op->args[3];
1972
Richard Henderson61d6a872025-01-12 21:40:43 -08001973 if (ctx->type == TCG_TYPE_I32) {
Richard Hendersondcd08992021-08-24 10:41:39 -07001974 v1 = (uint32_t)v1 >> shr;
Richard Henderson225bec02021-11-09 23:17:59 +01001975 v2 = (uint64_t)((int32_t)v2 << (32 - shr));
Richard Henderson61d6a872025-01-12 21:40:43 -08001976 } else {
1977 v1 >>= shr;
1978 v2 <<= 64 - shr;
Richard Hendersondcd08992021-08-24 10:41:39 -07001979 }
1980 return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
1981 }
Richard Hendersonc9df99e2024-12-08 20:06:42 -06001982 return finish_folding(ctx, op);
Richard Hendersondcd08992021-08-24 10:41:39 -07001983}
1984
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001985static bool fold_exts(OptContext *ctx, TCGOp *op)
1986{
Richard Henderson48e8de62024-12-26 12:01:57 -08001987 uint64_t s_mask, z_mask;
Richard Hendersona9621922024-12-08 20:08:46 -06001988 TempOptInfo *t1;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001989
1990 if (fold_const1(ctx, op)) {
1991 return true;
1992 }
1993
Richard Hendersona9621922024-12-08 20:08:46 -06001994 t1 = arg_info(op->args[1]);
1995 z_mask = t1->z_mask;
1996 s_mask = t1->s_mask;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001997
1998 switch (op->opc) {
Richard Hendersonfae450b2021-08-25 22:42:19 -07001999 case INDEX_op_ext_i32_i64:
Richard Hendersona9621922024-12-08 20:08:46 -06002000 s_mask |= INT32_MIN;
2001 z_mask = (int32_t)z_mask;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002002 break;
2003 default:
2004 g_assert_not_reached();
2005 }
Richard Hendersona9621922024-12-08 20:08:46 -06002006 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002007}
2008
2009static bool fold_extu(OptContext *ctx, TCGOp *op)
2010{
Richard Henderson48e8de62024-12-26 12:01:57 -08002011 uint64_t z_mask;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002012
2013 if (fold_const1(ctx, op)) {
2014 return true;
2015 }
2016
Richard Henderson48e8de62024-12-26 12:01:57 -08002017 z_mask = arg_info(op->args[1])->z_mask;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002018 switch (op->opc) {
Richard Hendersonfae450b2021-08-25 22:42:19 -07002019 case INDEX_op_extrl_i64_i32:
2020 case INDEX_op_extu_i32_i64:
Richard Hendersonfae450b2021-08-25 22:42:19 -07002021 z_mask = (uint32_t)z_mask;
2022 break;
2023 case INDEX_op_extrh_i64_i32:
Richard Hendersonfae450b2021-08-25 22:42:19 -07002024 z_mask >>= 32;
2025 break;
2026 default:
2027 g_assert_not_reached();
2028 }
Richard Henderson08abe292024-12-08 20:11:44 -06002029 return fold_masks_z(ctx, op, z_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002030}
2031
Richard Henderson3eefdf22021-08-25 11:06:43 -07002032static bool fold_mb(OptContext *ctx, TCGOp *op)
2033{
2034 /* Eliminate duplicate and redundant fence instructions. */
2035 if (ctx->prev_mb) {
2036 /*
2037 * Merge two barriers of the same type into one,
2038 * or a weaker barrier into a stronger one,
2039 * or two weaker barriers into a stronger one.
2040 * mb X; mb Y => mb X|Y
2041 * mb; strl => mb; st
2042 * ldaq; mb => ld; mb
2043 * ldaq; strl => ld; mb; st
2044 * Other combinations are also merged into a strong
2045 * barrier. This is stricter than specified but for
2046 * the purposes of TCG is better than not optimizing.
2047 */
2048 ctx->prev_mb->args[0] |= op->args[0];
2049 tcg_op_remove(ctx->tcg, op);
2050 } else {
2051 ctx->prev_mb = op;
2052 }
2053 return true;
2054}
2055
Richard Henderson2cfac7f2021-08-25 13:05:43 -07002056static bool fold_mov(OptContext *ctx, TCGOp *op)
2057{
2058 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2059}
2060
Richard Henderson0c310a32021-08-24 10:37:24 -07002061static bool fold_movcond(OptContext *ctx, TCGOp *op)
2062{
Richard Henderson32202782024-12-08 20:16:38 -06002063 uint64_t z_mask, s_mask;
2064 TempOptInfo *tt, *ft;
Richard Henderson7a2f7082021-08-26 07:06:39 -07002065 int i;
Richard Henderson0c310a32021-08-24 10:37:24 -07002066
Richard Henderson141125e2024-09-06 21:00:10 -07002067 /* If true and false values are the same, eliminate the cmp. */
2068 if (args_are_copies(op->args[3], op->args[4])) {
2069 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
2070 }
2071
Richard Henderson7a2f7082021-08-26 07:06:39 -07002072 /*
2073 * Canonicalize the "false" input reg to match the destination reg so
2074 * that the tcg backend can implement a "move if true" operation.
2075 */
2076 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
Richard Henderson246c4b72023-10-24 16:36:50 -07002077 op->args[5] = tcg_invert_cond(op->args[5]);
Richard Henderson7a2f7082021-08-26 07:06:39 -07002078 }
2079
Richard Hendersonfb04ab72024-01-10 18:21:58 +11002080 i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1],
Richard Henderson246c4b72023-10-24 16:36:50 -07002081 &op->args[2], &op->args[5]);
Richard Henderson0c310a32021-08-24 10:37:24 -07002082 if (i >= 0) {
2083 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
2084 }
2085
Richard Henderson32202782024-12-08 20:16:38 -06002086 tt = arg_info(op->args[3]);
2087 ft = arg_info(op->args[4]);
2088 z_mask = tt->z_mask | ft->z_mask;
2089 s_mask = tt->s_mask & ft->s_mask;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002090
Richard Henderson32202782024-12-08 20:16:38 -06002091 if (ti_is_const(tt) && ti_is_const(ft)) {
2092 uint64_t tv = ti_const_val(tt);
2093 uint64_t fv = ti_const_val(ft);
Richard Henderson246c4b72023-10-24 16:36:50 -07002094 TCGCond cond = op->args[5];
Richard Henderson0c310a32021-08-24 10:37:24 -07002095
Richard Henderson0c310a32021-08-24 10:37:24 -07002096 if (tv == 1 && fv == 0) {
Richard Hendersona363e1e2025-01-10 09:26:44 -08002097 op->opc = INDEX_op_setcond;
Richard Henderson0c310a32021-08-24 10:37:24 -07002098 op->args[3] = cond;
2099 } else if (fv == 1 && tv == 0) {
Richard Hendersona363e1e2025-01-10 09:26:44 -08002100 op->opc = INDEX_op_setcond;
Richard Henderson0c310a32021-08-24 10:37:24 -07002101 op->args[3] = tcg_invert_cond(cond);
Richard Hendersonf7914582025-01-09 12:48:21 -08002102 } else if (tv == -1 && fv == 0) {
Richard Hendersona363e1e2025-01-10 09:26:44 -08002103 op->opc = INDEX_op_negsetcond;
Richard Hendersonf7914582025-01-09 12:48:21 -08002104 op->args[3] = cond;
2105 } else if (fv == -1 && tv == 0) {
Richard Hendersona363e1e2025-01-10 09:26:44 -08002106 op->opc = INDEX_op_negsetcond;
Richard Hendersonf7914582025-01-09 12:48:21 -08002107 op->args[3] = tcg_invert_cond(cond);
Richard Henderson0c310a32021-08-24 10:37:24 -07002108 }
2109 }
Richard Henderson32202782024-12-08 20:16:38 -06002110
2111 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Henderson0c310a32021-08-24 10:37:24 -07002112}
2113
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002114static bool fold_mul(OptContext *ctx, TCGOp *op)
2115{
Richard Hendersone8679952021-08-25 13:19:52 -07002116 if (fold_const2(ctx, op) ||
Richard Henderson5b5cf472021-10-25 11:19:14 -07002117 fold_xi_to_i(ctx, op, 0) ||
2118 fold_xi_to_x(ctx, op, 1)) {
Richard Hendersone8679952021-08-25 13:19:52 -07002119 return true;
2120 }
Richard Hendersoncd9c5832024-12-08 20:18:02 -06002121 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002122}
2123
2124static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
2125{
Richard Henderson7a2f7082021-08-26 07:06:39 -07002126 if (fold_const2_commutative(ctx, op) ||
Richard Hendersone8679952021-08-25 13:19:52 -07002127 fold_xi_to_i(ctx, op, 0)) {
2128 return true;
2129 }
Richard Hendersoncd9c5832024-12-08 20:18:02 -06002130 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002131}
2132
Richard Henderson407112b2021-08-26 06:33:04 -07002133static bool fold_multiply2(OptContext *ctx, TCGOp *op)
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07002134{
Richard Henderson7a2f7082021-08-26 07:06:39 -07002135 swap_commutative(op->args[0], &op->args[2], &op->args[3]);
2136
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07002137 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
Richard Henderson407112b2021-08-26 06:33:04 -07002138 uint64_t a = arg_info(op->args[2])->val;
2139 uint64_t b = arg_info(op->args[3])->val;
2140 uint64_t h, l;
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07002141 TCGArg rl, rh;
Richard Henderson407112b2021-08-26 06:33:04 -07002142 TCGOp *op2;
2143
2144 switch (op->opc) {
Richard Hendersond7761982025-01-09 09:11:53 -08002145 case INDEX_op_mulu2:
2146 if (ctx->type == TCG_TYPE_I32) {
2147 l = (uint64_t)(uint32_t)a * (uint32_t)b;
2148 h = (int32_t)(l >> 32);
2149 l = (int32_t)l;
2150 } else {
2151 mulu64(&l, &h, a, b);
2152 }
Richard Henderson407112b2021-08-26 06:33:04 -07002153 break;
Richard Hendersonbfe96482025-01-09 07:24:32 -08002154 case INDEX_op_muls2:
2155 if (ctx->type == TCG_TYPE_I32) {
2156 l = (int64_t)(int32_t)a * (int32_t)b;
2157 h = l >> 32;
2158 l = (int32_t)l;
2159 } else {
2160 muls64(&l, &h, a, b);
2161 }
Richard Henderson407112b2021-08-26 06:33:04 -07002162 break;
2163 default:
2164 g_assert_not_reached();
2165 }
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07002166
2167 rl = op->args[0];
2168 rh = op->args[1];
Richard Henderson407112b2021-08-26 06:33:04 -07002169
2170 /* The proper opcode is supplied by tcg_opt_gen_mov. */
Richard Hendersona3c1c572025-04-21 11:05:29 -07002171 op2 = opt_insert_before(ctx, op, 0, 2);
Richard Henderson407112b2021-08-26 06:33:04 -07002172
2173 tcg_opt_gen_movi(ctx, op, rl, l);
2174 tcg_opt_gen_movi(ctx, op2, rh, h);
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07002175 return true;
2176 }
Richard Hendersoncd9c5832024-12-08 20:18:02 -06002177 return finish_folding(ctx, op);
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07002178}
2179
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002180static bool fold_nand(OptContext *ctx, TCGOp *op)
2181{
Richard Hendersonfa3168e2024-12-08 20:20:40 -06002182 uint64_t s_mask;
2183
Richard Henderson7a2f7082021-08-26 07:06:39 -07002184 if (fold_const2_commutative(ctx, op) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07002185 fold_xi_to_not(ctx, op, -1)) {
2186 return true;
2187 }
Richard Henderson3f2b1f82021-08-26 13:08:54 -07002188
Richard Hendersonfa3168e2024-12-08 20:20:40 -06002189 s_mask = arg_info(op->args[1])->s_mask
2190 & arg_info(op->args[2])->s_mask;
2191 return fold_masks_s(ctx, op, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002192}
2193
Richard Hendersone25fe882024-04-04 20:53:50 +00002194static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002195{
Richard Hendersonfae450b2021-08-25 22:42:19 -07002196 /* Set to 1 all bits to the left of the rightmost. */
Richard Hendersone25fe882024-04-04 20:53:50 +00002197 uint64_t z_mask = arg_info(op->args[1])->z_mask;
Richard Hendersond151fd32024-12-08 20:23:11 -06002198 z_mask = -(z_mask & -z_mask);
Richard Hendersonfae450b2021-08-25 22:42:19 -07002199
Richard Hendersond151fd32024-12-08 20:23:11 -06002200 return fold_masks_z(ctx, op, z_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002201}
2202
Richard Hendersone25fe882024-04-04 20:53:50 +00002203static bool fold_neg(OptContext *ctx, TCGOp *op)
2204{
2205 return fold_const1(ctx, op) || fold_neg_no_const(ctx, op);
2206}
2207
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002208static bool fold_nor(OptContext *ctx, TCGOp *op)
2209{
Richard Henderson2b7b6952024-12-08 20:25:21 -06002210 uint64_t s_mask;
2211
Richard Henderson7a2f7082021-08-26 07:06:39 -07002212 if (fold_const2_commutative(ctx, op) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07002213 fold_xi_to_not(ctx, op, 0)) {
2214 return true;
2215 }
Richard Henderson3f2b1f82021-08-26 13:08:54 -07002216
Richard Henderson2b7b6952024-12-08 20:25:21 -06002217 s_mask = arg_info(op->args[1])->s_mask
2218 & arg_info(op->args[2])->s_mask;
2219 return fold_masks_s(ctx, op, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002220}
2221
2222static bool fold_not(OptContext *ctx, TCGOp *op)
2223{
Richard Henderson0e0a32b2021-08-24 13:18:01 -07002224 if (fold_const1(ctx, op)) {
2225 return true;
2226 }
Richard Henderson608e75f2024-12-08 20:27:02 -06002227 return fold_masks_s(ctx, op, arg_info(op->args[1])->s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002228}
2229
2230static bool fold_or(OptContext *ctx, TCGOp *op)
2231{
Richard Henderson83b1ba32024-12-08 20:28:59 -06002232 uint64_t z_mask, s_mask;
2233 TempOptInfo *t1, *t2;
2234
Richard Henderson7a2f7082021-08-26 07:06:39 -07002235 if (fold_const2_commutative(ctx, op) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07002236 fold_xi_to_x(ctx, op, 0) ||
Richard Hendersonca7bb042021-08-25 13:14:21 -07002237 fold_xx_to_x(ctx, op)) {
2238 return true;
2239 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07002240
Richard Henderson83b1ba32024-12-08 20:28:59 -06002241 t1 = arg_info(op->args[1]);
2242 t2 = arg_info(op->args[2]);
2243 z_mask = t1->z_mask | t2->z_mask;
2244 s_mask = t1->s_mask & t2->s_mask;
2245 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002246}
2247
2248static bool fold_orc(OptContext *ctx, TCGOp *op)
2249{
Richard Henderson54e26b22024-12-08 20:30:20 -06002250 uint64_t s_mask;
Richard Henderson50e40ec2024-12-10 08:13:10 -06002251 TempOptInfo *t1, *t2;
Richard Henderson54e26b22024-12-08 20:30:20 -06002252
Richard Henderson0e0a32b2021-08-24 13:18:01 -07002253 if (fold_const2(ctx, op) ||
Richard Henderson4e858d92021-08-26 07:31:13 -07002254 fold_xx_to_i(ctx, op, -1) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07002255 fold_xi_to_x(ctx, op, -1) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07002256 fold_ix_to_not(ctx, op, 0)) {
2257 return true;
2258 }
Richard Henderson3f2b1f82021-08-26 13:08:54 -07002259
Richard Henderson50e40ec2024-12-10 08:13:10 -06002260 t2 = arg_info(op->args[2]);
2261 if (ti_is_const(t2)) {
2262 /* Fold orc r,x,i to or r,x,~i. */
2263 switch (ctx->type) {
2264 case TCG_TYPE_I32:
2265 case TCG_TYPE_I64:
2266 op->opc = INDEX_op_or;
2267 break;
2268 case TCG_TYPE_V64:
2269 case TCG_TYPE_V128:
2270 case TCG_TYPE_V256:
2271 op->opc = INDEX_op_or_vec;
2272 break;
2273 default:
2274 g_assert_not_reached();
2275 }
2276 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
2277 return fold_or(ctx, op);
2278 }
2279
2280 t1 = arg_info(op->args[1]);
2281 s_mask = t1->s_mask & t2->s_mask;
Richard Henderson54e26b22024-12-08 20:30:20 -06002282 return fold_masks_s(ctx, op, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002283}
2284
Richard Henderson6813be92024-12-08 20:33:30 -06002285static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
Richard Henderson3eefdf22021-08-25 11:06:43 -07002286{
Richard Hendersonfae450b2021-08-25 22:42:19 -07002287 const TCGOpDef *def = &tcg_op_defs[op->opc];
2288 MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
2289 MemOp mop = get_memop(oi);
2290 int width = 8 * memop_size(mop);
Richard Henderson6813be92024-12-08 20:33:30 -06002291 uint64_t z_mask = -1, s_mask = 0;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002292
Richard Henderson57fe5c62021-08-26 12:04:46 -07002293 if (width < 64) {
Richard Henderson75c3bf32024-12-19 10:50:40 -08002294 if (mop & MO_SIGN) {
Richard Henderson6813be92024-12-08 20:33:30 -06002295 s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
Richard Henderson75c3bf32024-12-19 10:50:40 -08002296 } else {
Richard Henderson6813be92024-12-08 20:33:30 -06002297 z_mask = MAKE_64BIT_MASK(0, width);
Richard Henderson57fe5c62021-08-26 12:04:46 -07002298 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07002299 }
2300
Richard Henderson3eefdf22021-08-25 11:06:43 -07002301 /* Opcodes that touch guest memory stop the mb optimization. */
2302 ctx->prev_mb = NULL;
Richard Henderson6813be92024-12-08 20:33:30 -06002303
2304 return fold_masks_zs(ctx, op, z_mask, s_mask);
2305}
2306
2307static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
2308{
2309 /* Opcodes that touch guest memory stop the mb optimization. */
2310 ctx->prev_mb = NULL;
2311 return finish_folding(ctx, op);
Richard Henderson3eefdf22021-08-25 11:06:43 -07002312}
2313
2314static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
2315{
2316 /* Opcodes that touch guest memory stop the mb optimization. */
2317 ctx->prev_mb = NULL;
Richard Henderson082b3ef2024-12-08 20:34:57 -06002318 return true;
Richard Henderson3eefdf22021-08-25 11:06:43 -07002319}
2320
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002321static bool fold_remainder(OptContext *ctx, TCGOp *op)
2322{
Richard Henderson267c17e2021-10-25 11:30:33 -07002323 if (fold_const2(ctx, op) ||
2324 fold_xx_to_i(ctx, op, 0)) {
2325 return true;
2326 }
Richard Hendersonf9e39342024-12-08 20:36:50 -06002327 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002328}
2329
Richard Henderson95eb2292024-12-08 20:47:59 -06002330/* Return 1 if finished, -1 if simplified, 0 if unchanged. */
2331static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
Richard Henderson8d65cda2024-03-26 16:00:40 -10002332{
2333 uint64_t a_zmask, b_val;
2334 TCGCond cond;
2335
2336 if (!arg_is_const(op->args[2])) {
2337 return false;
2338 }
2339
2340 a_zmask = arg_info(op->args[1])->z_mask;
2341 b_val = arg_info(op->args[2])->val;
2342 cond = op->args[3];
2343
2344 if (ctx->type == TCG_TYPE_I32) {
2345 a_zmask = (uint32_t)a_zmask;
2346 b_val = (uint32_t)b_val;
2347 }
2348
2349 /*
2350 * A with only low bits set vs B with high bits set means that A < B.
2351 */
2352 if (a_zmask < b_val) {
2353 bool inv = false;
2354
2355 switch (cond) {
2356 case TCG_COND_NE:
2357 case TCG_COND_LEU:
2358 case TCG_COND_LTU:
2359 inv = true;
2360 /* fall through */
2361 case TCG_COND_GTU:
2362 case TCG_COND_GEU:
2363 case TCG_COND_EQ:
2364 return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv);
2365 default:
2366 break;
2367 }
2368 }
2369
2370 /*
2371 * A with only lsb set is already boolean.
2372 */
2373 if (a_zmask <= 1) {
2374 bool convert = false;
2375 bool inv = false;
2376
2377 switch (cond) {
2378 case TCG_COND_EQ:
2379 inv = true;
2380 /* fall through */
2381 case TCG_COND_NE:
2382 convert = (b_val == 0);
2383 break;
2384 case TCG_COND_LTU:
2385 case TCG_COND_TSTEQ:
2386 inv = true;
2387 /* fall through */
2388 case TCG_COND_GEU:
2389 case TCG_COND_TSTNE:
2390 convert = (b_val == 1);
2391 break;
2392 default:
2393 break;
2394 }
2395 if (convert) {
Richard Henderson8d65cda2024-03-26 16:00:40 -10002396 if (!inv && !neg) {
2397 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2398 }
2399
Richard Henderson8d65cda2024-03-26 16:00:40 -10002400 if (!inv) {
Richard Henderson69713582025-01-06 22:48:57 -08002401 op->opc = INDEX_op_neg;
Richard Henderson8d65cda2024-03-26 16:00:40 -10002402 } else if (neg) {
Richard Henderson79602f62025-01-06 09:11:39 -08002403 op->opc = INDEX_op_add;
Richard Henderson8d65cda2024-03-26 16:00:40 -10002404 op->args[2] = arg_new_constant(ctx, -1);
2405 } else {
Richard Hendersonfffd3dc2025-01-06 15:18:35 -08002406 op->opc = INDEX_op_xor;
Richard Henderson8d65cda2024-03-26 16:00:40 -10002407 op->args[2] = arg_new_constant(ctx, 1);
2408 }
Richard Henderson95eb2292024-12-08 20:47:59 -06002409 return -1;
Richard Henderson8d65cda2024-03-26 16:00:40 -10002410 }
2411 }
Richard Henderson95eb2292024-12-08 20:47:59 -06002412 return 0;
Richard Henderson8d65cda2024-03-26 16:00:40 -10002413}
2414
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002415static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
2416{
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002417 TCGCond cond = op->args[3];
2418 TCGArg ret, src1, src2;
2419 TCGOp *op2;
2420 uint64_t val;
2421 int sh;
2422 bool inv;
2423
2424 if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) {
2425 return;
2426 }
2427
2428 src2 = op->args[2];
2429 val = arg_info(src2)->val;
2430 if (!is_power_of_2(val)) {
2431 return;
2432 }
2433 sh = ctz64(val);
2434
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002435 ret = op->args[0];
2436 src1 = op->args[1];
2437 inv = cond == TCG_COND_TSTEQ;
2438
Richard Hendersonfa361ee2025-01-12 11:50:09 -08002439 if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) {
2440 op->opc = INDEX_op_sextract;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002441 op->args[1] = src1;
2442 op->args[2] = sh;
2443 op->args[3] = 1;
2444 return;
Richard Henderson07d5d502025-01-11 09:01:46 -08002445 } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) {
2446 op->opc = INDEX_op_extract;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002447 op->args[1] = src1;
2448 op->args[2] = sh;
2449 op->args[3] = 1;
2450 } else {
2451 if (sh) {
Richard Henderson74dbd362025-01-07 22:52:10 -08002452 op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3);
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002453 op2->args[0] = ret;
2454 op2->args[1] = src1;
2455 op2->args[2] = arg_new_constant(ctx, sh);
2456 src1 = ret;
2457 }
Richard Hendersonc3b920b2025-01-06 10:32:44 -08002458 op->opc = INDEX_op_and;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002459 op->args[1] = src1;
2460 op->args[2] = arg_new_constant(ctx, 1);
2461 }
2462
2463 if (neg && inv) {
Richard Henderson93a9ddb2025-01-06 22:06:08 -08002464 op2 = opt_insert_after(ctx, op, INDEX_op_add, 3);
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002465 op2->args[0] = ret;
2466 op2->args[1] = ret;
Richard Henderson93a9ddb2025-01-06 22:06:08 -08002467 op2->args[2] = arg_new_constant(ctx, -1);
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002468 } else if (inv) {
Richard Hendersonfffd3dc2025-01-06 15:18:35 -08002469 op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3);
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002470 op2->args[0] = ret;
2471 op2->args[1] = ret;
2472 op2->args[2] = arg_new_constant(ctx, 1);
2473 } else if (neg) {
Richard Henderson69713582025-01-06 22:48:57 -08002474 op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2);
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002475 op2->args[0] = ret;
2476 op2->args[1] = ret;
2477 }
2478}
2479
Richard Hendersonc63ff552021-08-24 09:35:30 -07002480static bool fold_setcond(OptContext *ctx, TCGOp *op)
2481{
Richard Hendersonfb04ab72024-01-10 18:21:58 +11002482 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
Richard Henderson246c4b72023-10-24 16:36:50 -07002483 &op->args[2], &op->args[3]);
Richard Hendersonc63ff552021-08-24 09:35:30 -07002484 if (i >= 0) {
2485 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2486 }
Richard Henderson8d65cda2024-03-26 16:00:40 -10002487
Richard Henderson95eb2292024-12-08 20:47:59 -06002488 i = fold_setcond_zmask(ctx, op, false);
2489 if (i > 0) {
Richard Henderson8d65cda2024-03-26 16:00:40 -10002490 return true;
2491 }
Richard Henderson95eb2292024-12-08 20:47:59 -06002492 if (i == 0) {
2493 fold_setcond_tst_pow2(ctx, op, false);
2494 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07002495
Richard Henderson2c8a2832024-12-08 20:50:37 -06002496 return fold_masks_z(ctx, op, 1);
Richard Hendersonc63ff552021-08-24 09:35:30 -07002497}
2498
Richard Henderson36355022023-08-04 23:24:04 +00002499static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
2500{
Richard Hendersonfb04ab72024-01-10 18:21:58 +11002501 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
Richard Henderson246c4b72023-10-24 16:36:50 -07002502 &op->args[2], &op->args[3]);
Richard Henderson36355022023-08-04 23:24:04 +00002503 if (i >= 0) {
2504 return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
2505 }
Richard Henderson8d65cda2024-03-26 16:00:40 -10002506
Richard Henderson95eb2292024-12-08 20:47:59 -06002507 i = fold_setcond_zmask(ctx, op, true);
2508 if (i > 0) {
Richard Henderson8d65cda2024-03-26 16:00:40 -10002509 return true;
2510 }
Richard Henderson95eb2292024-12-08 20:47:59 -06002511 if (i == 0) {
2512 fold_setcond_tst_pow2(ctx, op, true);
2513 }
Richard Henderson36355022023-08-04 23:24:04 +00002514
2515 /* Value is {0,-1} so all bits are repetitions of the sign. */
Richard Henderson081cf082024-12-08 20:50:58 -06002516 return fold_masks_s(ctx, op, -1);
Richard Henderson36355022023-08-04 23:24:04 +00002517}
2518
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002519static bool fold_setcond2(OptContext *ctx, TCGOp *op)
2520{
Richard Henderson7e64b112023-10-24 16:53:56 -07002521 TCGCond cond;
Richard Henderson7a2f7082021-08-26 07:06:39 -07002522 int i, inv = 0;
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002523
Richard Hendersonfb04ab72024-01-10 18:21:58 +11002524 i = do_constant_folding_cond2(ctx, op, &op->args[1]);
Richard Henderson7e64b112023-10-24 16:53:56 -07002525 cond = op->args[5];
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002526 if (i >= 0) {
2527 goto do_setcond_const;
2528 }
2529
2530 switch (cond) {
2531 case TCG_COND_LT:
2532 case TCG_COND_GE:
2533 /*
2534 * Simplify LT/GE comparisons vs zero to a single compare
2535 * vs the high word of the input.
2536 */
Richard Henderson27cdb852023-10-23 11:38:00 -07002537 if (arg_is_const_val(op->args[3], 0) &&
2538 arg_is_const_val(op->args[4], 0)) {
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002539 goto do_setcond_high;
2540 }
2541 break;
2542
2543 case TCG_COND_NE:
2544 inv = 1;
2545 QEMU_FALLTHROUGH;
2546 case TCG_COND_EQ:
2547 /*
2548 * Simplify EQ/NE comparisons where one of the pairs
2549 * can be simplified.
2550 */
Richard Henderson67f84c92021-08-25 08:00:20 -07002551 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002552 op->args[3], cond);
2553 switch (i ^ inv) {
2554 case 0:
2555 goto do_setcond_const;
2556 case 1:
2557 goto do_setcond_high;
2558 }
2559
Richard Henderson67f84c92021-08-25 08:00:20 -07002560 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002561 op->args[4], cond);
2562 switch (i ^ inv) {
2563 case 0:
2564 goto do_setcond_const;
2565 case 1:
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002566 goto do_setcond_low;
2567 }
2568 break;
2569
2570 case TCG_COND_TSTEQ:
2571 case TCG_COND_TSTNE:
Richard Hendersona71d9df2024-06-30 19:46:23 -07002572 if (arg_is_const_val(op->args[3], 0)) {
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002573 goto do_setcond_high;
2574 }
2575 if (arg_is_const_val(op->args[4], 0)) {
2576 goto do_setcond_low;
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002577 }
2578 break;
2579
2580 default:
2581 break;
2582
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002583 do_setcond_low:
2584 op->args[2] = op->args[3];
2585 op->args[3] = cond;
Richard Hendersona363e1e2025-01-10 09:26:44 -08002586 op->opc = INDEX_op_setcond;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002587 return fold_setcond(ctx, op);
2588
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002589 do_setcond_high:
2590 op->args[1] = op->args[2];
2591 op->args[2] = op->args[4];
2592 op->args[3] = cond;
Richard Hendersona363e1e2025-01-10 09:26:44 -08002593 op->opc = INDEX_op_setcond;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002594 return fold_setcond(ctx, op);
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002595 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07002596
Richard Hendersona53502c2024-12-08 20:56:36 -06002597 return fold_masks_z(ctx, op, 1);
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002598
2599 do_setcond_const:
2600 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2601}
2602
Richard Hendersonb6617c82021-08-24 10:44:53 -07002603static bool fold_sextract(OptContext *ctx, TCGOp *op)
2604{
Richard Henderson57fe5c62021-08-26 12:04:46 -07002605 uint64_t z_mask, s_mask, s_mask_old;
Richard Hendersonbaff5072024-12-08 21:09:30 -06002606 TempOptInfo *t1 = arg_info(op->args[1]);
Richard Henderson57fe5c62021-08-26 12:04:46 -07002607 int pos = op->args[2];
2608 int len = op->args[3];
Richard Hendersonfae450b2021-08-25 22:42:19 -07002609
Richard Hendersonbaff5072024-12-08 21:09:30 -06002610 if (ti_is_const(t1)) {
2611 return tcg_opt_gen_movi(ctx, op, op->args[0],
2612 sextract64(ti_const_val(t1), pos, len));
Richard Hendersonb6617c82021-08-24 10:44:53 -07002613 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07002614
Richard Hendersonbaff5072024-12-08 21:09:30 -06002615 s_mask_old = t1->s_mask;
2616 s_mask = s_mask_old >> pos;
2617 s_mask |= -1ull << (len - 1);
Richard Henderson57fe5c62021-08-26 12:04:46 -07002618
Richard Hendersonaa9e0502024-12-21 22:03:53 -08002619 if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
Richard Henderson045ace32024-12-19 10:33:51 -08002620 return true;
Richard Henderson57fe5c62021-08-26 12:04:46 -07002621 }
2622
Richard Hendersonbaff5072024-12-08 21:09:30 -06002623 z_mask = sextract64(t1->z_mask, pos, len);
2624 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Hendersonb6617c82021-08-24 10:44:53 -07002625}
2626
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002627static bool fold_shift(OptContext *ctx, TCGOp *op)
2628{
Richard Henderson4ed2ba32024-12-19 19:38:54 -08002629 uint64_t s_mask, z_mask;
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002630 TempOptInfo *t1, *t2;
Richard Henderson93a967f2021-08-26 13:24:59 -07002631
Richard Hendersona63ce0e2021-08-25 20:28:53 -07002632 if (fold_const2(ctx, op) ||
Richard Hendersonda48e272021-08-25 20:42:04 -07002633 fold_ix_to_i(ctx, op, 0) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07002634 fold_xi_to_x(ctx, op, 0)) {
2635 return true;
2636 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07002637
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002638 t1 = arg_info(op->args[1]);
2639 t2 = arg_info(op->args[2]);
2640 s_mask = t1->s_mask;
2641 z_mask = t1->z_mask;
Richard Henderson93a967f2021-08-26 13:24:59 -07002642
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002643 if (ti_is_const(t2)) {
2644 int sh = ti_const_val(t2);
Richard Henderson93a967f2021-08-26 13:24:59 -07002645
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002646 z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
Richard Henderson93a967f2021-08-26 13:24:59 -07002647 s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
Richard Henderson93a967f2021-08-26 13:24:59 -07002648
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002649 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Hendersonfae450b2021-08-25 22:42:19 -07002650 }
Richard Henderson93a967f2021-08-26 13:24:59 -07002651
2652 switch (op->opc) {
Richard Henderson3949f362025-01-08 08:05:18 -08002653 case INDEX_op_sar:
Richard Henderson93a967f2021-08-26 13:24:59 -07002654 /*
2655 * Arithmetic right shift will not reduce the number of
2656 * input sign repetitions.
2657 */
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002658 return fold_masks_s(ctx, op, s_mask);
Richard Henderson74dbd362025-01-07 22:52:10 -08002659 case INDEX_op_shr:
Richard Henderson93a967f2021-08-26 13:24:59 -07002660 /*
2661 * If the sign bit is known zero, then logical right shift
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002662 * will not reduce the number of input sign repetitions.
Richard Henderson93a967f2021-08-26 13:24:59 -07002663 */
Richard Henderson4ed2ba32024-12-19 19:38:54 -08002664 if (~z_mask & -s_mask) {
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002665 return fold_masks_s(ctx, op, s_mask);
Richard Henderson93a967f2021-08-26 13:24:59 -07002666 }
2667 break;
2668 default:
2669 break;
2670 }
2671
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002672 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002673}
2674
Richard Henderson9caca882021-08-24 13:30:32 -07002675static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
2676{
2677 TCGOpcode neg_op;
2678 bool have_neg;
2679
2680 if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
2681 return false;
2682 }
2683
2684 switch (ctx->type) {
2685 case TCG_TYPE_I32:
Richard Henderson9caca882021-08-24 13:30:32 -07002686 case TCG_TYPE_I64:
Richard Henderson69713582025-01-06 22:48:57 -08002687 neg_op = INDEX_op_neg;
Richard Hendersonb701f192023-10-25 21:14:04 -07002688 have_neg = true;
Richard Henderson9caca882021-08-24 13:30:32 -07002689 break;
2690 case TCG_TYPE_V64:
2691 case TCG_TYPE_V128:
2692 case TCG_TYPE_V256:
2693 neg_op = INDEX_op_neg_vec;
2694 have_neg = (TCG_TARGET_HAS_neg_vec &&
2695 tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
2696 break;
2697 default:
2698 g_assert_not_reached();
2699 }
2700 if (have_neg) {
2701 op->opc = neg_op;
2702 op->args[1] = op->args[2];
Richard Hendersone25fe882024-04-04 20:53:50 +00002703 return fold_neg_no_const(ctx, op);
Richard Henderson9caca882021-08-24 13:30:32 -07002704 }
2705 return false;
2706}
2707
Richard Hendersonc578ff12021-12-16 06:07:25 -08002708/* We cannot as yet do_constant_folding with vectors. */
2709static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002710{
Richard Hendersonc578ff12021-12-16 06:07:25 -08002711 if (fold_xx_to_i(ctx, op, 0) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07002712 fold_xi_to_x(ctx, op, 0) ||
Richard Henderson9caca882021-08-24 13:30:32 -07002713 fold_sub_to_neg(ctx, op)) {
Richard Hendersoncbe42fb2021-08-25 13:02:00 -07002714 return true;
2715 }
Richard Hendersonfe1d0072024-12-08 21:15:22 -06002716 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002717}
2718
Richard Hendersonc578ff12021-12-16 06:07:25 -08002719static bool fold_sub(OptContext *ctx, TCGOp *op)
2720{
Richard Hendersonfe1d0072024-12-08 21:15:22 -06002721 if (fold_const2(ctx, op) ||
2722 fold_xx_to_i(ctx, op, 0) ||
2723 fold_xi_to_x(ctx, op, 0) ||
2724 fold_sub_to_neg(ctx, op)) {
Richard Henderson6334a962023-10-25 18:39:43 -07002725 return true;
2726 }
2727
2728 /* Fold sub r,x,i to add r,x,-i */
2729 if (arg_is_const(op->args[2])) {
2730 uint64_t val = arg_info(op->args[2])->val;
2731
Richard Henderson79602f62025-01-06 09:11:39 -08002732 op->opc = INDEX_op_add;
Richard Henderson6334a962023-10-25 18:39:43 -07002733 op->args[2] = arg_new_constant(ctx, -val);
2734 }
Richard Hendersonfe1d0072024-12-08 21:15:22 -06002735 return finish_folding(ctx, op);
Richard Hendersonc578ff12021-12-16 06:07:25 -08002736}
2737
Richard Hendersonaeb35142025-01-14 18:28:15 -08002738static void squash_prev_borrowout(OptContext *ctx, TCGOp *op)
2739{
2740 TempOptInfo *t2;
2741
2742 op = QTAILQ_PREV(op, link);
2743 switch (op->opc) {
2744 case INDEX_op_subbo:
2745 op->opc = INDEX_op_sub;
2746 fold_sub(ctx, op);
2747 break;
2748 case INDEX_op_subbio:
2749 op->opc = INDEX_op_subbi;
2750 break;
2751 case INDEX_op_subb1o:
2752 t2 = arg_info(op->args[2]);
2753 if (ti_is_const(t2)) {
2754 op->opc = INDEX_op_add;
2755 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2756 /* Perform other constant folding, if needed. */
2757 fold_add(ctx, op);
2758 } else {
2759 TCGArg ret = op->args[0];
2760 op->opc = INDEX_op_sub;
2761 op = opt_insert_after(ctx, op, INDEX_op_add, 3);
2762 op->args[0] = ret;
2763 op->args[1] = ret;
2764 op->args[2] = arg_new_constant(ctx, -1);
2765 }
2766 break;
2767 default:
2768 g_assert_not_reached();
2769 }
2770}
2771
2772static bool fold_subbi(OptContext *ctx, TCGOp *op)
2773{
2774 TempOptInfo *t2;
2775 int borrow_in = ctx->carry_state;
2776
2777 if (borrow_in < 0) {
2778 return finish_folding(ctx, op);
2779 }
2780 ctx->carry_state = -1;
2781
2782 squash_prev_borrowout(ctx, op);
2783 if (borrow_in == 0) {
2784 op->opc = INDEX_op_sub;
2785 return fold_sub(ctx, op);
2786 }
2787
2788 /*
2789 * Propagate the known carry-in into any constant, then negate to
2790 * transform from sub to add. If there is no constant, emit a
2791 * separate add -1.
2792 */
2793 t2 = arg_info(op->args[2]);
2794 if (ti_is_const(t2)) {
2795 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2796 } else {
2797 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3);
2798
2799 op2->args[0] = op->args[0];
2800 op2->args[1] = op->args[1];
2801 op2->args[2] = op->args[2];
2802 fold_sub(ctx, op2);
2803
2804 op->args[1] = op->args[0];
2805 op->args[2] = arg_new_constant(ctx, -1);
2806 }
2807 op->opc = INDEX_op_add;
2808 return fold_add(ctx, op);
2809}
2810
2811static bool fold_subbio(OptContext *ctx, TCGOp *op)
2812{
2813 TempOptInfo *t1, *t2;
2814 int borrow_out = -1;
2815
2816 if (ctx->carry_state < 0) {
2817 return finish_folding(ctx, op);
2818 }
2819
2820 squash_prev_borrowout(ctx, op);
2821 if (ctx->carry_state == 0) {
2822 goto do_subbo;
2823 }
2824
2825 t1 = arg_info(op->args[1]);
2826 t2 = arg_info(op->args[2]);
2827
2828 /* Propagate the known borrow-in into a constant, if possible. */
2829 if (ti_is_const(t2)) {
2830 uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
2831 uint64_t v = ti_const_val(t2) & max;
2832
2833 if (v < max) {
2834 op->args[2] = arg_new_constant(ctx, v + 1);
2835 goto do_subbo;
2836 }
2837 /* subtracting max + 1 produces known borrow out. */
2838 borrow_out = 1;
2839 }
2840 if (ti_is_const(t1)) {
2841 uint64_t v = ti_const_val(t1);
2842 if (v != 0) {
2843 op->args[2] = arg_new_constant(ctx, v - 1);
2844 goto do_subbo;
2845 }
2846 }
2847
2848 /* Adjust the opcode to remember the known carry-in. */
2849 op->opc = INDEX_op_subb1o;
2850 ctx->carry_state = borrow_out;
2851 return finish_folding(ctx, op);
2852
2853 do_subbo:
2854 op->opc = INDEX_op_subbo;
2855 return fold_subbo(ctx, op);
2856}
2857
2858static bool fold_subbo(OptContext *ctx, TCGOp *op)
2859{
2860 TempOptInfo *t1 = arg_info(op->args[1]);
2861 TempOptInfo *t2 = arg_info(op->args[2]);
2862 int borrow_out = -1;
2863
2864 if (ti_is_const(t2)) {
2865 uint64_t v2 = ti_const_val(t2);
2866 if (v2 == 0) {
2867 borrow_out = 0;
2868 } else if (ti_is_const(t1)) {
2869 uint64_t v1 = ti_const_val(t1);
2870 borrow_out = v1 < v2;
2871 }
2872 }
2873 ctx->carry_state = borrow_out;
2874 return finish_folding(ctx, op);
2875}
2876
Richard Hendersonfae450b2021-08-25 22:42:19 -07002877static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
2878{
Richard Hendersond33e0f02024-12-09 08:53:20 -06002879 uint64_t z_mask = -1, s_mask = 0;
2880
Richard Hendersonfae450b2021-08-25 22:42:19 -07002881 /* We can't do any folding with a load, but we can record bits. */
2882 switch (op->opc) {
Richard Hendersone9968042025-01-21 21:47:16 -08002883 case INDEX_op_ld8s:
Richard Hendersond33e0f02024-12-09 08:53:20 -06002884 s_mask = INT8_MIN;
Richard Henderson57fe5c62021-08-26 12:04:46 -07002885 break;
Richard Hendersone9968042025-01-21 21:47:16 -08002886 case INDEX_op_ld8u:
Richard Hendersond33e0f02024-12-09 08:53:20 -06002887 z_mask = MAKE_64BIT_MASK(0, 8);
Richard Henderson57fe5c62021-08-26 12:04:46 -07002888 break;
Richard Hendersone9968042025-01-21 21:47:16 -08002889 case INDEX_op_ld16s:
Richard Hendersond33e0f02024-12-09 08:53:20 -06002890 s_mask = INT16_MIN;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002891 break;
Richard Hendersone9968042025-01-21 21:47:16 -08002892 case INDEX_op_ld16u:
Richard Hendersond33e0f02024-12-09 08:53:20 -06002893 z_mask = MAKE_64BIT_MASK(0, 16);
Richard Henderson57fe5c62021-08-26 12:04:46 -07002894 break;
Richard Hendersone9968042025-01-21 21:47:16 -08002895 case INDEX_op_ld32s:
Richard Hendersond33e0f02024-12-09 08:53:20 -06002896 s_mask = INT32_MIN;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002897 break;
Richard Hendersone9968042025-01-21 21:47:16 -08002898 case INDEX_op_ld32u:
Richard Hendersond33e0f02024-12-09 08:53:20 -06002899 z_mask = MAKE_64BIT_MASK(0, 32);
Richard Hendersonfae450b2021-08-25 22:42:19 -07002900 break;
2901 default:
2902 g_assert_not_reached();
2903 }
Richard Hendersond33e0f02024-12-09 08:53:20 -06002904 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Hendersonfae450b2021-08-25 22:42:19 -07002905}
2906
Richard Hendersonab84dc32023-08-23 23:04:24 -07002907static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
2908{
2909 TCGTemp *dst, *src;
2910 intptr_t ofs;
2911 TCGType type;
2912
2913 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
Richard Henderson0fb5b752024-12-09 09:44:40 -06002914 return finish_folding(ctx, op);
Richard Hendersonab84dc32023-08-23 23:04:24 -07002915 }
2916
2917 type = ctx->type;
2918 ofs = op->args[2];
2919 dst = arg_temp(op->args[0]);
2920 src = find_mem_copy_for(ctx, type, ofs);
2921 if (src && src->base_type == type) {
2922 return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src));
2923 }
2924
2925 reset_ts(ctx, dst);
2926 record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1);
2927 return true;
2928}
2929
2930static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
2931{
2932 intptr_t ofs = op->args[2];
2933 intptr_t lm1;
2934
2935 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2936 remove_mem_copy_all(ctx);
Richard Henderson082b3ef2024-12-08 20:34:57 -06002937 return true;
Richard Hendersonab84dc32023-08-23 23:04:24 -07002938 }
2939
2940 switch (op->opc) {
2941 CASE_OP_32_64(st8):
2942 lm1 = 0;
2943 break;
2944 CASE_OP_32_64(st16):
2945 lm1 = 1;
2946 break;
2947 case INDEX_op_st32_i64:
2948 case INDEX_op_st_i32:
2949 lm1 = 3;
2950 break;
2951 case INDEX_op_st_i64:
2952 lm1 = 7;
2953 break;
2954 case INDEX_op_st_vec:
2955 lm1 = tcg_type_size(ctx->type) - 1;
2956 break;
2957 default:
2958 g_assert_not_reached();
2959 }
2960 remove_mem_copy_in(ctx, ofs, ofs + lm1);
Richard Henderson082b3ef2024-12-08 20:34:57 -06002961 return true;
Richard Hendersonab84dc32023-08-23 23:04:24 -07002962}
2963
2964static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
2965{
2966 TCGTemp *src;
2967 intptr_t ofs, last;
2968 TCGType type;
2969
2970 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
Richard Henderson082b3ef2024-12-08 20:34:57 -06002971 return fold_tcg_st(ctx, op);
Richard Hendersonab84dc32023-08-23 23:04:24 -07002972 }
2973
2974 src = arg_temp(op->args[0]);
2975 ofs = op->args[2];
2976 type = ctx->type;
Richard Henderson3eaadae2023-08-23 23:13:06 -07002977
2978 /*
2979 * Eliminate duplicate stores of a constant.
2980 * This happens frequently when the target ISA zero-extends.
2981 */
2982 if (ts_is_const(src)) {
2983 TCGTemp *prev = find_mem_copy_for(ctx, type, ofs);
2984 if (src == prev) {
2985 tcg_op_remove(ctx->tcg, op);
2986 return true;
2987 }
2988 }
2989
Richard Hendersonab84dc32023-08-23 23:04:24 -07002990 last = ofs + tcg_type_size(type) - 1;
2991 remove_mem_copy_in(ctx, ofs, last);
2992 record_mem_copy(ctx, type, src, ofs, last);
Richard Henderson082b3ef2024-12-08 20:34:57 -06002993 return true;
Richard Hendersonab84dc32023-08-23 23:04:24 -07002994}
2995
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002996static bool fold_xor(OptContext *ctx, TCGOp *op)
2997{
Richard Hendersonc890fd72024-12-08 21:39:01 -06002998 uint64_t z_mask, s_mask;
2999 TempOptInfo *t1, *t2;
3000
Richard Henderson7a2f7082021-08-26 07:06:39 -07003001 if (fold_const2_commutative(ctx, op) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07003002 fold_xx_to_i(ctx, op, 0) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07003003 fold_xi_to_x(ctx, op, 0) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07003004 fold_xi_to_not(ctx, op, -1)) {
Richard Hendersoncbe42fb2021-08-25 13:02:00 -07003005 return true;
3006 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07003007
Richard Hendersonc890fd72024-12-08 21:39:01 -06003008 t1 = arg_info(op->args[1]);
3009 t2 = arg_info(op->args[2]);
3010 z_mask = t1->z_mask | t2->z_mask;
3011 s_mask = t1->s_mask & t2->s_mask;
3012 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003013}
3014
Kirill Batuzov22613af2011-07-07 16:37:13 +04003015/* Propagate constants and copies, fold constant expressions. */
Aurelien Jarno36e60ef2015-06-04 21:53:27 +02003016void tcg_optimize(TCGContext *s)
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +04003017{
Richard Henderson5cf32be2021-08-24 08:17:08 -07003018 int nb_temps, i;
Richard Hendersond0ed5152021-08-24 07:38:39 -07003019 TCGOp *op, *op_next;
Richard Hendersondc849882021-08-24 07:13:45 -07003020 OptContext ctx = { .tcg = s };
Richard Henderson5d8f5362012-09-21 10:13:38 -07003021
Richard Hendersonab84dc32023-08-23 23:04:24 -07003022 QSIMPLEQ_INIT(&ctx.mem_free);
3023
Kirill Batuzov22613af2011-07-07 16:37:13 +04003024 /* Array VALS has an element for each temp.
3025 If this temp holds a constant then its value is kept in VALS' element.
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +02003026 If this temp is a copy of other ones then the other copies are
3027 available through the doubly linked circular list. */
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +04003028
3029 nb_temps = s->nb_temps;
Richard Henderson8f17a972020-03-30 19:52:02 -07003030 for (i = 0; i < nb_temps; ++i) {
3031 s->temps[i].state_ptr = NULL;
3032 }
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +04003033
Richard Henderson15fa08f2017-11-02 15:19:14 +01003034 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
Richard Hendersonc45cb8b2014-09-19 13:49:15 -07003035 TCGOpcode opc = op->opc;
Richard Henderson5cf32be2021-08-24 08:17:08 -07003036 const TCGOpDef *def;
Richard Henderson404a1482021-08-24 11:08:21 -07003037 bool done = false;
Richard Hendersonc45cb8b2014-09-19 13:49:15 -07003038
Richard Henderson5cf32be2021-08-24 08:17:08 -07003039 /* Calls are special. */
Richard Hendersonc45cb8b2014-09-19 13:49:15 -07003040 if (opc == INDEX_op_call) {
Richard Henderson5cf32be2021-08-24 08:17:08 -07003041 fold_call(&ctx, op);
3042 continue;
Richard Hendersoncf066672014-03-22 20:06:52 -07003043 }
Richard Henderson5cf32be2021-08-24 08:17:08 -07003044
3045 def = &tcg_op_defs[opc];
Richard Hendersonec5d4cb2021-08-24 08:20:27 -07003046 init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
3047 copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
Kirill Batuzov22613af2011-07-07 16:37:13 +04003048
Richard Henderson67f84c92021-08-25 08:00:20 -07003049 /* Pre-compute the type of the operation. */
Richard Henderson4d872212025-01-02 19:43:06 -08003050 ctx.type = TCGOP_TYPE(op);
Richard Henderson67f84c92021-08-25 08:00:20 -07003051
Richard Henderson2cfac7f2021-08-25 13:05:43 -07003052 /*
3053 * Process each opcode.
3054 * Sorted alphabetically by opcode as much as possible.
3055 */
Richard Hendersonc45cb8b2014-09-19 13:49:15 -07003056 switch (opc) {
Richard Henderson79602f62025-01-06 09:11:39 -08003057 case INDEX_op_add:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003058 done = fold_add(&ctx, op);
3059 break;
Richard Hendersonc578ff12021-12-16 06:07:25 -08003060 case INDEX_op_add_vec:
3061 done = fold_add_vec(&ctx, op);
3062 break;
Richard Henderson76f42782025-01-14 13:58:39 -08003063 case INDEX_op_addci:
Richard Hendersonaeb35142025-01-14 18:28:15 -08003064 done = fold_addci(&ctx, op);
3065 break;
Richard Henderson76f42782025-01-14 13:58:39 -08003066 case INDEX_op_addcio:
Richard Hendersonaeb35142025-01-14 18:28:15 -08003067 done = fold_addcio(&ctx, op);
3068 break;
3069 case INDEX_op_addco:
3070 done = fold_addco(&ctx, op);
Richard Henderson76f42782025-01-14 13:58:39 -08003071 break;
Richard Hendersonc3b920b2025-01-06 10:32:44 -08003072 case INDEX_op_and:
3073 case INDEX_op_and_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003074 done = fold_and(&ctx, op);
3075 break;
Richard Henderson46f96bf2025-01-06 12:37:02 -08003076 case INDEX_op_andc:
3077 case INDEX_op_andc_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003078 done = fold_andc(&ctx, op);
3079 break;
Richard Hendersonb6d69fc2025-01-10 11:49:22 -08003080 case INDEX_op_brcond:
Richard Henderson079b0802021-08-24 09:30:59 -07003081 done = fold_brcond(&ctx, op);
3082 break;
Richard Henderson764d2ab2021-08-24 09:22:11 -07003083 case INDEX_op_brcond2_i32:
3084 done = fold_brcond2(&ctx, op);
3085 break;
Richard Henderson0dd07ee2025-01-10 18:51:16 -08003086 case INDEX_op_bswap16:
Richard Henderson7498d882025-01-10 19:53:51 -08003087 case INDEX_op_bswap32:
Richard Henderson3ad5d4c2025-01-10 21:54:44 -08003088 case INDEX_op_bswap64:
Richard Henderson09bacdc2021-08-24 11:58:12 -07003089 done = fold_bswap(&ctx, op);
3090 break;
Richard Henderson5a5bb0a2025-01-08 16:12:46 -08003091 case INDEX_op_clz:
Richard Hendersonc96447d2025-01-08 17:07:01 -08003092 case INDEX_op_ctz:
Richard Henderson30dd0bf2021-08-24 10:51:34 -07003093 done = fold_count_zeros(&ctx, op);
3094 break;
Richard Henderson97218ae2025-01-08 18:37:43 -08003095 case INDEX_op_ctpop:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003096 done = fold_ctpop(&ctx, op);
3097 break;
Richard Henderson4d137ff2025-01-12 20:48:57 -08003098 case INDEX_op_deposit:
Richard Henderson1b1907b2021-08-24 10:47:04 -07003099 done = fold_deposit(&ctx, op);
3100 break;
Richard Hendersonb2c514f2025-01-07 13:22:56 -08003101 case INDEX_op_divs:
Richard Henderson961b80a2025-01-07 14:27:19 -08003102 case INDEX_op_divu:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003103 done = fold_divide(&ctx, op);
3104 break;
Richard Henderson8cdb3fc2021-08-24 12:06:33 -07003105 case INDEX_op_dup_vec:
3106 done = fold_dup(&ctx, op);
3107 break;
3108 case INDEX_op_dup2_vec:
3109 done = fold_dup2(&ctx, op);
3110 break;
Richard Henderson5c0968a2025-01-06 15:47:53 -08003111 case INDEX_op_eqv:
3112 case INDEX_op_eqv_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003113 done = fold_eqv(&ctx, op);
3114 break;
Richard Henderson07d5d502025-01-11 09:01:46 -08003115 case INDEX_op_extract:
Richard Hendersonb6617c82021-08-24 10:44:53 -07003116 done = fold_extract(&ctx, op);
3117 break;
Richard Henderson61d6a872025-01-12 21:40:43 -08003118 case INDEX_op_extract2:
Richard Hendersondcd08992021-08-24 10:41:39 -07003119 done = fold_extract2(&ctx, op);
3120 break;
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003121 case INDEX_op_ext_i32_i64:
3122 done = fold_exts(&ctx, op);
3123 break;
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003124 case INDEX_op_extu_i32_i64:
3125 case INDEX_op_extrl_i64_i32:
3126 case INDEX_op_extrh_i64_i32:
3127 done = fold_extu(&ctx, op);
3128 break;
Richard Hendersone9968042025-01-21 21:47:16 -08003129 case INDEX_op_ld8s:
3130 case INDEX_op_ld8u:
3131 case INDEX_op_ld16s:
3132 case INDEX_op_ld16u:
3133 case INDEX_op_ld32s:
3134 case INDEX_op_ld32u:
Richard Hendersonfae450b2021-08-25 22:42:19 -07003135 done = fold_tcg_ld(&ctx, op);
3136 break;
Richard Hendersone9968042025-01-21 21:47:16 -08003137 case INDEX_op_ld:
Richard Hendersonab84dc32023-08-23 23:04:24 -07003138 case INDEX_op_ld_vec:
3139 done = fold_tcg_ld_memcopy(&ctx, op);
3140 break;
3141 CASE_OP_32_64(st8):
3142 CASE_OP_32_64(st16):
3143 case INDEX_op_st32_i64:
3144 done = fold_tcg_st(&ctx, op);
3145 break;
3146 case INDEX_op_st_i32:
3147 case INDEX_op_st_i64:
3148 case INDEX_op_st_vec:
3149 done = fold_tcg_st_memcopy(&ctx, op);
3150 break;
Richard Henderson3eefdf22021-08-25 11:06:43 -07003151 case INDEX_op_mb:
3152 done = fold_mb(&ctx, op);
3153 break;
Richard Hendersonb5701262024-12-28 15:58:24 -08003154 case INDEX_op_mov:
3155 case INDEX_op_mov_vec:
Richard Henderson2cfac7f2021-08-25 13:05:43 -07003156 done = fold_mov(&ctx, op);
3157 break;
Richard Hendersonea46c4b2025-01-10 13:41:25 -08003158 case INDEX_op_movcond:
Richard Henderson0c310a32021-08-24 10:37:24 -07003159 done = fold_movcond(&ctx, op);
3160 break;
Richard Hendersond2c3eca2025-01-07 09:32:18 -08003161 case INDEX_op_mul:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003162 done = fold_mul(&ctx, op);
3163 break;
Richard Hendersonc7428242025-01-07 11:19:29 -08003164 case INDEX_op_mulsh:
Richard Hendersonaa28c9e2025-01-07 10:36:24 -08003165 case INDEX_op_muluh:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003166 done = fold_mul_highpart(&ctx, op);
3167 break;
Richard Hendersonbfe96482025-01-09 07:24:32 -08003168 case INDEX_op_muls2:
Richard Hendersond7761982025-01-09 09:11:53 -08003169 case INDEX_op_mulu2:
Richard Henderson407112b2021-08-26 06:33:04 -07003170 done = fold_multiply2(&ctx, op);
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07003171 break;
Richard Henderson59379a42025-01-06 20:32:54 -08003172 case INDEX_op_nand:
3173 case INDEX_op_nand_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003174 done = fold_nand(&ctx, op);
3175 break;
Richard Henderson69713582025-01-06 22:48:57 -08003176 case INDEX_op_neg:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003177 done = fold_neg(&ctx, op);
3178 break;
Richard Henderson3a8c4e92025-01-06 21:02:17 -08003179 case INDEX_op_nor:
3180 case INDEX_op_nor_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003181 done = fold_nor(&ctx, op);
3182 break;
Richard Henderson5c62d372025-01-06 23:46:47 -08003183 case INDEX_op_not:
3184 case INDEX_op_not_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003185 done = fold_not(&ctx, op);
3186 break;
Richard Henderson49bd7512025-01-06 14:00:40 -08003187 case INDEX_op_or:
3188 case INDEX_op_or_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003189 done = fold_or(&ctx, op);
3190 break;
Richard Henderson6aba25e2025-01-06 14:46:26 -08003191 case INDEX_op_orc:
3192 case INDEX_op_orc_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003193 done = fold_orc(&ctx, op);
3194 break;
Richard Henderson50b7a192025-02-04 13:46:09 -08003195 case INDEX_op_qemu_ld_i32:
Richard Henderson6813be92024-12-08 20:33:30 -06003196 done = fold_qemu_ld_1reg(&ctx, op);
3197 break;
Richard Henderson50b7a192025-02-04 13:46:09 -08003198 case INDEX_op_qemu_ld_i64:
Richard Henderson6813be92024-12-08 20:33:30 -06003199 if (TCG_TARGET_REG_BITS == 64) {
3200 done = fold_qemu_ld_1reg(&ctx, op);
3201 break;
3202 }
3203 QEMU_FALLTHROUGH;
Richard Henderson50b7a192025-02-04 13:46:09 -08003204 case INDEX_op_qemu_ld_i128:
Richard Henderson6813be92024-12-08 20:33:30 -06003205 done = fold_qemu_ld_2reg(&ctx, op);
Richard Henderson3eefdf22021-08-25 11:06:43 -07003206 break;
Richard Henderson50b7a192025-02-04 13:46:09 -08003207 case INDEX_op_qemu_st8_i32:
3208 case INDEX_op_qemu_st_i32:
3209 case INDEX_op_qemu_st_i64:
3210 case INDEX_op_qemu_st_i128:
Richard Henderson3eefdf22021-08-25 11:06:43 -07003211 done = fold_qemu_st(&ctx, op);
3212 break;
Richard Henderson9a6bc182025-01-07 19:00:51 -08003213 case INDEX_op_rems:
Richard Hendersoncd9acd22025-01-07 20:25:14 -08003214 case INDEX_op_remu:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003215 done = fold_remainder(&ctx, op);
3216 break;
Richard Henderson005a87e2025-01-08 10:42:16 -08003217 case INDEX_op_rotl:
3218 case INDEX_op_rotr:
Richard Henderson3949f362025-01-08 08:05:18 -08003219 case INDEX_op_sar:
Richard Henderson6ca59452025-01-07 21:50:04 -08003220 case INDEX_op_shl:
Richard Henderson74dbd362025-01-07 22:52:10 -08003221 case INDEX_op_shr:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003222 done = fold_shift(&ctx, op);
3223 break;
Richard Hendersona363e1e2025-01-10 09:26:44 -08003224 case INDEX_op_setcond:
Richard Hendersonc63ff552021-08-24 09:35:30 -07003225 done = fold_setcond(&ctx, op);
3226 break;
Richard Hendersona363e1e2025-01-10 09:26:44 -08003227 case INDEX_op_negsetcond:
Richard Henderson36355022023-08-04 23:24:04 +00003228 done = fold_negsetcond(&ctx, op);
3229 break;
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07003230 case INDEX_op_setcond2_i32:
3231 done = fold_setcond2(&ctx, op);
3232 break;
Richard Henderson1f106542024-09-06 12:22:41 -07003233 case INDEX_op_cmp_vec:
3234 done = fold_cmp_vec(&ctx, op);
3235 break;
3236 case INDEX_op_cmpsel_vec:
3237 done = fold_cmpsel_vec(&ctx, op);
3238 break;
Richard Hendersone58b9772024-09-06 22:30:01 -07003239 case INDEX_op_bitsel_vec:
3240 done = fold_bitsel_vec(&ctx, op);
3241 break;
Richard Hendersonfa361ee2025-01-12 11:50:09 -08003242 case INDEX_op_sextract:
Richard Hendersonb6617c82021-08-24 10:44:53 -07003243 done = fold_sextract(&ctx, op);
3244 break;
Richard Henderson60f34f52025-01-06 22:06:32 -08003245 case INDEX_op_sub:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003246 done = fold_sub(&ctx, op);
3247 break;
Richard Hendersonaeb35142025-01-14 18:28:15 -08003248 case INDEX_op_subbi:
3249 done = fold_subbi(&ctx, op);
3250 break;
3251 case INDEX_op_subbio:
3252 done = fold_subbio(&ctx, op);
3253 break;
3254 case INDEX_op_subbo:
3255 done = fold_subbo(&ctx, op);
3256 break;
Richard Hendersonc578ff12021-12-16 06:07:25 -08003257 case INDEX_op_sub_vec:
3258 done = fold_sub_vec(&ctx, op);
3259 break;
Richard Hendersonfffd3dc2025-01-06 15:18:35 -08003260 case INDEX_op_xor:
3261 case INDEX_op_xor_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003262 done = fold_xor(&ctx, op);
Richard Hendersonb10f3832021-08-23 22:30:17 -07003263 break;
Richard Henderson15268552024-12-08 07:45:11 -06003264 case INDEX_op_set_label:
3265 case INDEX_op_br:
3266 case INDEX_op_exit_tb:
3267 case INDEX_op_goto_tb:
3268 case INDEX_op_goto_ptr:
3269 finish_ebb(&ctx);
3270 done = true;
3271 break;
Richard Henderson2cfac7f2021-08-25 13:05:43 -07003272 default:
Richard Henderson0ae56422024-12-08 21:42:53 -06003273 done = finish_folding(&ctx, op);
Richard Henderson2cfac7f2021-08-25 13:05:43 -07003274 break;
Richard Hendersonb10f3832021-08-23 22:30:17 -07003275 }
Richard Henderson0ae56422024-12-08 21:42:53 -06003276 tcg_debug_assert(done);
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +04003277 }
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +04003278}