blob: cfb407c7fc3508d4cd6a984c7e1d6a9761646332 [file] [log] [blame]
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +04001/*
2 * Optimizations for Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
Peter Maydell757e7252016-01-26 18:17:08 +000026#include "qemu/osdep.h"
Richard Henderson9531c072021-08-26 06:51:39 -070027#include "qemu/int128.h"
Richard Hendersonab84dc32023-08-23 23:04:24 -070028#include "qemu/interval-tree.h"
Richard Hendersonad3d0e42023-03-28 18:17:24 -070029#include "tcg/tcg-op-common.h"
Richard Henderson90163902021-03-18 10:21:45 -060030#include "tcg-internal.h"
Richard Henderson93280b62025-01-08 22:51:55 +010031#include "tcg-has.h"
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +040032
Richard Henderson170ba882017-11-22 09:07:11 +010033
Richard Hendersonab84dc32023-08-23 23:04:24 -070034typedef struct MemCopyInfo {
35 IntervalTreeNode itree;
36 QSIMPLEQ_ENTRY (MemCopyInfo) next;
37 TCGTemp *ts;
38 TCGType type;
39} MemCopyInfo;
40
Richard Henderson6fcb98e2020-03-30 17:44:30 -070041typedef struct TempOptInfo {
Aurelien Jarnob41059d2015-07-27 12:41:44 +020042 bool is_const;
Richard Henderson63490392017-06-20 13:43:15 -070043 TCGTemp *prev_copy;
44 TCGTemp *next_copy;
Richard Hendersonab84dc32023-08-23 23:04:24 -070045 QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
Richard Henderson54795542020-09-06 16:21:32 -070046 uint64_t val;
Richard Hendersonb1fde412021-08-23 13:07:49 -070047 uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
Richard Henderson6d70ddc2024-12-21 21:08:10 -080048 uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
Richard Henderson6fcb98e2020-03-30 17:44:30 -070049} TempOptInfo;
Kirill Batuzov22613af2011-07-07 16:37:13 +040050
Richard Henderson3b3f8472021-08-23 22:06:31 -070051typedef struct OptContext {
Richard Hendersondc849882021-08-24 07:13:45 -070052 TCGContext *tcg;
Richard Hendersond0ed5152021-08-24 07:38:39 -070053 TCGOp *prev_mb;
Richard Henderson3b3f8472021-08-23 22:06:31 -070054 TCGTempSet temps_used;
Richard Henderson137f1f42021-08-24 08:49:25 -070055
Richard Hendersonab84dc32023-08-23 23:04:24 -070056 IntervalTreeRoot mem_copy;
57 QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
58
Richard Henderson137f1f42021-08-24 08:49:25 -070059 /* In flight values from optimization. */
Richard Henderson67f84c92021-08-25 08:00:20 -070060 TCGType type;
Richard Hendersonaeb35142025-01-14 18:28:15 -080061 int carry_state; /* -1 = non-constant, {0,1} = constant carry-in */
Richard Henderson3b3f8472021-08-23 22:06:31 -070062} OptContext;
63
Richard Henderson6fcb98e2020-03-30 17:44:30 -070064static inline TempOptInfo *ts_info(TCGTemp *ts)
Aurelien Jarnod9c769c2015-07-27 12:41:44 +020065{
Richard Henderson63490392017-06-20 13:43:15 -070066 return ts->state_ptr;
Aurelien Jarnod9c769c2015-07-27 12:41:44 +020067}
68
Richard Henderson6fcb98e2020-03-30 17:44:30 -070069static inline TempOptInfo *arg_info(TCGArg arg)
Aurelien Jarnod9c769c2015-07-27 12:41:44 +020070{
Richard Henderson63490392017-06-20 13:43:15 -070071 return ts_info(arg_temp(arg));
72}
73
Richard Hendersone1b6c142024-12-22 10:26:14 -080074static inline bool ti_is_const(TempOptInfo *ti)
75{
76 return ti->is_const;
77}
78
79static inline uint64_t ti_const_val(TempOptInfo *ti)
80{
81 return ti->val;
82}
83
84static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
85{
86 return ti_is_const(ti) && ti_const_val(ti) == val;
87}
88
Richard Henderson63490392017-06-20 13:43:15 -070089static inline bool ts_is_const(TCGTemp *ts)
90{
Richard Hendersone1b6c142024-12-22 10:26:14 -080091 return ti_is_const(ts_info(ts));
Richard Henderson63490392017-06-20 13:43:15 -070092}
93
Richard Henderson27cdb852023-10-23 11:38:00 -070094static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
95{
Richard Hendersone1b6c142024-12-22 10:26:14 -080096 return ti_is_const_val(ts_info(ts), val);
Richard Henderson27cdb852023-10-23 11:38:00 -070097}
98
Richard Henderson63490392017-06-20 13:43:15 -070099static inline bool arg_is_const(TCGArg arg)
100{
101 return ts_is_const(arg_temp(arg));
102}
103
Richard Henderson27cdb852023-10-23 11:38:00 -0700104static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
105{
106 return ts_is_const_val(arg_temp(arg), val);
107}
108
Richard Henderson63490392017-06-20 13:43:15 -0700109static inline bool ts_is_copy(TCGTemp *ts)
110{
111 return ts_info(ts)->next_copy != ts;
Aurelien Jarnod9c769c2015-07-27 12:41:44 +0200112}
113
Richard Henderson9f75e522023-11-02 13:37:46 -0700114static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b)
115{
116 return a->kind < b->kind ? b : a;
117}
118
Aurelien Jarno1208d7d2015-07-27 12:41:44 +0200119/* Initialize and activate a temporary. */
Richard Henderson3b3f8472021-08-23 22:06:31 -0700120static void init_ts_info(OptContext *ctx, TCGTemp *ts)
Aurelien Jarno1208d7d2015-07-27 12:41:44 +0200121{
Richard Henderson63490392017-06-20 13:43:15 -0700122 size_t idx = temp_idx(ts);
Richard Henderson8f17a972020-03-30 19:52:02 -0700123 TempOptInfo *ti;
Richard Henderson63490392017-06-20 13:43:15 -0700124
Richard Henderson3b3f8472021-08-23 22:06:31 -0700125 if (test_bit(idx, ctx->temps_used.l)) {
Richard Henderson8f17a972020-03-30 19:52:02 -0700126 return;
127 }
Richard Henderson3b3f8472021-08-23 22:06:31 -0700128 set_bit(idx, ctx->temps_used.l);
Richard Henderson8f17a972020-03-30 19:52:02 -0700129
130 ti = ts->state_ptr;
131 if (ti == NULL) {
132 ti = tcg_malloc(sizeof(TempOptInfo));
Richard Henderson63490392017-06-20 13:43:15 -0700133 ts->state_ptr = ti;
Richard Henderson8f17a972020-03-30 19:52:02 -0700134 }
135
136 ti->next_copy = ts;
137 ti->prev_copy = ts;
Richard Hendersonab84dc32023-08-23 23:04:24 -0700138 QSIMPLEQ_INIT(&ti->mem_copy);
Richard Henderson8f17a972020-03-30 19:52:02 -0700139 if (ts->kind == TEMP_CONST) {
140 ti->is_const = true;
141 ti->val = ts->val;
Richard Hendersonb1fde412021-08-23 13:07:49 -0700142 ti->z_mask = ts->val;
Richard Henderson6d70ddc2024-12-21 21:08:10 -0800143 ti->s_mask = INT64_MIN >> clrsb64(ts->val);
Richard Henderson8f17a972020-03-30 19:52:02 -0700144 } else {
145 ti->is_const = false;
Richard Hendersonb1fde412021-08-23 13:07:49 -0700146 ti->z_mask = -1;
Richard Henderson57fe5c62021-08-26 12:04:46 -0700147 ti->s_mask = 0;
Aurelien Jarno1208d7d2015-07-27 12:41:44 +0200148 }
149}
150
Richard Hendersonab84dc32023-08-23 23:04:24 -0700151static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l)
152{
153 IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l);
154 return r ? container_of(r, MemCopyInfo, itree) : NULL;
155}
156
157static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l)
158{
159 IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l);
160 return r ? container_of(r, MemCopyInfo, itree) : NULL;
161}
162
163static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc)
164{
165 TCGTemp *ts = mc->ts;
166 TempOptInfo *ti = ts_info(ts);
167
168 interval_tree_remove(&mc->itree, &ctx->mem_copy);
169 QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next);
170 QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next);
171}
172
173static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l)
174{
175 while (true) {
176 MemCopyInfo *mc = mem_copy_first(ctx, s, l);
177 if (!mc) {
178 break;
179 }
180 remove_mem_copy(ctx, mc);
181 }
182}
183
184static void remove_mem_copy_all(OptContext *ctx)
185{
186 remove_mem_copy_in(ctx, 0, -1);
187 tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy));
188}
189
Richard Henderson9f75e522023-11-02 13:37:46 -0700190static TCGTemp *find_better_copy(TCGTemp *ts)
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200191{
Richard Henderson9f75e522023-11-02 13:37:46 -0700192 TCGTemp *i, *ret;
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200193
Richard Henderson4c868ce2020-04-23 09:02:23 -0700194 /* If this is already readonly, we can't do better. */
195 if (temp_readonly(ts)) {
Richard Henderson63490392017-06-20 13:43:15 -0700196 return ts;
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200197 }
198
Richard Henderson9f75e522023-11-02 13:37:46 -0700199 ret = ts;
Richard Henderson63490392017-06-20 13:43:15 -0700200 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
Richard Henderson9f75e522023-11-02 13:37:46 -0700201 ret = cmp_better_copy(ret, i);
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200202 }
Richard Henderson9f75e522023-11-02 13:37:46 -0700203 return ret;
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200204}
205
Richard Hendersonab84dc32023-08-23 23:04:24 -0700206static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts)
207{
208 TempOptInfo *si = ts_info(src_ts);
209 TempOptInfo *di = ts_info(dst_ts);
210 MemCopyInfo *mc;
211
212 QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) {
213 tcg_debug_assert(mc->ts == src_ts);
214 mc->ts = dst_ts;
215 }
216 QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy);
217}
218
219/* Reset TEMP's state, possibly removing the temp for the list of copies. */
220static void reset_ts(OptContext *ctx, TCGTemp *ts)
221{
222 TempOptInfo *ti = ts_info(ts);
223 TCGTemp *pts = ti->prev_copy;
224 TCGTemp *nts = ti->next_copy;
225 TempOptInfo *pi = ts_info(pts);
226 TempOptInfo *ni = ts_info(nts);
227
228 ni->prev_copy = ti->prev_copy;
229 pi->next_copy = ti->next_copy;
230 ti->next_copy = ts;
231 ti->prev_copy = ts;
232 ti->is_const = false;
233 ti->z_mask = -1;
234 ti->s_mask = 0;
235
236 if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
237 if (ts == nts) {
238 /* Last temp copy being removed, the mem copies die. */
239 MemCopyInfo *mc;
240 QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) {
241 interval_tree_remove(&mc->itree, &ctx->mem_copy);
242 }
243 QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy);
244 } else {
245 move_mem_copies(find_better_copy(nts), ts);
246 }
247 }
248}
249
250static void reset_temp(OptContext *ctx, TCGArg arg)
251{
252 reset_ts(ctx, arg_temp(arg));
253}
254
255static void record_mem_copy(OptContext *ctx, TCGType type,
256 TCGTemp *ts, intptr_t start, intptr_t last)
257{
258 MemCopyInfo *mc;
259 TempOptInfo *ti;
260
261 mc = QSIMPLEQ_FIRST(&ctx->mem_free);
262 if (mc) {
263 QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next);
264 } else {
265 mc = tcg_malloc(sizeof(*mc));
266 }
267
268 memset(mc, 0, sizeof(*mc));
269 mc->itree.start = start;
270 mc->itree.last = last;
271 mc->type = type;
272 interval_tree_insert(&mc->itree, &ctx->mem_copy);
273
274 ts = find_better_copy(ts);
275 ti = ts_info(ts);
276 mc->ts = ts;
277 QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next);
278}
279
Richard Henderson63490392017-06-20 13:43:15 -0700280static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200281{
Richard Henderson63490392017-06-20 13:43:15 -0700282 TCGTemp *i;
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200283
Richard Henderson63490392017-06-20 13:43:15 -0700284 if (ts1 == ts2) {
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200285 return true;
286 }
287
Richard Henderson63490392017-06-20 13:43:15 -0700288 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200289 return false;
290 }
291
Richard Henderson63490392017-06-20 13:43:15 -0700292 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
293 if (i == ts2) {
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200294 return true;
295 }
296 }
297
298 return false;
299}
300
Richard Henderson63490392017-06-20 13:43:15 -0700301static bool args_are_copies(TCGArg arg1, TCGArg arg2)
302{
303 return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
304}
305
Richard Hendersonab84dc32023-08-23 23:04:24 -0700306static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s)
307{
308 MemCopyInfo *mc;
309
310 for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) {
311 if (mc->itree.start == s && mc->type == type) {
312 return find_better_copy(mc->ts);
313 }
314 }
315 return NULL;
316}
317
Richard Henderson26aac972023-10-23 12:31:57 -0700318static TCGArg arg_new_constant(OptContext *ctx, uint64_t val)
319{
320 TCGType type = ctx->type;
321 TCGTemp *ts;
322
323 if (type == TCG_TYPE_I32) {
324 val = (int32_t)val;
325 }
326
327 ts = tcg_constant_internal(type, val);
328 init_ts_info(ctx, ts);
329
330 return temp_arg(ts);
331}
332
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100333static TCGArg arg_new_temp(OptContext *ctx)
334{
335 TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB);
336 init_ts_info(ctx, ts);
337 return temp_arg(ts);
338}
339
Richard Hendersona3c1c572025-04-21 11:05:29 -0700340static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op,
341 TCGOpcode opc, unsigned narg)
342{
Richard Hendersoncf5c9f62025-01-21 20:34:41 -0800343 return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg);
Richard Hendersona3c1c572025-04-21 11:05:29 -0700344}
345
346static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op,
347 TCGOpcode opc, unsigned narg)
348{
Richard Hendersoncf5c9f62025-01-21 20:34:41 -0800349 return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg);
Richard Hendersona3c1c572025-04-21 11:05:29 -0700350}
351
Richard Henderson6b99d5b2021-08-24 10:57:56 -0700352static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
Kirill Batuzov22613af2011-07-07 16:37:13 +0400353{
Richard Henderson63490392017-06-20 13:43:15 -0700354 TCGTemp *dst_ts = arg_temp(dst);
355 TCGTemp *src_ts = arg_temp(src);
Richard Henderson6fcb98e2020-03-30 17:44:30 -0700356 TempOptInfo *di;
357 TempOptInfo *si;
Richard Henderson63490392017-06-20 13:43:15 -0700358 TCGOpcode new_op;
359
360 if (ts_are_copies(dst_ts, src_ts)) {
Richard Hendersondc849882021-08-24 07:13:45 -0700361 tcg_op_remove(ctx->tcg, op);
Richard Henderson6b99d5b2021-08-24 10:57:56 -0700362 return true;
Aurelien Jarno53657182015-06-04 21:53:25 +0200363 }
364
Richard Henderson986cac12023-01-09 13:59:35 -0800365 reset_ts(ctx, dst_ts);
Richard Henderson63490392017-06-20 13:43:15 -0700366 di = ts_info(dst_ts);
367 si = ts_info(src_ts);
Richard Henderson67f84c92021-08-25 08:00:20 -0700368
369 switch (ctx->type) {
370 case TCG_TYPE_I32:
Richard Henderson67f84c92021-08-25 08:00:20 -0700371 case TCG_TYPE_I64:
Richard Hendersonb5701262024-12-28 15:58:24 -0800372 new_op = INDEX_op_mov;
Richard Henderson67f84c92021-08-25 08:00:20 -0700373 break;
374 case TCG_TYPE_V64:
375 case TCG_TYPE_V128:
376 case TCG_TYPE_V256:
Richard Henderson4d872212025-01-02 19:43:06 -0800377 /* TCGOP_TYPE and TCGOP_VECE remain unchanged. */
Richard Henderson67f84c92021-08-25 08:00:20 -0700378 new_op = INDEX_op_mov_vec;
379 break;
380 default:
381 g_assert_not_reached();
Richard Henderson170ba882017-11-22 09:07:11 +0100382 }
Richard Hendersonc45cb8b2014-09-19 13:49:15 -0700383 op->opc = new_op;
Richard Henderson63490392017-06-20 13:43:15 -0700384 op->args[0] = dst;
385 op->args[1] = src;
Richard Hendersona62f6f52014-05-22 10:59:12 -0700386
Richard Hendersonfaa2e102021-08-26 09:03:59 -0700387 di->z_mask = si->z_mask;
Richard Henderson57fe5c62021-08-26 12:04:46 -0700388 di->s_mask = si->s_mask;
Richard Henderson24666ba2014-05-22 11:14:10 -0700389
Richard Henderson63490392017-06-20 13:43:15 -0700390 if (src_ts->type == dst_ts->type) {
Richard Henderson6fcb98e2020-03-30 17:44:30 -0700391 TempOptInfo *ni = ts_info(si->next_copy);
Richard Henderson63490392017-06-20 13:43:15 -0700392
393 di->next_copy = si->next_copy;
394 di->prev_copy = src_ts;
395 ni->prev_copy = dst_ts;
396 si->next_copy = dst_ts;
397 di->is_const = si->is_const;
398 di->val = si->val;
Richard Hendersonab84dc32023-08-23 23:04:24 -0700399
400 if (!QSIMPLEQ_EMPTY(&si->mem_copy)
401 && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
402 move_mem_copies(dst_ts, src_ts);
403 }
Paolo Bonzini3a9d8b12013-01-11 15:42:52 -0800404 }
Richard Henderson6b99d5b2021-08-24 10:57:56 -0700405 return true;
Kirill Batuzov22613af2011-07-07 16:37:13 +0400406}
407
Richard Henderson6b99d5b2021-08-24 10:57:56 -0700408static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
Richard Hendersondc849882021-08-24 07:13:45 -0700409 TCGArg dst, uint64_t val)
Richard Henderson8fe35e02020-03-30 20:42:43 -0700410{
Richard Hendersonfaa2e102021-08-26 09:03:59 -0700411 /* Convert movi to mov with constant temp. */
Richard Henderson26aac972023-10-23 12:31:57 -0700412 return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val));
Richard Henderson8fe35e02020-03-30 20:42:43 -0700413}
414
Richard Hendersonaa28c9e2025-01-07 10:36:24 -0800415static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type,
416 uint64_t x, uint64_t y)
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400417{
Richard Henderson03271522013-08-14 14:35:56 -0700418 uint64_t l64, h64;
419
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400420 switch (op) {
Richard Henderson79602f62025-01-06 09:11:39 -0800421 case INDEX_op_add:
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400422 return x + y;
423
Richard Henderson60f34f52025-01-06 22:06:32 -0800424 case INDEX_op_sub:
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400425 return x - y;
426
Richard Hendersond2c3eca2025-01-07 09:32:18 -0800427 case INDEX_op_mul:
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400428 return x * y;
429
Richard Hendersonc3b920b2025-01-06 10:32:44 -0800430 case INDEX_op_and:
431 case INDEX_op_and_vec:
Kirill Batuzov9a810902011-07-07 16:37:15 +0400432 return x & y;
433
Richard Henderson49bd7512025-01-06 14:00:40 -0800434 case INDEX_op_or:
435 case INDEX_op_or_vec:
Kirill Batuzov9a810902011-07-07 16:37:15 +0400436 return x | y;
437
Richard Hendersonfffd3dc2025-01-06 15:18:35 -0800438 case INDEX_op_xor:
439 case INDEX_op_xor_vec:
Kirill Batuzov9a810902011-07-07 16:37:15 +0400440 return x ^ y;
441
Richard Henderson6ca59452025-01-07 21:50:04 -0800442 case INDEX_op_shl:
443 if (type == TCG_TYPE_I32) {
444 return (uint32_t)x << (y & 31);
445 }
Richard Henderson50c5c4d2014-03-18 07:45:39 -0700446 return (uint64_t)x << (y & 63);
Kirill Batuzov55c09752011-07-07 16:37:16 +0400447
Richard Henderson74dbd362025-01-07 22:52:10 -0800448 case INDEX_op_shr:
449 if (type == TCG_TYPE_I32) {
450 return (uint32_t)x >> (y & 31);
451 }
Richard Henderson50c5c4d2014-03-18 07:45:39 -0700452 return (uint64_t)x >> (y & 63);
Kirill Batuzov55c09752011-07-07 16:37:16 +0400453
Richard Henderson3949f362025-01-08 08:05:18 -0800454 case INDEX_op_sar:
455 if (type == TCG_TYPE_I32) {
456 return (int32_t)x >> (y & 31);
457 }
Richard Henderson50c5c4d2014-03-18 07:45:39 -0700458 return (int64_t)x >> (y & 63);
Kirill Batuzov55c09752011-07-07 16:37:16 +0400459
Richard Henderson005a87e2025-01-08 10:42:16 -0800460 case INDEX_op_rotr:
461 if (type == TCG_TYPE_I32) {
462 return ror32(x, y & 31);
463 }
Richard Henderson50c5c4d2014-03-18 07:45:39 -0700464 return ror64(x, y & 63);
Kirill Batuzov55c09752011-07-07 16:37:16 +0400465
Richard Henderson005a87e2025-01-08 10:42:16 -0800466 case INDEX_op_rotl:
467 if (type == TCG_TYPE_I32) {
468 return rol32(x, y & 31);
469 }
Richard Henderson50c5c4d2014-03-18 07:45:39 -0700470 return rol64(x, y & 63);
Kirill Batuzov55c09752011-07-07 16:37:16 +0400471
Richard Henderson5c62d372025-01-06 23:46:47 -0800472 case INDEX_op_not:
473 case INDEX_op_not_vec:
Kirill Batuzova640f032011-07-07 16:37:17 +0400474 return ~x;
475
Richard Henderson69713582025-01-06 22:48:57 -0800476 case INDEX_op_neg:
Richard Hendersoncb25c802011-08-17 14:11:47 -0700477 return -x;
478
Richard Henderson46f96bf2025-01-06 12:37:02 -0800479 case INDEX_op_andc:
480 case INDEX_op_andc_vec:
Richard Hendersoncb25c802011-08-17 14:11:47 -0700481 return x & ~y;
482
Richard Henderson6aba25e2025-01-06 14:46:26 -0800483 case INDEX_op_orc:
484 case INDEX_op_orc_vec:
Richard Hendersoncb25c802011-08-17 14:11:47 -0700485 return x | ~y;
486
Richard Henderson5c0968a2025-01-06 15:47:53 -0800487 case INDEX_op_eqv:
488 case INDEX_op_eqv_vec:
Richard Hendersoncb25c802011-08-17 14:11:47 -0700489 return ~(x ^ y);
490
Richard Henderson59379a42025-01-06 20:32:54 -0800491 case INDEX_op_nand:
492 case INDEX_op_nand_vec:
Richard Hendersoncb25c802011-08-17 14:11:47 -0700493 return ~(x & y);
494
Richard Henderson3a8c4e92025-01-06 21:02:17 -0800495 case INDEX_op_nor:
496 case INDEX_op_nor_vec:
Richard Hendersoncb25c802011-08-17 14:11:47 -0700497 return ~(x | y);
498
Richard Henderson5a5bb0a2025-01-08 16:12:46 -0800499 case INDEX_op_clz:
500 if (type == TCG_TYPE_I32) {
501 return (uint32_t)x ? clz32(x) : y;
502 }
Richard Henderson0e28d002016-11-16 09:23:28 +0100503 return x ? clz64(x) : y;
504
Richard Hendersonc96447d2025-01-08 17:07:01 -0800505 case INDEX_op_ctz:
506 if (type == TCG_TYPE_I32) {
507 return (uint32_t)x ? ctz32(x) : y;
508 }
Richard Henderson0e28d002016-11-16 09:23:28 +0100509 return x ? ctz64(x) : y;
510
Richard Henderson97218ae2025-01-08 18:37:43 -0800511 case INDEX_op_ctpop:
512 return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x);
Richard Hendersona768e4e2016-11-21 11:13:39 +0100513
Richard Henderson0dd07ee2025-01-10 18:51:16 -0800514 case INDEX_op_bswap16:
Richard Henderson0b76ff82021-06-13 13:04:00 -0700515 x = bswap16(x);
516 return y & TCG_BSWAP_OS ? (int16_t)x : x;
Richard Henderson64985942018-11-20 08:53:34 +0100517
Richard Henderson7498d882025-01-10 19:53:51 -0800518 case INDEX_op_bswap32:
Richard Henderson0b76ff82021-06-13 13:04:00 -0700519 x = bswap32(x);
520 return y & TCG_BSWAP_OS ? (int32_t)x : x;
Richard Henderson64985942018-11-20 08:53:34 +0100521
Richard Henderson3ad5d4c2025-01-10 21:54:44 -0800522 case INDEX_op_bswap64:
Richard Henderson64985942018-11-20 08:53:34 +0100523 return bswap64(x);
524
Aurelien Jarno8bcb5c82015-07-27 12:41:45 +0200525 case INDEX_op_ext_i32_i64:
Kirill Batuzova640f032011-07-07 16:37:17 +0400526 return (int32_t)x;
527
Aurelien Jarno8bcb5c82015-07-27 12:41:45 +0200528 case INDEX_op_extu_i32_i64:
Richard Henderson609ad702015-07-24 07:16:00 -0700529 case INDEX_op_extrl_i64_i32:
Kirill Batuzova640f032011-07-07 16:37:17 +0400530 return (uint32_t)x;
Kirill Batuzova640f032011-07-07 16:37:17 +0400531
Richard Henderson609ad702015-07-24 07:16:00 -0700532 case INDEX_op_extrh_i64_i32:
533 return (uint64_t)x >> 32;
534
Richard Hendersonaa28c9e2025-01-07 10:36:24 -0800535 case INDEX_op_muluh:
536 if (type == TCG_TYPE_I32) {
537 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
538 }
539 mulu64(&l64, &h64, x, y);
540 return h64;
541
Richard Hendersonc7428242025-01-07 11:19:29 -0800542 case INDEX_op_mulsh:
543 if (type == TCG_TYPE_I32) {
544 return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
545 }
Richard Henderson03271522013-08-14 14:35:56 -0700546 muls64(&l64, &h64, x, y);
547 return h64;
548
Richard Hendersonb2c514f2025-01-07 13:22:56 -0800549 case INDEX_op_divs:
Richard Henderson01547f72013-08-14 15:22:46 -0700550 /* Avoid crashing on divide by zero, otherwise undefined. */
Richard Hendersonb2c514f2025-01-07 13:22:56 -0800551 if (type == TCG_TYPE_I32) {
552 return (int32_t)x / ((int32_t)y ? : 1);
553 }
554 return (int64_t)x / ((int64_t)y ? : 1);
555
Richard Henderson961b80a2025-01-07 14:27:19 -0800556 case INDEX_op_divu:
557 if (type == TCG_TYPE_I32) {
558 return (uint32_t)x / ((uint32_t)y ? : 1);
559 }
Richard Henderson01547f72013-08-14 15:22:46 -0700560 return (uint64_t)x / ((uint64_t)y ? : 1);
561
Richard Henderson9a6bc182025-01-07 19:00:51 -0800562 case INDEX_op_rems:
563 if (type == TCG_TYPE_I32) {
564 return (int32_t)x % ((int32_t)y ? : 1);
565 }
566 return (int64_t)x % ((int64_t)y ? : 1);
567
Richard Hendersoncd9acd22025-01-07 20:25:14 -0800568 case INDEX_op_remu:
569 if (type == TCG_TYPE_I32) {
570 return (uint32_t)x % ((uint32_t)y ? : 1);
571 }
Richard Henderson01547f72013-08-14 15:22:46 -0700572 return (uint64_t)x % ((uint64_t)y ? : 1);
573
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400574 default:
Richard Henderson732e89f2023-04-05 12:09:14 -0700575 g_assert_not_reached();
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400576 }
577}
578
Richard Henderson67f84c92021-08-25 08:00:20 -0700579static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
580 uint64_t x, uint64_t y)
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400581{
Richard Hendersonaa28c9e2025-01-07 10:36:24 -0800582 uint64_t res = do_constant_folding_2(op, type, x, y);
Richard Henderson67f84c92021-08-25 08:00:20 -0700583 if (type == TCG_TYPE_I32) {
Aurelien Jarno29f3ff82015-07-10 18:03:31 +0200584 res = (int32_t)res;
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400585 }
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400586 return res;
587}
588
Richard Henderson9519da72012-10-02 11:32:26 -0700589static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
590{
591 switch (c) {
592 case TCG_COND_EQ:
593 return x == y;
594 case TCG_COND_NE:
595 return x != y;
596 case TCG_COND_LT:
597 return (int32_t)x < (int32_t)y;
598 case TCG_COND_GE:
599 return (int32_t)x >= (int32_t)y;
600 case TCG_COND_LE:
601 return (int32_t)x <= (int32_t)y;
602 case TCG_COND_GT:
603 return (int32_t)x > (int32_t)y;
604 case TCG_COND_LTU:
605 return x < y;
606 case TCG_COND_GEU:
607 return x >= y;
608 case TCG_COND_LEU:
609 return x <= y;
610 case TCG_COND_GTU:
611 return x > y;
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700612 case TCG_COND_TSTEQ:
613 return (x & y) == 0;
614 case TCG_COND_TSTNE:
615 return (x & y) != 0;
616 case TCG_COND_ALWAYS:
617 case TCG_COND_NEVER:
618 break;
Richard Henderson9519da72012-10-02 11:32:26 -0700619 }
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700620 g_assert_not_reached();
Richard Henderson9519da72012-10-02 11:32:26 -0700621}
622
623static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
624{
625 switch (c) {
626 case TCG_COND_EQ:
627 return x == y;
628 case TCG_COND_NE:
629 return x != y;
630 case TCG_COND_LT:
631 return (int64_t)x < (int64_t)y;
632 case TCG_COND_GE:
633 return (int64_t)x >= (int64_t)y;
634 case TCG_COND_LE:
635 return (int64_t)x <= (int64_t)y;
636 case TCG_COND_GT:
637 return (int64_t)x > (int64_t)y;
638 case TCG_COND_LTU:
639 return x < y;
640 case TCG_COND_GEU:
641 return x >= y;
642 case TCG_COND_LEU:
643 return x <= y;
644 case TCG_COND_GTU:
645 return x > y;
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700646 case TCG_COND_TSTEQ:
647 return (x & y) == 0;
648 case TCG_COND_TSTNE:
649 return (x & y) != 0;
650 case TCG_COND_ALWAYS:
651 case TCG_COND_NEVER:
652 break;
Richard Henderson9519da72012-10-02 11:32:26 -0700653 }
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700654 g_assert_not_reached();
Richard Henderson9519da72012-10-02 11:32:26 -0700655}
656
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700657static int do_constant_folding_cond_eq(TCGCond c)
Richard Henderson9519da72012-10-02 11:32:26 -0700658{
659 switch (c) {
660 case TCG_COND_GT:
661 case TCG_COND_LTU:
662 case TCG_COND_LT:
663 case TCG_COND_GTU:
664 case TCG_COND_NE:
665 return 0;
666 case TCG_COND_GE:
667 case TCG_COND_GEU:
668 case TCG_COND_LE:
669 case TCG_COND_LEU:
670 case TCG_COND_EQ:
671 return 1;
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700672 case TCG_COND_TSTEQ:
673 case TCG_COND_TSTNE:
674 return -1;
675 case TCG_COND_ALWAYS:
676 case TCG_COND_NEVER:
677 break;
Richard Henderson9519da72012-10-02 11:32:26 -0700678 }
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700679 g_assert_not_reached();
Richard Henderson9519da72012-10-02 11:32:26 -0700680}
681
Richard Henderson8d57bf12021-08-24 08:34:27 -0700682/*
683 * Return -1 if the condition can't be simplified,
684 * and the result of the condition (0 or 1) if it can.
685 */
Richard Henderson67f84c92021-08-25 08:00:20 -0700686static int do_constant_folding_cond(TCGType type, TCGArg x,
Richard Henderson8d57bf12021-08-24 08:34:27 -0700687 TCGArg y, TCGCond c)
Aurelien Jarnof8dd19e2012-09-06 16:47:14 +0200688{
Richard Henderson63490392017-06-20 13:43:15 -0700689 if (arg_is_const(x) && arg_is_const(y)) {
Alex Bennée9becc362022-02-09 11:21:42 +0000690 uint64_t xv = arg_info(x)->val;
691 uint64_t yv = arg_info(y)->val;
692
Richard Henderson67f84c92021-08-25 08:00:20 -0700693 switch (type) {
694 case TCG_TYPE_I32:
Richard Henderson170ba882017-11-22 09:07:11 +0100695 return do_constant_folding_cond_32(xv, yv, c);
Richard Henderson67f84c92021-08-25 08:00:20 -0700696 case TCG_TYPE_I64:
697 return do_constant_folding_cond_64(xv, yv, c);
698 default:
699 /* Only scalar comparisons are optimizable */
700 return -1;
Aurelien Jarnof8dd19e2012-09-06 16:47:14 +0200701 }
Richard Henderson63490392017-06-20 13:43:15 -0700702 } else if (args_are_copies(x, y)) {
Richard Henderson9519da72012-10-02 11:32:26 -0700703 return do_constant_folding_cond_eq(c);
Richard Henderson27cdb852023-10-23 11:38:00 -0700704 } else if (arg_is_const_val(y, 0)) {
Aurelien Jarnob336ceb2012-09-18 19:37:00 +0200705 switch (c) {
706 case TCG_COND_LTU:
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700707 case TCG_COND_TSTNE:
Aurelien Jarnob336ceb2012-09-18 19:37:00 +0200708 return 0;
709 case TCG_COND_GEU:
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700710 case TCG_COND_TSTEQ:
Aurelien Jarnob336ceb2012-09-18 19:37:00 +0200711 return 1;
712 default:
Richard Henderson8d57bf12021-08-24 08:34:27 -0700713 return -1;
Aurelien Jarnob336ceb2012-09-18 19:37:00 +0200714 }
Aurelien Jarnof8dd19e2012-09-06 16:47:14 +0200715 }
Richard Henderson8d57bf12021-08-24 08:34:27 -0700716 return -1;
Aurelien Jarnof8dd19e2012-09-06 16:47:14 +0200717}
718
Richard Henderson7a2f7082021-08-26 07:06:39 -0700719/**
720 * swap_commutative:
721 * @dest: TCGArg of the destination argument, or NO_DEST.
722 * @p1: first paired argument
723 * @p2: second paired argument
724 *
725 * If *@p1 is a constant and *@p2 is not, swap.
726 * If *@p2 matches @dest, swap.
727 * Return true if a swap was performed.
728 */
729
730#define NO_DEST temp_arg(NULL)
731
Richard Hendersone2f5ee32025-01-14 23:08:24 -0800732static int pref_commutative(TempOptInfo *ti)
733{
734 /* Slight preference for non-zero constants second. */
735 return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2;
736}
737
Richard Henderson24c9ae42012-10-02 11:32:21 -0700738static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
739{
740 TCGArg a1 = *p1, a2 = *p2;
741 int sum = 0;
Richard Hendersone2f5ee32025-01-14 23:08:24 -0800742 sum += pref_commutative(arg_info(a1));
743 sum -= pref_commutative(arg_info(a2));
Richard Henderson24c9ae42012-10-02 11:32:21 -0700744
745 /* Prefer the constant in second argument, and then the form
746 op a, a, b, which is better handled on non-RISC hosts. */
747 if (sum > 0 || (sum == 0 && dest == a2)) {
748 *p1 = a2;
749 *p2 = a1;
750 return true;
751 }
752 return false;
753}
754
Richard Henderson0bfcb862012-10-02 11:32:23 -0700755static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
756{
757 int sum = 0;
Richard Hendersone2f5ee32025-01-14 23:08:24 -0800758 sum += pref_commutative(arg_info(p1[0]));
759 sum += pref_commutative(arg_info(p1[1]));
760 sum -= pref_commutative(arg_info(p2[0]));
761 sum -= pref_commutative(arg_info(p2[1]));
Richard Henderson0bfcb862012-10-02 11:32:23 -0700762 if (sum > 0) {
763 TCGArg t;
764 t = p1[0], p1[0] = p2[0], p2[0] = t;
765 t = p1[1], p1[1] = p2[1], p2[1] = t;
766 return true;
767 }
768 return false;
769}
770
Richard Henderson7e64b112023-10-24 16:53:56 -0700771/*
772 * Return -1 if the condition can't be simplified,
773 * and the result of the condition (0 or 1) if it can.
774 */
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100775static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
Richard Henderson246c4b72023-10-24 16:36:50 -0700776 TCGArg *p1, TCGArg *p2, TCGArg *pcond)
777{
778 TCGCond cond;
Paolo Bonzini35020622024-01-22 10:48:11 +0100779 TempOptInfo *i1;
Richard Henderson246c4b72023-10-24 16:36:50 -0700780 bool swap;
781 int r;
782
783 swap = swap_commutative(dest, p1, p2);
784 cond = *pcond;
785 if (swap) {
786 *pcond = cond = tcg_swap_cond(cond);
787 }
788
789 r = do_constant_folding_cond(ctx->type, *p1, *p2, cond);
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700790 if (r >= 0) {
791 return r;
792 }
793 if (!is_tst_cond(cond)) {
794 return -1;
795 }
796
Paolo Bonzini35020622024-01-22 10:48:11 +0100797 i1 = arg_info(*p1);
798
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700799 /*
800 * TSTNE x,x -> NE x,0
Paolo Bonzini35020622024-01-22 10:48:11 +0100801 * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700802 */
Paolo Bonzini35020622024-01-22 10:48:11 +0100803 if (args_are_copies(*p1, *p2) ||
804 (arg_is_const(*p2) && (i1->z_mask & ~arg_info(*p2)->val) == 0)) {
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700805 *p2 = arg_new_constant(ctx, 0);
806 *pcond = tcg_tst_eqne_cond(cond);
807 return -1;
808 }
809
Paolo Bonzini35020622024-01-22 10:48:11 +0100810 /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
811 if (arg_is_const(*p2) && (arg_info(*p2)->val & ~i1->s_mask) == 0) {
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700812 *p2 = arg_new_constant(ctx, 0);
813 *pcond = tcg_tst_ltge_cond(cond);
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100814 return -1;
815 }
816
817 /* Expand to AND with a temporary if no backend support. */
818 if (!TCG_TARGET_HAS_tst) {
Richard Hendersonc3b920b2025-01-06 10:32:44 -0800819 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100820 TCGArg tmp = arg_new_temp(ctx);
821
822 op2->args[0] = tmp;
823 op2->args[1] = *p1;
824 op2->args[2] = *p2;
825
826 *p1 = tmp;
827 *p2 = arg_new_constant(ctx, 0);
828 *pcond = tcg_tst_eqne_cond(cond);
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700829 }
830 return -1;
Richard Henderson246c4b72023-10-24 16:36:50 -0700831}
832
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100833static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args)
Richard Henderson7e64b112023-10-24 16:53:56 -0700834{
835 TCGArg al, ah, bl, bh;
836 TCGCond c;
837 bool swap;
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700838 int r;
Richard Henderson7e64b112023-10-24 16:53:56 -0700839
840 swap = swap_commutative2(args, args + 2);
841 c = args[4];
842 if (swap) {
843 args[4] = c = tcg_swap_cond(c);
844 }
845
846 al = args[0];
847 ah = args[1];
848 bl = args[2];
849 bh = args[3];
850
851 if (arg_is_const(bl) && arg_is_const(bh)) {
852 tcg_target_ulong blv = arg_info(bl)->val;
853 tcg_target_ulong bhv = arg_info(bh)->val;
854 uint64_t b = deposit64(blv, 32, 32, bhv);
855
856 if (arg_is_const(al) && arg_is_const(ah)) {
857 tcg_target_ulong alv = arg_info(al)->val;
858 tcg_target_ulong ahv = arg_info(ah)->val;
859 uint64_t a = deposit64(alv, 32, 32, ahv);
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700860
861 r = do_constant_folding_cond_64(a, b, c);
862 if (r >= 0) {
863 return r;
864 }
Richard Henderson7e64b112023-10-24 16:53:56 -0700865 }
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700866
Richard Henderson7e64b112023-10-24 16:53:56 -0700867 if (b == 0) {
868 switch (c) {
869 case TCG_COND_LTU:
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700870 case TCG_COND_TSTNE:
Richard Henderson7e64b112023-10-24 16:53:56 -0700871 return 0;
872 case TCG_COND_GEU:
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700873 case TCG_COND_TSTEQ:
Richard Henderson7e64b112023-10-24 16:53:56 -0700874 return 1;
875 default:
876 break;
877 }
878 }
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700879
880 /* TSTNE x,-1 -> NE x,0 */
881 if (b == -1 && is_tst_cond(c)) {
882 args[3] = args[2] = arg_new_constant(ctx, 0);
883 args[4] = tcg_tst_eqne_cond(c);
884 return -1;
885 }
886
887 /* TSTNE x,sign -> LT x,0 */
888 if (b == INT64_MIN && is_tst_cond(c)) {
889 /* bl must be 0, so copy that to bh */
890 args[3] = bl;
891 args[4] = tcg_tst_ltge_cond(c);
892 return -1;
893 }
Richard Henderson7e64b112023-10-24 16:53:56 -0700894 }
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700895
Richard Henderson7e64b112023-10-24 16:53:56 -0700896 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700897 r = do_constant_folding_cond_eq(c);
898 if (r >= 0) {
899 return r;
900 }
901
902 /* TSTNE x,x -> NE x,0 */
903 if (is_tst_cond(c)) {
904 args[3] = args[2] = arg_new_constant(ctx, 0);
905 args[4] = tcg_tst_eqne_cond(c);
906 return -1;
907 }
Richard Henderson7e64b112023-10-24 16:53:56 -0700908 }
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100909
910 /* Expand to AND with a temporary if no backend support. */
911 if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) {
Richard Hendersonc3b920b2025-01-06 10:32:44 -0800912 TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3);
913 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100914 TCGArg t1 = arg_new_temp(ctx);
915 TCGArg t2 = arg_new_temp(ctx);
916
917 op1->args[0] = t1;
918 op1->args[1] = al;
919 op1->args[2] = bl;
920 op2->args[0] = t2;
921 op2->args[1] = ah;
922 op2->args[2] = bh;
923
924 args[0] = t1;
925 args[1] = t2;
926 args[3] = args[2] = arg_new_constant(ctx, 0);
927 args[4] = tcg_tst_eqne_cond(c);
928 }
Richard Henderson7e64b112023-10-24 16:53:56 -0700929 return -1;
930}
931
Richard Hendersone2577ea2021-08-24 08:00:48 -0700932static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
933{
934 for (int i = 0; i < nb_args; i++) {
935 TCGTemp *ts = arg_temp(op->args[i]);
Richard Henderson39004a72022-11-11 10:09:37 +1000936 init_ts_info(ctx, ts);
Richard Hendersone2577ea2021-08-24 08:00:48 -0700937 }
938}
939
Richard Henderson8774dde2021-08-24 08:04:47 -0700940static void copy_propagate(OptContext *ctx, TCGOp *op,
941 int nb_oargs, int nb_iargs)
942{
Richard Henderson8774dde2021-08-24 08:04:47 -0700943 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
944 TCGTemp *ts = arg_temp(op->args[i]);
Richard Henderson39004a72022-11-11 10:09:37 +1000945 if (ts_is_copy(ts)) {
Richard Henderson9f75e522023-11-02 13:37:46 -0700946 op->args[i] = temp_arg(find_better_copy(ts));
Richard Henderson8774dde2021-08-24 08:04:47 -0700947 }
948 }
949}
950
Richard Henderson15268552024-12-08 07:45:11 -0600951static void finish_bb(OptContext *ctx)
952{
953 /* We only optimize memory barriers across basic blocks. */
954 ctx->prev_mb = NULL;
955}
956
957static void finish_ebb(OptContext *ctx)
958{
959 finish_bb(ctx);
960 /* We only optimize across extended basic blocks. */
961 memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
962 remove_mem_copy_all(ctx);
963}
964
Richard Hendersonf3ed3cf2024-12-08 18:39:47 -0600965static bool finish_folding(OptContext *ctx, TCGOp *op)
Richard Henderson137f1f42021-08-24 08:49:25 -0700966{
967 const TCGOpDef *def = &tcg_op_defs[op->opc];
968 int i, nb_oargs;
969
Richard Henderson137f1f42021-08-24 08:49:25 -0700970 nb_oargs = def->nb_oargs;
971 for (i = 0; i < nb_oargs; i++) {
Richard Henderson57fe5c62021-08-26 12:04:46 -0700972 TCGTemp *ts = arg_temp(op->args[i]);
Richard Henderson986cac12023-01-09 13:59:35 -0800973 reset_ts(ctx, ts);
Richard Henderson137f1f42021-08-24 08:49:25 -0700974 }
Richard Hendersonf3ed3cf2024-12-08 18:39:47 -0600975 return true;
Richard Henderson137f1f42021-08-24 08:49:25 -0700976}
977
Richard Henderson2f9f08b2021-08-25 12:03:48 -0700978/*
979 * The fold_* functions return true when processing is complete,
980 * usually by folding the operation to a constant or to a copy,
981 * and calling tcg_opt_gen_{mov,movi}. They may do other things,
982 * like collect information about the value produced, for use in
983 * optimizing a subsequent operation.
984 *
985 * These first fold_* functions are all helpers, used by other
986 * folders for more specific operations.
987 */
988
989static bool fold_const1(OptContext *ctx, TCGOp *op)
990{
991 if (arg_is_const(op->args[1])) {
992 uint64_t t;
993
994 t = arg_info(op->args[1])->val;
Richard Henderson67f84c92021-08-25 08:00:20 -0700995 t = do_constant_folding(op->opc, ctx->type, t, 0);
Richard Henderson2f9f08b2021-08-25 12:03:48 -0700996 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
997 }
998 return false;
999}
1000
1001static bool fold_const2(OptContext *ctx, TCGOp *op)
1002{
1003 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1004 uint64_t t1 = arg_info(op->args[1])->val;
1005 uint64_t t2 = arg_info(op->args[2])->val;
1006
Richard Henderson67f84c92021-08-25 08:00:20 -07001007 t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001008 return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1009 }
1010 return false;
1011}
1012
Richard Hendersonc578ff12021-12-16 06:07:25 -08001013static bool fold_commutative(OptContext *ctx, TCGOp *op)
1014{
1015 swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1016 return false;
1017}
1018
Richard Henderson7a2f7082021-08-26 07:06:39 -07001019static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
1020{
1021 swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1022 return fold_const2(ctx, op);
1023}
1024
Richard Hendersond582b142024-12-19 10:43:26 -08001025/*
1026 * Record "zero" and "sign" masks for the single output of @op.
1027 * See TempOptInfo definition of z_mask and s_mask.
1028 * If z_mask allows, fold the output to constant zero.
Richard Henderson75c3bf32024-12-19 10:50:40 -08001029 * The passed s_mask may be augmented by z_mask.
Richard Hendersond582b142024-12-19 10:43:26 -08001030 */
1031static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
Richard Henderson6d70ddc2024-12-21 21:08:10 -08001032 uint64_t z_mask, int64_t s_mask)
Richard Hendersonfae450b2021-08-25 22:42:19 -07001033{
Richard Henderson56e06ec2024-12-08 18:26:48 -06001034 const TCGOpDef *def = &tcg_op_defs[op->opc];
1035 TCGTemp *ts;
1036 TempOptInfo *ti;
Richard Henderson6d70ddc2024-12-21 21:08:10 -08001037 int rep;
Richard Henderson56e06ec2024-12-08 18:26:48 -06001038
1039 /* Only single-output opcodes are supported here. */
1040 tcg_debug_assert(def->nb_oargs == 1);
Richard Hendersonfae450b2021-08-25 22:42:19 -07001041
1042 /*
Richard Hendersonfaa2e102021-08-26 09:03:59 -07001043 * 32-bit ops generate 32-bit results, which for the purpose of
1044 * simplifying tcg are sign-extended. Certainly that's how we
1045 * represent our constants elsewhere. Note that the bits will
1046 * be reset properly for a 64-bit value when encountering the
1047 * type changing opcodes.
Richard Hendersonfae450b2021-08-25 22:42:19 -07001048 */
1049 if (ctx->type == TCG_TYPE_I32) {
Richard Hendersonfaa2e102021-08-26 09:03:59 -07001050 z_mask = (int32_t)z_mask;
Richard Henderson6d70ddc2024-12-21 21:08:10 -08001051 s_mask |= INT32_MIN;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001052 }
1053
1054 if (z_mask == 0) {
1055 return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
1056 }
Richard Henderson56e06ec2024-12-08 18:26:48 -06001057
1058 ts = arg_temp(op->args[0]);
1059 reset_ts(ctx, ts);
1060
1061 ti = ts_info(ts);
1062 ti->z_mask = z_mask;
Richard Henderson6d70ddc2024-12-21 21:08:10 -08001063
1064 /* Canonicalize s_mask and incorporate data from z_mask. */
1065 rep = clz64(~s_mask);
1066 rep = MAX(rep, clz64(z_mask));
1067 rep = MAX(rep - 1, 0);
1068 ti->s_mask = INT64_MIN >> rep;
1069
Richard Henderson56e06ec2024-12-08 18:26:48 -06001070 return true;
Richard Henderson045ace32024-12-19 10:33:51 -08001071}
1072
Richard Henderson81be07f2024-12-08 19:49:17 -06001073static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
1074{
1075 return fold_masks_zs(ctx, op, z_mask, 0);
1076}
1077
Richard Hendersonef6be622024-12-08 20:03:15 -06001078static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
1079{
1080 return fold_masks_zs(ctx, op, -1, s_mask);
1081}
1082
Richard Henderson045ace32024-12-19 10:33:51 -08001083/*
1084 * An "affected" mask bit is 0 if and only if the result is identical
1085 * to the first input. Thus if the entire mask is 0, the operation
1086 * is equivalent to a copy.
1087 */
1088static bool fold_affected_mask(OptContext *ctx, TCGOp *op, uint64_t a_mask)
1089{
1090 if (ctx->type == TCG_TYPE_I32) {
1091 a_mask = (uint32_t)a_mask;
1092 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001093 if (a_mask == 0) {
1094 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1095 }
1096 return false;
1097}
1098
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001099/*
1100 * Convert @op to NOT, if NOT is supported by the host.
1101 * Return true f the conversion is successful, which will still
1102 * indicate that the processing is complete.
1103 */
1104static bool fold_not(OptContext *ctx, TCGOp *op);
1105static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
1106{
1107 TCGOpcode not_op;
1108 bool have_not;
1109
1110 switch (ctx->type) {
1111 case TCG_TYPE_I32:
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001112 case TCG_TYPE_I64:
Richard Henderson5c62d372025-01-06 23:46:47 -08001113 not_op = INDEX_op_not;
1114 have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0);
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001115 break;
1116 case TCG_TYPE_V64:
1117 case TCG_TYPE_V128:
1118 case TCG_TYPE_V256:
1119 not_op = INDEX_op_not_vec;
1120 have_not = TCG_TARGET_HAS_not_vec;
1121 break;
1122 default:
1123 g_assert_not_reached();
1124 }
1125 if (have_not) {
1126 op->opc = not_op;
1127 op->args[1] = op->args[idx];
1128 return fold_not(ctx, op);
1129 }
1130 return false;
1131}
1132
Richard Hendersonda48e272021-08-25 20:42:04 -07001133/* If the binary operation has first argument @i, fold to @i. */
1134static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1135{
Richard Henderson27cdb852023-10-23 11:38:00 -07001136 if (arg_is_const_val(op->args[1], i)) {
Richard Hendersonda48e272021-08-25 20:42:04 -07001137 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1138 }
1139 return false;
1140}
1141
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001142/* If the binary operation has first argument @i, fold to NOT. */
1143static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1144{
Richard Henderson27cdb852023-10-23 11:38:00 -07001145 if (arg_is_const_val(op->args[1], i)) {
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001146 return fold_to_not(ctx, op, 2);
1147 }
1148 return false;
1149}
1150
Richard Hendersone8679952021-08-25 13:19:52 -07001151/* If the binary operation has second argument @i, fold to @i. */
1152static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1153{
Richard Henderson27cdb852023-10-23 11:38:00 -07001154 if (arg_is_const_val(op->args[2], i)) {
Richard Hendersone8679952021-08-25 13:19:52 -07001155 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1156 }
1157 return false;
1158}
1159
Richard Hendersona63ce0e2021-08-25 20:28:53 -07001160/* If the binary operation has second argument @i, fold to identity. */
1161static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
1162{
Richard Henderson27cdb852023-10-23 11:38:00 -07001163 if (arg_is_const_val(op->args[2], i)) {
Richard Hendersona63ce0e2021-08-25 20:28:53 -07001164 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1165 }
1166 return false;
1167}
1168
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001169/* If the binary operation has second argument @i, fold to NOT. */
1170static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1171{
Richard Henderson27cdb852023-10-23 11:38:00 -07001172 if (arg_is_const_val(op->args[2], i)) {
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001173 return fold_to_not(ctx, op, 1);
1174 }
1175 return false;
1176}
1177
Richard Hendersoncbe42fb2021-08-25 13:02:00 -07001178/* If the binary operation has both arguments equal, fold to @i. */
1179static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1180{
1181 if (args_are_copies(op->args[1], op->args[2])) {
1182 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1183 }
1184 return false;
1185}
1186
Richard Hendersonca7bb042021-08-25 13:14:21 -07001187/* If the binary operation has both arguments equal, fold to identity. */
1188static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
1189{
1190 if (args_are_copies(op->args[1], op->args[2])) {
1191 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1192 }
1193 return false;
1194}
1195
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001196/*
1197 * These outermost fold_<op> functions are sorted alphabetically.
Richard Hendersonca7bb042021-08-25 13:14:21 -07001198 *
1199 * The ordering of the transformations should be:
1200 * 1) those that produce a constant
1201 * 2) those that produce a copy
1202 * 3) those that produce information about the result value.
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001203 */
1204
Richard Hendersonaeb35142025-01-14 18:28:15 -08001205static bool fold_addco(OptContext *ctx, TCGOp *op);
Richard Henderson7d3c63a2024-12-09 14:06:08 -06001206static bool fold_or(OptContext *ctx, TCGOp *op);
1207static bool fold_orc(OptContext *ctx, TCGOp *op);
Richard Hendersonaeb35142025-01-14 18:28:15 -08001208static bool fold_subbo(OptContext *ctx, TCGOp *op);
Richard Henderson7d3c63a2024-12-09 14:06:08 -06001209static bool fold_xor(OptContext *ctx, TCGOp *op);
1210
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001211static bool fold_add(OptContext *ctx, TCGOp *op)
1212{
Richard Henderson7a2f7082021-08-26 07:06:39 -07001213 if (fold_const2_commutative(ctx, op) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07001214 fold_xi_to_x(ctx, op, 0)) {
1215 return true;
1216 }
Richard Hendersonf3ed3cf2024-12-08 18:39:47 -06001217 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001218}
1219
Richard Hendersonc578ff12021-12-16 06:07:25 -08001220/* We cannot as yet do_constant_folding with vectors. */
1221static bool fold_add_vec(OptContext *ctx, TCGOp *op)
1222{
1223 if (fold_commutative(ctx, op) ||
1224 fold_xi_to_x(ctx, op, 0)) {
1225 return true;
1226 }
Richard Hendersonf3ed3cf2024-12-08 18:39:47 -06001227 return finish_folding(ctx, op);
Richard Hendersonc578ff12021-12-16 06:07:25 -08001228}
1229
Richard Hendersonaeb35142025-01-14 18:28:15 -08001230static void squash_prev_carryout(OptContext *ctx, TCGOp *op)
1231{
1232 TempOptInfo *t2;
1233
1234 op = QTAILQ_PREV(op, link);
1235 switch (op->opc) {
1236 case INDEX_op_addco:
1237 op->opc = INDEX_op_add;
1238 fold_add(ctx, op);
1239 break;
1240 case INDEX_op_addcio:
1241 op->opc = INDEX_op_addci;
1242 break;
1243 case INDEX_op_addc1o:
1244 op->opc = INDEX_op_add;
1245 t2 = arg_info(op->args[2]);
1246 if (ti_is_const(t2)) {
1247 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1248 /* Perform other constant folding, if needed. */
1249 fold_add(ctx, op);
1250 } else {
1251 TCGArg ret = op->args[0];
1252 op = opt_insert_after(ctx, op, INDEX_op_add, 3);
1253 op->args[0] = ret;
1254 op->args[1] = ret;
1255 op->args[2] = arg_new_constant(ctx, 1);
1256 }
1257 break;
1258 default:
1259 g_assert_not_reached();
1260 }
1261}
1262
1263static bool fold_addci(OptContext *ctx, TCGOp *op)
Richard Henderson76f42782025-01-14 13:58:39 -08001264{
1265 fold_commutative(ctx, op);
Richard Hendersonaeb35142025-01-14 18:28:15 -08001266
1267 if (ctx->carry_state < 0) {
1268 return finish_folding(ctx, op);
1269 }
1270
1271 squash_prev_carryout(ctx, op);
1272 op->opc = INDEX_op_add;
1273
1274 if (ctx->carry_state > 0) {
1275 TempOptInfo *t2 = arg_info(op->args[2]);
1276
1277 /*
1278 * Propagate the known carry-in into a constant, if possible.
1279 * Otherwise emit a second add +1.
1280 */
1281 if (ti_is_const(t2)) {
1282 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1283 } else {
1284 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3);
1285
1286 op2->args[0] = op->args[0];
1287 op2->args[1] = op->args[1];
1288 op2->args[2] = op->args[2];
1289 fold_add(ctx, op2);
1290
1291 op->args[1] = op->args[0];
1292 op->args[2] = arg_new_constant(ctx, 1);
1293 }
1294 }
1295
1296 ctx->carry_state = -1;
1297 return fold_add(ctx, op);
1298}
1299
1300static bool fold_addcio(OptContext *ctx, TCGOp *op)
1301{
1302 TempOptInfo *t1, *t2;
1303 int carry_out = -1;
1304 uint64_t sum, max;
1305
1306 fold_commutative(ctx, op);
1307 t1 = arg_info(op->args[1]);
1308 t2 = arg_info(op->args[2]);
1309
1310 /*
1311 * The z_mask value is >= the maximum value that can be represented
1312 * with the known zero bits. So adding the z_mask values will not
1313 * overflow if and only if the true values cannot overflow.
1314 */
1315 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) &&
1316 !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) {
1317 carry_out = 0;
1318 }
1319
1320 if (ctx->carry_state < 0) {
1321 ctx->carry_state = carry_out;
1322 return finish_folding(ctx, op);
1323 }
1324
1325 squash_prev_carryout(ctx, op);
1326 if (ctx->carry_state == 0) {
1327 goto do_addco;
1328 }
1329
1330 /* Propagate the known carry-in into a constant, if possible. */
1331 max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
1332 if (ti_is_const(t2)) {
1333 uint64_t v = ti_const_val(t2) & max;
1334 if (v < max) {
1335 op->args[2] = arg_new_constant(ctx, v + 1);
1336 goto do_addco;
1337 }
1338 /* max + known carry in produces known carry out. */
1339 carry_out = 1;
1340 }
1341 if (ti_is_const(t1)) {
1342 uint64_t v = ti_const_val(t1) & max;
1343 if (v < max) {
1344 op->args[1] = arg_new_constant(ctx, v + 1);
1345 goto do_addco;
1346 }
1347 carry_out = 1;
1348 }
1349
1350 /* Adjust the opcode to remember the known carry-in. */
1351 op->opc = INDEX_op_addc1o;
1352 ctx->carry_state = carry_out;
1353 return finish_folding(ctx, op);
1354
1355 do_addco:
1356 op->opc = INDEX_op_addco;
1357 return fold_addco(ctx, op);
1358}
1359
1360static bool fold_addco(OptContext *ctx, TCGOp *op)
1361{
1362 TempOptInfo *t1, *t2;
1363 int carry_out = -1;
1364 uint64_t ign;
1365
1366 fold_commutative(ctx, op);
1367 t1 = arg_info(op->args[1]);
1368 t2 = arg_info(op->args[2]);
1369
1370 if (ti_is_const(t2)) {
1371 uint64_t v2 = ti_const_val(t2);
1372
1373 if (ti_is_const(t1)) {
1374 uint64_t v1 = ti_const_val(t1);
1375 /* Given sign-extension of z_mask for I32, we need not truncate. */
1376 carry_out = uadd64_overflow(v1, v2, &ign);
1377 } else if (v2 == 0) {
1378 carry_out = 0;
1379 }
1380 } else {
1381 /*
1382 * The z_mask value is >= the maximum value that can be represented
1383 * with the known zero bits. So adding the z_mask values will not
1384 * overflow if and only if the true values cannot overflow.
1385 */
1386 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) {
1387 carry_out = 0;
1388 }
1389 }
1390 ctx->carry_state = carry_out;
Richard Henderson76f42782025-01-14 13:58:39 -08001391 return finish_folding(ctx, op);
1392}
1393
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001394static bool fold_and(OptContext *ctx, TCGOp *op)
1395{
Richard Henderson1ca73722024-12-08 18:47:15 -06001396 uint64_t z1, z2, z_mask, s_mask;
1397 TempOptInfo *t1, *t2;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001398
Richard Henderson7a2f7082021-08-26 07:06:39 -07001399 if (fold_const2_commutative(ctx, op) ||
Richard Hendersone8679952021-08-25 13:19:52 -07001400 fold_xi_to_i(ctx, op, 0) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07001401 fold_xi_to_x(ctx, op, -1) ||
Richard Hendersonca7bb042021-08-25 13:14:21 -07001402 fold_xx_to_x(ctx, op)) {
1403 return true;
1404 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001405
Richard Henderson1ca73722024-12-08 18:47:15 -06001406 t1 = arg_info(op->args[1]);
1407 t2 = arg_info(op->args[2]);
1408 z1 = t1->z_mask;
1409 z2 = t2->z_mask;
Richard Henderson3f2b1f82021-08-26 13:08:54 -07001410
1411 /*
Richard Hendersonfae450b2021-08-25 22:42:19 -07001412 * Known-zeros does not imply known-ones. Therefore unless
1413 * arg2 is constant, we can't infer affected bits from it.
1414 */
Richard Henderson1ca73722024-12-08 18:47:15 -06001415 if (ti_is_const(t2) && fold_affected_mask(ctx, op, z1 & ~z2)) {
Richard Henderson045ace32024-12-19 10:33:51 -08001416 return true;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001417 }
1418
Richard Henderson1ca73722024-12-08 18:47:15 -06001419 z_mask = z1 & z2;
1420
1421 /*
1422 * Sign repetitions are perforce all identical, whether they are 1 or 0.
1423 * Bitwise operations preserve the relative quantity of the repetitions.
1424 */
1425 s_mask = t1->s_mask & t2->s_mask;
1426
1427 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001428}
1429
1430static bool fold_andc(OptContext *ctx, TCGOp *op)
1431{
Richard Henderson21e2b5f2024-12-08 18:56:55 -06001432 uint64_t z_mask, s_mask;
1433 TempOptInfo *t1, *t2;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001434
Richard Hendersoncbe42fb2021-08-25 13:02:00 -07001435 if (fold_const2(ctx, op) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001436 fold_xx_to_i(ctx, op, 0) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07001437 fold_xi_to_x(ctx, op, 0) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001438 fold_ix_to_not(ctx, op, -1)) {
Richard Hendersoncbe42fb2021-08-25 13:02:00 -07001439 return true;
1440 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001441
Richard Henderson21e2b5f2024-12-08 18:56:55 -06001442 t1 = arg_info(op->args[1]);
1443 t2 = arg_info(op->args[2]);
1444 z_mask = t1->z_mask;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001445
Richard Henderson899281c2023-11-15 11:18:55 -08001446 if (ti_is_const(t2)) {
1447 /* Fold andc r,x,i to and r,x,~i. */
1448 switch (ctx->type) {
1449 case TCG_TYPE_I32:
1450 case TCG_TYPE_I64:
1451 op->opc = INDEX_op_and;
1452 break;
1453 case TCG_TYPE_V64:
1454 case TCG_TYPE_V128:
1455 case TCG_TYPE_V256:
1456 op->opc = INDEX_op_and_vec;
1457 break;
1458 default:
1459 g_assert_not_reached();
1460 }
1461 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1462 return fold_and(ctx, op);
1463 }
1464
Richard Hendersonfae450b2021-08-25 22:42:19 -07001465 /*
1466 * Known-zeros does not imply known-ones. Therefore unless
1467 * arg2 is constant, we can't infer anything from it.
1468 */
Richard Henderson21e2b5f2024-12-08 18:56:55 -06001469 if (ti_is_const(t2)) {
1470 uint64_t v2 = ti_const_val(t2);
1471 if (fold_affected_mask(ctx, op, z_mask & v2)) {
Richard Henderson045ace32024-12-19 10:33:51 -08001472 return true;
1473 }
Richard Henderson21e2b5f2024-12-08 18:56:55 -06001474 z_mask &= ~v2;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001475 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001476
Richard Henderson21e2b5f2024-12-08 18:56:55 -06001477 s_mask = t1->s_mask & t2->s_mask;
1478 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001479}
1480
Richard Henderson7d3c63a2024-12-09 14:06:08 -06001481static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
1482{
1483 /* If true and false values are the same, eliminate the cmp. */
1484 if (args_are_copies(op->args[2], op->args[3])) {
1485 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1486 }
1487
1488 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1489 uint64_t tv = arg_info(op->args[2])->val;
1490 uint64_t fv = arg_info(op->args[3])->val;
1491
1492 if (tv == -1 && fv == 0) {
1493 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1494 }
1495 if (tv == 0 && fv == -1) {
1496 if (TCG_TARGET_HAS_not_vec) {
1497 op->opc = INDEX_op_not_vec;
1498 return fold_not(ctx, op);
1499 } else {
1500 op->opc = INDEX_op_xor_vec;
1501 op->args[2] = arg_new_constant(ctx, -1);
1502 return fold_xor(ctx, op);
1503 }
1504 }
1505 }
1506 if (arg_is_const(op->args[2])) {
1507 uint64_t tv = arg_info(op->args[2])->val;
1508 if (tv == -1) {
1509 op->opc = INDEX_op_or_vec;
1510 op->args[2] = op->args[3];
1511 return fold_or(ctx, op);
1512 }
1513 if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
1514 op->opc = INDEX_op_andc_vec;
1515 op->args[2] = op->args[1];
1516 op->args[1] = op->args[3];
1517 return fold_andc(ctx, op);
1518 }
1519 }
1520 if (arg_is_const(op->args[3])) {
1521 uint64_t fv = arg_info(op->args[3])->val;
1522 if (fv == 0) {
1523 op->opc = INDEX_op_and_vec;
1524 return fold_and(ctx, op);
1525 }
1526 if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
1527 op->opc = INDEX_op_orc_vec;
1528 op->args[2] = op->args[1];
1529 op->args[1] = op->args[3];
1530 return fold_orc(ctx, op);
1531 }
1532 }
1533 return finish_folding(ctx, op);
1534}
1535
Richard Henderson079b0802021-08-24 09:30:59 -07001536static bool fold_brcond(OptContext *ctx, TCGOp *op)
1537{
Richard Hendersonfb04ab72024-01-10 18:21:58 +11001538 int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
Richard Henderson246c4b72023-10-24 16:36:50 -07001539 &op->args[1], &op->args[2]);
Richard Henderson079b0802021-08-24 09:30:59 -07001540 if (i == 0) {
1541 tcg_op_remove(ctx->tcg, op);
1542 return true;
1543 }
1544 if (i > 0) {
1545 op->opc = INDEX_op_br;
1546 op->args[0] = op->args[3];
Richard Henderson15268552024-12-08 07:45:11 -06001547 finish_ebb(ctx);
1548 } else {
1549 finish_bb(ctx);
Richard Henderson079b0802021-08-24 09:30:59 -07001550 }
Richard Henderson15268552024-12-08 07:45:11 -06001551 return true;
Richard Henderson079b0802021-08-24 09:30:59 -07001552}
1553
Richard Henderson764d2ab2021-08-24 09:22:11 -07001554static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1555{
Richard Henderson7e64b112023-10-24 16:53:56 -07001556 TCGCond cond;
1557 TCGArg label;
Richard Henderson7a2f7082021-08-26 07:06:39 -07001558 int i, inv = 0;
Richard Henderson764d2ab2021-08-24 09:22:11 -07001559
Richard Hendersonfb04ab72024-01-10 18:21:58 +11001560 i = do_constant_folding_cond2(ctx, op, &op->args[0]);
Richard Henderson7e64b112023-10-24 16:53:56 -07001561 cond = op->args[4];
1562 label = op->args[5];
Richard Henderson764d2ab2021-08-24 09:22:11 -07001563 if (i >= 0) {
1564 goto do_brcond_const;
1565 }
1566
1567 switch (cond) {
1568 case TCG_COND_LT:
1569 case TCG_COND_GE:
1570 /*
1571 * Simplify LT/GE comparisons vs zero to a single compare
1572 * vs the high word of the input.
1573 */
Richard Henderson27cdb852023-10-23 11:38:00 -07001574 if (arg_is_const_val(op->args[2], 0) &&
1575 arg_is_const_val(op->args[3], 0)) {
Richard Henderson764d2ab2021-08-24 09:22:11 -07001576 goto do_brcond_high;
1577 }
1578 break;
1579
1580 case TCG_COND_NE:
1581 inv = 1;
1582 QEMU_FALLTHROUGH;
1583 case TCG_COND_EQ:
1584 /*
1585 * Simplify EQ/NE comparisons where one of the pairs
1586 * can be simplified.
1587 */
Richard Henderson67f84c92021-08-25 08:00:20 -07001588 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
Richard Henderson764d2ab2021-08-24 09:22:11 -07001589 op->args[2], cond);
1590 switch (i ^ inv) {
1591 case 0:
1592 goto do_brcond_const;
1593 case 1:
1594 goto do_brcond_high;
1595 }
1596
Richard Henderson67f84c92021-08-25 08:00:20 -07001597 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
Richard Henderson764d2ab2021-08-24 09:22:11 -07001598 op->args[3], cond);
1599 switch (i ^ inv) {
1600 case 0:
1601 goto do_brcond_const;
1602 case 1:
Richard Hendersonceb9ee02023-10-23 23:44:27 -07001603 goto do_brcond_low;
1604 }
1605 break;
1606
1607 case TCG_COND_TSTEQ:
1608 case TCG_COND_TSTNE:
1609 if (arg_is_const_val(op->args[2], 0)) {
1610 goto do_brcond_high;
1611 }
1612 if (arg_is_const_val(op->args[3], 0)) {
1613 goto do_brcond_low;
Richard Henderson764d2ab2021-08-24 09:22:11 -07001614 }
1615 break;
1616
1617 default:
1618 break;
1619
Richard Hendersonceb9ee02023-10-23 23:44:27 -07001620 do_brcond_low:
Richard Hendersonb6d69fc2025-01-10 11:49:22 -08001621 op->opc = INDEX_op_brcond;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07001622 op->args[1] = op->args[2];
1623 op->args[2] = cond;
1624 op->args[3] = label;
1625 return fold_brcond(ctx, op);
1626
Richard Henderson764d2ab2021-08-24 09:22:11 -07001627 do_brcond_high:
Richard Hendersonb6d69fc2025-01-10 11:49:22 -08001628 op->opc = INDEX_op_brcond;
Richard Henderson764d2ab2021-08-24 09:22:11 -07001629 op->args[0] = op->args[1];
1630 op->args[1] = op->args[3];
1631 op->args[2] = cond;
1632 op->args[3] = label;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07001633 return fold_brcond(ctx, op);
Richard Henderson764d2ab2021-08-24 09:22:11 -07001634
1635 do_brcond_const:
1636 if (i == 0) {
1637 tcg_op_remove(ctx->tcg, op);
1638 return true;
1639 }
1640 op->opc = INDEX_op_br;
1641 op->args[0] = label;
Richard Henderson15268552024-12-08 07:45:11 -06001642 finish_ebb(ctx);
1643 return true;
Richard Henderson764d2ab2021-08-24 09:22:11 -07001644 }
Richard Henderson15268552024-12-08 07:45:11 -06001645
1646 finish_bb(ctx);
1647 return true;
Richard Henderson764d2ab2021-08-24 09:22:11 -07001648}
1649
Richard Henderson09bacdc2021-08-24 11:58:12 -07001650static bool fold_bswap(OptContext *ctx, TCGOp *op)
1651{
Richard Henderson57fe5c62021-08-26 12:04:46 -07001652 uint64_t z_mask, s_mask, sign;
Richard Hendersonc1e7b982024-12-08 19:42:20 -06001653 TempOptInfo *t1 = arg_info(op->args[1]);
Richard Hendersonfae450b2021-08-25 22:42:19 -07001654
Richard Hendersonc1e7b982024-12-08 19:42:20 -06001655 if (ti_is_const(t1)) {
1656 return tcg_opt_gen_movi(ctx, op, op->args[0],
1657 do_constant_folding(op->opc, ctx->type,
1658 ti_const_val(t1),
1659 op->args[2]));
Richard Henderson09bacdc2021-08-24 11:58:12 -07001660 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001661
Richard Hendersonc1e7b982024-12-08 19:42:20 -06001662 z_mask = t1->z_mask;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001663 switch (op->opc) {
Richard Henderson0dd07ee2025-01-10 18:51:16 -08001664 case INDEX_op_bswap16:
Richard Hendersonfae450b2021-08-25 22:42:19 -07001665 z_mask = bswap16(z_mask);
1666 sign = INT16_MIN;
1667 break;
Richard Henderson7498d882025-01-10 19:53:51 -08001668 case INDEX_op_bswap32:
Richard Hendersonfae450b2021-08-25 22:42:19 -07001669 z_mask = bswap32(z_mask);
1670 sign = INT32_MIN;
1671 break;
Richard Henderson3ad5d4c2025-01-10 21:54:44 -08001672 case INDEX_op_bswap64:
Richard Hendersonfae450b2021-08-25 22:42:19 -07001673 z_mask = bswap64(z_mask);
1674 sign = INT64_MIN;
1675 break;
1676 default:
1677 g_assert_not_reached();
1678 }
1679
Richard Henderson75c3bf32024-12-19 10:50:40 -08001680 s_mask = 0;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001681 switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
1682 case TCG_BSWAP_OZ:
1683 break;
1684 case TCG_BSWAP_OS:
1685 /* If the sign bit may be 1, force all the bits above to 1. */
1686 if (z_mask & sign) {
1687 z_mask |= sign;
1688 }
Richard Hendersonc1e7b982024-12-08 19:42:20 -06001689 /* The value and therefore s_mask is explicitly sign-extended. */
1690 s_mask = sign;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001691 break;
1692 default:
1693 /* The high bits are undefined: force all bits above the sign to 1. */
1694 z_mask |= sign << 1;
1695 break;
1696 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001697
Richard Hendersonc1e7b982024-12-08 19:42:20 -06001698 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Henderson09bacdc2021-08-24 11:58:12 -07001699}
1700
Richard Henderson5cf32be2021-08-24 08:17:08 -07001701static bool fold_call(OptContext *ctx, TCGOp *op)
1702{
1703 TCGContext *s = ctx->tcg;
1704 int nb_oargs = TCGOP_CALLO(op);
1705 int nb_iargs = TCGOP_CALLI(op);
1706 int flags, i;
1707
1708 init_arguments(ctx, op, nb_oargs + nb_iargs);
1709 copy_propagate(ctx, op, nb_oargs, nb_iargs);
1710
1711 /* If the function reads or writes globals, reset temp data. */
1712 flags = tcg_call_flags(op);
1713 if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1714 int nb_globals = s->nb_globals;
1715
1716 for (i = 0; i < nb_globals; i++) {
1717 if (test_bit(i, ctx->temps_used.l)) {
Richard Henderson986cac12023-01-09 13:59:35 -08001718 reset_ts(ctx, &ctx->tcg->temps[i]);
Richard Henderson5cf32be2021-08-24 08:17:08 -07001719 }
1720 }
1721 }
1722
Richard Hendersonab84dc32023-08-23 23:04:24 -07001723 /* If the function has side effects, reset mem data. */
1724 if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1725 remove_mem_copy_all(ctx);
1726 }
1727
Richard Henderson5cf32be2021-08-24 08:17:08 -07001728 /* Reset temp data for outputs. */
1729 for (i = 0; i < nb_oargs; i++) {
Richard Henderson986cac12023-01-09 13:59:35 -08001730 reset_temp(ctx, op->args[i]);
Richard Henderson5cf32be2021-08-24 08:17:08 -07001731 }
1732
1733 /* Stop optimizing MB across calls. */
1734 ctx->prev_mb = NULL;
1735 return true;
1736}
1737
Richard Henderson29f65862024-12-09 14:09:49 -06001738static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
1739{
1740 /* Canonicalize the comparison to put immediate second. */
1741 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1742 op->args[3] = tcg_swap_cond(op->args[3]);
1743 }
1744 return finish_folding(ctx, op);
1745}
1746
1747static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
1748{
1749 /* If true and false values are the same, eliminate the cmp. */
1750 if (args_are_copies(op->args[3], op->args[4])) {
1751 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
1752 }
1753
1754 /* Canonicalize the comparison to put immediate second. */
1755 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1756 op->args[5] = tcg_swap_cond(op->args[5]);
1757 }
1758 /*
1759 * Canonicalize the "false" input reg to match the destination,
1760 * so that the tcg backend can implement "move if true".
1761 */
1762 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1763 op->args[5] = tcg_invert_cond(op->args[5]);
1764 }
1765 return finish_folding(ctx, op);
1766}
1767
Richard Henderson30dd0bf2021-08-24 10:51:34 -07001768static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1769{
Richard Hendersonce1d6632024-12-08 19:47:51 -06001770 uint64_t z_mask, s_mask;
1771 TempOptInfo *t1 = arg_info(op->args[1]);
1772 TempOptInfo *t2 = arg_info(op->args[2]);
Richard Hendersonfae450b2021-08-25 22:42:19 -07001773
Richard Hendersonce1d6632024-12-08 19:47:51 -06001774 if (ti_is_const(t1)) {
1775 uint64_t t = ti_const_val(t1);
Richard Henderson30dd0bf2021-08-24 10:51:34 -07001776
1777 if (t != 0) {
Richard Henderson67f84c92021-08-25 08:00:20 -07001778 t = do_constant_folding(op->opc, ctx->type, t, 0);
Richard Henderson30dd0bf2021-08-24 10:51:34 -07001779 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1780 }
1781 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1782 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001783
1784 switch (ctx->type) {
1785 case TCG_TYPE_I32:
1786 z_mask = 31;
1787 break;
1788 case TCG_TYPE_I64:
1789 z_mask = 63;
1790 break;
1791 default:
1792 g_assert_not_reached();
1793 }
Richard Hendersonce1d6632024-12-08 19:47:51 -06001794 s_mask = ~z_mask;
1795 z_mask |= t2->z_mask;
1796 s_mask &= t2->s_mask;
1797
1798 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Henderson30dd0bf2021-08-24 10:51:34 -07001799}
1800
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001801static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1802{
Richard Henderson81be07f2024-12-08 19:49:17 -06001803 uint64_t z_mask;
1804
Richard Hendersonfae450b2021-08-25 22:42:19 -07001805 if (fold_const1(ctx, op)) {
1806 return true;
1807 }
1808
1809 switch (ctx->type) {
1810 case TCG_TYPE_I32:
Richard Henderson81be07f2024-12-08 19:49:17 -06001811 z_mask = 32 | 31;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001812 break;
1813 case TCG_TYPE_I64:
Richard Henderson81be07f2024-12-08 19:49:17 -06001814 z_mask = 64 | 63;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001815 break;
1816 default:
1817 g_assert_not_reached();
1818 }
Richard Henderson81be07f2024-12-08 19:49:17 -06001819 return fold_masks_z(ctx, op, z_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001820}
1821
Richard Henderson1b1907b2021-08-24 10:47:04 -07001822static bool fold_deposit(OptContext *ctx, TCGOp *op)
1823{
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001824 TempOptInfo *t1 = arg_info(op->args[1]);
1825 TempOptInfo *t2 = arg_info(op->args[2]);
1826 int ofs = op->args[3];
1827 int len = op->args[4];
Richard Hendersonc3b920b2025-01-06 10:32:44 -08001828 int width = 8 * tcg_type_size(ctx->type);
Richard Hendersonedb832c2024-12-19 17:56:05 -08001829 uint64_t z_mask, s_mask;
Richard Henderson8f7a8402023-08-13 11:03:05 -07001830
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001831 if (ti_is_const(t1) && ti_is_const(t2)) {
1832 return tcg_opt_gen_movi(ctx, op, op->args[0],
1833 deposit64(ti_const_val(t1), ofs, len,
1834 ti_const_val(t2)));
Richard Henderson1b1907b2021-08-24 10:47:04 -07001835 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001836
Richard Henderson8f7a8402023-08-13 11:03:05 -07001837 /* Inserting a value into zero at offset 0. */
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001838 if (ti_is_const_val(t1, 0) && ofs == 0) {
1839 uint64_t mask = MAKE_64BIT_MASK(0, len);
Richard Henderson8f7a8402023-08-13 11:03:05 -07001840
Richard Hendersonc3b920b2025-01-06 10:32:44 -08001841 op->opc = INDEX_op_and;
Richard Henderson8f7a8402023-08-13 11:03:05 -07001842 op->args[1] = op->args[2];
Richard Henderson26aac972023-10-23 12:31:57 -07001843 op->args[2] = arg_new_constant(ctx, mask);
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001844 return fold_and(ctx, op);
Richard Henderson8f7a8402023-08-13 11:03:05 -07001845 }
1846
1847 /* Inserting zero into a value. */
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001848 if (ti_is_const_val(t2, 0)) {
1849 uint64_t mask = deposit64(-1, ofs, len, 0);
Richard Henderson8f7a8402023-08-13 11:03:05 -07001850
Richard Hendersonc3b920b2025-01-06 10:32:44 -08001851 op->opc = INDEX_op_and;
Richard Henderson26aac972023-10-23 12:31:57 -07001852 op->args[2] = arg_new_constant(ctx, mask);
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001853 return fold_and(ctx, op);
Richard Henderson8f7a8402023-08-13 11:03:05 -07001854 }
1855
Richard Hendersonedb832c2024-12-19 17:56:05 -08001856 /* The s_mask from the top portion of the deposit is still valid. */
1857 if (ofs + len == width) {
1858 s_mask = t2->s_mask << ofs;
1859 } else {
1860 s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
1861 }
1862
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001863 z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
Richard Hendersonedb832c2024-12-19 17:56:05 -08001864 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Henderson1b1907b2021-08-24 10:47:04 -07001865}
1866
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001867static bool fold_divide(OptContext *ctx, TCGOp *op)
1868{
Richard Henderson2f9d9a32021-10-25 11:30:14 -07001869 if (fold_const2(ctx, op) ||
1870 fold_xi_to_x(ctx, op, 1)) {
1871 return true;
1872 }
Richard Henderson3d5ec802024-12-08 19:59:15 -06001873 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001874}
1875
Richard Henderson8cdb3fc2021-08-24 12:06:33 -07001876static bool fold_dup(OptContext *ctx, TCGOp *op)
1877{
1878 if (arg_is_const(op->args[1])) {
1879 uint64_t t = arg_info(op->args[1])->val;
1880 t = dup_const(TCGOP_VECE(op), t);
1881 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1882 }
Richard Hendersone089d692024-12-08 20:00:51 -06001883 return finish_folding(ctx, op);
Richard Henderson8cdb3fc2021-08-24 12:06:33 -07001884}
1885
1886static bool fold_dup2(OptContext *ctx, TCGOp *op)
1887{
1888 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1889 uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
1890 arg_info(op->args[2])->val);
1891 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1892 }
1893
1894 if (args_are_copies(op->args[1], op->args[2])) {
1895 op->opc = INDEX_op_dup_vec;
1896 TCGOP_VECE(op) = MO_32;
1897 }
Richard Hendersone089d692024-12-08 20:00:51 -06001898 return finish_folding(ctx, op);
Richard Henderson8cdb3fc2021-08-24 12:06:33 -07001899}
1900
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001901static bool fold_eqv(OptContext *ctx, TCGOp *op)
1902{
Richard Hendersonef6be622024-12-08 20:03:15 -06001903 uint64_t s_mask;
Richard Henderson46c68d72023-11-15 11:51:28 -08001904 TempOptInfo *t1, *t2;
Richard Hendersonef6be622024-12-08 20:03:15 -06001905
Richard Henderson7a2f7082021-08-26 07:06:39 -07001906 if (fold_const2_commutative(ctx, op) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07001907 fold_xi_to_x(ctx, op, -1) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001908 fold_xi_to_not(ctx, op, 0)) {
1909 return true;
1910 }
Richard Henderson3f2b1f82021-08-26 13:08:54 -07001911
Richard Henderson46c68d72023-11-15 11:51:28 -08001912 t2 = arg_info(op->args[2]);
1913 if (ti_is_const(t2)) {
1914 /* Fold eqv r,x,i to xor r,x,~i. */
1915 switch (ctx->type) {
1916 case TCG_TYPE_I32:
1917 case TCG_TYPE_I64:
1918 op->opc = INDEX_op_xor;
1919 break;
1920 case TCG_TYPE_V64:
1921 case TCG_TYPE_V128:
1922 case TCG_TYPE_V256:
1923 op->opc = INDEX_op_xor_vec;
1924 break;
1925 default:
1926 g_assert_not_reached();
1927 }
1928 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1929 return fold_xor(ctx, op);
1930 }
1931
1932 t1 = arg_info(op->args[1]);
1933 s_mask = t1->s_mask & t2->s_mask;
Richard Hendersonef6be622024-12-08 20:03:15 -06001934 return fold_masks_s(ctx, op, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001935}
1936
Richard Hendersonb6617c82021-08-24 10:44:53 -07001937static bool fold_extract(OptContext *ctx, TCGOp *op)
1938{
Richard Hendersonfae450b2021-08-25 22:42:19 -07001939 uint64_t z_mask_old, z_mask;
Richard Hendersonb6cd00f2024-12-08 20:05:11 -06001940 TempOptInfo *t1 = arg_info(op->args[1]);
Richard Henderson57fe5c62021-08-26 12:04:46 -07001941 int pos = op->args[2];
1942 int len = op->args[3];
Richard Hendersonfae450b2021-08-25 22:42:19 -07001943
Richard Hendersonb6cd00f2024-12-08 20:05:11 -06001944 if (ti_is_const(t1)) {
1945 return tcg_opt_gen_movi(ctx, op, op->args[0],
1946 extract64(ti_const_val(t1), pos, len));
Richard Hendersonb6617c82021-08-24 10:44:53 -07001947 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001948
Richard Hendersonb6cd00f2024-12-08 20:05:11 -06001949 z_mask_old = t1->z_mask;
Richard Henderson57fe5c62021-08-26 12:04:46 -07001950 z_mask = extract64(z_mask_old, pos, len);
Richard Henderson045ace32024-12-19 10:33:51 -08001951 if (pos == 0 && fold_affected_mask(ctx, op, z_mask_old ^ z_mask)) {
1952 return true;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001953 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001954
Richard Hendersonb6cd00f2024-12-08 20:05:11 -06001955 return fold_masks_z(ctx, op, z_mask);
Richard Hendersonb6617c82021-08-24 10:44:53 -07001956}
1957
Richard Hendersondcd08992021-08-24 10:41:39 -07001958static bool fold_extract2(OptContext *ctx, TCGOp *op)
1959{
1960 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1961 uint64_t v1 = arg_info(op->args[1])->val;
1962 uint64_t v2 = arg_info(op->args[2])->val;
1963 int shr = op->args[3];
1964
Richard Henderson61d6a872025-01-12 21:40:43 -08001965 if (ctx->type == TCG_TYPE_I32) {
Richard Hendersondcd08992021-08-24 10:41:39 -07001966 v1 = (uint32_t)v1 >> shr;
Richard Henderson225bec02021-11-09 23:17:59 +01001967 v2 = (uint64_t)((int32_t)v2 << (32 - shr));
Richard Henderson61d6a872025-01-12 21:40:43 -08001968 } else {
1969 v1 >>= shr;
1970 v2 <<= 64 - shr;
Richard Hendersondcd08992021-08-24 10:41:39 -07001971 }
1972 return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
1973 }
Richard Hendersonc9df99e2024-12-08 20:06:42 -06001974 return finish_folding(ctx, op);
Richard Hendersondcd08992021-08-24 10:41:39 -07001975}
1976
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001977static bool fold_exts(OptContext *ctx, TCGOp *op)
1978{
Richard Henderson48e8de62024-12-26 12:01:57 -08001979 uint64_t s_mask, z_mask;
Richard Hendersona9621922024-12-08 20:08:46 -06001980 TempOptInfo *t1;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001981
1982 if (fold_const1(ctx, op)) {
1983 return true;
1984 }
1985
Richard Hendersona9621922024-12-08 20:08:46 -06001986 t1 = arg_info(op->args[1]);
1987 z_mask = t1->z_mask;
1988 s_mask = t1->s_mask;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001989
1990 switch (op->opc) {
Richard Hendersonfae450b2021-08-25 22:42:19 -07001991 case INDEX_op_ext_i32_i64:
Richard Hendersona9621922024-12-08 20:08:46 -06001992 s_mask |= INT32_MIN;
1993 z_mask = (int32_t)z_mask;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001994 break;
1995 default:
1996 g_assert_not_reached();
1997 }
Richard Hendersona9621922024-12-08 20:08:46 -06001998 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001999}
2000
2001static bool fold_extu(OptContext *ctx, TCGOp *op)
2002{
Richard Henderson48e8de62024-12-26 12:01:57 -08002003 uint64_t z_mask;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002004
2005 if (fold_const1(ctx, op)) {
2006 return true;
2007 }
2008
Richard Henderson48e8de62024-12-26 12:01:57 -08002009 z_mask = arg_info(op->args[1])->z_mask;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002010 switch (op->opc) {
Richard Hendersonfae450b2021-08-25 22:42:19 -07002011 case INDEX_op_extrl_i64_i32:
2012 case INDEX_op_extu_i32_i64:
Richard Hendersonfae450b2021-08-25 22:42:19 -07002013 z_mask = (uint32_t)z_mask;
2014 break;
2015 case INDEX_op_extrh_i64_i32:
Richard Hendersonfae450b2021-08-25 22:42:19 -07002016 z_mask >>= 32;
2017 break;
2018 default:
2019 g_assert_not_reached();
2020 }
Richard Henderson08abe292024-12-08 20:11:44 -06002021 return fold_masks_z(ctx, op, z_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002022}
2023
Richard Henderson3eefdf22021-08-25 11:06:43 -07002024static bool fold_mb(OptContext *ctx, TCGOp *op)
2025{
2026 /* Eliminate duplicate and redundant fence instructions. */
2027 if (ctx->prev_mb) {
2028 /*
2029 * Merge two barriers of the same type into one,
2030 * or a weaker barrier into a stronger one,
2031 * or two weaker barriers into a stronger one.
2032 * mb X; mb Y => mb X|Y
2033 * mb; strl => mb; st
2034 * ldaq; mb => ld; mb
2035 * ldaq; strl => ld; mb; st
2036 * Other combinations are also merged into a strong
2037 * barrier. This is stricter than specified but for
2038 * the purposes of TCG is better than not optimizing.
2039 */
2040 ctx->prev_mb->args[0] |= op->args[0];
2041 tcg_op_remove(ctx->tcg, op);
2042 } else {
2043 ctx->prev_mb = op;
2044 }
2045 return true;
2046}
2047
Richard Henderson2cfac7f2021-08-25 13:05:43 -07002048static bool fold_mov(OptContext *ctx, TCGOp *op)
2049{
2050 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2051}
2052
Richard Henderson0c310a32021-08-24 10:37:24 -07002053static bool fold_movcond(OptContext *ctx, TCGOp *op)
2054{
Richard Henderson32202782024-12-08 20:16:38 -06002055 uint64_t z_mask, s_mask;
2056 TempOptInfo *tt, *ft;
Richard Henderson7a2f7082021-08-26 07:06:39 -07002057 int i;
Richard Henderson0c310a32021-08-24 10:37:24 -07002058
Richard Henderson141125e2024-09-06 21:00:10 -07002059 /* If true and false values are the same, eliminate the cmp. */
2060 if (args_are_copies(op->args[3], op->args[4])) {
2061 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
2062 }
2063
Richard Henderson7a2f7082021-08-26 07:06:39 -07002064 /*
2065 * Canonicalize the "false" input reg to match the destination reg so
2066 * that the tcg backend can implement a "move if true" operation.
2067 */
2068 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
Richard Henderson246c4b72023-10-24 16:36:50 -07002069 op->args[5] = tcg_invert_cond(op->args[5]);
Richard Henderson7a2f7082021-08-26 07:06:39 -07002070 }
2071
Richard Hendersonfb04ab72024-01-10 18:21:58 +11002072 i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1],
Richard Henderson246c4b72023-10-24 16:36:50 -07002073 &op->args[2], &op->args[5]);
Richard Henderson0c310a32021-08-24 10:37:24 -07002074 if (i >= 0) {
2075 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
2076 }
2077
Richard Henderson32202782024-12-08 20:16:38 -06002078 tt = arg_info(op->args[3]);
2079 ft = arg_info(op->args[4]);
2080 z_mask = tt->z_mask | ft->z_mask;
2081 s_mask = tt->s_mask & ft->s_mask;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002082
Richard Henderson32202782024-12-08 20:16:38 -06002083 if (ti_is_const(tt) && ti_is_const(ft)) {
2084 uint64_t tv = ti_const_val(tt);
2085 uint64_t fv = ti_const_val(ft);
Richard Henderson246c4b72023-10-24 16:36:50 -07002086 TCGCond cond = op->args[5];
Richard Henderson0c310a32021-08-24 10:37:24 -07002087
Richard Henderson0c310a32021-08-24 10:37:24 -07002088 if (tv == 1 && fv == 0) {
Richard Hendersona363e1e2025-01-10 09:26:44 -08002089 op->opc = INDEX_op_setcond;
Richard Henderson0c310a32021-08-24 10:37:24 -07002090 op->args[3] = cond;
2091 } else if (fv == 1 && tv == 0) {
Richard Hendersona363e1e2025-01-10 09:26:44 -08002092 op->opc = INDEX_op_setcond;
Richard Henderson0c310a32021-08-24 10:37:24 -07002093 op->args[3] = tcg_invert_cond(cond);
Richard Hendersonf7914582025-01-09 12:48:21 -08002094 } else if (tv == -1 && fv == 0) {
Richard Hendersona363e1e2025-01-10 09:26:44 -08002095 op->opc = INDEX_op_negsetcond;
Richard Hendersonf7914582025-01-09 12:48:21 -08002096 op->args[3] = cond;
2097 } else if (fv == -1 && tv == 0) {
Richard Hendersona363e1e2025-01-10 09:26:44 -08002098 op->opc = INDEX_op_negsetcond;
Richard Hendersonf7914582025-01-09 12:48:21 -08002099 op->args[3] = tcg_invert_cond(cond);
Richard Henderson0c310a32021-08-24 10:37:24 -07002100 }
2101 }
Richard Henderson32202782024-12-08 20:16:38 -06002102
2103 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Henderson0c310a32021-08-24 10:37:24 -07002104}
2105
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002106static bool fold_mul(OptContext *ctx, TCGOp *op)
2107{
Richard Hendersone8679952021-08-25 13:19:52 -07002108 if (fold_const2(ctx, op) ||
Richard Henderson5b5cf472021-10-25 11:19:14 -07002109 fold_xi_to_i(ctx, op, 0) ||
2110 fold_xi_to_x(ctx, op, 1)) {
Richard Hendersone8679952021-08-25 13:19:52 -07002111 return true;
2112 }
Richard Hendersoncd9c5832024-12-08 20:18:02 -06002113 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002114}
2115
2116static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
2117{
Richard Henderson7a2f7082021-08-26 07:06:39 -07002118 if (fold_const2_commutative(ctx, op) ||
Richard Hendersone8679952021-08-25 13:19:52 -07002119 fold_xi_to_i(ctx, op, 0)) {
2120 return true;
2121 }
Richard Hendersoncd9c5832024-12-08 20:18:02 -06002122 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002123}
2124
Richard Henderson407112b2021-08-26 06:33:04 -07002125static bool fold_multiply2(OptContext *ctx, TCGOp *op)
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07002126{
Richard Henderson7a2f7082021-08-26 07:06:39 -07002127 swap_commutative(op->args[0], &op->args[2], &op->args[3]);
2128
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07002129 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
Richard Henderson407112b2021-08-26 06:33:04 -07002130 uint64_t a = arg_info(op->args[2])->val;
2131 uint64_t b = arg_info(op->args[3])->val;
2132 uint64_t h, l;
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07002133 TCGArg rl, rh;
Richard Henderson407112b2021-08-26 06:33:04 -07002134 TCGOp *op2;
2135
2136 switch (op->opc) {
Richard Hendersond7761982025-01-09 09:11:53 -08002137 case INDEX_op_mulu2:
2138 if (ctx->type == TCG_TYPE_I32) {
2139 l = (uint64_t)(uint32_t)a * (uint32_t)b;
2140 h = (int32_t)(l >> 32);
2141 l = (int32_t)l;
2142 } else {
2143 mulu64(&l, &h, a, b);
2144 }
Richard Henderson407112b2021-08-26 06:33:04 -07002145 break;
Richard Hendersonbfe96482025-01-09 07:24:32 -08002146 case INDEX_op_muls2:
2147 if (ctx->type == TCG_TYPE_I32) {
2148 l = (int64_t)(int32_t)a * (int32_t)b;
2149 h = l >> 32;
2150 l = (int32_t)l;
2151 } else {
2152 muls64(&l, &h, a, b);
2153 }
Richard Henderson407112b2021-08-26 06:33:04 -07002154 break;
2155 default:
2156 g_assert_not_reached();
2157 }
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07002158
2159 rl = op->args[0];
2160 rh = op->args[1];
Richard Henderson407112b2021-08-26 06:33:04 -07002161
2162 /* The proper opcode is supplied by tcg_opt_gen_mov. */
Richard Hendersona3c1c572025-04-21 11:05:29 -07002163 op2 = opt_insert_before(ctx, op, 0, 2);
Richard Henderson407112b2021-08-26 06:33:04 -07002164
2165 tcg_opt_gen_movi(ctx, op, rl, l);
2166 tcg_opt_gen_movi(ctx, op2, rh, h);
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07002167 return true;
2168 }
Richard Hendersoncd9c5832024-12-08 20:18:02 -06002169 return finish_folding(ctx, op);
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07002170}
2171
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002172static bool fold_nand(OptContext *ctx, TCGOp *op)
2173{
Richard Hendersonfa3168e2024-12-08 20:20:40 -06002174 uint64_t s_mask;
2175
Richard Henderson7a2f7082021-08-26 07:06:39 -07002176 if (fold_const2_commutative(ctx, op) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07002177 fold_xi_to_not(ctx, op, -1)) {
2178 return true;
2179 }
Richard Henderson3f2b1f82021-08-26 13:08:54 -07002180
Richard Hendersonfa3168e2024-12-08 20:20:40 -06002181 s_mask = arg_info(op->args[1])->s_mask
2182 & arg_info(op->args[2])->s_mask;
2183 return fold_masks_s(ctx, op, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002184}
2185
Richard Hendersone25fe882024-04-04 20:53:50 +00002186static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002187{
Richard Hendersonfae450b2021-08-25 22:42:19 -07002188 /* Set to 1 all bits to the left of the rightmost. */
Richard Hendersone25fe882024-04-04 20:53:50 +00002189 uint64_t z_mask = arg_info(op->args[1])->z_mask;
Richard Hendersond151fd32024-12-08 20:23:11 -06002190 z_mask = -(z_mask & -z_mask);
Richard Hendersonfae450b2021-08-25 22:42:19 -07002191
Richard Hendersond151fd32024-12-08 20:23:11 -06002192 return fold_masks_z(ctx, op, z_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002193}
2194
Richard Hendersone25fe882024-04-04 20:53:50 +00002195static bool fold_neg(OptContext *ctx, TCGOp *op)
2196{
2197 return fold_const1(ctx, op) || fold_neg_no_const(ctx, op);
2198}
2199
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002200static bool fold_nor(OptContext *ctx, TCGOp *op)
2201{
Richard Henderson2b7b6952024-12-08 20:25:21 -06002202 uint64_t s_mask;
2203
Richard Henderson7a2f7082021-08-26 07:06:39 -07002204 if (fold_const2_commutative(ctx, op) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07002205 fold_xi_to_not(ctx, op, 0)) {
2206 return true;
2207 }
Richard Henderson3f2b1f82021-08-26 13:08:54 -07002208
Richard Henderson2b7b6952024-12-08 20:25:21 -06002209 s_mask = arg_info(op->args[1])->s_mask
2210 & arg_info(op->args[2])->s_mask;
2211 return fold_masks_s(ctx, op, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002212}
2213
2214static bool fold_not(OptContext *ctx, TCGOp *op)
2215{
Richard Henderson0e0a32b2021-08-24 13:18:01 -07002216 if (fold_const1(ctx, op)) {
2217 return true;
2218 }
Richard Henderson608e75f2024-12-08 20:27:02 -06002219 return fold_masks_s(ctx, op, arg_info(op->args[1])->s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002220}
2221
2222static bool fold_or(OptContext *ctx, TCGOp *op)
2223{
Richard Henderson83b1ba32024-12-08 20:28:59 -06002224 uint64_t z_mask, s_mask;
2225 TempOptInfo *t1, *t2;
2226
Richard Henderson7a2f7082021-08-26 07:06:39 -07002227 if (fold_const2_commutative(ctx, op) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07002228 fold_xi_to_x(ctx, op, 0) ||
Richard Hendersonca7bb042021-08-25 13:14:21 -07002229 fold_xx_to_x(ctx, op)) {
2230 return true;
2231 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07002232
Richard Henderson83b1ba32024-12-08 20:28:59 -06002233 t1 = arg_info(op->args[1]);
2234 t2 = arg_info(op->args[2]);
2235 z_mask = t1->z_mask | t2->z_mask;
2236 s_mask = t1->s_mask & t2->s_mask;
2237 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002238}
2239
2240static bool fold_orc(OptContext *ctx, TCGOp *op)
2241{
Richard Henderson54e26b22024-12-08 20:30:20 -06002242 uint64_t s_mask;
Richard Henderson50e40ec2024-12-10 08:13:10 -06002243 TempOptInfo *t1, *t2;
Richard Henderson54e26b22024-12-08 20:30:20 -06002244
Richard Henderson0e0a32b2021-08-24 13:18:01 -07002245 if (fold_const2(ctx, op) ||
Richard Henderson4e858d92021-08-26 07:31:13 -07002246 fold_xx_to_i(ctx, op, -1) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07002247 fold_xi_to_x(ctx, op, -1) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07002248 fold_ix_to_not(ctx, op, 0)) {
2249 return true;
2250 }
Richard Henderson3f2b1f82021-08-26 13:08:54 -07002251
Richard Henderson50e40ec2024-12-10 08:13:10 -06002252 t2 = arg_info(op->args[2]);
2253 if (ti_is_const(t2)) {
2254 /* Fold orc r,x,i to or r,x,~i. */
2255 switch (ctx->type) {
2256 case TCG_TYPE_I32:
2257 case TCG_TYPE_I64:
2258 op->opc = INDEX_op_or;
2259 break;
2260 case TCG_TYPE_V64:
2261 case TCG_TYPE_V128:
2262 case TCG_TYPE_V256:
2263 op->opc = INDEX_op_or_vec;
2264 break;
2265 default:
2266 g_assert_not_reached();
2267 }
2268 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
2269 return fold_or(ctx, op);
2270 }
2271
2272 t1 = arg_info(op->args[1]);
2273 s_mask = t1->s_mask & t2->s_mask;
Richard Henderson54e26b22024-12-08 20:30:20 -06002274 return fold_masks_s(ctx, op, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002275}
2276
Richard Henderson6813be92024-12-08 20:33:30 -06002277static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
Richard Henderson3eefdf22021-08-25 11:06:43 -07002278{
Richard Hendersonfae450b2021-08-25 22:42:19 -07002279 const TCGOpDef *def = &tcg_op_defs[op->opc];
2280 MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
2281 MemOp mop = get_memop(oi);
2282 int width = 8 * memop_size(mop);
Richard Henderson6813be92024-12-08 20:33:30 -06002283 uint64_t z_mask = -1, s_mask = 0;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002284
Richard Henderson57fe5c62021-08-26 12:04:46 -07002285 if (width < 64) {
Richard Henderson75c3bf32024-12-19 10:50:40 -08002286 if (mop & MO_SIGN) {
Richard Henderson6813be92024-12-08 20:33:30 -06002287 s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
Richard Henderson75c3bf32024-12-19 10:50:40 -08002288 } else {
Richard Henderson6813be92024-12-08 20:33:30 -06002289 z_mask = MAKE_64BIT_MASK(0, width);
Richard Henderson57fe5c62021-08-26 12:04:46 -07002290 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07002291 }
2292
Richard Henderson3eefdf22021-08-25 11:06:43 -07002293 /* Opcodes that touch guest memory stop the mb optimization. */
2294 ctx->prev_mb = NULL;
Richard Henderson6813be92024-12-08 20:33:30 -06002295
2296 return fold_masks_zs(ctx, op, z_mask, s_mask);
2297}
2298
2299static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
2300{
2301 /* Opcodes that touch guest memory stop the mb optimization. */
2302 ctx->prev_mb = NULL;
2303 return finish_folding(ctx, op);
Richard Henderson3eefdf22021-08-25 11:06:43 -07002304}
2305
2306static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
2307{
2308 /* Opcodes that touch guest memory stop the mb optimization. */
2309 ctx->prev_mb = NULL;
Richard Henderson082b3ef2024-12-08 20:34:57 -06002310 return true;
Richard Henderson3eefdf22021-08-25 11:06:43 -07002311}
2312
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002313static bool fold_remainder(OptContext *ctx, TCGOp *op)
2314{
Richard Henderson267c17e2021-10-25 11:30:33 -07002315 if (fold_const2(ctx, op) ||
2316 fold_xx_to_i(ctx, op, 0)) {
2317 return true;
2318 }
Richard Hendersonf9e39342024-12-08 20:36:50 -06002319 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002320}
2321
Richard Henderson95eb2292024-12-08 20:47:59 -06002322/* Return 1 if finished, -1 if simplified, 0 if unchanged. */
2323static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
Richard Henderson8d65cda2024-03-26 16:00:40 -10002324{
2325 uint64_t a_zmask, b_val;
2326 TCGCond cond;
2327
2328 if (!arg_is_const(op->args[2])) {
2329 return false;
2330 }
2331
2332 a_zmask = arg_info(op->args[1])->z_mask;
2333 b_val = arg_info(op->args[2])->val;
2334 cond = op->args[3];
2335
2336 if (ctx->type == TCG_TYPE_I32) {
2337 a_zmask = (uint32_t)a_zmask;
2338 b_val = (uint32_t)b_val;
2339 }
2340
2341 /*
2342 * A with only low bits set vs B with high bits set means that A < B.
2343 */
2344 if (a_zmask < b_val) {
2345 bool inv = false;
2346
2347 switch (cond) {
2348 case TCG_COND_NE:
2349 case TCG_COND_LEU:
2350 case TCG_COND_LTU:
2351 inv = true;
2352 /* fall through */
2353 case TCG_COND_GTU:
2354 case TCG_COND_GEU:
2355 case TCG_COND_EQ:
2356 return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv);
2357 default:
2358 break;
2359 }
2360 }
2361
2362 /*
2363 * A with only lsb set is already boolean.
2364 */
2365 if (a_zmask <= 1) {
2366 bool convert = false;
2367 bool inv = false;
2368
2369 switch (cond) {
2370 case TCG_COND_EQ:
2371 inv = true;
2372 /* fall through */
2373 case TCG_COND_NE:
2374 convert = (b_val == 0);
2375 break;
2376 case TCG_COND_LTU:
2377 case TCG_COND_TSTEQ:
2378 inv = true;
2379 /* fall through */
2380 case TCG_COND_GEU:
2381 case TCG_COND_TSTNE:
2382 convert = (b_val == 1);
2383 break;
2384 default:
2385 break;
2386 }
2387 if (convert) {
Richard Henderson8d65cda2024-03-26 16:00:40 -10002388 if (!inv && !neg) {
2389 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2390 }
2391
Richard Henderson8d65cda2024-03-26 16:00:40 -10002392 if (!inv) {
Richard Henderson69713582025-01-06 22:48:57 -08002393 op->opc = INDEX_op_neg;
Richard Henderson8d65cda2024-03-26 16:00:40 -10002394 } else if (neg) {
Richard Henderson79602f62025-01-06 09:11:39 -08002395 op->opc = INDEX_op_add;
Richard Henderson8d65cda2024-03-26 16:00:40 -10002396 op->args[2] = arg_new_constant(ctx, -1);
2397 } else {
Richard Hendersonfffd3dc2025-01-06 15:18:35 -08002398 op->opc = INDEX_op_xor;
Richard Henderson8d65cda2024-03-26 16:00:40 -10002399 op->args[2] = arg_new_constant(ctx, 1);
2400 }
Richard Henderson95eb2292024-12-08 20:47:59 -06002401 return -1;
Richard Henderson8d65cda2024-03-26 16:00:40 -10002402 }
2403 }
Richard Henderson95eb2292024-12-08 20:47:59 -06002404 return 0;
Richard Henderson8d65cda2024-03-26 16:00:40 -10002405}
2406
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002407static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
2408{
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002409 TCGCond cond = op->args[3];
2410 TCGArg ret, src1, src2;
2411 TCGOp *op2;
2412 uint64_t val;
2413 int sh;
2414 bool inv;
2415
2416 if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) {
2417 return;
2418 }
2419
2420 src2 = op->args[2];
2421 val = arg_info(src2)->val;
2422 if (!is_power_of_2(val)) {
2423 return;
2424 }
2425 sh = ctz64(val);
2426
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002427 ret = op->args[0];
2428 src1 = op->args[1];
2429 inv = cond == TCG_COND_TSTEQ;
2430
Richard Hendersonfa361ee2025-01-12 11:50:09 -08002431 if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) {
2432 op->opc = INDEX_op_sextract;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002433 op->args[1] = src1;
2434 op->args[2] = sh;
2435 op->args[3] = 1;
2436 return;
Richard Henderson07d5d502025-01-11 09:01:46 -08002437 } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) {
2438 op->opc = INDEX_op_extract;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002439 op->args[1] = src1;
2440 op->args[2] = sh;
2441 op->args[3] = 1;
2442 } else {
2443 if (sh) {
Richard Henderson74dbd362025-01-07 22:52:10 -08002444 op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3);
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002445 op2->args[0] = ret;
2446 op2->args[1] = src1;
2447 op2->args[2] = arg_new_constant(ctx, sh);
2448 src1 = ret;
2449 }
Richard Hendersonc3b920b2025-01-06 10:32:44 -08002450 op->opc = INDEX_op_and;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002451 op->args[1] = src1;
2452 op->args[2] = arg_new_constant(ctx, 1);
2453 }
2454
2455 if (neg && inv) {
Richard Henderson93a9ddb2025-01-06 22:06:08 -08002456 op2 = opt_insert_after(ctx, op, INDEX_op_add, 3);
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002457 op2->args[0] = ret;
2458 op2->args[1] = ret;
Richard Henderson93a9ddb2025-01-06 22:06:08 -08002459 op2->args[2] = arg_new_constant(ctx, -1);
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002460 } else if (inv) {
Richard Hendersonfffd3dc2025-01-06 15:18:35 -08002461 op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3);
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002462 op2->args[0] = ret;
2463 op2->args[1] = ret;
2464 op2->args[2] = arg_new_constant(ctx, 1);
2465 } else if (neg) {
Richard Henderson69713582025-01-06 22:48:57 -08002466 op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2);
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002467 op2->args[0] = ret;
2468 op2->args[1] = ret;
2469 }
2470}
2471
Richard Hendersonc63ff552021-08-24 09:35:30 -07002472static bool fold_setcond(OptContext *ctx, TCGOp *op)
2473{
Richard Hendersonfb04ab72024-01-10 18:21:58 +11002474 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
Richard Henderson246c4b72023-10-24 16:36:50 -07002475 &op->args[2], &op->args[3]);
Richard Hendersonc63ff552021-08-24 09:35:30 -07002476 if (i >= 0) {
2477 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2478 }
Richard Henderson8d65cda2024-03-26 16:00:40 -10002479
Richard Henderson95eb2292024-12-08 20:47:59 -06002480 i = fold_setcond_zmask(ctx, op, false);
2481 if (i > 0) {
Richard Henderson8d65cda2024-03-26 16:00:40 -10002482 return true;
2483 }
Richard Henderson95eb2292024-12-08 20:47:59 -06002484 if (i == 0) {
2485 fold_setcond_tst_pow2(ctx, op, false);
2486 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07002487
Richard Henderson2c8a2832024-12-08 20:50:37 -06002488 return fold_masks_z(ctx, op, 1);
Richard Hendersonc63ff552021-08-24 09:35:30 -07002489}
2490
Richard Henderson36355022023-08-04 23:24:04 +00002491static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
2492{
Richard Hendersonfb04ab72024-01-10 18:21:58 +11002493 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
Richard Henderson246c4b72023-10-24 16:36:50 -07002494 &op->args[2], &op->args[3]);
Richard Henderson36355022023-08-04 23:24:04 +00002495 if (i >= 0) {
2496 return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
2497 }
Richard Henderson8d65cda2024-03-26 16:00:40 -10002498
Richard Henderson95eb2292024-12-08 20:47:59 -06002499 i = fold_setcond_zmask(ctx, op, true);
2500 if (i > 0) {
Richard Henderson8d65cda2024-03-26 16:00:40 -10002501 return true;
2502 }
Richard Henderson95eb2292024-12-08 20:47:59 -06002503 if (i == 0) {
2504 fold_setcond_tst_pow2(ctx, op, true);
2505 }
Richard Henderson36355022023-08-04 23:24:04 +00002506
2507 /* Value is {0,-1} so all bits are repetitions of the sign. */
Richard Henderson081cf082024-12-08 20:50:58 -06002508 return fold_masks_s(ctx, op, -1);
Richard Henderson36355022023-08-04 23:24:04 +00002509}
2510
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002511static bool fold_setcond2(OptContext *ctx, TCGOp *op)
2512{
Richard Henderson7e64b112023-10-24 16:53:56 -07002513 TCGCond cond;
Richard Henderson7a2f7082021-08-26 07:06:39 -07002514 int i, inv = 0;
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002515
Richard Hendersonfb04ab72024-01-10 18:21:58 +11002516 i = do_constant_folding_cond2(ctx, op, &op->args[1]);
Richard Henderson7e64b112023-10-24 16:53:56 -07002517 cond = op->args[5];
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002518 if (i >= 0) {
2519 goto do_setcond_const;
2520 }
2521
2522 switch (cond) {
2523 case TCG_COND_LT:
2524 case TCG_COND_GE:
2525 /*
2526 * Simplify LT/GE comparisons vs zero to a single compare
2527 * vs the high word of the input.
2528 */
Richard Henderson27cdb852023-10-23 11:38:00 -07002529 if (arg_is_const_val(op->args[3], 0) &&
2530 arg_is_const_val(op->args[4], 0)) {
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002531 goto do_setcond_high;
2532 }
2533 break;
2534
2535 case TCG_COND_NE:
2536 inv = 1;
2537 QEMU_FALLTHROUGH;
2538 case TCG_COND_EQ:
2539 /*
2540 * Simplify EQ/NE comparisons where one of the pairs
2541 * can be simplified.
2542 */
Richard Henderson67f84c92021-08-25 08:00:20 -07002543 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002544 op->args[3], cond);
2545 switch (i ^ inv) {
2546 case 0:
2547 goto do_setcond_const;
2548 case 1:
2549 goto do_setcond_high;
2550 }
2551
Richard Henderson67f84c92021-08-25 08:00:20 -07002552 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002553 op->args[4], cond);
2554 switch (i ^ inv) {
2555 case 0:
2556 goto do_setcond_const;
2557 case 1:
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002558 goto do_setcond_low;
2559 }
2560 break;
2561
2562 case TCG_COND_TSTEQ:
2563 case TCG_COND_TSTNE:
Richard Hendersona71d9df2024-06-30 19:46:23 -07002564 if (arg_is_const_val(op->args[3], 0)) {
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002565 goto do_setcond_high;
2566 }
2567 if (arg_is_const_val(op->args[4], 0)) {
2568 goto do_setcond_low;
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002569 }
2570 break;
2571
2572 default:
2573 break;
2574
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002575 do_setcond_low:
2576 op->args[2] = op->args[3];
2577 op->args[3] = cond;
Richard Hendersona363e1e2025-01-10 09:26:44 -08002578 op->opc = INDEX_op_setcond;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002579 return fold_setcond(ctx, op);
2580
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002581 do_setcond_high:
2582 op->args[1] = op->args[2];
2583 op->args[2] = op->args[4];
2584 op->args[3] = cond;
Richard Hendersona363e1e2025-01-10 09:26:44 -08002585 op->opc = INDEX_op_setcond;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002586 return fold_setcond(ctx, op);
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002587 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07002588
Richard Hendersona53502c2024-12-08 20:56:36 -06002589 return fold_masks_z(ctx, op, 1);
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002590
2591 do_setcond_const:
2592 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2593}
2594
Richard Hendersonb6617c82021-08-24 10:44:53 -07002595static bool fold_sextract(OptContext *ctx, TCGOp *op)
2596{
Richard Henderson57fe5c62021-08-26 12:04:46 -07002597 uint64_t z_mask, s_mask, s_mask_old;
Richard Hendersonbaff5072024-12-08 21:09:30 -06002598 TempOptInfo *t1 = arg_info(op->args[1]);
Richard Henderson57fe5c62021-08-26 12:04:46 -07002599 int pos = op->args[2];
2600 int len = op->args[3];
Richard Hendersonfae450b2021-08-25 22:42:19 -07002601
Richard Hendersonbaff5072024-12-08 21:09:30 -06002602 if (ti_is_const(t1)) {
2603 return tcg_opt_gen_movi(ctx, op, op->args[0],
2604 sextract64(ti_const_val(t1), pos, len));
Richard Hendersonb6617c82021-08-24 10:44:53 -07002605 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07002606
Richard Hendersonbaff5072024-12-08 21:09:30 -06002607 s_mask_old = t1->s_mask;
2608 s_mask = s_mask_old >> pos;
2609 s_mask |= -1ull << (len - 1);
Richard Henderson57fe5c62021-08-26 12:04:46 -07002610
Richard Hendersonaa9e0502024-12-21 22:03:53 -08002611 if (pos == 0 && fold_affected_mask(ctx, op, s_mask & ~s_mask_old)) {
Richard Henderson045ace32024-12-19 10:33:51 -08002612 return true;
Richard Henderson57fe5c62021-08-26 12:04:46 -07002613 }
2614
Richard Hendersonbaff5072024-12-08 21:09:30 -06002615 z_mask = sextract64(t1->z_mask, pos, len);
2616 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Hendersonb6617c82021-08-24 10:44:53 -07002617}
2618
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002619static bool fold_shift(OptContext *ctx, TCGOp *op)
2620{
Richard Henderson4ed2ba32024-12-19 19:38:54 -08002621 uint64_t s_mask, z_mask;
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002622 TempOptInfo *t1, *t2;
Richard Henderson93a967f2021-08-26 13:24:59 -07002623
Richard Hendersona63ce0e2021-08-25 20:28:53 -07002624 if (fold_const2(ctx, op) ||
Richard Hendersonda48e272021-08-25 20:42:04 -07002625 fold_ix_to_i(ctx, op, 0) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07002626 fold_xi_to_x(ctx, op, 0)) {
2627 return true;
2628 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07002629
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002630 t1 = arg_info(op->args[1]);
2631 t2 = arg_info(op->args[2]);
2632 s_mask = t1->s_mask;
2633 z_mask = t1->z_mask;
Richard Henderson93a967f2021-08-26 13:24:59 -07002634
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002635 if (ti_is_const(t2)) {
2636 int sh = ti_const_val(t2);
Richard Henderson93a967f2021-08-26 13:24:59 -07002637
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002638 z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
Richard Henderson93a967f2021-08-26 13:24:59 -07002639 s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
Richard Henderson93a967f2021-08-26 13:24:59 -07002640
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002641 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Hendersonfae450b2021-08-25 22:42:19 -07002642 }
Richard Henderson93a967f2021-08-26 13:24:59 -07002643
2644 switch (op->opc) {
Richard Henderson3949f362025-01-08 08:05:18 -08002645 case INDEX_op_sar:
Richard Henderson93a967f2021-08-26 13:24:59 -07002646 /*
2647 * Arithmetic right shift will not reduce the number of
2648 * input sign repetitions.
2649 */
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002650 return fold_masks_s(ctx, op, s_mask);
Richard Henderson74dbd362025-01-07 22:52:10 -08002651 case INDEX_op_shr:
Richard Henderson93a967f2021-08-26 13:24:59 -07002652 /*
2653 * If the sign bit is known zero, then logical right shift
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002654 * will not reduce the number of input sign repetitions.
Richard Henderson93a967f2021-08-26 13:24:59 -07002655 */
Richard Henderson4ed2ba32024-12-19 19:38:54 -08002656 if (~z_mask & -s_mask) {
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002657 return fold_masks_s(ctx, op, s_mask);
Richard Henderson93a967f2021-08-26 13:24:59 -07002658 }
2659 break;
2660 default:
2661 break;
2662 }
2663
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002664 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002665}
2666
Richard Henderson9caca882021-08-24 13:30:32 -07002667static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
2668{
2669 TCGOpcode neg_op;
2670 bool have_neg;
2671
2672 if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
2673 return false;
2674 }
2675
2676 switch (ctx->type) {
2677 case TCG_TYPE_I32:
Richard Henderson9caca882021-08-24 13:30:32 -07002678 case TCG_TYPE_I64:
Richard Henderson69713582025-01-06 22:48:57 -08002679 neg_op = INDEX_op_neg;
Richard Hendersonb701f192023-10-25 21:14:04 -07002680 have_neg = true;
Richard Henderson9caca882021-08-24 13:30:32 -07002681 break;
2682 case TCG_TYPE_V64:
2683 case TCG_TYPE_V128:
2684 case TCG_TYPE_V256:
2685 neg_op = INDEX_op_neg_vec;
2686 have_neg = (TCG_TARGET_HAS_neg_vec &&
2687 tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
2688 break;
2689 default:
2690 g_assert_not_reached();
2691 }
2692 if (have_neg) {
2693 op->opc = neg_op;
2694 op->args[1] = op->args[2];
Richard Hendersone25fe882024-04-04 20:53:50 +00002695 return fold_neg_no_const(ctx, op);
Richard Henderson9caca882021-08-24 13:30:32 -07002696 }
2697 return false;
2698}
2699
Richard Hendersonc578ff12021-12-16 06:07:25 -08002700/* We cannot as yet do_constant_folding with vectors. */
2701static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002702{
Richard Hendersonc578ff12021-12-16 06:07:25 -08002703 if (fold_xx_to_i(ctx, op, 0) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07002704 fold_xi_to_x(ctx, op, 0) ||
Richard Henderson9caca882021-08-24 13:30:32 -07002705 fold_sub_to_neg(ctx, op)) {
Richard Hendersoncbe42fb2021-08-25 13:02:00 -07002706 return true;
2707 }
Richard Hendersonfe1d0072024-12-08 21:15:22 -06002708 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002709}
2710
Richard Hendersonc578ff12021-12-16 06:07:25 -08002711static bool fold_sub(OptContext *ctx, TCGOp *op)
2712{
Richard Hendersonfe1d0072024-12-08 21:15:22 -06002713 if (fold_const2(ctx, op) ||
2714 fold_xx_to_i(ctx, op, 0) ||
2715 fold_xi_to_x(ctx, op, 0) ||
2716 fold_sub_to_neg(ctx, op)) {
Richard Henderson6334a962023-10-25 18:39:43 -07002717 return true;
2718 }
2719
2720 /* Fold sub r,x,i to add r,x,-i */
2721 if (arg_is_const(op->args[2])) {
2722 uint64_t val = arg_info(op->args[2])->val;
2723
Richard Henderson79602f62025-01-06 09:11:39 -08002724 op->opc = INDEX_op_add;
Richard Henderson6334a962023-10-25 18:39:43 -07002725 op->args[2] = arg_new_constant(ctx, -val);
2726 }
Richard Hendersonfe1d0072024-12-08 21:15:22 -06002727 return finish_folding(ctx, op);
Richard Hendersonc578ff12021-12-16 06:07:25 -08002728}
2729
Richard Hendersonaeb35142025-01-14 18:28:15 -08002730static void squash_prev_borrowout(OptContext *ctx, TCGOp *op)
2731{
2732 TempOptInfo *t2;
2733
2734 op = QTAILQ_PREV(op, link);
2735 switch (op->opc) {
2736 case INDEX_op_subbo:
2737 op->opc = INDEX_op_sub;
2738 fold_sub(ctx, op);
2739 break;
2740 case INDEX_op_subbio:
2741 op->opc = INDEX_op_subbi;
2742 break;
2743 case INDEX_op_subb1o:
2744 t2 = arg_info(op->args[2]);
2745 if (ti_is_const(t2)) {
2746 op->opc = INDEX_op_add;
2747 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2748 /* Perform other constant folding, if needed. */
2749 fold_add(ctx, op);
2750 } else {
2751 TCGArg ret = op->args[0];
2752 op->opc = INDEX_op_sub;
2753 op = opt_insert_after(ctx, op, INDEX_op_add, 3);
2754 op->args[0] = ret;
2755 op->args[1] = ret;
2756 op->args[2] = arg_new_constant(ctx, -1);
2757 }
2758 break;
2759 default:
2760 g_assert_not_reached();
2761 }
2762}
2763
2764static bool fold_subbi(OptContext *ctx, TCGOp *op)
2765{
2766 TempOptInfo *t2;
2767 int borrow_in = ctx->carry_state;
2768
2769 if (borrow_in < 0) {
2770 return finish_folding(ctx, op);
2771 }
2772 ctx->carry_state = -1;
2773
2774 squash_prev_borrowout(ctx, op);
2775 if (borrow_in == 0) {
2776 op->opc = INDEX_op_sub;
2777 return fold_sub(ctx, op);
2778 }
2779
2780 /*
2781 * Propagate the known carry-in into any constant, then negate to
2782 * transform from sub to add. If there is no constant, emit a
2783 * separate add -1.
2784 */
2785 t2 = arg_info(op->args[2]);
2786 if (ti_is_const(t2)) {
2787 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2788 } else {
2789 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3);
2790
2791 op2->args[0] = op->args[0];
2792 op2->args[1] = op->args[1];
2793 op2->args[2] = op->args[2];
2794 fold_sub(ctx, op2);
2795
2796 op->args[1] = op->args[0];
2797 op->args[2] = arg_new_constant(ctx, -1);
2798 }
2799 op->opc = INDEX_op_add;
2800 return fold_add(ctx, op);
2801}
2802
2803static bool fold_subbio(OptContext *ctx, TCGOp *op)
2804{
2805 TempOptInfo *t1, *t2;
2806 int borrow_out = -1;
2807
2808 if (ctx->carry_state < 0) {
2809 return finish_folding(ctx, op);
2810 }
2811
2812 squash_prev_borrowout(ctx, op);
2813 if (ctx->carry_state == 0) {
2814 goto do_subbo;
2815 }
2816
2817 t1 = arg_info(op->args[1]);
2818 t2 = arg_info(op->args[2]);
2819
2820 /* Propagate the known borrow-in into a constant, if possible. */
2821 if (ti_is_const(t2)) {
2822 uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
2823 uint64_t v = ti_const_val(t2) & max;
2824
2825 if (v < max) {
2826 op->args[2] = arg_new_constant(ctx, v + 1);
2827 goto do_subbo;
2828 }
2829 /* subtracting max + 1 produces known borrow out. */
2830 borrow_out = 1;
2831 }
2832 if (ti_is_const(t1)) {
2833 uint64_t v = ti_const_val(t1);
2834 if (v != 0) {
2835 op->args[2] = arg_new_constant(ctx, v - 1);
2836 goto do_subbo;
2837 }
2838 }
2839
2840 /* Adjust the opcode to remember the known carry-in. */
2841 op->opc = INDEX_op_subb1o;
2842 ctx->carry_state = borrow_out;
2843 return finish_folding(ctx, op);
2844
2845 do_subbo:
2846 op->opc = INDEX_op_subbo;
2847 return fold_subbo(ctx, op);
2848}
2849
2850static bool fold_subbo(OptContext *ctx, TCGOp *op)
2851{
2852 TempOptInfo *t1 = arg_info(op->args[1]);
2853 TempOptInfo *t2 = arg_info(op->args[2]);
2854 int borrow_out = -1;
2855
2856 if (ti_is_const(t2)) {
2857 uint64_t v2 = ti_const_val(t2);
2858 if (v2 == 0) {
2859 borrow_out = 0;
2860 } else if (ti_is_const(t1)) {
2861 uint64_t v1 = ti_const_val(t1);
2862 borrow_out = v1 < v2;
2863 }
2864 }
2865 ctx->carry_state = borrow_out;
2866 return finish_folding(ctx, op);
2867}
2868
Richard Hendersonfae450b2021-08-25 22:42:19 -07002869static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
2870{
Richard Hendersond33e0f02024-12-09 08:53:20 -06002871 uint64_t z_mask = -1, s_mask = 0;
2872
Richard Hendersonfae450b2021-08-25 22:42:19 -07002873 /* We can't do any folding with a load, but we can record bits. */
2874 switch (op->opc) {
Richard Hendersone9968042025-01-21 21:47:16 -08002875 case INDEX_op_ld8s:
Richard Hendersond33e0f02024-12-09 08:53:20 -06002876 s_mask = INT8_MIN;
Richard Henderson57fe5c62021-08-26 12:04:46 -07002877 break;
Richard Hendersone9968042025-01-21 21:47:16 -08002878 case INDEX_op_ld8u:
Richard Hendersond33e0f02024-12-09 08:53:20 -06002879 z_mask = MAKE_64BIT_MASK(0, 8);
Richard Henderson57fe5c62021-08-26 12:04:46 -07002880 break;
Richard Hendersone9968042025-01-21 21:47:16 -08002881 case INDEX_op_ld16s:
Richard Hendersond33e0f02024-12-09 08:53:20 -06002882 s_mask = INT16_MIN;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002883 break;
Richard Hendersone9968042025-01-21 21:47:16 -08002884 case INDEX_op_ld16u:
Richard Hendersond33e0f02024-12-09 08:53:20 -06002885 z_mask = MAKE_64BIT_MASK(0, 16);
Richard Henderson57fe5c62021-08-26 12:04:46 -07002886 break;
Richard Hendersone9968042025-01-21 21:47:16 -08002887 case INDEX_op_ld32s:
Richard Hendersond33e0f02024-12-09 08:53:20 -06002888 s_mask = INT32_MIN;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002889 break;
Richard Hendersone9968042025-01-21 21:47:16 -08002890 case INDEX_op_ld32u:
Richard Hendersond33e0f02024-12-09 08:53:20 -06002891 z_mask = MAKE_64BIT_MASK(0, 32);
Richard Hendersonfae450b2021-08-25 22:42:19 -07002892 break;
2893 default:
2894 g_assert_not_reached();
2895 }
Richard Hendersond33e0f02024-12-09 08:53:20 -06002896 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Hendersonfae450b2021-08-25 22:42:19 -07002897}
2898
Richard Hendersonab84dc32023-08-23 23:04:24 -07002899static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
2900{
2901 TCGTemp *dst, *src;
2902 intptr_t ofs;
2903 TCGType type;
2904
2905 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
Richard Henderson0fb5b752024-12-09 09:44:40 -06002906 return finish_folding(ctx, op);
Richard Hendersonab84dc32023-08-23 23:04:24 -07002907 }
2908
2909 type = ctx->type;
2910 ofs = op->args[2];
2911 dst = arg_temp(op->args[0]);
2912 src = find_mem_copy_for(ctx, type, ofs);
2913 if (src && src->base_type == type) {
2914 return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src));
2915 }
2916
2917 reset_ts(ctx, dst);
2918 record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1);
2919 return true;
2920}
2921
2922static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
2923{
2924 intptr_t ofs = op->args[2];
2925 intptr_t lm1;
2926
2927 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2928 remove_mem_copy_all(ctx);
Richard Henderson082b3ef2024-12-08 20:34:57 -06002929 return true;
Richard Hendersonab84dc32023-08-23 23:04:24 -07002930 }
2931
2932 switch (op->opc) {
Richard Hendersona28f1512025-01-22 13:28:55 -08002933 case INDEX_op_st8:
Richard Hendersonab84dc32023-08-23 23:04:24 -07002934 lm1 = 0;
2935 break;
Richard Hendersona28f1512025-01-22 13:28:55 -08002936 case INDEX_op_st16:
Richard Hendersonab84dc32023-08-23 23:04:24 -07002937 lm1 = 1;
2938 break;
Richard Hendersona28f1512025-01-22 13:28:55 -08002939 case INDEX_op_st32:
Richard Hendersonab84dc32023-08-23 23:04:24 -07002940 lm1 = 3;
2941 break;
Richard Hendersona28f1512025-01-22 13:28:55 -08002942 case INDEX_op_st:
Richard Hendersonab84dc32023-08-23 23:04:24 -07002943 case INDEX_op_st_vec:
2944 lm1 = tcg_type_size(ctx->type) - 1;
2945 break;
2946 default:
2947 g_assert_not_reached();
2948 }
2949 remove_mem_copy_in(ctx, ofs, ofs + lm1);
Richard Henderson082b3ef2024-12-08 20:34:57 -06002950 return true;
Richard Hendersonab84dc32023-08-23 23:04:24 -07002951}
2952
2953static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
2954{
2955 TCGTemp *src;
2956 intptr_t ofs, last;
2957 TCGType type;
2958
2959 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
Richard Henderson082b3ef2024-12-08 20:34:57 -06002960 return fold_tcg_st(ctx, op);
Richard Hendersonab84dc32023-08-23 23:04:24 -07002961 }
2962
2963 src = arg_temp(op->args[0]);
2964 ofs = op->args[2];
2965 type = ctx->type;
Richard Henderson3eaadae2023-08-23 23:13:06 -07002966
2967 /*
2968 * Eliminate duplicate stores of a constant.
2969 * This happens frequently when the target ISA zero-extends.
2970 */
2971 if (ts_is_const(src)) {
2972 TCGTemp *prev = find_mem_copy_for(ctx, type, ofs);
2973 if (src == prev) {
2974 tcg_op_remove(ctx->tcg, op);
2975 return true;
2976 }
2977 }
2978
Richard Hendersonab84dc32023-08-23 23:04:24 -07002979 last = ofs + tcg_type_size(type) - 1;
2980 remove_mem_copy_in(ctx, ofs, last);
2981 record_mem_copy(ctx, type, src, ofs, last);
Richard Henderson082b3ef2024-12-08 20:34:57 -06002982 return true;
Richard Hendersonab84dc32023-08-23 23:04:24 -07002983}
2984
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002985static bool fold_xor(OptContext *ctx, TCGOp *op)
2986{
Richard Hendersonc890fd72024-12-08 21:39:01 -06002987 uint64_t z_mask, s_mask;
2988 TempOptInfo *t1, *t2;
2989
Richard Henderson7a2f7082021-08-26 07:06:39 -07002990 if (fold_const2_commutative(ctx, op) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07002991 fold_xx_to_i(ctx, op, 0) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07002992 fold_xi_to_x(ctx, op, 0) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07002993 fold_xi_to_not(ctx, op, -1)) {
Richard Hendersoncbe42fb2021-08-25 13:02:00 -07002994 return true;
2995 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07002996
Richard Hendersonc890fd72024-12-08 21:39:01 -06002997 t1 = arg_info(op->args[1]);
2998 t2 = arg_info(op->args[2]);
2999 z_mask = t1->z_mask | t2->z_mask;
3000 s_mask = t1->s_mask & t2->s_mask;
3001 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003002}
3003
Kirill Batuzov22613af2011-07-07 16:37:13 +04003004/* Propagate constants and copies, fold constant expressions. */
Aurelien Jarno36e60ef2015-06-04 21:53:27 +02003005void tcg_optimize(TCGContext *s)
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +04003006{
Richard Henderson5cf32be2021-08-24 08:17:08 -07003007 int nb_temps, i;
Richard Hendersond0ed5152021-08-24 07:38:39 -07003008 TCGOp *op, *op_next;
Richard Hendersondc849882021-08-24 07:13:45 -07003009 OptContext ctx = { .tcg = s };
Richard Henderson5d8f5362012-09-21 10:13:38 -07003010
Richard Hendersonab84dc32023-08-23 23:04:24 -07003011 QSIMPLEQ_INIT(&ctx.mem_free);
3012
Kirill Batuzov22613af2011-07-07 16:37:13 +04003013 /* Array VALS has an element for each temp.
3014 If this temp holds a constant then its value is kept in VALS' element.
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +02003015 If this temp is a copy of other ones then the other copies are
3016 available through the doubly linked circular list. */
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +04003017
3018 nb_temps = s->nb_temps;
Richard Henderson8f17a972020-03-30 19:52:02 -07003019 for (i = 0; i < nb_temps; ++i) {
3020 s->temps[i].state_ptr = NULL;
3021 }
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +04003022
Richard Henderson15fa08f2017-11-02 15:19:14 +01003023 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
Richard Hendersonc45cb8b2014-09-19 13:49:15 -07003024 TCGOpcode opc = op->opc;
Richard Henderson5cf32be2021-08-24 08:17:08 -07003025 const TCGOpDef *def;
Richard Henderson404a1482021-08-24 11:08:21 -07003026 bool done = false;
Richard Hendersonc45cb8b2014-09-19 13:49:15 -07003027
Richard Henderson5cf32be2021-08-24 08:17:08 -07003028 /* Calls are special. */
Richard Hendersonc45cb8b2014-09-19 13:49:15 -07003029 if (opc == INDEX_op_call) {
Richard Henderson5cf32be2021-08-24 08:17:08 -07003030 fold_call(&ctx, op);
3031 continue;
Richard Hendersoncf066672014-03-22 20:06:52 -07003032 }
Richard Henderson5cf32be2021-08-24 08:17:08 -07003033
3034 def = &tcg_op_defs[opc];
Richard Hendersonec5d4cb2021-08-24 08:20:27 -07003035 init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
3036 copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
Kirill Batuzov22613af2011-07-07 16:37:13 +04003037
Richard Henderson67f84c92021-08-25 08:00:20 -07003038 /* Pre-compute the type of the operation. */
Richard Henderson4d872212025-01-02 19:43:06 -08003039 ctx.type = TCGOP_TYPE(op);
Richard Henderson67f84c92021-08-25 08:00:20 -07003040
Richard Henderson2cfac7f2021-08-25 13:05:43 -07003041 /*
3042 * Process each opcode.
3043 * Sorted alphabetically by opcode as much as possible.
3044 */
Richard Hendersonc45cb8b2014-09-19 13:49:15 -07003045 switch (opc) {
Richard Henderson79602f62025-01-06 09:11:39 -08003046 case INDEX_op_add:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003047 done = fold_add(&ctx, op);
3048 break;
Richard Hendersonc578ff12021-12-16 06:07:25 -08003049 case INDEX_op_add_vec:
3050 done = fold_add_vec(&ctx, op);
3051 break;
Richard Henderson76f42782025-01-14 13:58:39 -08003052 case INDEX_op_addci:
Richard Hendersonaeb35142025-01-14 18:28:15 -08003053 done = fold_addci(&ctx, op);
3054 break;
Richard Henderson76f42782025-01-14 13:58:39 -08003055 case INDEX_op_addcio:
Richard Hendersonaeb35142025-01-14 18:28:15 -08003056 done = fold_addcio(&ctx, op);
3057 break;
3058 case INDEX_op_addco:
3059 done = fold_addco(&ctx, op);
Richard Henderson76f42782025-01-14 13:58:39 -08003060 break;
Richard Hendersonc3b920b2025-01-06 10:32:44 -08003061 case INDEX_op_and:
3062 case INDEX_op_and_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003063 done = fold_and(&ctx, op);
3064 break;
Richard Henderson46f96bf2025-01-06 12:37:02 -08003065 case INDEX_op_andc:
3066 case INDEX_op_andc_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003067 done = fold_andc(&ctx, op);
3068 break;
Richard Hendersonb6d69fc2025-01-10 11:49:22 -08003069 case INDEX_op_brcond:
Richard Henderson079b0802021-08-24 09:30:59 -07003070 done = fold_brcond(&ctx, op);
3071 break;
Richard Henderson764d2ab2021-08-24 09:22:11 -07003072 case INDEX_op_brcond2_i32:
3073 done = fold_brcond2(&ctx, op);
3074 break;
Richard Henderson0dd07ee2025-01-10 18:51:16 -08003075 case INDEX_op_bswap16:
Richard Henderson7498d882025-01-10 19:53:51 -08003076 case INDEX_op_bswap32:
Richard Henderson3ad5d4c2025-01-10 21:54:44 -08003077 case INDEX_op_bswap64:
Richard Henderson09bacdc2021-08-24 11:58:12 -07003078 done = fold_bswap(&ctx, op);
3079 break;
Richard Henderson5a5bb0a2025-01-08 16:12:46 -08003080 case INDEX_op_clz:
Richard Hendersonc96447d2025-01-08 17:07:01 -08003081 case INDEX_op_ctz:
Richard Henderson30dd0bf2021-08-24 10:51:34 -07003082 done = fold_count_zeros(&ctx, op);
3083 break;
Richard Henderson97218ae2025-01-08 18:37:43 -08003084 case INDEX_op_ctpop:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003085 done = fold_ctpop(&ctx, op);
3086 break;
Richard Henderson4d137ff2025-01-12 20:48:57 -08003087 case INDEX_op_deposit:
Richard Henderson1b1907b2021-08-24 10:47:04 -07003088 done = fold_deposit(&ctx, op);
3089 break;
Richard Hendersonb2c514f2025-01-07 13:22:56 -08003090 case INDEX_op_divs:
Richard Henderson961b80a2025-01-07 14:27:19 -08003091 case INDEX_op_divu:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003092 done = fold_divide(&ctx, op);
3093 break;
Richard Henderson8cdb3fc2021-08-24 12:06:33 -07003094 case INDEX_op_dup_vec:
3095 done = fold_dup(&ctx, op);
3096 break;
3097 case INDEX_op_dup2_vec:
3098 done = fold_dup2(&ctx, op);
3099 break;
Richard Henderson5c0968a2025-01-06 15:47:53 -08003100 case INDEX_op_eqv:
3101 case INDEX_op_eqv_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003102 done = fold_eqv(&ctx, op);
3103 break;
Richard Henderson07d5d502025-01-11 09:01:46 -08003104 case INDEX_op_extract:
Richard Hendersonb6617c82021-08-24 10:44:53 -07003105 done = fold_extract(&ctx, op);
3106 break;
Richard Henderson61d6a872025-01-12 21:40:43 -08003107 case INDEX_op_extract2:
Richard Hendersondcd08992021-08-24 10:41:39 -07003108 done = fold_extract2(&ctx, op);
3109 break;
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003110 case INDEX_op_ext_i32_i64:
3111 done = fold_exts(&ctx, op);
3112 break;
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003113 case INDEX_op_extu_i32_i64:
3114 case INDEX_op_extrl_i64_i32:
3115 case INDEX_op_extrh_i64_i32:
3116 done = fold_extu(&ctx, op);
3117 break;
Richard Hendersone9968042025-01-21 21:47:16 -08003118 case INDEX_op_ld8s:
3119 case INDEX_op_ld8u:
3120 case INDEX_op_ld16s:
3121 case INDEX_op_ld16u:
3122 case INDEX_op_ld32s:
3123 case INDEX_op_ld32u:
Richard Hendersonfae450b2021-08-25 22:42:19 -07003124 done = fold_tcg_ld(&ctx, op);
3125 break;
Richard Hendersone9968042025-01-21 21:47:16 -08003126 case INDEX_op_ld:
Richard Hendersonab84dc32023-08-23 23:04:24 -07003127 case INDEX_op_ld_vec:
3128 done = fold_tcg_ld_memcopy(&ctx, op);
3129 break;
Richard Hendersona28f1512025-01-22 13:28:55 -08003130 case INDEX_op_st8:
3131 case INDEX_op_st16:
3132 case INDEX_op_st32:
Richard Hendersonab84dc32023-08-23 23:04:24 -07003133 done = fold_tcg_st(&ctx, op);
3134 break;
Richard Hendersona28f1512025-01-22 13:28:55 -08003135 case INDEX_op_st:
Richard Hendersonab84dc32023-08-23 23:04:24 -07003136 case INDEX_op_st_vec:
3137 done = fold_tcg_st_memcopy(&ctx, op);
3138 break;
Richard Henderson3eefdf22021-08-25 11:06:43 -07003139 case INDEX_op_mb:
3140 done = fold_mb(&ctx, op);
3141 break;
Richard Hendersonb5701262024-12-28 15:58:24 -08003142 case INDEX_op_mov:
3143 case INDEX_op_mov_vec:
Richard Henderson2cfac7f2021-08-25 13:05:43 -07003144 done = fold_mov(&ctx, op);
3145 break;
Richard Hendersonea46c4b2025-01-10 13:41:25 -08003146 case INDEX_op_movcond:
Richard Henderson0c310a32021-08-24 10:37:24 -07003147 done = fold_movcond(&ctx, op);
3148 break;
Richard Hendersond2c3eca2025-01-07 09:32:18 -08003149 case INDEX_op_mul:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003150 done = fold_mul(&ctx, op);
3151 break;
Richard Hendersonc7428242025-01-07 11:19:29 -08003152 case INDEX_op_mulsh:
Richard Hendersonaa28c9e2025-01-07 10:36:24 -08003153 case INDEX_op_muluh:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003154 done = fold_mul_highpart(&ctx, op);
3155 break;
Richard Hendersonbfe96482025-01-09 07:24:32 -08003156 case INDEX_op_muls2:
Richard Hendersond7761982025-01-09 09:11:53 -08003157 case INDEX_op_mulu2:
Richard Henderson407112b2021-08-26 06:33:04 -07003158 done = fold_multiply2(&ctx, op);
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07003159 break;
Richard Henderson59379a42025-01-06 20:32:54 -08003160 case INDEX_op_nand:
3161 case INDEX_op_nand_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003162 done = fold_nand(&ctx, op);
3163 break;
Richard Henderson69713582025-01-06 22:48:57 -08003164 case INDEX_op_neg:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003165 done = fold_neg(&ctx, op);
3166 break;
Richard Henderson3a8c4e92025-01-06 21:02:17 -08003167 case INDEX_op_nor:
3168 case INDEX_op_nor_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003169 done = fold_nor(&ctx, op);
3170 break;
Richard Henderson5c62d372025-01-06 23:46:47 -08003171 case INDEX_op_not:
3172 case INDEX_op_not_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003173 done = fold_not(&ctx, op);
3174 break;
Richard Henderson49bd7512025-01-06 14:00:40 -08003175 case INDEX_op_or:
3176 case INDEX_op_or_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003177 done = fold_or(&ctx, op);
3178 break;
Richard Henderson6aba25e2025-01-06 14:46:26 -08003179 case INDEX_op_orc:
3180 case INDEX_op_orc_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003181 done = fold_orc(&ctx, op);
3182 break;
Richard Henderson50b7a192025-02-04 13:46:09 -08003183 case INDEX_op_qemu_ld_i32:
Richard Henderson6813be92024-12-08 20:33:30 -06003184 done = fold_qemu_ld_1reg(&ctx, op);
3185 break;
Richard Henderson50b7a192025-02-04 13:46:09 -08003186 case INDEX_op_qemu_ld_i64:
Richard Henderson6813be92024-12-08 20:33:30 -06003187 if (TCG_TARGET_REG_BITS == 64) {
3188 done = fold_qemu_ld_1reg(&ctx, op);
3189 break;
3190 }
3191 QEMU_FALLTHROUGH;
Richard Henderson50b7a192025-02-04 13:46:09 -08003192 case INDEX_op_qemu_ld_i128:
Richard Henderson6813be92024-12-08 20:33:30 -06003193 done = fold_qemu_ld_2reg(&ctx, op);
Richard Henderson3eefdf22021-08-25 11:06:43 -07003194 break;
Richard Henderson50b7a192025-02-04 13:46:09 -08003195 case INDEX_op_qemu_st8_i32:
3196 case INDEX_op_qemu_st_i32:
3197 case INDEX_op_qemu_st_i64:
3198 case INDEX_op_qemu_st_i128:
Richard Henderson3eefdf22021-08-25 11:06:43 -07003199 done = fold_qemu_st(&ctx, op);
3200 break;
Richard Henderson9a6bc182025-01-07 19:00:51 -08003201 case INDEX_op_rems:
Richard Hendersoncd9acd22025-01-07 20:25:14 -08003202 case INDEX_op_remu:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003203 done = fold_remainder(&ctx, op);
3204 break;
Richard Henderson005a87e2025-01-08 10:42:16 -08003205 case INDEX_op_rotl:
3206 case INDEX_op_rotr:
Richard Henderson3949f362025-01-08 08:05:18 -08003207 case INDEX_op_sar:
Richard Henderson6ca59452025-01-07 21:50:04 -08003208 case INDEX_op_shl:
Richard Henderson74dbd362025-01-07 22:52:10 -08003209 case INDEX_op_shr:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003210 done = fold_shift(&ctx, op);
3211 break;
Richard Hendersona363e1e2025-01-10 09:26:44 -08003212 case INDEX_op_setcond:
Richard Hendersonc63ff552021-08-24 09:35:30 -07003213 done = fold_setcond(&ctx, op);
3214 break;
Richard Hendersona363e1e2025-01-10 09:26:44 -08003215 case INDEX_op_negsetcond:
Richard Henderson36355022023-08-04 23:24:04 +00003216 done = fold_negsetcond(&ctx, op);
3217 break;
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07003218 case INDEX_op_setcond2_i32:
3219 done = fold_setcond2(&ctx, op);
3220 break;
Richard Henderson1f106542024-09-06 12:22:41 -07003221 case INDEX_op_cmp_vec:
3222 done = fold_cmp_vec(&ctx, op);
3223 break;
3224 case INDEX_op_cmpsel_vec:
3225 done = fold_cmpsel_vec(&ctx, op);
3226 break;
Richard Hendersone58b9772024-09-06 22:30:01 -07003227 case INDEX_op_bitsel_vec:
3228 done = fold_bitsel_vec(&ctx, op);
3229 break;
Richard Hendersonfa361ee2025-01-12 11:50:09 -08003230 case INDEX_op_sextract:
Richard Hendersonb6617c82021-08-24 10:44:53 -07003231 done = fold_sextract(&ctx, op);
3232 break;
Richard Henderson60f34f52025-01-06 22:06:32 -08003233 case INDEX_op_sub:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003234 done = fold_sub(&ctx, op);
3235 break;
Richard Hendersonaeb35142025-01-14 18:28:15 -08003236 case INDEX_op_subbi:
3237 done = fold_subbi(&ctx, op);
3238 break;
3239 case INDEX_op_subbio:
3240 done = fold_subbio(&ctx, op);
3241 break;
3242 case INDEX_op_subbo:
3243 done = fold_subbo(&ctx, op);
3244 break;
Richard Hendersonc578ff12021-12-16 06:07:25 -08003245 case INDEX_op_sub_vec:
3246 done = fold_sub_vec(&ctx, op);
3247 break;
Richard Hendersonfffd3dc2025-01-06 15:18:35 -08003248 case INDEX_op_xor:
3249 case INDEX_op_xor_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003250 done = fold_xor(&ctx, op);
Richard Hendersonb10f3832021-08-23 22:30:17 -07003251 break;
Richard Henderson15268552024-12-08 07:45:11 -06003252 case INDEX_op_set_label:
3253 case INDEX_op_br:
3254 case INDEX_op_exit_tb:
3255 case INDEX_op_goto_tb:
3256 case INDEX_op_goto_ptr:
3257 finish_ebb(&ctx);
3258 done = true;
3259 break;
Richard Henderson2cfac7f2021-08-25 13:05:43 -07003260 default:
Richard Henderson0ae56422024-12-08 21:42:53 -06003261 done = finish_folding(&ctx, op);
Richard Henderson2cfac7f2021-08-25 13:05:43 -07003262 break;
Richard Hendersonb10f3832021-08-23 22:30:17 -07003263 }
Richard Henderson0ae56422024-12-08 21:42:53 -06003264 tcg_debug_assert(done);
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +04003265 }
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +04003266}