blob: 06ccf39d64212d92ae8f725c8b9318a1f1696053 [file] [log] [blame]
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +04001/*
2 * Optimizations for Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2010 Samsung Electronics.
5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
Peter Maydell757e7252016-01-26 18:17:08 +000026#include "qemu/osdep.h"
Richard Henderson9531c072021-08-26 06:51:39 -070027#include "qemu/int128.h"
Richard Hendersonab84dc32023-08-23 23:04:24 -070028#include "qemu/interval-tree.h"
Richard Hendersonad3d0e42023-03-28 18:17:24 -070029#include "tcg/tcg-op-common.h"
Richard Henderson90163902021-03-18 10:21:45 -060030#include "tcg-internal.h"
Richard Henderson93280b62025-01-08 22:51:55 +010031#include "tcg-has.h"
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +040032
Richard Henderson170ba882017-11-22 09:07:11 +010033
Richard Hendersonab84dc32023-08-23 23:04:24 -070034typedef struct MemCopyInfo {
35 IntervalTreeNode itree;
36 QSIMPLEQ_ENTRY (MemCopyInfo) next;
37 TCGTemp *ts;
38 TCGType type;
39} MemCopyInfo;
40
Richard Henderson6fcb98e2020-03-30 17:44:30 -070041typedef struct TempOptInfo {
Richard Henderson63490392017-06-20 13:43:15 -070042 TCGTemp *prev_copy;
43 TCGTemp *next_copy;
Richard Hendersonab84dc32023-08-23 23:04:24 -070044 QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
Richard Hendersonb1fde412021-08-23 13:07:49 -070045 uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
Richard Henderson56f15f62024-12-22 15:07:31 -080046 uint64_t o_mask; /* mask bit is 1 if and only if value bit is 1 */
Richard Henderson6d70ddc2024-12-21 21:08:10 -080047 uint64_t s_mask; /* mask bit is 1 if value bit matches msb */
Richard Henderson6fcb98e2020-03-30 17:44:30 -070048} TempOptInfo;
Kirill Batuzov22613af2011-07-07 16:37:13 +040049
Richard Henderson3b3f8472021-08-23 22:06:31 -070050typedef struct OptContext {
Richard Hendersondc849882021-08-24 07:13:45 -070051 TCGContext *tcg;
Richard Hendersond0ed5152021-08-24 07:38:39 -070052 TCGOp *prev_mb;
Richard Henderson3b3f8472021-08-23 22:06:31 -070053 TCGTempSet temps_used;
Richard Henderson137f1f42021-08-24 08:49:25 -070054
Richard Hendersonab84dc32023-08-23 23:04:24 -070055 IntervalTreeRoot mem_copy;
56 QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
57
Richard Henderson137f1f42021-08-24 08:49:25 -070058 /* In flight values from optimization. */
Richard Henderson67f84c92021-08-25 08:00:20 -070059 TCGType type;
Richard Hendersonaeb35142025-01-14 18:28:15 -080060 int carry_state; /* -1 = non-constant, {0,1} = constant carry-in */
Richard Henderson3b3f8472021-08-23 22:06:31 -070061} OptContext;
62
Richard Henderson6fcb98e2020-03-30 17:44:30 -070063static inline TempOptInfo *ts_info(TCGTemp *ts)
Aurelien Jarnod9c769c2015-07-27 12:41:44 +020064{
Richard Henderson63490392017-06-20 13:43:15 -070065 return ts->state_ptr;
Aurelien Jarnod9c769c2015-07-27 12:41:44 +020066}
67
Richard Henderson6fcb98e2020-03-30 17:44:30 -070068static inline TempOptInfo *arg_info(TCGArg arg)
Aurelien Jarnod9c769c2015-07-27 12:41:44 +020069{
Richard Henderson63490392017-06-20 13:43:15 -070070 return ts_info(arg_temp(arg));
71}
72
Richard Hendersone1b6c142024-12-22 10:26:14 -080073static inline bool ti_is_const(TempOptInfo *ti)
74{
Richard Henderson56f15f62024-12-22 15:07:31 -080075 /* If all bits that are not known zeros are known ones, it's constant. */
76 return ti->z_mask == ti->o_mask;
Richard Hendersone1b6c142024-12-22 10:26:14 -080077}
78
79static inline uint64_t ti_const_val(TempOptInfo *ti)
80{
Richard Henderson56f15f62024-12-22 15:07:31 -080081 /* If constant, both z_mask and o_mask contain the value. */
82 return ti->z_mask;
Richard Hendersone1b6c142024-12-22 10:26:14 -080083}
84
85static inline bool ti_is_const_val(TempOptInfo *ti, uint64_t val)
86{
87 return ti_is_const(ti) && ti_const_val(ti) == val;
88}
89
Richard Henderson63490392017-06-20 13:43:15 -070090static inline bool ts_is_const(TCGTemp *ts)
91{
Richard Hendersone1b6c142024-12-22 10:26:14 -080092 return ti_is_const(ts_info(ts));
Richard Henderson63490392017-06-20 13:43:15 -070093}
94
Richard Henderson27cdb852023-10-23 11:38:00 -070095static inline bool ts_is_const_val(TCGTemp *ts, uint64_t val)
96{
Richard Hendersone1b6c142024-12-22 10:26:14 -080097 return ti_is_const_val(ts_info(ts), val);
Richard Henderson27cdb852023-10-23 11:38:00 -070098}
99
Richard Henderson63490392017-06-20 13:43:15 -0700100static inline bool arg_is_const(TCGArg arg)
101{
102 return ts_is_const(arg_temp(arg));
103}
104
Richard Hendersonc1fa1b32025-02-17 15:17:47 -0800105static inline uint64_t arg_const_val(TCGArg arg)
106{
107 return ti_const_val(arg_info(arg));
108}
109
Richard Henderson27cdb852023-10-23 11:38:00 -0700110static inline bool arg_is_const_val(TCGArg arg, uint64_t val)
111{
112 return ts_is_const_val(arg_temp(arg), val);
113}
114
Richard Henderson63490392017-06-20 13:43:15 -0700115static inline bool ts_is_copy(TCGTemp *ts)
116{
117 return ts_info(ts)->next_copy != ts;
Aurelien Jarnod9c769c2015-07-27 12:41:44 +0200118}
119
Richard Henderson9f75e522023-11-02 13:37:46 -0700120static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b)
121{
122 return a->kind < b->kind ? b : a;
123}
124
Aurelien Jarno1208d7d2015-07-27 12:41:44 +0200125/* Initialize and activate a temporary. */
Richard Henderson3b3f8472021-08-23 22:06:31 -0700126static void init_ts_info(OptContext *ctx, TCGTemp *ts)
Aurelien Jarno1208d7d2015-07-27 12:41:44 +0200127{
Richard Henderson63490392017-06-20 13:43:15 -0700128 size_t idx = temp_idx(ts);
Richard Henderson8f17a972020-03-30 19:52:02 -0700129 TempOptInfo *ti;
Richard Henderson63490392017-06-20 13:43:15 -0700130
Richard Henderson3b3f8472021-08-23 22:06:31 -0700131 if (test_bit(idx, ctx->temps_used.l)) {
Richard Henderson8f17a972020-03-30 19:52:02 -0700132 return;
133 }
Richard Henderson3b3f8472021-08-23 22:06:31 -0700134 set_bit(idx, ctx->temps_used.l);
Richard Henderson8f17a972020-03-30 19:52:02 -0700135
136 ti = ts->state_ptr;
137 if (ti == NULL) {
138 ti = tcg_malloc(sizeof(TempOptInfo));
Richard Henderson63490392017-06-20 13:43:15 -0700139 ts->state_ptr = ti;
Richard Henderson8f17a972020-03-30 19:52:02 -0700140 }
141
142 ti->next_copy = ts;
143 ti->prev_copy = ts;
Richard Hendersonab84dc32023-08-23 23:04:24 -0700144 QSIMPLEQ_INIT(&ti->mem_copy);
Richard Henderson8f17a972020-03-30 19:52:02 -0700145 if (ts->kind == TEMP_CONST) {
Richard Hendersonb1fde412021-08-23 13:07:49 -0700146 ti->z_mask = ts->val;
Richard Henderson56f15f62024-12-22 15:07:31 -0800147 ti->o_mask = ts->val;
Richard Henderson6d70ddc2024-12-21 21:08:10 -0800148 ti->s_mask = INT64_MIN >> clrsb64(ts->val);
Richard Henderson8f17a972020-03-30 19:52:02 -0700149 } else {
Richard Hendersonb1fde412021-08-23 13:07:49 -0700150 ti->z_mask = -1;
Richard Henderson56f15f62024-12-22 15:07:31 -0800151 ti->o_mask = 0;
Richard Henderson57fe5c62021-08-26 12:04:46 -0700152 ti->s_mask = 0;
Aurelien Jarno1208d7d2015-07-27 12:41:44 +0200153 }
154}
155
Richard Hendersonab84dc32023-08-23 23:04:24 -0700156static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l)
157{
158 IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l);
159 return r ? container_of(r, MemCopyInfo, itree) : NULL;
160}
161
162static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l)
163{
164 IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l);
165 return r ? container_of(r, MemCopyInfo, itree) : NULL;
166}
167
168static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc)
169{
170 TCGTemp *ts = mc->ts;
171 TempOptInfo *ti = ts_info(ts);
172
173 interval_tree_remove(&mc->itree, &ctx->mem_copy);
174 QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next);
175 QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next);
176}
177
178static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l)
179{
180 while (true) {
181 MemCopyInfo *mc = mem_copy_first(ctx, s, l);
182 if (!mc) {
183 break;
184 }
185 remove_mem_copy(ctx, mc);
186 }
187}
188
189static void remove_mem_copy_all(OptContext *ctx)
190{
191 remove_mem_copy_in(ctx, 0, -1);
192 tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy));
193}
194
Richard Henderson9f75e522023-11-02 13:37:46 -0700195static TCGTemp *find_better_copy(TCGTemp *ts)
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200196{
Richard Henderson9f75e522023-11-02 13:37:46 -0700197 TCGTemp *i, *ret;
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200198
Richard Henderson4c868ce2020-04-23 09:02:23 -0700199 /* If this is already readonly, we can't do better. */
200 if (temp_readonly(ts)) {
Richard Henderson63490392017-06-20 13:43:15 -0700201 return ts;
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200202 }
203
Richard Henderson9f75e522023-11-02 13:37:46 -0700204 ret = ts;
Richard Henderson63490392017-06-20 13:43:15 -0700205 for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
Richard Henderson9f75e522023-11-02 13:37:46 -0700206 ret = cmp_better_copy(ret, i);
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200207 }
Richard Henderson9f75e522023-11-02 13:37:46 -0700208 return ret;
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200209}
210
Richard Hendersonab84dc32023-08-23 23:04:24 -0700211static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts)
212{
213 TempOptInfo *si = ts_info(src_ts);
214 TempOptInfo *di = ts_info(dst_ts);
215 MemCopyInfo *mc;
216
217 QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) {
218 tcg_debug_assert(mc->ts == src_ts);
219 mc->ts = dst_ts;
220 }
221 QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy);
222}
223
224/* Reset TEMP's state, possibly removing the temp for the list of copies. */
225static void reset_ts(OptContext *ctx, TCGTemp *ts)
226{
227 TempOptInfo *ti = ts_info(ts);
228 TCGTemp *pts = ti->prev_copy;
229 TCGTemp *nts = ti->next_copy;
230 TempOptInfo *pi = ts_info(pts);
231 TempOptInfo *ni = ts_info(nts);
232
233 ni->prev_copy = ti->prev_copy;
234 pi->next_copy = ti->next_copy;
235 ti->next_copy = ts;
236 ti->prev_copy = ts;
Richard Hendersonab84dc32023-08-23 23:04:24 -0700237 ti->z_mask = -1;
Richard Henderson56f15f62024-12-22 15:07:31 -0800238 ti->o_mask = 0;
Richard Hendersonab84dc32023-08-23 23:04:24 -0700239 ti->s_mask = 0;
240
241 if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
242 if (ts == nts) {
243 /* Last temp copy being removed, the mem copies die. */
244 MemCopyInfo *mc;
245 QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) {
246 interval_tree_remove(&mc->itree, &ctx->mem_copy);
247 }
248 QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy);
249 } else {
250 move_mem_copies(find_better_copy(nts), ts);
251 }
252 }
253}
254
255static void reset_temp(OptContext *ctx, TCGArg arg)
256{
257 reset_ts(ctx, arg_temp(arg));
258}
259
260static void record_mem_copy(OptContext *ctx, TCGType type,
261 TCGTemp *ts, intptr_t start, intptr_t last)
262{
263 MemCopyInfo *mc;
264 TempOptInfo *ti;
265
266 mc = QSIMPLEQ_FIRST(&ctx->mem_free);
267 if (mc) {
268 QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next);
269 } else {
270 mc = tcg_malloc(sizeof(*mc));
271 }
272
273 memset(mc, 0, sizeof(*mc));
274 mc->itree.start = start;
275 mc->itree.last = last;
276 mc->type = type;
277 interval_tree_insert(&mc->itree, &ctx->mem_copy);
278
279 ts = find_better_copy(ts);
280 ti = ts_info(ts);
281 mc->ts = ts;
282 QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next);
283}
284
Richard Henderson63490392017-06-20 13:43:15 -0700285static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200286{
Richard Henderson63490392017-06-20 13:43:15 -0700287 TCGTemp *i;
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200288
Richard Henderson63490392017-06-20 13:43:15 -0700289 if (ts1 == ts2) {
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200290 return true;
291 }
292
Richard Henderson63490392017-06-20 13:43:15 -0700293 if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200294 return false;
295 }
296
Richard Henderson63490392017-06-20 13:43:15 -0700297 for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
298 if (i == ts2) {
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +0200299 return true;
300 }
301 }
302
303 return false;
304}
305
Richard Henderson63490392017-06-20 13:43:15 -0700306static bool args_are_copies(TCGArg arg1, TCGArg arg2)
307{
308 return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
309}
310
Richard Hendersonab84dc32023-08-23 23:04:24 -0700311static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s)
312{
313 MemCopyInfo *mc;
314
315 for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) {
316 if (mc->itree.start == s && mc->type == type) {
317 return find_better_copy(mc->ts);
318 }
319 }
320 return NULL;
321}
322
Richard Henderson26aac972023-10-23 12:31:57 -0700323static TCGArg arg_new_constant(OptContext *ctx, uint64_t val)
324{
325 TCGType type = ctx->type;
326 TCGTemp *ts;
327
328 if (type == TCG_TYPE_I32) {
329 val = (int32_t)val;
330 }
331
332 ts = tcg_constant_internal(type, val);
333 init_ts_info(ctx, ts);
334
335 return temp_arg(ts);
336}
337
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100338static TCGArg arg_new_temp(OptContext *ctx)
339{
340 TCGTemp *ts = tcg_temp_new_internal(ctx->type, TEMP_EBB);
341 init_ts_info(ctx, ts);
342 return temp_arg(ts);
343}
344
Richard Hendersona3c1c572025-04-21 11:05:29 -0700345static TCGOp *opt_insert_after(OptContext *ctx, TCGOp *op,
346 TCGOpcode opc, unsigned narg)
347{
Richard Hendersoncf5c9f62025-01-21 20:34:41 -0800348 return tcg_op_insert_after(ctx->tcg, op, opc, ctx->type, narg);
Richard Hendersona3c1c572025-04-21 11:05:29 -0700349}
350
351static TCGOp *opt_insert_before(OptContext *ctx, TCGOp *op,
352 TCGOpcode opc, unsigned narg)
353{
Richard Hendersoncf5c9f62025-01-21 20:34:41 -0800354 return tcg_op_insert_before(ctx->tcg, op, opc, ctx->type, narg);
Richard Hendersona3c1c572025-04-21 11:05:29 -0700355}
356
Richard Henderson6b99d5b2021-08-24 10:57:56 -0700357static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
Kirill Batuzov22613af2011-07-07 16:37:13 +0400358{
Richard Henderson63490392017-06-20 13:43:15 -0700359 TCGTemp *dst_ts = arg_temp(dst);
360 TCGTemp *src_ts = arg_temp(src);
Richard Henderson6fcb98e2020-03-30 17:44:30 -0700361 TempOptInfo *di;
362 TempOptInfo *si;
Richard Henderson63490392017-06-20 13:43:15 -0700363 TCGOpcode new_op;
364
365 if (ts_are_copies(dst_ts, src_ts)) {
Richard Hendersondc849882021-08-24 07:13:45 -0700366 tcg_op_remove(ctx->tcg, op);
Richard Henderson6b99d5b2021-08-24 10:57:56 -0700367 return true;
Aurelien Jarno53657182015-06-04 21:53:25 +0200368 }
369
Richard Henderson986cac12023-01-09 13:59:35 -0800370 reset_ts(ctx, dst_ts);
Richard Henderson63490392017-06-20 13:43:15 -0700371 di = ts_info(dst_ts);
372 si = ts_info(src_ts);
Richard Henderson67f84c92021-08-25 08:00:20 -0700373
374 switch (ctx->type) {
375 case TCG_TYPE_I32:
Richard Henderson67f84c92021-08-25 08:00:20 -0700376 case TCG_TYPE_I64:
Richard Hendersonb5701262024-12-28 15:58:24 -0800377 new_op = INDEX_op_mov;
Richard Henderson67f84c92021-08-25 08:00:20 -0700378 break;
379 case TCG_TYPE_V64:
380 case TCG_TYPE_V128:
381 case TCG_TYPE_V256:
Richard Henderson4d872212025-01-02 19:43:06 -0800382 /* TCGOP_TYPE and TCGOP_VECE remain unchanged. */
Richard Henderson67f84c92021-08-25 08:00:20 -0700383 new_op = INDEX_op_mov_vec;
384 break;
385 default:
386 g_assert_not_reached();
Richard Henderson170ba882017-11-22 09:07:11 +0100387 }
Richard Hendersonc45cb8b2014-09-19 13:49:15 -0700388 op->opc = new_op;
Richard Henderson63490392017-06-20 13:43:15 -0700389 op->args[0] = dst;
390 op->args[1] = src;
Richard Hendersona62f6f52014-05-22 10:59:12 -0700391
Richard Hendersonfaa2e102021-08-26 09:03:59 -0700392 di->z_mask = si->z_mask;
Richard Henderson56f15f62024-12-22 15:07:31 -0800393 di->o_mask = si->o_mask;
Richard Henderson57fe5c62021-08-26 12:04:46 -0700394 di->s_mask = si->s_mask;
Richard Henderson24666ba2014-05-22 11:14:10 -0700395
Richard Henderson63490392017-06-20 13:43:15 -0700396 if (src_ts->type == dst_ts->type) {
Richard Henderson6fcb98e2020-03-30 17:44:30 -0700397 TempOptInfo *ni = ts_info(si->next_copy);
Richard Henderson63490392017-06-20 13:43:15 -0700398
399 di->next_copy = si->next_copy;
400 di->prev_copy = src_ts;
401 ni->prev_copy = dst_ts;
402 si->next_copy = dst_ts;
Richard Hendersonab84dc32023-08-23 23:04:24 -0700403
404 if (!QSIMPLEQ_EMPTY(&si->mem_copy)
405 && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
406 move_mem_copies(dst_ts, src_ts);
407 }
Richard Henderson56f15f62024-12-22 15:07:31 -0800408 } else if (dst_ts->type == TCG_TYPE_I32) {
409 di->z_mask = (int32_t)di->z_mask;
410 di->o_mask = (int32_t)di->o_mask;
411 di->s_mask |= INT32_MIN;
412 } else {
413 di->z_mask |= MAKE_64BIT_MASK(32, 32);
414 di->o_mask = (uint32_t)di->o_mask;
415 di->s_mask = INT64_MIN;
Paolo Bonzini3a9d8b12013-01-11 15:42:52 -0800416 }
Richard Henderson6b99d5b2021-08-24 10:57:56 -0700417 return true;
Kirill Batuzov22613af2011-07-07 16:37:13 +0400418}
419
Richard Henderson6b99d5b2021-08-24 10:57:56 -0700420static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
Richard Hendersondc849882021-08-24 07:13:45 -0700421 TCGArg dst, uint64_t val)
Richard Henderson8fe35e02020-03-30 20:42:43 -0700422{
Richard Hendersonfaa2e102021-08-26 09:03:59 -0700423 /* Convert movi to mov with constant temp. */
Richard Henderson26aac972023-10-23 12:31:57 -0700424 return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val));
Richard Henderson8fe35e02020-03-30 20:42:43 -0700425}
426
Richard Hendersonaa28c9e2025-01-07 10:36:24 -0800427static uint64_t do_constant_folding_2(TCGOpcode op, TCGType type,
428 uint64_t x, uint64_t y)
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400429{
Richard Henderson03271522013-08-14 14:35:56 -0700430 uint64_t l64, h64;
431
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400432 switch (op) {
Richard Henderson79602f62025-01-06 09:11:39 -0800433 case INDEX_op_add:
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400434 return x + y;
435
Richard Henderson60f34f52025-01-06 22:06:32 -0800436 case INDEX_op_sub:
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400437 return x - y;
438
Richard Hendersond2c3eca2025-01-07 09:32:18 -0800439 case INDEX_op_mul:
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400440 return x * y;
441
Richard Hendersonc3b920b2025-01-06 10:32:44 -0800442 case INDEX_op_and:
443 case INDEX_op_and_vec:
Kirill Batuzov9a810902011-07-07 16:37:15 +0400444 return x & y;
445
Richard Henderson49bd7512025-01-06 14:00:40 -0800446 case INDEX_op_or:
447 case INDEX_op_or_vec:
Kirill Batuzov9a810902011-07-07 16:37:15 +0400448 return x | y;
449
Richard Hendersonfffd3dc2025-01-06 15:18:35 -0800450 case INDEX_op_xor:
451 case INDEX_op_xor_vec:
Kirill Batuzov9a810902011-07-07 16:37:15 +0400452 return x ^ y;
453
Richard Henderson6ca59452025-01-07 21:50:04 -0800454 case INDEX_op_shl:
455 if (type == TCG_TYPE_I32) {
456 return (uint32_t)x << (y & 31);
457 }
Richard Henderson50c5c4d2014-03-18 07:45:39 -0700458 return (uint64_t)x << (y & 63);
Kirill Batuzov55c09752011-07-07 16:37:16 +0400459
Richard Henderson74dbd362025-01-07 22:52:10 -0800460 case INDEX_op_shr:
461 if (type == TCG_TYPE_I32) {
462 return (uint32_t)x >> (y & 31);
463 }
Richard Henderson50c5c4d2014-03-18 07:45:39 -0700464 return (uint64_t)x >> (y & 63);
Kirill Batuzov55c09752011-07-07 16:37:16 +0400465
Richard Henderson3949f362025-01-08 08:05:18 -0800466 case INDEX_op_sar:
467 if (type == TCG_TYPE_I32) {
468 return (int32_t)x >> (y & 31);
469 }
Richard Henderson50c5c4d2014-03-18 07:45:39 -0700470 return (int64_t)x >> (y & 63);
Kirill Batuzov55c09752011-07-07 16:37:16 +0400471
Richard Henderson005a87e2025-01-08 10:42:16 -0800472 case INDEX_op_rotr:
473 if (type == TCG_TYPE_I32) {
474 return ror32(x, y & 31);
475 }
Richard Henderson50c5c4d2014-03-18 07:45:39 -0700476 return ror64(x, y & 63);
Kirill Batuzov55c09752011-07-07 16:37:16 +0400477
Richard Henderson005a87e2025-01-08 10:42:16 -0800478 case INDEX_op_rotl:
479 if (type == TCG_TYPE_I32) {
480 return rol32(x, y & 31);
481 }
Richard Henderson50c5c4d2014-03-18 07:45:39 -0700482 return rol64(x, y & 63);
Kirill Batuzov55c09752011-07-07 16:37:16 +0400483
Richard Henderson5c62d372025-01-06 23:46:47 -0800484 case INDEX_op_not:
485 case INDEX_op_not_vec:
Kirill Batuzova640f032011-07-07 16:37:17 +0400486 return ~x;
487
Richard Henderson69713582025-01-06 22:48:57 -0800488 case INDEX_op_neg:
Richard Hendersoncb25c802011-08-17 14:11:47 -0700489 return -x;
490
Richard Henderson46f96bf2025-01-06 12:37:02 -0800491 case INDEX_op_andc:
492 case INDEX_op_andc_vec:
Richard Hendersoncb25c802011-08-17 14:11:47 -0700493 return x & ~y;
494
Richard Henderson6aba25e2025-01-06 14:46:26 -0800495 case INDEX_op_orc:
496 case INDEX_op_orc_vec:
Richard Hendersoncb25c802011-08-17 14:11:47 -0700497 return x | ~y;
498
Richard Henderson5c0968a2025-01-06 15:47:53 -0800499 case INDEX_op_eqv:
500 case INDEX_op_eqv_vec:
Richard Hendersoncb25c802011-08-17 14:11:47 -0700501 return ~(x ^ y);
502
Richard Henderson59379a42025-01-06 20:32:54 -0800503 case INDEX_op_nand:
504 case INDEX_op_nand_vec:
Richard Hendersoncb25c802011-08-17 14:11:47 -0700505 return ~(x & y);
506
Richard Henderson3a8c4e92025-01-06 21:02:17 -0800507 case INDEX_op_nor:
508 case INDEX_op_nor_vec:
Richard Hendersoncb25c802011-08-17 14:11:47 -0700509 return ~(x | y);
510
Richard Henderson5a5bb0a2025-01-08 16:12:46 -0800511 case INDEX_op_clz:
512 if (type == TCG_TYPE_I32) {
513 return (uint32_t)x ? clz32(x) : y;
514 }
Richard Henderson0e28d002016-11-16 09:23:28 +0100515 return x ? clz64(x) : y;
516
Richard Hendersonc96447d2025-01-08 17:07:01 -0800517 case INDEX_op_ctz:
518 if (type == TCG_TYPE_I32) {
519 return (uint32_t)x ? ctz32(x) : y;
520 }
Richard Henderson0e28d002016-11-16 09:23:28 +0100521 return x ? ctz64(x) : y;
522
Richard Henderson97218ae2025-01-08 18:37:43 -0800523 case INDEX_op_ctpop:
524 return type == TCG_TYPE_I32 ? ctpop32(x) : ctpop64(x);
Richard Hendersona768e4e2016-11-21 11:13:39 +0100525
Richard Henderson0dd07ee2025-01-10 18:51:16 -0800526 case INDEX_op_bswap16:
Richard Henderson0b76ff82021-06-13 13:04:00 -0700527 x = bswap16(x);
528 return y & TCG_BSWAP_OS ? (int16_t)x : x;
Richard Henderson64985942018-11-20 08:53:34 +0100529
Richard Henderson7498d882025-01-10 19:53:51 -0800530 case INDEX_op_bswap32:
Richard Henderson0b76ff82021-06-13 13:04:00 -0700531 x = bswap32(x);
532 return y & TCG_BSWAP_OS ? (int32_t)x : x;
Richard Henderson64985942018-11-20 08:53:34 +0100533
Richard Henderson3ad5d4c2025-01-10 21:54:44 -0800534 case INDEX_op_bswap64:
Richard Henderson64985942018-11-20 08:53:34 +0100535 return bswap64(x);
536
Aurelien Jarno8bcb5c82015-07-27 12:41:45 +0200537 case INDEX_op_ext_i32_i64:
Kirill Batuzova640f032011-07-07 16:37:17 +0400538 return (int32_t)x;
539
Aurelien Jarno8bcb5c82015-07-27 12:41:45 +0200540 case INDEX_op_extu_i32_i64:
Richard Henderson609ad702015-07-24 07:16:00 -0700541 case INDEX_op_extrl_i64_i32:
Kirill Batuzova640f032011-07-07 16:37:17 +0400542 return (uint32_t)x;
Kirill Batuzova640f032011-07-07 16:37:17 +0400543
Richard Henderson609ad702015-07-24 07:16:00 -0700544 case INDEX_op_extrh_i64_i32:
545 return (uint64_t)x >> 32;
546
Richard Hendersonaa28c9e2025-01-07 10:36:24 -0800547 case INDEX_op_muluh:
548 if (type == TCG_TYPE_I32) {
549 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
550 }
551 mulu64(&l64, &h64, x, y);
552 return h64;
553
Richard Hendersonc7428242025-01-07 11:19:29 -0800554 case INDEX_op_mulsh:
555 if (type == TCG_TYPE_I32) {
556 return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
557 }
Richard Henderson03271522013-08-14 14:35:56 -0700558 muls64(&l64, &h64, x, y);
559 return h64;
560
Richard Hendersonb2c514f2025-01-07 13:22:56 -0800561 case INDEX_op_divs:
Richard Henderson01547f72013-08-14 15:22:46 -0700562 /* Avoid crashing on divide by zero, otherwise undefined. */
Richard Hendersonb2c514f2025-01-07 13:22:56 -0800563 if (type == TCG_TYPE_I32) {
564 return (int32_t)x / ((int32_t)y ? : 1);
565 }
566 return (int64_t)x / ((int64_t)y ? : 1);
567
Richard Henderson961b80a2025-01-07 14:27:19 -0800568 case INDEX_op_divu:
569 if (type == TCG_TYPE_I32) {
570 return (uint32_t)x / ((uint32_t)y ? : 1);
571 }
Richard Henderson01547f72013-08-14 15:22:46 -0700572 return (uint64_t)x / ((uint64_t)y ? : 1);
573
Richard Henderson9a6bc182025-01-07 19:00:51 -0800574 case INDEX_op_rems:
575 if (type == TCG_TYPE_I32) {
576 return (int32_t)x % ((int32_t)y ? : 1);
577 }
578 return (int64_t)x % ((int64_t)y ? : 1);
579
Richard Hendersoncd9acd22025-01-07 20:25:14 -0800580 case INDEX_op_remu:
581 if (type == TCG_TYPE_I32) {
582 return (uint32_t)x % ((uint32_t)y ? : 1);
583 }
Richard Henderson01547f72013-08-14 15:22:46 -0700584 return (uint64_t)x % ((uint64_t)y ? : 1);
585
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400586 default:
Richard Henderson732e89f2023-04-05 12:09:14 -0700587 g_assert_not_reached();
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400588 }
589}
590
Richard Henderson67f84c92021-08-25 08:00:20 -0700591static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
592 uint64_t x, uint64_t y)
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400593{
Richard Hendersonaa28c9e2025-01-07 10:36:24 -0800594 uint64_t res = do_constant_folding_2(op, type, x, y);
Richard Henderson67f84c92021-08-25 08:00:20 -0700595 if (type == TCG_TYPE_I32) {
Aurelien Jarno29f3ff82015-07-10 18:03:31 +0200596 res = (int32_t)res;
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400597 }
Kirill Batuzov53108fb2011-07-07 16:37:14 +0400598 return res;
599}
600
Richard Henderson9519da72012-10-02 11:32:26 -0700601static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
602{
603 switch (c) {
604 case TCG_COND_EQ:
605 return x == y;
606 case TCG_COND_NE:
607 return x != y;
608 case TCG_COND_LT:
609 return (int32_t)x < (int32_t)y;
610 case TCG_COND_GE:
611 return (int32_t)x >= (int32_t)y;
612 case TCG_COND_LE:
613 return (int32_t)x <= (int32_t)y;
614 case TCG_COND_GT:
615 return (int32_t)x > (int32_t)y;
616 case TCG_COND_LTU:
617 return x < y;
618 case TCG_COND_GEU:
619 return x >= y;
620 case TCG_COND_LEU:
621 return x <= y;
622 case TCG_COND_GTU:
623 return x > y;
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700624 case TCG_COND_TSTEQ:
625 return (x & y) == 0;
626 case TCG_COND_TSTNE:
627 return (x & y) != 0;
628 case TCG_COND_ALWAYS:
629 case TCG_COND_NEVER:
630 break;
Richard Henderson9519da72012-10-02 11:32:26 -0700631 }
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700632 g_assert_not_reached();
Richard Henderson9519da72012-10-02 11:32:26 -0700633}
634
635static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
636{
637 switch (c) {
638 case TCG_COND_EQ:
639 return x == y;
640 case TCG_COND_NE:
641 return x != y;
642 case TCG_COND_LT:
643 return (int64_t)x < (int64_t)y;
644 case TCG_COND_GE:
645 return (int64_t)x >= (int64_t)y;
646 case TCG_COND_LE:
647 return (int64_t)x <= (int64_t)y;
648 case TCG_COND_GT:
649 return (int64_t)x > (int64_t)y;
650 case TCG_COND_LTU:
651 return x < y;
652 case TCG_COND_GEU:
653 return x >= y;
654 case TCG_COND_LEU:
655 return x <= y;
656 case TCG_COND_GTU:
657 return x > y;
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700658 case TCG_COND_TSTEQ:
659 return (x & y) == 0;
660 case TCG_COND_TSTNE:
661 return (x & y) != 0;
662 case TCG_COND_ALWAYS:
663 case TCG_COND_NEVER:
664 break;
Richard Henderson9519da72012-10-02 11:32:26 -0700665 }
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700666 g_assert_not_reached();
Richard Henderson9519da72012-10-02 11:32:26 -0700667}
668
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700669static int do_constant_folding_cond_eq(TCGCond c)
Richard Henderson9519da72012-10-02 11:32:26 -0700670{
671 switch (c) {
672 case TCG_COND_GT:
673 case TCG_COND_LTU:
674 case TCG_COND_LT:
675 case TCG_COND_GTU:
676 case TCG_COND_NE:
677 return 0;
678 case TCG_COND_GE:
679 case TCG_COND_GEU:
680 case TCG_COND_LE:
681 case TCG_COND_LEU:
682 case TCG_COND_EQ:
683 return 1;
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700684 case TCG_COND_TSTEQ:
685 case TCG_COND_TSTNE:
686 return -1;
687 case TCG_COND_ALWAYS:
688 case TCG_COND_NEVER:
689 break;
Richard Henderson9519da72012-10-02 11:32:26 -0700690 }
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700691 g_assert_not_reached();
Richard Henderson9519da72012-10-02 11:32:26 -0700692}
693
Richard Henderson8d57bf12021-08-24 08:34:27 -0700694/*
695 * Return -1 if the condition can't be simplified,
696 * and the result of the condition (0 or 1) if it can.
697 */
Richard Henderson67f84c92021-08-25 08:00:20 -0700698static int do_constant_folding_cond(TCGType type, TCGArg x,
Richard Henderson8d57bf12021-08-24 08:34:27 -0700699 TCGArg y, TCGCond c)
Aurelien Jarnof8dd19e2012-09-06 16:47:14 +0200700{
Richard Henderson63490392017-06-20 13:43:15 -0700701 if (arg_is_const(x) && arg_is_const(y)) {
Richard Hendersonc1fa1b32025-02-17 15:17:47 -0800702 uint64_t xv = arg_const_val(x);
703 uint64_t yv = arg_const_val(y);
Alex Bennée9becc362022-02-09 11:21:42 +0000704
Richard Henderson67f84c92021-08-25 08:00:20 -0700705 switch (type) {
706 case TCG_TYPE_I32:
Richard Henderson170ba882017-11-22 09:07:11 +0100707 return do_constant_folding_cond_32(xv, yv, c);
Richard Henderson67f84c92021-08-25 08:00:20 -0700708 case TCG_TYPE_I64:
709 return do_constant_folding_cond_64(xv, yv, c);
710 default:
711 /* Only scalar comparisons are optimizable */
712 return -1;
Aurelien Jarnof8dd19e2012-09-06 16:47:14 +0200713 }
Richard Henderson63490392017-06-20 13:43:15 -0700714 } else if (args_are_copies(x, y)) {
Richard Henderson9519da72012-10-02 11:32:26 -0700715 return do_constant_folding_cond_eq(c);
Richard Henderson27cdb852023-10-23 11:38:00 -0700716 } else if (arg_is_const_val(y, 0)) {
Aurelien Jarnob336ceb2012-09-18 19:37:00 +0200717 switch (c) {
718 case TCG_COND_LTU:
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700719 case TCG_COND_TSTNE:
Aurelien Jarnob336ceb2012-09-18 19:37:00 +0200720 return 0;
721 case TCG_COND_GEU:
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700722 case TCG_COND_TSTEQ:
Aurelien Jarnob336ceb2012-09-18 19:37:00 +0200723 return 1;
724 default:
Richard Henderson8d57bf12021-08-24 08:34:27 -0700725 return -1;
Aurelien Jarnob336ceb2012-09-18 19:37:00 +0200726 }
Aurelien Jarnof8dd19e2012-09-06 16:47:14 +0200727 }
Richard Henderson8d57bf12021-08-24 08:34:27 -0700728 return -1;
Aurelien Jarnof8dd19e2012-09-06 16:47:14 +0200729}
730
Richard Henderson7a2f7082021-08-26 07:06:39 -0700731/**
732 * swap_commutative:
733 * @dest: TCGArg of the destination argument, or NO_DEST.
734 * @p1: first paired argument
735 * @p2: second paired argument
736 *
737 * If *@p1 is a constant and *@p2 is not, swap.
738 * If *@p2 matches @dest, swap.
739 * Return true if a swap was performed.
740 */
741
742#define NO_DEST temp_arg(NULL)
743
Richard Hendersone2f5ee32025-01-14 23:08:24 -0800744static int pref_commutative(TempOptInfo *ti)
745{
746 /* Slight preference for non-zero constants second. */
747 return !ti_is_const(ti) ? 0 : ti_const_val(ti) ? 3 : 2;
748}
749
Richard Henderson24c9ae42012-10-02 11:32:21 -0700750static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
751{
752 TCGArg a1 = *p1, a2 = *p2;
753 int sum = 0;
Richard Hendersone2f5ee32025-01-14 23:08:24 -0800754 sum += pref_commutative(arg_info(a1));
755 sum -= pref_commutative(arg_info(a2));
Richard Henderson24c9ae42012-10-02 11:32:21 -0700756
757 /* Prefer the constant in second argument, and then the form
758 op a, a, b, which is better handled on non-RISC hosts. */
759 if (sum > 0 || (sum == 0 && dest == a2)) {
760 *p1 = a2;
761 *p2 = a1;
762 return true;
763 }
764 return false;
765}
766
Richard Henderson0bfcb862012-10-02 11:32:23 -0700767static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
768{
769 int sum = 0;
Richard Hendersone2f5ee32025-01-14 23:08:24 -0800770 sum += pref_commutative(arg_info(p1[0]));
771 sum += pref_commutative(arg_info(p1[1]));
772 sum -= pref_commutative(arg_info(p2[0]));
773 sum -= pref_commutative(arg_info(p2[1]));
Richard Henderson0bfcb862012-10-02 11:32:23 -0700774 if (sum > 0) {
775 TCGArg t;
776 t = p1[0], p1[0] = p2[0], p2[0] = t;
777 t = p1[1], p1[1] = p2[1], p2[1] = t;
778 return true;
779 }
780 return false;
781}
782
Richard Henderson7e64b112023-10-24 16:53:56 -0700783/*
784 * Return -1 if the condition can't be simplified,
785 * and the result of the condition (0 or 1) if it can.
786 */
Richard Hendersone532a392024-12-09 14:05:01 -0600787static bool fold_and(OptContext *ctx, TCGOp *op);
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100788static int do_constant_folding_cond1(OptContext *ctx, TCGOp *op, TCGArg dest,
Richard Henderson246c4b72023-10-24 16:36:50 -0700789 TCGArg *p1, TCGArg *p2, TCGArg *pcond)
790{
791 TCGCond cond;
Paolo Bonzini35020622024-01-22 10:48:11 +0100792 TempOptInfo *i1;
Richard Henderson246c4b72023-10-24 16:36:50 -0700793 bool swap;
794 int r;
795
796 swap = swap_commutative(dest, p1, p2);
797 cond = *pcond;
798 if (swap) {
799 *pcond = cond = tcg_swap_cond(cond);
800 }
801
802 r = do_constant_folding_cond(ctx->type, *p1, *p2, cond);
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700803 if (r >= 0) {
804 return r;
805 }
806 if (!is_tst_cond(cond)) {
807 return -1;
808 }
809
Paolo Bonzini35020622024-01-22 10:48:11 +0100810 i1 = arg_info(*p1);
811
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700812 /*
813 * TSTNE x,x -> NE x,0
Paolo Bonzini35020622024-01-22 10:48:11 +0100814 * TSTNE x,i -> NE x,0 if i includes all nonzero bits of x
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700815 */
Paolo Bonzini35020622024-01-22 10:48:11 +0100816 if (args_are_copies(*p1, *p2) ||
Richard Hendersonc1fa1b32025-02-17 15:17:47 -0800817 (arg_is_const(*p2) && (i1->z_mask & ~arg_const_val(*p2)) == 0)) {
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700818 *p2 = arg_new_constant(ctx, 0);
819 *pcond = tcg_tst_eqne_cond(cond);
820 return -1;
821 }
822
Paolo Bonzini35020622024-01-22 10:48:11 +0100823 /* TSTNE x,i -> LT x,0 if i only includes sign bit copies */
Richard Hendersonc1fa1b32025-02-17 15:17:47 -0800824 if (arg_is_const(*p2) && (arg_const_val(*p2) & ~i1->s_mask) == 0) {
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700825 *p2 = arg_new_constant(ctx, 0);
826 *pcond = tcg_tst_ltge_cond(cond);
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100827 return -1;
828 }
829
830 /* Expand to AND with a temporary if no backend support. */
831 if (!TCG_TARGET_HAS_tst) {
Richard Hendersonc3b920b2025-01-06 10:32:44 -0800832 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100833 TCGArg tmp = arg_new_temp(ctx);
834
835 op2->args[0] = tmp;
836 op2->args[1] = *p1;
837 op2->args[2] = *p2;
Richard Hendersone532a392024-12-09 14:05:01 -0600838 fold_and(ctx, op2);
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100839
840 *p1 = tmp;
841 *p2 = arg_new_constant(ctx, 0);
842 *pcond = tcg_tst_eqne_cond(cond);
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700843 }
844 return -1;
Richard Henderson246c4b72023-10-24 16:36:50 -0700845}
846
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100847static int do_constant_folding_cond2(OptContext *ctx, TCGOp *op, TCGArg *args)
Richard Henderson7e64b112023-10-24 16:53:56 -0700848{
849 TCGArg al, ah, bl, bh;
850 TCGCond c;
851 bool swap;
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700852 int r;
Richard Henderson7e64b112023-10-24 16:53:56 -0700853
854 swap = swap_commutative2(args, args + 2);
855 c = args[4];
856 if (swap) {
857 args[4] = c = tcg_swap_cond(c);
858 }
859
860 al = args[0];
861 ah = args[1];
862 bl = args[2];
863 bh = args[3];
864
865 if (arg_is_const(bl) && arg_is_const(bh)) {
Richard Hendersonc1fa1b32025-02-17 15:17:47 -0800866 tcg_target_ulong blv = arg_const_val(bl);
867 tcg_target_ulong bhv = arg_const_val(bh);
Richard Henderson7e64b112023-10-24 16:53:56 -0700868 uint64_t b = deposit64(blv, 32, 32, bhv);
869
870 if (arg_is_const(al) && arg_is_const(ah)) {
Richard Hendersonc1fa1b32025-02-17 15:17:47 -0800871 tcg_target_ulong alv = arg_const_val(al);
872 tcg_target_ulong ahv = arg_const_val(ah);
Richard Henderson7e64b112023-10-24 16:53:56 -0700873 uint64_t a = deposit64(alv, 32, 32, ahv);
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700874
875 r = do_constant_folding_cond_64(a, b, c);
876 if (r >= 0) {
877 return r;
878 }
Richard Henderson7e64b112023-10-24 16:53:56 -0700879 }
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700880
Richard Henderson7e64b112023-10-24 16:53:56 -0700881 if (b == 0) {
882 switch (c) {
883 case TCG_COND_LTU:
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700884 case TCG_COND_TSTNE:
Richard Henderson7e64b112023-10-24 16:53:56 -0700885 return 0;
886 case TCG_COND_GEU:
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700887 case TCG_COND_TSTEQ:
Richard Henderson7e64b112023-10-24 16:53:56 -0700888 return 1;
889 default:
890 break;
891 }
892 }
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700893
894 /* TSTNE x,-1 -> NE x,0 */
895 if (b == -1 && is_tst_cond(c)) {
896 args[3] = args[2] = arg_new_constant(ctx, 0);
897 args[4] = tcg_tst_eqne_cond(c);
898 return -1;
899 }
900
901 /* TSTNE x,sign -> LT x,0 */
902 if (b == INT64_MIN && is_tst_cond(c)) {
903 /* bl must be 0, so copy that to bh */
904 args[3] = bl;
905 args[4] = tcg_tst_ltge_cond(c);
906 return -1;
907 }
Richard Henderson7e64b112023-10-24 16:53:56 -0700908 }
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700909
Richard Henderson7e64b112023-10-24 16:53:56 -0700910 if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
Richard Hendersonceb9ee02023-10-23 23:44:27 -0700911 r = do_constant_folding_cond_eq(c);
912 if (r >= 0) {
913 return r;
914 }
915
916 /* TSTNE x,x -> NE x,0 */
917 if (is_tst_cond(c)) {
918 args[3] = args[2] = arg_new_constant(ctx, 0);
919 args[4] = tcg_tst_eqne_cond(c);
920 return -1;
921 }
Richard Henderson7e64b112023-10-24 16:53:56 -0700922 }
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100923
924 /* Expand to AND with a temporary if no backend support. */
925 if (!TCG_TARGET_HAS_tst && is_tst_cond(c)) {
Richard Hendersonc3b920b2025-01-06 10:32:44 -0800926 TCGOp *op1 = opt_insert_before(ctx, op, INDEX_op_and, 3);
927 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_and, 3);
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100928 TCGArg t1 = arg_new_temp(ctx);
929 TCGArg t2 = arg_new_temp(ctx);
930
931 op1->args[0] = t1;
932 op1->args[1] = al;
933 op1->args[2] = bl;
Richard Hendersone532a392024-12-09 14:05:01 -0600934 fold_and(ctx, op1);
935
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100936 op2->args[0] = t2;
937 op2->args[1] = ah;
938 op2->args[2] = bh;
Richard Hendersone532a392024-12-09 14:05:01 -0600939 fold_and(ctx, op1);
Richard Hendersonfb04ab72024-01-10 18:21:58 +1100940
941 args[0] = t1;
942 args[1] = t2;
943 args[3] = args[2] = arg_new_constant(ctx, 0);
944 args[4] = tcg_tst_eqne_cond(c);
945 }
Richard Henderson7e64b112023-10-24 16:53:56 -0700946 return -1;
947}
948
Richard Hendersone2577ea2021-08-24 08:00:48 -0700949static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
950{
951 for (int i = 0; i < nb_args; i++) {
952 TCGTemp *ts = arg_temp(op->args[i]);
Richard Henderson39004a72022-11-11 10:09:37 +1000953 init_ts_info(ctx, ts);
Richard Hendersone2577ea2021-08-24 08:00:48 -0700954 }
955}
956
Richard Henderson8774dde2021-08-24 08:04:47 -0700957static void copy_propagate(OptContext *ctx, TCGOp *op,
958 int nb_oargs, int nb_iargs)
959{
Richard Henderson8774dde2021-08-24 08:04:47 -0700960 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
961 TCGTemp *ts = arg_temp(op->args[i]);
Richard Henderson39004a72022-11-11 10:09:37 +1000962 if (ts_is_copy(ts)) {
Richard Henderson9f75e522023-11-02 13:37:46 -0700963 op->args[i] = temp_arg(find_better_copy(ts));
Richard Henderson8774dde2021-08-24 08:04:47 -0700964 }
965 }
966}
967
Richard Henderson15268552024-12-08 07:45:11 -0600968static void finish_bb(OptContext *ctx)
969{
970 /* We only optimize memory barriers across basic blocks. */
971 ctx->prev_mb = NULL;
972}
973
974static void finish_ebb(OptContext *ctx)
975{
976 finish_bb(ctx);
977 /* We only optimize across extended basic blocks. */
978 memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
979 remove_mem_copy_all(ctx);
980}
981
Richard Hendersonf3ed3cf2024-12-08 18:39:47 -0600982static bool finish_folding(OptContext *ctx, TCGOp *op)
Richard Henderson137f1f42021-08-24 08:49:25 -0700983{
984 const TCGOpDef *def = &tcg_op_defs[op->opc];
985 int i, nb_oargs;
986
Richard Henderson137f1f42021-08-24 08:49:25 -0700987 nb_oargs = def->nb_oargs;
988 for (i = 0; i < nb_oargs; i++) {
Richard Henderson57fe5c62021-08-26 12:04:46 -0700989 TCGTemp *ts = arg_temp(op->args[i]);
Richard Henderson986cac12023-01-09 13:59:35 -0800990 reset_ts(ctx, ts);
Richard Henderson137f1f42021-08-24 08:49:25 -0700991 }
Richard Hendersonf3ed3cf2024-12-08 18:39:47 -0600992 return true;
Richard Henderson137f1f42021-08-24 08:49:25 -0700993}
994
Richard Henderson2f9f08b2021-08-25 12:03:48 -0700995/*
996 * The fold_* functions return true when processing is complete,
997 * usually by folding the operation to a constant or to a copy,
998 * and calling tcg_opt_gen_{mov,movi}. They may do other things,
999 * like collect information about the value produced, for use in
1000 * optimizing a subsequent operation.
1001 *
1002 * These first fold_* functions are all helpers, used by other
1003 * folders for more specific operations.
1004 */
1005
1006static bool fold_const1(OptContext *ctx, TCGOp *op)
1007{
1008 if (arg_is_const(op->args[1])) {
Richard Hendersonc1fa1b32025-02-17 15:17:47 -08001009 uint64_t t = arg_const_val(op->args[1]);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001010
Richard Henderson67f84c92021-08-25 08:00:20 -07001011 t = do_constant_folding(op->opc, ctx->type, t, 0);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001012 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1013 }
1014 return false;
1015}
1016
1017static bool fold_const2(OptContext *ctx, TCGOp *op)
1018{
1019 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
Richard Hendersonc1fa1b32025-02-17 15:17:47 -08001020 uint64_t t1 = arg_const_val(op->args[1]);
1021 uint64_t t2 = arg_const_val(op->args[2]);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001022
Richard Henderson67f84c92021-08-25 08:00:20 -07001023 t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001024 return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1025 }
1026 return false;
1027}
1028
Richard Hendersonc578ff12021-12-16 06:07:25 -08001029static bool fold_commutative(OptContext *ctx, TCGOp *op)
1030{
1031 swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1032 return false;
1033}
1034
Richard Henderson7a2f7082021-08-26 07:06:39 -07001035static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
1036{
1037 swap_commutative(op->args[0], &op->args[1], &op->args[2]);
1038 return fold_const2(ctx, op);
1039}
1040
Richard Hendersond582b142024-12-19 10:43:26 -08001041/*
1042 * Record "zero" and "sign" masks for the single output of @op.
1043 * See TempOptInfo definition of z_mask and s_mask.
1044 * If z_mask allows, fold the output to constant zero.
Richard Henderson75c3bf32024-12-19 10:50:40 -08001045 * The passed s_mask may be augmented by z_mask.
Richard Hendersond582b142024-12-19 10:43:26 -08001046 */
Richard Henderson932522a2023-10-23 14:29:46 -07001047static bool fold_masks_zosa_int(OptContext *ctx, TCGOp *op,
1048 uint64_t z_mask, uint64_t o_mask,
1049 int64_t s_mask, uint64_t a_mask)
Richard Hendersonfae450b2021-08-25 22:42:19 -07001050{
Richard Henderson56e06ec2024-12-08 18:26:48 -06001051 const TCGOpDef *def = &tcg_op_defs[op->opc];
1052 TCGTemp *ts;
1053 TempOptInfo *ti;
Richard Henderson6d70ddc2024-12-21 21:08:10 -08001054 int rep;
Richard Henderson56e06ec2024-12-08 18:26:48 -06001055
1056 /* Only single-output opcodes are supported here. */
1057 tcg_debug_assert(def->nb_oargs == 1);
Richard Hendersonfae450b2021-08-25 22:42:19 -07001058
1059 /*
Richard Hendersonfaa2e102021-08-26 09:03:59 -07001060 * 32-bit ops generate 32-bit results, which for the purpose of
1061 * simplifying tcg are sign-extended. Certainly that's how we
1062 * represent our constants elsewhere. Note that the bits will
1063 * be reset properly for a 64-bit value when encountering the
1064 * type changing opcodes.
Richard Hendersonfae450b2021-08-25 22:42:19 -07001065 */
1066 if (ctx->type == TCG_TYPE_I32) {
Richard Hendersonfaa2e102021-08-26 09:03:59 -07001067 z_mask = (int32_t)z_mask;
Richard Henderson56f15f62024-12-22 15:07:31 -08001068 o_mask = (int32_t)o_mask;
Richard Henderson6d70ddc2024-12-21 21:08:10 -08001069 s_mask |= INT32_MIN;
Richard Henderson9e397cc2025-06-02 14:11:51 +01001070 a_mask = (uint32_t)a_mask;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001071 }
1072
Richard Henderson56f15f62024-12-22 15:07:31 -08001073 /* Bits that are known 1 and bits that are known 0 must not overlap. */
1074 tcg_debug_assert((o_mask & ~z_mask) == 0);
1075
1076 /* All bits that are not known zero are known one is a constant. */
1077 if (z_mask == o_mask) {
1078 return tcg_opt_gen_movi(ctx, op, op->args[0], o_mask);
Richard Hendersonfae450b2021-08-25 22:42:19 -07001079 }
Richard Henderson56e06ec2024-12-08 18:26:48 -06001080
Richard Henderson9e397cc2025-06-02 14:11:51 +01001081 /* If no bits are affected, the operation devolves to a copy. */
1082 if (a_mask == 0) {
1083 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1084 }
1085
Richard Henderson56e06ec2024-12-08 18:26:48 -06001086 ts = arg_temp(op->args[0]);
1087 reset_ts(ctx, ts);
1088
1089 ti = ts_info(ts);
1090 ti->z_mask = z_mask;
Richard Henderson6d70ddc2024-12-21 21:08:10 -08001091
1092 /* Canonicalize s_mask and incorporate data from z_mask. */
1093 rep = clz64(~s_mask);
1094 rep = MAX(rep, clz64(z_mask));
Richard Henderson56f15f62024-12-22 15:07:31 -08001095 rep = MAX(rep, clz64(~o_mask));
Richard Henderson6d70ddc2024-12-21 21:08:10 -08001096 rep = MAX(rep - 1, 0);
1097 ti->s_mask = INT64_MIN >> rep;
1098
Richard Henderson932522a2023-10-23 14:29:46 -07001099 return false;
1100}
1101
1102static bool fold_masks_zosa(OptContext *ctx, TCGOp *op, uint64_t z_mask,
1103 uint64_t o_mask, int64_t s_mask, uint64_t a_mask)
1104{
1105 fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, -1);
Richard Henderson56e06ec2024-12-08 18:26:48 -06001106 return true;
Richard Henderson045ace32024-12-19 10:33:51 -08001107}
1108
Richard Henderson33fceba2024-12-10 08:26:56 -06001109static bool fold_masks_zos(OptContext *ctx, TCGOp *op,
1110 uint64_t z_mask, uint64_t o_mask, uint64_t s_mask)
1111{
1112 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, -1);
1113}
1114
Richard Henderson83c47c32024-12-10 15:48:16 -06001115static bool fold_masks_zo(OptContext *ctx, TCGOp *op,
1116 uint64_t z_mask, uint64_t o_mask)
1117{
1118 return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, -1);
1119}
1120
Richard Henderson56f15f62024-12-22 15:07:31 -08001121static bool fold_masks_zs(OptContext *ctx, TCGOp *op,
1122 uint64_t z_mask, uint64_t s_mask)
1123{
Richard Henderson9e397cc2025-06-02 14:11:51 +01001124 return fold_masks_zosa(ctx, op, z_mask, 0, s_mask, -1);
Richard Henderson56f15f62024-12-22 15:07:31 -08001125}
1126
Richard Henderson81be07f2024-12-08 19:49:17 -06001127static bool fold_masks_z(OptContext *ctx, TCGOp *op, uint64_t z_mask)
1128{
Richard Henderson9e397cc2025-06-02 14:11:51 +01001129 return fold_masks_zosa(ctx, op, z_mask, 0, 0, -1);
Richard Henderson81be07f2024-12-08 19:49:17 -06001130}
1131
Richard Hendersonef6be622024-12-08 20:03:15 -06001132static bool fold_masks_s(OptContext *ctx, TCGOp *op, uint64_t s_mask)
1133{
Richard Henderson9e397cc2025-06-02 14:11:51 +01001134 return fold_masks_zosa(ctx, op, -1, 0, s_mask, -1);
Richard Hendersonef6be622024-12-08 20:03:15 -06001135}
1136
Richard Henderson045ace32024-12-19 10:33:51 -08001137/*
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001138 * Convert @op to NOT, if NOT is supported by the host.
1139 * Return true f the conversion is successful, which will still
1140 * indicate that the processing is complete.
1141 */
1142static bool fold_not(OptContext *ctx, TCGOp *op);
1143static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
1144{
1145 TCGOpcode not_op;
1146 bool have_not;
1147
1148 switch (ctx->type) {
1149 case TCG_TYPE_I32:
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001150 case TCG_TYPE_I64:
Richard Henderson5c62d372025-01-06 23:46:47 -08001151 not_op = INDEX_op_not;
1152 have_not = tcg_op_supported(INDEX_op_not, ctx->type, 0);
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001153 break;
1154 case TCG_TYPE_V64:
1155 case TCG_TYPE_V128:
1156 case TCG_TYPE_V256:
1157 not_op = INDEX_op_not_vec;
1158 have_not = TCG_TARGET_HAS_not_vec;
1159 break;
1160 default:
1161 g_assert_not_reached();
1162 }
1163 if (have_not) {
1164 op->opc = not_op;
1165 op->args[1] = op->args[idx];
1166 return fold_not(ctx, op);
1167 }
1168 return false;
1169}
1170
Richard Hendersonda48e272021-08-25 20:42:04 -07001171/* If the binary operation has first argument @i, fold to @i. */
1172static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1173{
Richard Henderson27cdb852023-10-23 11:38:00 -07001174 if (arg_is_const_val(op->args[1], i)) {
Richard Hendersonda48e272021-08-25 20:42:04 -07001175 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1176 }
1177 return false;
1178}
1179
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001180/* If the binary operation has first argument @i, fold to NOT. */
1181static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1182{
Richard Henderson27cdb852023-10-23 11:38:00 -07001183 if (arg_is_const_val(op->args[1], i)) {
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001184 return fold_to_not(ctx, op, 2);
1185 }
1186 return false;
1187}
1188
Richard Hendersone8679952021-08-25 13:19:52 -07001189/* If the binary operation has second argument @i, fold to @i. */
1190static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1191{
Richard Henderson27cdb852023-10-23 11:38:00 -07001192 if (arg_is_const_val(op->args[2], i)) {
Richard Hendersone8679952021-08-25 13:19:52 -07001193 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1194 }
1195 return false;
1196}
1197
Richard Hendersona63ce0e2021-08-25 20:28:53 -07001198/* If the binary operation has second argument @i, fold to identity. */
1199static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
1200{
Richard Henderson27cdb852023-10-23 11:38:00 -07001201 if (arg_is_const_val(op->args[2], i)) {
Richard Hendersona63ce0e2021-08-25 20:28:53 -07001202 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1203 }
1204 return false;
1205}
1206
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001207/* If the binary operation has second argument @i, fold to NOT. */
1208static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
1209{
Richard Henderson27cdb852023-10-23 11:38:00 -07001210 if (arg_is_const_val(op->args[2], i)) {
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001211 return fold_to_not(ctx, op, 1);
1212 }
1213 return false;
1214}
1215
Richard Hendersoncbe42fb2021-08-25 13:02:00 -07001216/* If the binary operation has both arguments equal, fold to @i. */
1217static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1218{
1219 if (args_are_copies(op->args[1], op->args[2])) {
1220 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1221 }
1222 return false;
1223}
1224
Richard Hendersonca7bb042021-08-25 13:14:21 -07001225/* If the binary operation has both arguments equal, fold to identity. */
1226static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
1227{
1228 if (args_are_copies(op->args[1], op->args[2])) {
1229 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1230 }
1231 return false;
1232}
1233
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001234/*
1235 * These outermost fold_<op> functions are sorted alphabetically.
Richard Hendersonca7bb042021-08-25 13:14:21 -07001236 *
1237 * The ordering of the transformations should be:
1238 * 1) those that produce a constant
1239 * 2) those that produce a copy
1240 * 3) those that produce information about the result value.
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001241 */
1242
Richard Hendersonaeb35142025-01-14 18:28:15 -08001243static bool fold_addco(OptContext *ctx, TCGOp *op);
Richard Henderson7d3c63a2024-12-09 14:06:08 -06001244static bool fold_or(OptContext *ctx, TCGOp *op);
1245static bool fold_orc(OptContext *ctx, TCGOp *op);
Richard Hendersonaeb35142025-01-14 18:28:15 -08001246static bool fold_subbo(OptContext *ctx, TCGOp *op);
Richard Henderson7d3c63a2024-12-09 14:06:08 -06001247static bool fold_xor(OptContext *ctx, TCGOp *op);
1248
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001249static bool fold_add(OptContext *ctx, TCGOp *op)
1250{
Richard Henderson7a2f7082021-08-26 07:06:39 -07001251 if (fold_const2_commutative(ctx, op) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07001252 fold_xi_to_x(ctx, op, 0)) {
1253 return true;
1254 }
Richard Hendersonf3ed3cf2024-12-08 18:39:47 -06001255 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001256}
1257
Richard Hendersonc578ff12021-12-16 06:07:25 -08001258/* We cannot as yet do_constant_folding with vectors. */
1259static bool fold_add_vec(OptContext *ctx, TCGOp *op)
1260{
1261 if (fold_commutative(ctx, op) ||
1262 fold_xi_to_x(ctx, op, 0)) {
1263 return true;
1264 }
Richard Hendersonf3ed3cf2024-12-08 18:39:47 -06001265 return finish_folding(ctx, op);
Richard Hendersonc578ff12021-12-16 06:07:25 -08001266}
1267
Richard Hendersonaeb35142025-01-14 18:28:15 -08001268static void squash_prev_carryout(OptContext *ctx, TCGOp *op)
1269{
1270 TempOptInfo *t2;
1271
1272 op = QTAILQ_PREV(op, link);
1273 switch (op->opc) {
1274 case INDEX_op_addco:
1275 op->opc = INDEX_op_add;
1276 fold_add(ctx, op);
1277 break;
1278 case INDEX_op_addcio:
1279 op->opc = INDEX_op_addci;
1280 break;
1281 case INDEX_op_addc1o:
1282 op->opc = INDEX_op_add;
1283 t2 = arg_info(op->args[2]);
1284 if (ti_is_const(t2)) {
1285 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1286 /* Perform other constant folding, if needed. */
1287 fold_add(ctx, op);
1288 } else {
1289 TCGArg ret = op->args[0];
1290 op = opt_insert_after(ctx, op, INDEX_op_add, 3);
1291 op->args[0] = ret;
1292 op->args[1] = ret;
1293 op->args[2] = arg_new_constant(ctx, 1);
1294 }
1295 break;
1296 default:
1297 g_assert_not_reached();
1298 }
1299}
1300
1301static bool fold_addci(OptContext *ctx, TCGOp *op)
Richard Henderson76f42782025-01-14 13:58:39 -08001302{
1303 fold_commutative(ctx, op);
Richard Hendersonaeb35142025-01-14 18:28:15 -08001304
1305 if (ctx->carry_state < 0) {
1306 return finish_folding(ctx, op);
1307 }
1308
1309 squash_prev_carryout(ctx, op);
1310 op->opc = INDEX_op_add;
1311
1312 if (ctx->carry_state > 0) {
1313 TempOptInfo *t2 = arg_info(op->args[2]);
1314
1315 /*
1316 * Propagate the known carry-in into a constant, if possible.
1317 * Otherwise emit a second add +1.
1318 */
1319 if (ti_is_const(t2)) {
1320 op->args[2] = arg_new_constant(ctx, ti_const_val(t2) + 1);
1321 } else {
1322 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_add, 3);
1323
1324 op2->args[0] = op->args[0];
1325 op2->args[1] = op->args[1];
1326 op2->args[2] = op->args[2];
1327 fold_add(ctx, op2);
1328
1329 op->args[1] = op->args[0];
1330 op->args[2] = arg_new_constant(ctx, 1);
1331 }
1332 }
1333
1334 ctx->carry_state = -1;
1335 return fold_add(ctx, op);
1336}
1337
1338static bool fold_addcio(OptContext *ctx, TCGOp *op)
1339{
1340 TempOptInfo *t1, *t2;
1341 int carry_out = -1;
1342 uint64_t sum, max;
1343
1344 fold_commutative(ctx, op);
1345 t1 = arg_info(op->args[1]);
1346 t2 = arg_info(op->args[2]);
1347
1348 /*
1349 * The z_mask value is >= the maximum value that can be represented
1350 * with the known zero bits. So adding the z_mask values will not
1351 * overflow if and only if the true values cannot overflow.
1352 */
1353 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &sum) &&
1354 !uadd64_overflow(sum, ctx->carry_state != 0, &sum)) {
1355 carry_out = 0;
1356 }
1357
1358 if (ctx->carry_state < 0) {
1359 ctx->carry_state = carry_out;
1360 return finish_folding(ctx, op);
1361 }
1362
1363 squash_prev_carryout(ctx, op);
1364 if (ctx->carry_state == 0) {
1365 goto do_addco;
1366 }
1367
1368 /* Propagate the known carry-in into a constant, if possible. */
1369 max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
1370 if (ti_is_const(t2)) {
1371 uint64_t v = ti_const_val(t2) & max;
1372 if (v < max) {
1373 op->args[2] = arg_new_constant(ctx, v + 1);
1374 goto do_addco;
1375 }
1376 /* max + known carry in produces known carry out. */
1377 carry_out = 1;
1378 }
1379 if (ti_is_const(t1)) {
1380 uint64_t v = ti_const_val(t1) & max;
1381 if (v < max) {
1382 op->args[1] = arg_new_constant(ctx, v + 1);
1383 goto do_addco;
1384 }
1385 carry_out = 1;
1386 }
1387
1388 /* Adjust the opcode to remember the known carry-in. */
1389 op->opc = INDEX_op_addc1o;
1390 ctx->carry_state = carry_out;
1391 return finish_folding(ctx, op);
1392
1393 do_addco:
1394 op->opc = INDEX_op_addco;
1395 return fold_addco(ctx, op);
1396}
1397
1398static bool fold_addco(OptContext *ctx, TCGOp *op)
1399{
1400 TempOptInfo *t1, *t2;
1401 int carry_out = -1;
1402 uint64_t ign;
1403
1404 fold_commutative(ctx, op);
1405 t1 = arg_info(op->args[1]);
1406 t2 = arg_info(op->args[2]);
1407
1408 if (ti_is_const(t2)) {
1409 uint64_t v2 = ti_const_val(t2);
1410
1411 if (ti_is_const(t1)) {
1412 uint64_t v1 = ti_const_val(t1);
1413 /* Given sign-extension of z_mask for I32, we need not truncate. */
1414 carry_out = uadd64_overflow(v1, v2, &ign);
1415 } else if (v2 == 0) {
1416 carry_out = 0;
1417 }
1418 } else {
1419 /*
1420 * The z_mask value is >= the maximum value that can be represented
1421 * with the known zero bits. So adding the z_mask values will not
1422 * overflow if and only if the true values cannot overflow.
1423 */
1424 if (!uadd64_overflow(t1->z_mask, t2->z_mask, &ign)) {
1425 carry_out = 0;
1426 }
1427 }
1428 ctx->carry_state = carry_out;
Richard Henderson76f42782025-01-14 13:58:39 -08001429 return finish_folding(ctx, op);
1430}
1431
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001432static bool fold_and(OptContext *ctx, TCGOp *op)
1433{
Richard Henderson1e2edf82024-12-09 16:48:36 -06001434 uint64_t z_mask, o_mask, s_mask, a_mask;
Richard Henderson1ca73722024-12-08 18:47:15 -06001435 TempOptInfo *t1, *t2;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001436
Richard Henderson7a2f7082021-08-26 07:06:39 -07001437 if (fold_const2_commutative(ctx, op) ||
Richard Hendersone8679952021-08-25 13:19:52 -07001438 fold_xi_to_i(ctx, op, 0) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07001439 fold_xi_to_x(ctx, op, -1) ||
Richard Hendersonca7bb042021-08-25 13:14:21 -07001440 fold_xx_to_x(ctx, op)) {
1441 return true;
1442 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001443
Richard Henderson1ca73722024-12-08 18:47:15 -06001444 t1 = arg_info(op->args[1]);
1445 t2 = arg_info(op->args[2]);
Richard Henderson3f2b1f82021-08-26 13:08:54 -07001446
Richard Henderson1e2edf82024-12-09 16:48:36 -06001447 z_mask = t1->z_mask & t2->z_mask;
1448 o_mask = t1->o_mask & t2->o_mask;
Richard Henderson1ca73722024-12-08 18:47:15 -06001449
1450 /*
1451 * Sign repetitions are perforce all identical, whether they are 1 or 0.
1452 * Bitwise operations preserve the relative quantity of the repetitions.
1453 */
1454 s_mask = t1->s_mask & t2->s_mask;
1455
Richard Henderson1e2edf82024-12-09 16:48:36 -06001456 /* Affected bits are those not known zero, masked by those known one. */
1457 a_mask = t1->z_mask & ~t2->o_mask;
1458
Richard Henderson932522a2023-10-23 14:29:46 -07001459 if (!fold_masks_zosa_int(ctx, op, z_mask, o_mask, s_mask, a_mask)) {
1460 if (ti_is_const(t2)) {
1461 /*
1462 * Canonicalize on extract, if valid. This aids x86 with its
1463 * 2 operand MOVZBL and 2 operand AND, selecting the TCGOpcode
1464 * which does not require matching operands. Other backends can
1465 * trivially expand the extract to AND during code generation.
1466 */
1467 uint64_t val = ti_const_val(t2);
1468 if (!(val & (val + 1))) {
1469 unsigned len = ctz64(~val);
1470 if (TCG_TARGET_extract_valid(ctx->type, 0, len)) {
1471 op->opc = INDEX_op_extract;
1472 op->args[2] = 0;
1473 op->args[3] = len;
1474 }
1475 }
1476 }
1477 }
1478 return true;
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001479}
1480
1481static bool fold_andc(OptContext *ctx, TCGOp *op)
1482{
Richard Hendersond4d441e2024-12-22 16:08:42 -08001483 uint64_t z_mask, o_mask, s_mask, a_mask;
Richard Henderson21e2b5f2024-12-08 18:56:55 -06001484 TempOptInfo *t1, *t2;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001485
Richard Hendersoncbe42fb2021-08-25 13:02:00 -07001486 if (fold_const2(ctx, op) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001487 fold_xx_to_i(ctx, op, 0) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07001488 fold_xi_to_x(ctx, op, 0) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001489 fold_ix_to_not(ctx, op, -1)) {
Richard Hendersoncbe42fb2021-08-25 13:02:00 -07001490 return true;
1491 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001492
Richard Henderson21e2b5f2024-12-08 18:56:55 -06001493 t1 = arg_info(op->args[1]);
1494 t2 = arg_info(op->args[2]);
Richard Hendersonfae450b2021-08-25 22:42:19 -07001495
Richard Henderson899281c2023-11-15 11:18:55 -08001496 if (ti_is_const(t2)) {
1497 /* Fold andc r,x,i to and r,x,~i. */
1498 switch (ctx->type) {
1499 case TCG_TYPE_I32:
1500 case TCG_TYPE_I64:
1501 op->opc = INDEX_op_and;
1502 break;
1503 case TCG_TYPE_V64:
1504 case TCG_TYPE_V128:
1505 case TCG_TYPE_V256:
1506 op->opc = INDEX_op_and_vec;
1507 break;
1508 default:
1509 g_assert_not_reached();
1510 }
1511 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1512 return fold_and(ctx, op);
1513 }
1514
Richard Hendersond4d441e2024-12-22 16:08:42 -08001515 z_mask = t1->z_mask & ~t2->o_mask;
1516 o_mask = t1->o_mask & ~t2->z_mask;
Richard Henderson21e2b5f2024-12-08 18:56:55 -06001517 s_mask = t1->s_mask & t2->s_mask;
Richard Hendersond4d441e2024-12-22 16:08:42 -08001518
1519 /* Affected bits are those not known zero, masked by those known zero. */
1520 a_mask = t1->z_mask & t2->z_mask;
1521
1522 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001523}
1524
Richard Henderson7d3c63a2024-12-09 14:06:08 -06001525static bool fold_bitsel_vec(OptContext *ctx, TCGOp *op)
1526{
1527 /* If true and false values are the same, eliminate the cmp. */
1528 if (args_are_copies(op->args[2], op->args[3])) {
1529 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1530 }
1531
1532 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
Richard Hendersonc1fa1b32025-02-17 15:17:47 -08001533 uint64_t tv = arg_const_val(op->args[2]);
1534 uint64_t fv = arg_const_val(op->args[3]);
Richard Henderson7d3c63a2024-12-09 14:06:08 -06001535
1536 if (tv == -1 && fv == 0) {
1537 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1538 }
1539 if (tv == 0 && fv == -1) {
1540 if (TCG_TARGET_HAS_not_vec) {
1541 op->opc = INDEX_op_not_vec;
1542 return fold_not(ctx, op);
1543 } else {
1544 op->opc = INDEX_op_xor_vec;
1545 op->args[2] = arg_new_constant(ctx, -1);
1546 return fold_xor(ctx, op);
1547 }
1548 }
1549 }
1550 if (arg_is_const(op->args[2])) {
Richard Hendersonc1fa1b32025-02-17 15:17:47 -08001551 uint64_t tv = arg_const_val(op->args[2]);
Richard Henderson7d3c63a2024-12-09 14:06:08 -06001552 if (tv == -1) {
1553 op->opc = INDEX_op_or_vec;
1554 op->args[2] = op->args[3];
1555 return fold_or(ctx, op);
1556 }
1557 if (tv == 0 && TCG_TARGET_HAS_andc_vec) {
1558 op->opc = INDEX_op_andc_vec;
1559 op->args[2] = op->args[1];
1560 op->args[1] = op->args[3];
1561 return fold_andc(ctx, op);
1562 }
1563 }
1564 if (arg_is_const(op->args[3])) {
Richard Hendersonc1fa1b32025-02-17 15:17:47 -08001565 uint64_t fv = arg_const_val(op->args[3]);
Richard Henderson7d3c63a2024-12-09 14:06:08 -06001566 if (fv == 0) {
1567 op->opc = INDEX_op_and_vec;
1568 return fold_and(ctx, op);
1569 }
1570 if (fv == -1 && TCG_TARGET_HAS_orc_vec) {
1571 op->opc = INDEX_op_orc_vec;
1572 op->args[2] = op->args[1];
1573 op->args[1] = op->args[3];
1574 return fold_orc(ctx, op);
1575 }
1576 }
1577 return finish_folding(ctx, op);
1578}
1579
Richard Henderson079b0802021-08-24 09:30:59 -07001580static bool fold_brcond(OptContext *ctx, TCGOp *op)
1581{
Richard Hendersonfb04ab72024-01-10 18:21:58 +11001582 int i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[0],
Richard Henderson246c4b72023-10-24 16:36:50 -07001583 &op->args[1], &op->args[2]);
Richard Henderson079b0802021-08-24 09:30:59 -07001584 if (i == 0) {
1585 tcg_op_remove(ctx->tcg, op);
1586 return true;
1587 }
1588 if (i > 0) {
1589 op->opc = INDEX_op_br;
1590 op->args[0] = op->args[3];
Richard Henderson15268552024-12-08 07:45:11 -06001591 finish_ebb(ctx);
1592 } else {
1593 finish_bb(ctx);
Richard Henderson079b0802021-08-24 09:30:59 -07001594 }
Richard Henderson15268552024-12-08 07:45:11 -06001595 return true;
Richard Henderson079b0802021-08-24 09:30:59 -07001596}
1597
Richard Henderson764d2ab2021-08-24 09:22:11 -07001598static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1599{
Richard Henderson7e64b112023-10-24 16:53:56 -07001600 TCGCond cond;
1601 TCGArg label;
Richard Henderson7a2f7082021-08-26 07:06:39 -07001602 int i, inv = 0;
Richard Henderson764d2ab2021-08-24 09:22:11 -07001603
Richard Hendersonfb04ab72024-01-10 18:21:58 +11001604 i = do_constant_folding_cond2(ctx, op, &op->args[0]);
Richard Henderson7e64b112023-10-24 16:53:56 -07001605 cond = op->args[4];
1606 label = op->args[5];
Richard Henderson764d2ab2021-08-24 09:22:11 -07001607 if (i >= 0) {
1608 goto do_brcond_const;
1609 }
1610
1611 switch (cond) {
1612 case TCG_COND_LT:
1613 case TCG_COND_GE:
1614 /*
1615 * Simplify LT/GE comparisons vs zero to a single compare
1616 * vs the high word of the input.
1617 */
Richard Henderson27cdb852023-10-23 11:38:00 -07001618 if (arg_is_const_val(op->args[2], 0) &&
1619 arg_is_const_val(op->args[3], 0)) {
Richard Henderson764d2ab2021-08-24 09:22:11 -07001620 goto do_brcond_high;
1621 }
1622 break;
1623
1624 case TCG_COND_NE:
1625 inv = 1;
1626 QEMU_FALLTHROUGH;
1627 case TCG_COND_EQ:
1628 /*
1629 * Simplify EQ/NE comparisons where one of the pairs
1630 * can be simplified.
1631 */
Richard Henderson67f84c92021-08-25 08:00:20 -07001632 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
Richard Henderson764d2ab2021-08-24 09:22:11 -07001633 op->args[2], cond);
1634 switch (i ^ inv) {
1635 case 0:
1636 goto do_brcond_const;
1637 case 1:
1638 goto do_brcond_high;
1639 }
1640
Richard Henderson67f84c92021-08-25 08:00:20 -07001641 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
Richard Henderson764d2ab2021-08-24 09:22:11 -07001642 op->args[3], cond);
1643 switch (i ^ inv) {
1644 case 0:
1645 goto do_brcond_const;
1646 case 1:
Richard Hendersonceb9ee02023-10-23 23:44:27 -07001647 goto do_brcond_low;
1648 }
1649 break;
1650
1651 case TCG_COND_TSTEQ:
1652 case TCG_COND_TSTNE:
1653 if (arg_is_const_val(op->args[2], 0)) {
1654 goto do_brcond_high;
1655 }
1656 if (arg_is_const_val(op->args[3], 0)) {
1657 goto do_brcond_low;
Richard Henderson764d2ab2021-08-24 09:22:11 -07001658 }
1659 break;
1660
1661 default:
1662 break;
1663
Richard Hendersonceb9ee02023-10-23 23:44:27 -07001664 do_brcond_low:
Richard Hendersonb6d69fc2025-01-10 11:49:22 -08001665 op->opc = INDEX_op_brcond;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07001666 op->args[1] = op->args[2];
1667 op->args[2] = cond;
1668 op->args[3] = label;
1669 return fold_brcond(ctx, op);
1670
Richard Henderson764d2ab2021-08-24 09:22:11 -07001671 do_brcond_high:
Richard Hendersonb6d69fc2025-01-10 11:49:22 -08001672 op->opc = INDEX_op_brcond;
Richard Henderson764d2ab2021-08-24 09:22:11 -07001673 op->args[0] = op->args[1];
1674 op->args[1] = op->args[3];
1675 op->args[2] = cond;
1676 op->args[3] = label;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07001677 return fold_brcond(ctx, op);
Richard Henderson764d2ab2021-08-24 09:22:11 -07001678
1679 do_brcond_const:
1680 if (i == 0) {
1681 tcg_op_remove(ctx->tcg, op);
1682 return true;
1683 }
1684 op->opc = INDEX_op_br;
1685 op->args[0] = label;
Richard Henderson15268552024-12-08 07:45:11 -06001686 finish_ebb(ctx);
1687 return true;
Richard Henderson764d2ab2021-08-24 09:22:11 -07001688 }
Richard Henderson15268552024-12-08 07:45:11 -06001689
1690 finish_bb(ctx);
1691 return true;
Richard Henderson764d2ab2021-08-24 09:22:11 -07001692}
1693
Richard Henderson09bacdc2021-08-24 11:58:12 -07001694static bool fold_bswap(OptContext *ctx, TCGOp *op)
1695{
Richard Hendersone6e37332024-12-10 15:02:41 -06001696 uint64_t z_mask, o_mask, s_mask;
Richard Hendersonc1e7b982024-12-08 19:42:20 -06001697 TempOptInfo *t1 = arg_info(op->args[1]);
Richard Hendersone6e37332024-12-10 15:02:41 -06001698 int flags = op->args[2];
Richard Hendersonfae450b2021-08-25 22:42:19 -07001699
Richard Hendersonc1e7b982024-12-08 19:42:20 -06001700 if (ti_is_const(t1)) {
1701 return tcg_opt_gen_movi(ctx, op, op->args[0],
1702 do_constant_folding(op->opc, ctx->type,
Richard Hendersone6e37332024-12-10 15:02:41 -06001703 ti_const_val(t1), flags));
Richard Henderson09bacdc2021-08-24 11:58:12 -07001704 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001705
Richard Hendersonc1e7b982024-12-08 19:42:20 -06001706 z_mask = t1->z_mask;
Richard Hendersone6e37332024-12-10 15:02:41 -06001707 o_mask = t1->o_mask;
1708 s_mask = 0;
1709
Richard Hendersonfae450b2021-08-25 22:42:19 -07001710 switch (op->opc) {
Richard Henderson0dd07ee2025-01-10 18:51:16 -08001711 case INDEX_op_bswap16:
Richard Hendersonfae450b2021-08-25 22:42:19 -07001712 z_mask = bswap16(z_mask);
Richard Hendersone6e37332024-12-10 15:02:41 -06001713 o_mask = bswap16(o_mask);
1714 if (flags & TCG_BSWAP_OS) {
1715 z_mask = (int16_t)z_mask;
1716 o_mask = (int16_t)o_mask;
1717 s_mask = INT16_MIN;
1718 } else if (!(flags & TCG_BSWAP_OZ)) {
1719 z_mask |= MAKE_64BIT_MASK(16, 48);
1720 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001721 break;
Richard Henderson7498d882025-01-10 19:53:51 -08001722 case INDEX_op_bswap32:
Richard Hendersonfae450b2021-08-25 22:42:19 -07001723 z_mask = bswap32(z_mask);
Richard Hendersone6e37332024-12-10 15:02:41 -06001724 o_mask = bswap32(o_mask);
1725 if (flags & TCG_BSWAP_OS) {
1726 z_mask = (int32_t)z_mask;
1727 o_mask = (int32_t)o_mask;
1728 s_mask = INT32_MIN;
1729 } else if (!(flags & TCG_BSWAP_OZ)) {
1730 z_mask |= MAKE_64BIT_MASK(32, 32);
1731 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001732 break;
Richard Henderson3ad5d4c2025-01-10 21:54:44 -08001733 case INDEX_op_bswap64:
Richard Hendersonfae450b2021-08-25 22:42:19 -07001734 z_mask = bswap64(z_mask);
Richard Hendersone6e37332024-12-10 15:02:41 -06001735 o_mask = bswap64(o_mask);
Richard Hendersonfae450b2021-08-25 22:42:19 -07001736 break;
1737 default:
1738 g_assert_not_reached();
1739 }
1740
Richard Hendersone6e37332024-12-10 15:02:41 -06001741 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
Richard Henderson09bacdc2021-08-24 11:58:12 -07001742}
1743
Richard Henderson5cf32be2021-08-24 08:17:08 -07001744static bool fold_call(OptContext *ctx, TCGOp *op)
1745{
1746 TCGContext *s = ctx->tcg;
1747 int nb_oargs = TCGOP_CALLO(op);
1748 int nb_iargs = TCGOP_CALLI(op);
1749 int flags, i;
1750
1751 init_arguments(ctx, op, nb_oargs + nb_iargs);
1752 copy_propagate(ctx, op, nb_oargs, nb_iargs);
1753
1754 /* If the function reads or writes globals, reset temp data. */
1755 flags = tcg_call_flags(op);
1756 if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1757 int nb_globals = s->nb_globals;
1758
1759 for (i = 0; i < nb_globals; i++) {
1760 if (test_bit(i, ctx->temps_used.l)) {
Richard Henderson986cac12023-01-09 13:59:35 -08001761 reset_ts(ctx, &ctx->tcg->temps[i]);
Richard Henderson5cf32be2021-08-24 08:17:08 -07001762 }
1763 }
1764 }
1765
Richard Hendersonab84dc32023-08-23 23:04:24 -07001766 /* If the function has side effects, reset mem data. */
1767 if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1768 remove_mem_copy_all(ctx);
1769 }
1770
Richard Henderson5cf32be2021-08-24 08:17:08 -07001771 /* Reset temp data for outputs. */
1772 for (i = 0; i < nb_oargs; i++) {
Richard Henderson986cac12023-01-09 13:59:35 -08001773 reset_temp(ctx, op->args[i]);
Richard Henderson5cf32be2021-08-24 08:17:08 -07001774 }
1775
1776 /* Stop optimizing MB across calls. */
1777 ctx->prev_mb = NULL;
1778 return true;
1779}
1780
Richard Henderson29f65862024-12-09 14:09:49 -06001781static bool fold_cmp_vec(OptContext *ctx, TCGOp *op)
1782{
1783 /* Canonicalize the comparison to put immediate second. */
1784 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1785 op->args[3] = tcg_swap_cond(op->args[3]);
1786 }
1787 return finish_folding(ctx, op);
1788}
1789
1790static bool fold_cmpsel_vec(OptContext *ctx, TCGOp *op)
1791{
1792 /* If true and false values are the same, eliminate the cmp. */
1793 if (args_are_copies(op->args[3], op->args[4])) {
1794 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
1795 }
1796
1797 /* Canonicalize the comparison to put immediate second. */
1798 if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1799 op->args[5] = tcg_swap_cond(op->args[5]);
1800 }
1801 /*
1802 * Canonicalize the "false" input reg to match the destination,
1803 * so that the tcg backend can implement "move if true".
1804 */
1805 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1806 op->args[5] = tcg_invert_cond(op->args[5]);
1807 }
1808 return finish_folding(ctx, op);
1809}
1810
Richard Henderson30dd0bf2021-08-24 10:51:34 -07001811static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1812{
Richard Hendersonce1d6632024-12-08 19:47:51 -06001813 uint64_t z_mask, s_mask;
1814 TempOptInfo *t1 = arg_info(op->args[1]);
1815 TempOptInfo *t2 = arg_info(op->args[2]);
Richard Hendersonfae450b2021-08-25 22:42:19 -07001816
Richard Hendersonce1d6632024-12-08 19:47:51 -06001817 if (ti_is_const(t1)) {
1818 uint64_t t = ti_const_val(t1);
Richard Henderson30dd0bf2021-08-24 10:51:34 -07001819
1820 if (t != 0) {
Richard Henderson67f84c92021-08-25 08:00:20 -07001821 t = do_constant_folding(op->opc, ctx->type, t, 0);
Richard Henderson30dd0bf2021-08-24 10:51:34 -07001822 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1823 }
1824 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1825 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001826
1827 switch (ctx->type) {
1828 case TCG_TYPE_I32:
1829 z_mask = 31;
1830 break;
1831 case TCG_TYPE_I64:
1832 z_mask = 63;
1833 break;
1834 default:
1835 g_assert_not_reached();
1836 }
Richard Hendersonce1d6632024-12-08 19:47:51 -06001837 s_mask = ~z_mask;
1838 z_mask |= t2->z_mask;
1839 s_mask &= t2->s_mask;
1840
1841 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Henderson30dd0bf2021-08-24 10:51:34 -07001842}
1843
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001844static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1845{
Richard Henderson81be07f2024-12-08 19:49:17 -06001846 uint64_t z_mask;
1847
Richard Hendersonfae450b2021-08-25 22:42:19 -07001848 if (fold_const1(ctx, op)) {
1849 return true;
1850 }
1851
1852 switch (ctx->type) {
1853 case TCG_TYPE_I32:
Richard Henderson81be07f2024-12-08 19:49:17 -06001854 z_mask = 32 | 31;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001855 break;
1856 case TCG_TYPE_I64:
Richard Henderson81be07f2024-12-08 19:49:17 -06001857 z_mask = 64 | 63;
Richard Hendersonfae450b2021-08-25 22:42:19 -07001858 break;
1859 default:
1860 g_assert_not_reached();
1861 }
Richard Henderson81be07f2024-12-08 19:49:17 -06001862 return fold_masks_z(ctx, op, z_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001863}
1864
Richard Henderson1b1907b2021-08-24 10:47:04 -07001865static bool fold_deposit(OptContext *ctx, TCGOp *op)
1866{
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001867 TempOptInfo *t1 = arg_info(op->args[1]);
1868 TempOptInfo *t2 = arg_info(op->args[2]);
1869 int ofs = op->args[3];
1870 int len = op->args[4];
Richard Hendersonc3b920b2025-01-06 10:32:44 -08001871 int width = 8 * tcg_type_size(ctx->type);
Richard Henderson9d80b3c2024-12-10 14:45:44 -06001872 uint64_t z_mask, o_mask, s_mask;
Richard Henderson8f7a8402023-08-13 11:03:05 -07001873
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001874 if (ti_is_const(t1) && ti_is_const(t2)) {
1875 return tcg_opt_gen_movi(ctx, op, op->args[0],
1876 deposit64(ti_const_val(t1), ofs, len,
1877 ti_const_val(t2)));
Richard Henderson1b1907b2021-08-24 10:47:04 -07001878 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001879
Richard Henderson8f7a8402023-08-13 11:03:05 -07001880 /* Inserting a value into zero at offset 0. */
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001881 if (ti_is_const_val(t1, 0) && ofs == 0) {
1882 uint64_t mask = MAKE_64BIT_MASK(0, len);
Richard Henderson8f7a8402023-08-13 11:03:05 -07001883
Richard Hendersonc3b920b2025-01-06 10:32:44 -08001884 op->opc = INDEX_op_and;
Richard Henderson8f7a8402023-08-13 11:03:05 -07001885 op->args[1] = op->args[2];
Richard Henderson26aac972023-10-23 12:31:57 -07001886 op->args[2] = arg_new_constant(ctx, mask);
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001887 return fold_and(ctx, op);
Richard Henderson8f7a8402023-08-13 11:03:05 -07001888 }
1889
1890 /* Inserting zero into a value. */
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001891 if (ti_is_const_val(t2, 0)) {
1892 uint64_t mask = deposit64(-1, ofs, len, 0);
Richard Henderson8f7a8402023-08-13 11:03:05 -07001893
Richard Hendersonc3b920b2025-01-06 10:32:44 -08001894 op->opc = INDEX_op_and;
Richard Henderson26aac972023-10-23 12:31:57 -07001895 op->args[2] = arg_new_constant(ctx, mask);
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001896 return fold_and(ctx, op);
Richard Henderson8f7a8402023-08-13 11:03:05 -07001897 }
1898
Richard Hendersonedb832c2024-12-19 17:56:05 -08001899 /* The s_mask from the top portion of the deposit is still valid. */
1900 if (ofs + len == width) {
1901 s_mask = t2->s_mask << ofs;
1902 } else {
1903 s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
1904 }
1905
Richard Hendersonc7739ab2024-12-08 19:57:28 -06001906 z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
Richard Henderson9d80b3c2024-12-10 14:45:44 -06001907 o_mask = deposit64(t1->o_mask, ofs, len, t2->o_mask);
1908
1909 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
Richard Henderson1b1907b2021-08-24 10:47:04 -07001910}
1911
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001912static bool fold_divide(OptContext *ctx, TCGOp *op)
1913{
Richard Henderson2f9d9a32021-10-25 11:30:14 -07001914 if (fold_const2(ctx, op) ||
1915 fold_xi_to_x(ctx, op, 1)) {
1916 return true;
1917 }
Richard Henderson3d5ec802024-12-08 19:59:15 -06001918 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001919}
1920
Richard Henderson8cdb3fc2021-08-24 12:06:33 -07001921static bool fold_dup(OptContext *ctx, TCGOp *op)
1922{
1923 if (arg_is_const(op->args[1])) {
Richard Hendersonc1fa1b32025-02-17 15:17:47 -08001924 uint64_t t = arg_const_val(op->args[1]);
Richard Henderson8cdb3fc2021-08-24 12:06:33 -07001925 t = dup_const(TCGOP_VECE(op), t);
1926 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1927 }
Richard Hendersone089d692024-12-08 20:00:51 -06001928 return finish_folding(ctx, op);
Richard Henderson8cdb3fc2021-08-24 12:06:33 -07001929}
1930
1931static bool fold_dup2(OptContext *ctx, TCGOp *op)
1932{
1933 if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
Richard Hendersonc1fa1b32025-02-17 15:17:47 -08001934 uint64_t t = deposit64(arg_const_val(op->args[1]), 32, 32,
1935 arg_const_val(op->args[2]));
Richard Henderson8cdb3fc2021-08-24 12:06:33 -07001936 return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1937 }
1938
1939 if (args_are_copies(op->args[1], op->args[2])) {
1940 op->opc = INDEX_op_dup_vec;
1941 TCGOP_VECE(op) = MO_32;
1942 }
Richard Hendersone089d692024-12-08 20:00:51 -06001943 return finish_folding(ctx, op);
Richard Henderson8cdb3fc2021-08-24 12:06:33 -07001944}
1945
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001946static bool fold_eqv(OptContext *ctx, TCGOp *op)
1947{
Richard Henderson33fceba2024-12-10 08:26:56 -06001948 uint64_t z_mask, o_mask, s_mask;
Richard Henderson46c68d72023-11-15 11:51:28 -08001949 TempOptInfo *t1, *t2;
Richard Hendersonef6be622024-12-08 20:03:15 -06001950
Richard Henderson7a2f7082021-08-26 07:06:39 -07001951 if (fold_const2_commutative(ctx, op) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07001952 fold_xi_to_x(ctx, op, -1) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07001953 fold_xi_to_not(ctx, op, 0)) {
1954 return true;
1955 }
Richard Henderson3f2b1f82021-08-26 13:08:54 -07001956
Richard Henderson46c68d72023-11-15 11:51:28 -08001957 t2 = arg_info(op->args[2]);
1958 if (ti_is_const(t2)) {
1959 /* Fold eqv r,x,i to xor r,x,~i. */
1960 switch (ctx->type) {
1961 case TCG_TYPE_I32:
1962 case TCG_TYPE_I64:
1963 op->opc = INDEX_op_xor;
1964 break;
1965 case TCG_TYPE_V64:
1966 case TCG_TYPE_V128:
1967 case TCG_TYPE_V256:
1968 op->opc = INDEX_op_xor_vec;
1969 break;
1970 default:
1971 g_assert_not_reached();
1972 }
1973 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
1974 return fold_xor(ctx, op);
1975 }
1976
1977 t1 = arg_info(op->args[1]);
Richard Henderson33fceba2024-12-10 08:26:56 -06001978
1979 z_mask = (t1->z_mask | ~t2->o_mask) & (t2->z_mask | ~t1->o_mask);
1980 o_mask = ~(t1->z_mask | t2->z_mask) | (t1->o_mask & t2->o_mask);
Richard Henderson46c68d72023-11-15 11:51:28 -08001981 s_mask = t1->s_mask & t2->s_mask;
Richard Henderson33fceba2024-12-10 08:26:56 -06001982
1983 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07001984}
1985
Richard Hendersonb6617c82021-08-24 10:44:53 -07001986static bool fold_extract(OptContext *ctx, TCGOp *op)
1987{
Richard Hendersonfcde7362024-12-10 15:34:12 -06001988 uint64_t z_mask, o_mask, a_mask;
Richard Hendersonb6cd00f2024-12-08 20:05:11 -06001989 TempOptInfo *t1 = arg_info(op->args[1]);
Richard Henderson57fe5c62021-08-26 12:04:46 -07001990 int pos = op->args[2];
1991 int len = op->args[3];
Richard Hendersonfae450b2021-08-25 22:42:19 -07001992
Richard Hendersonb6cd00f2024-12-08 20:05:11 -06001993 if (ti_is_const(t1)) {
1994 return tcg_opt_gen_movi(ctx, op, op->args[0],
1995 extract64(ti_const_val(t1), pos, len));
Richard Hendersonb6617c82021-08-24 10:44:53 -07001996 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07001997
Richard Hendersonfcde7362024-12-10 15:34:12 -06001998 z_mask = extract64(t1->z_mask, pos, len);
1999 o_mask = extract64(t1->o_mask, pos, len);
2000 a_mask = pos ? -1 : t1->z_mask ^ z_mask;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002001
Richard Hendersonfcde7362024-12-10 15:34:12 -06002002 return fold_masks_zosa(ctx, op, z_mask, o_mask, 0, a_mask);
Richard Hendersonb6617c82021-08-24 10:44:53 -07002003}
2004
Richard Hendersondcd08992021-08-24 10:41:39 -07002005static bool fold_extract2(OptContext *ctx, TCGOp *op)
2006{
Richard Henderson83c47c32024-12-10 15:48:16 -06002007 TempOptInfo *t1 = arg_info(op->args[1]);
2008 TempOptInfo *t2 = arg_info(op->args[2]);
2009 uint64_t z1 = t1->z_mask;
2010 uint64_t z2 = t2->z_mask;
2011 uint64_t o1 = t1->o_mask;
2012 uint64_t o2 = t2->o_mask;
2013 int shr = op->args[3];
Richard Hendersondcd08992021-08-24 10:41:39 -07002014
Richard Henderson83c47c32024-12-10 15:48:16 -06002015 if (ctx->type == TCG_TYPE_I32) {
2016 z1 = (uint32_t)z1 >> shr;
2017 o1 = (uint32_t)o1 >> shr;
2018 z2 = (uint64_t)((int32_t)z2 << (32 - shr));
2019 o2 = (uint64_t)((int32_t)o2 << (32 - shr));
2020 } else {
2021 z1 >>= shr;
2022 o1 >>= shr;
2023 z2 <<= 64 - shr;
2024 o2 <<= 64 - shr;
Richard Hendersondcd08992021-08-24 10:41:39 -07002025 }
Richard Henderson83c47c32024-12-10 15:48:16 -06002026
2027 return fold_masks_zo(ctx, op, z1 | z2, o1 | o2);
Richard Hendersondcd08992021-08-24 10:41:39 -07002028}
2029
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002030static bool fold_exts(OptContext *ctx, TCGOp *op)
2031{
Richard Hendersonde852572024-12-10 15:55:33 -06002032 uint64_t z_mask, o_mask, s_mask;
Richard Hendersona9621922024-12-08 20:08:46 -06002033 TempOptInfo *t1;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002034
2035 if (fold_const1(ctx, op)) {
2036 return true;
2037 }
2038
Richard Hendersona9621922024-12-08 20:08:46 -06002039 t1 = arg_info(op->args[1]);
2040 z_mask = t1->z_mask;
Richard Hendersonde852572024-12-10 15:55:33 -06002041 o_mask = t1->o_mask;
Richard Hendersona9621922024-12-08 20:08:46 -06002042 s_mask = t1->s_mask;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002043
2044 switch (op->opc) {
Richard Hendersonfae450b2021-08-25 22:42:19 -07002045 case INDEX_op_ext_i32_i64:
Richard Hendersona9621922024-12-08 20:08:46 -06002046 s_mask |= INT32_MIN;
2047 z_mask = (int32_t)z_mask;
Richard Hendersonde852572024-12-10 15:55:33 -06002048 o_mask = (int32_t)o_mask;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002049 break;
2050 default:
2051 g_assert_not_reached();
2052 }
Richard Hendersonde852572024-12-10 15:55:33 -06002053 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002054}
2055
2056static bool fold_extu(OptContext *ctx, TCGOp *op)
2057{
Richard Hendersonf7834242024-12-10 15:58:04 -06002058 uint64_t z_mask, o_mask;
2059 TempOptInfo *t1;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002060
2061 if (fold_const1(ctx, op)) {
2062 return true;
2063 }
2064
Richard Hendersonf7834242024-12-10 15:58:04 -06002065 t1 = arg_info(op->args[1]);
2066 z_mask = t1->z_mask;
2067 o_mask = t1->o_mask;
2068
Richard Hendersonfae450b2021-08-25 22:42:19 -07002069 switch (op->opc) {
Richard Hendersonfae450b2021-08-25 22:42:19 -07002070 case INDEX_op_extrl_i64_i32:
2071 case INDEX_op_extu_i32_i64:
Richard Hendersonfae450b2021-08-25 22:42:19 -07002072 z_mask = (uint32_t)z_mask;
Richard Hendersonf7834242024-12-10 15:58:04 -06002073 o_mask = (uint32_t)o_mask;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002074 break;
2075 case INDEX_op_extrh_i64_i32:
Richard Hendersonfae450b2021-08-25 22:42:19 -07002076 z_mask >>= 32;
Richard Hendersonf7834242024-12-10 15:58:04 -06002077 o_mask >>= 32;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002078 break;
2079 default:
2080 g_assert_not_reached();
2081 }
Richard Hendersonf7834242024-12-10 15:58:04 -06002082 return fold_masks_zo(ctx, op, z_mask, o_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002083}
2084
Richard Henderson3eefdf22021-08-25 11:06:43 -07002085static bool fold_mb(OptContext *ctx, TCGOp *op)
2086{
2087 /* Eliminate duplicate and redundant fence instructions. */
2088 if (ctx->prev_mb) {
2089 /*
2090 * Merge two barriers of the same type into one,
2091 * or a weaker barrier into a stronger one,
2092 * or two weaker barriers into a stronger one.
2093 * mb X; mb Y => mb X|Y
2094 * mb; strl => mb; st
2095 * ldaq; mb => ld; mb
2096 * ldaq; strl => ld; mb; st
2097 * Other combinations are also merged into a strong
2098 * barrier. This is stricter than specified but for
2099 * the purposes of TCG is better than not optimizing.
2100 */
2101 ctx->prev_mb->args[0] |= op->args[0];
2102 tcg_op_remove(ctx->tcg, op);
2103 } else {
2104 ctx->prev_mb = op;
2105 }
2106 return true;
2107}
2108
Richard Henderson2cfac7f2021-08-25 13:05:43 -07002109static bool fold_mov(OptContext *ctx, TCGOp *op)
2110{
2111 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2112}
2113
Richard Henderson0c310a32021-08-24 10:37:24 -07002114static bool fold_movcond(OptContext *ctx, TCGOp *op)
2115{
Richard Henderson08d676a2024-12-10 16:30:12 -06002116 uint64_t z_mask, o_mask, s_mask;
Richard Henderson32202782024-12-08 20:16:38 -06002117 TempOptInfo *tt, *ft;
Richard Henderson7a2f7082021-08-26 07:06:39 -07002118 int i;
Richard Henderson0c310a32021-08-24 10:37:24 -07002119
Richard Henderson141125e2024-09-06 21:00:10 -07002120 /* If true and false values are the same, eliminate the cmp. */
2121 if (args_are_copies(op->args[3], op->args[4])) {
2122 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[3]);
2123 }
2124
Richard Henderson7a2f7082021-08-26 07:06:39 -07002125 /*
2126 * Canonicalize the "false" input reg to match the destination reg so
2127 * that the tcg backend can implement a "move if true" operation.
2128 */
2129 if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
Richard Henderson246c4b72023-10-24 16:36:50 -07002130 op->args[5] = tcg_invert_cond(op->args[5]);
Richard Henderson7a2f7082021-08-26 07:06:39 -07002131 }
2132
Richard Hendersonfb04ab72024-01-10 18:21:58 +11002133 i = do_constant_folding_cond1(ctx, op, NO_DEST, &op->args[1],
Richard Henderson246c4b72023-10-24 16:36:50 -07002134 &op->args[2], &op->args[5]);
Richard Henderson0c310a32021-08-24 10:37:24 -07002135 if (i >= 0) {
2136 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
2137 }
2138
Richard Henderson32202782024-12-08 20:16:38 -06002139 tt = arg_info(op->args[3]);
2140 ft = arg_info(op->args[4]);
2141 z_mask = tt->z_mask | ft->z_mask;
Richard Henderson08d676a2024-12-10 16:30:12 -06002142 o_mask = tt->o_mask & ft->o_mask;
Richard Henderson32202782024-12-08 20:16:38 -06002143 s_mask = tt->s_mask & ft->s_mask;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002144
Richard Henderson32202782024-12-08 20:16:38 -06002145 if (ti_is_const(tt) && ti_is_const(ft)) {
2146 uint64_t tv = ti_const_val(tt);
2147 uint64_t fv = ti_const_val(ft);
Richard Henderson246c4b72023-10-24 16:36:50 -07002148 TCGCond cond = op->args[5];
Richard Henderson0c310a32021-08-24 10:37:24 -07002149
Richard Henderson0c310a32021-08-24 10:37:24 -07002150 if (tv == 1 && fv == 0) {
Richard Hendersona363e1e2025-01-10 09:26:44 -08002151 op->opc = INDEX_op_setcond;
Richard Henderson0c310a32021-08-24 10:37:24 -07002152 op->args[3] = cond;
2153 } else if (fv == 1 && tv == 0) {
Richard Hendersona363e1e2025-01-10 09:26:44 -08002154 op->opc = INDEX_op_setcond;
Richard Henderson0c310a32021-08-24 10:37:24 -07002155 op->args[3] = tcg_invert_cond(cond);
Richard Hendersonf7914582025-01-09 12:48:21 -08002156 } else if (tv == -1 && fv == 0) {
Richard Hendersona363e1e2025-01-10 09:26:44 -08002157 op->opc = INDEX_op_negsetcond;
Richard Hendersonf7914582025-01-09 12:48:21 -08002158 op->args[3] = cond;
2159 } else if (fv == -1 && tv == 0) {
Richard Hendersona363e1e2025-01-10 09:26:44 -08002160 op->opc = INDEX_op_negsetcond;
Richard Hendersonf7914582025-01-09 12:48:21 -08002161 op->args[3] = tcg_invert_cond(cond);
Richard Henderson0c310a32021-08-24 10:37:24 -07002162 }
2163 }
Richard Henderson32202782024-12-08 20:16:38 -06002164
Richard Henderson08d676a2024-12-10 16:30:12 -06002165 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
Richard Henderson0c310a32021-08-24 10:37:24 -07002166}
2167
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002168static bool fold_mul(OptContext *ctx, TCGOp *op)
2169{
Richard Hendersone8679952021-08-25 13:19:52 -07002170 if (fold_const2(ctx, op) ||
Richard Henderson5b5cf472021-10-25 11:19:14 -07002171 fold_xi_to_i(ctx, op, 0) ||
2172 fold_xi_to_x(ctx, op, 1)) {
Richard Hendersone8679952021-08-25 13:19:52 -07002173 return true;
2174 }
Richard Hendersoncd9c5832024-12-08 20:18:02 -06002175 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002176}
2177
2178static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
2179{
Richard Henderson7a2f7082021-08-26 07:06:39 -07002180 if (fold_const2_commutative(ctx, op) ||
Richard Hendersone8679952021-08-25 13:19:52 -07002181 fold_xi_to_i(ctx, op, 0)) {
2182 return true;
2183 }
Richard Hendersoncd9c5832024-12-08 20:18:02 -06002184 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002185}
2186
Richard Henderson407112b2021-08-26 06:33:04 -07002187static bool fold_multiply2(OptContext *ctx, TCGOp *op)
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07002188{
Richard Henderson7a2f7082021-08-26 07:06:39 -07002189 swap_commutative(op->args[0], &op->args[2], &op->args[3]);
2190
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07002191 if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
Richard Hendersonc1fa1b32025-02-17 15:17:47 -08002192 uint64_t a = arg_const_val(op->args[2]);
2193 uint64_t b = arg_const_val(op->args[3]);
Richard Henderson407112b2021-08-26 06:33:04 -07002194 uint64_t h, l;
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07002195 TCGArg rl, rh;
Richard Henderson407112b2021-08-26 06:33:04 -07002196 TCGOp *op2;
2197
2198 switch (op->opc) {
Richard Hendersond7761982025-01-09 09:11:53 -08002199 case INDEX_op_mulu2:
2200 if (ctx->type == TCG_TYPE_I32) {
2201 l = (uint64_t)(uint32_t)a * (uint32_t)b;
2202 h = (int32_t)(l >> 32);
2203 l = (int32_t)l;
2204 } else {
2205 mulu64(&l, &h, a, b);
2206 }
Richard Henderson407112b2021-08-26 06:33:04 -07002207 break;
Richard Hendersonbfe96482025-01-09 07:24:32 -08002208 case INDEX_op_muls2:
2209 if (ctx->type == TCG_TYPE_I32) {
2210 l = (int64_t)(int32_t)a * (int32_t)b;
2211 h = l >> 32;
2212 l = (int32_t)l;
2213 } else {
2214 muls64(&l, &h, a, b);
2215 }
Richard Henderson407112b2021-08-26 06:33:04 -07002216 break;
2217 default:
2218 g_assert_not_reached();
2219 }
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07002220
2221 rl = op->args[0];
2222 rh = op->args[1];
Richard Henderson407112b2021-08-26 06:33:04 -07002223
2224 /* The proper opcode is supplied by tcg_opt_gen_mov. */
Richard Hendersona3c1c572025-04-21 11:05:29 -07002225 op2 = opt_insert_before(ctx, op, 0, 2);
Richard Henderson407112b2021-08-26 06:33:04 -07002226
2227 tcg_opt_gen_movi(ctx, op, rl, l);
2228 tcg_opt_gen_movi(ctx, op2, rh, h);
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07002229 return true;
2230 }
Richard Hendersoncd9c5832024-12-08 20:18:02 -06002231 return finish_folding(ctx, op);
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07002232}
2233
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002234static bool fold_nand(OptContext *ctx, TCGOp *op)
2235{
Richard Henderson16559c32024-12-09 18:13:15 -06002236 uint64_t z_mask, o_mask, s_mask;
2237 TempOptInfo *t1, *t2;
Richard Hendersonfa3168e2024-12-08 20:20:40 -06002238
Richard Henderson7a2f7082021-08-26 07:06:39 -07002239 if (fold_const2_commutative(ctx, op) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07002240 fold_xi_to_not(ctx, op, -1)) {
2241 return true;
2242 }
Richard Henderson3f2b1f82021-08-26 13:08:54 -07002243
Richard Henderson16559c32024-12-09 18:13:15 -06002244 t1 = arg_info(op->args[1]);
2245 t2 = arg_info(op->args[2]);
2246
2247 z_mask = ~(t1->o_mask & t2->o_mask);
2248 o_mask = ~(t1->z_mask & t2->z_mask);
2249 s_mask = t1->s_mask & t2->s_mask;
2250
2251 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002252}
2253
Richard Hendersone25fe882024-04-04 20:53:50 +00002254static bool fold_neg_no_const(OptContext *ctx, TCGOp *op)
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002255{
Richard Hendersonfae450b2021-08-25 22:42:19 -07002256 /* Set to 1 all bits to the left of the rightmost. */
Richard Hendersone25fe882024-04-04 20:53:50 +00002257 uint64_t z_mask = arg_info(op->args[1])->z_mask;
Richard Hendersond151fd32024-12-08 20:23:11 -06002258 z_mask = -(z_mask & -z_mask);
Richard Hendersonfae450b2021-08-25 22:42:19 -07002259
Richard Hendersond151fd32024-12-08 20:23:11 -06002260 return fold_masks_z(ctx, op, z_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002261}
2262
Richard Hendersone25fe882024-04-04 20:53:50 +00002263static bool fold_neg(OptContext *ctx, TCGOp *op)
2264{
2265 return fold_const1(ctx, op) || fold_neg_no_const(ctx, op);
2266}
2267
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002268static bool fold_nor(OptContext *ctx, TCGOp *op)
2269{
Richard Henderson682d6d52024-12-09 21:13:02 -06002270 uint64_t z_mask, o_mask, s_mask;
2271 TempOptInfo *t1, *t2;
Richard Henderson2b7b6952024-12-08 20:25:21 -06002272
Richard Henderson7a2f7082021-08-26 07:06:39 -07002273 if (fold_const2_commutative(ctx, op) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07002274 fold_xi_to_not(ctx, op, 0)) {
2275 return true;
2276 }
Richard Henderson3f2b1f82021-08-26 13:08:54 -07002277
Richard Henderson682d6d52024-12-09 21:13:02 -06002278 t1 = arg_info(op->args[1]);
2279 t2 = arg_info(op->args[2]);
2280
2281 z_mask = ~(t1->o_mask | t2->o_mask);
2282 o_mask = ~(t1->z_mask | t2->z_mask);
2283 s_mask = t1->s_mask & t2->s_mask;
2284
2285 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002286}
2287
2288static bool fold_not(OptContext *ctx, TCGOp *op)
2289{
Richard Hendersond89504b2024-12-09 21:15:37 -06002290 TempOptInfo *t1;
2291
Richard Henderson0e0a32b2021-08-24 13:18:01 -07002292 if (fold_const1(ctx, op)) {
2293 return true;
2294 }
Richard Hendersond89504b2024-12-09 21:15:37 -06002295
2296 t1 = arg_info(op->args[1]);
2297 return fold_masks_zos(ctx, op, ~t1->o_mask, ~t1->z_mask, t1->s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002298}
2299
2300static bool fold_or(OptContext *ctx, TCGOp *op)
2301{
Richard Henderson84b399d2024-12-09 21:35:53 -06002302 uint64_t z_mask, o_mask, s_mask, a_mask;
Richard Henderson83b1ba32024-12-08 20:28:59 -06002303 TempOptInfo *t1, *t2;
2304
Richard Henderson7a2f7082021-08-26 07:06:39 -07002305 if (fold_const2_commutative(ctx, op) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07002306 fold_xi_to_x(ctx, op, 0) ||
Richard Hendersonca7bb042021-08-25 13:14:21 -07002307 fold_xx_to_x(ctx, op)) {
2308 return true;
2309 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07002310
Richard Henderson83b1ba32024-12-08 20:28:59 -06002311 t1 = arg_info(op->args[1]);
2312 t2 = arg_info(op->args[2]);
Richard Henderson84b399d2024-12-09 21:35:53 -06002313
Richard Henderson83b1ba32024-12-08 20:28:59 -06002314 z_mask = t1->z_mask | t2->z_mask;
Richard Henderson84b399d2024-12-09 21:35:53 -06002315 o_mask = t1->o_mask | t2->o_mask;
Richard Henderson83b1ba32024-12-08 20:28:59 -06002316 s_mask = t1->s_mask & t2->s_mask;
Richard Henderson84b399d2024-12-09 21:35:53 -06002317
2318 /* Affected bits are those not known one, masked by those known zero. */
2319 a_mask = ~t1->o_mask & t2->z_mask;
2320
2321 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002322}
2323
2324static bool fold_orc(OptContext *ctx, TCGOp *op)
2325{
Richard Hendersoncc4033e2024-12-09 22:22:27 -06002326 uint64_t z_mask, o_mask, s_mask, a_mask;
Richard Henderson50e40ec2024-12-10 08:13:10 -06002327 TempOptInfo *t1, *t2;
Richard Henderson54e26b22024-12-08 20:30:20 -06002328
Richard Henderson0e0a32b2021-08-24 13:18:01 -07002329 if (fold_const2(ctx, op) ||
Richard Henderson4e858d92021-08-26 07:31:13 -07002330 fold_xx_to_i(ctx, op, -1) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07002331 fold_xi_to_x(ctx, op, -1) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07002332 fold_ix_to_not(ctx, op, 0)) {
2333 return true;
2334 }
Richard Henderson3f2b1f82021-08-26 13:08:54 -07002335
Richard Henderson50e40ec2024-12-10 08:13:10 -06002336 t2 = arg_info(op->args[2]);
2337 if (ti_is_const(t2)) {
2338 /* Fold orc r,x,i to or r,x,~i. */
2339 switch (ctx->type) {
2340 case TCG_TYPE_I32:
2341 case TCG_TYPE_I64:
2342 op->opc = INDEX_op_or;
2343 break;
2344 case TCG_TYPE_V64:
2345 case TCG_TYPE_V128:
2346 case TCG_TYPE_V256:
2347 op->opc = INDEX_op_or_vec;
2348 break;
2349 default:
2350 g_assert_not_reached();
2351 }
2352 op->args[2] = arg_new_constant(ctx, ~ti_const_val(t2));
2353 return fold_or(ctx, op);
2354 }
2355
2356 t1 = arg_info(op->args[1]);
Richard Hendersoncc4033e2024-12-09 22:22:27 -06002357
2358 z_mask = t1->z_mask | ~t2->o_mask;
2359 o_mask = t1->o_mask | ~t2->z_mask;
Richard Henderson50e40ec2024-12-10 08:13:10 -06002360 s_mask = t1->s_mask & t2->s_mask;
Richard Hendersoncc4033e2024-12-09 22:22:27 -06002361
2362 /* Affected bits are those not known one, masked by those known one. */
2363 a_mask = ~t1->o_mask & t2->o_mask;
2364
2365 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002366}
2367
Richard Henderson6813be92024-12-08 20:33:30 -06002368static bool fold_qemu_ld_1reg(OptContext *ctx, TCGOp *op)
Richard Henderson3eefdf22021-08-25 11:06:43 -07002369{
Richard Hendersonfae450b2021-08-25 22:42:19 -07002370 const TCGOpDef *def = &tcg_op_defs[op->opc];
2371 MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
2372 MemOp mop = get_memop(oi);
2373 int width = 8 * memop_size(mop);
Richard Henderson6813be92024-12-08 20:33:30 -06002374 uint64_t z_mask = -1, s_mask = 0;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002375
Richard Henderson57fe5c62021-08-26 12:04:46 -07002376 if (width < 64) {
Richard Henderson75c3bf32024-12-19 10:50:40 -08002377 if (mop & MO_SIGN) {
Richard Henderson6813be92024-12-08 20:33:30 -06002378 s_mask = MAKE_64BIT_MASK(width - 1, 64 - (width - 1));
Richard Henderson75c3bf32024-12-19 10:50:40 -08002379 } else {
Richard Henderson6813be92024-12-08 20:33:30 -06002380 z_mask = MAKE_64BIT_MASK(0, width);
Richard Henderson57fe5c62021-08-26 12:04:46 -07002381 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07002382 }
2383
Richard Henderson3eefdf22021-08-25 11:06:43 -07002384 /* Opcodes that touch guest memory stop the mb optimization. */
2385 ctx->prev_mb = NULL;
Richard Henderson6813be92024-12-08 20:33:30 -06002386
2387 return fold_masks_zs(ctx, op, z_mask, s_mask);
2388}
2389
2390static bool fold_qemu_ld_2reg(OptContext *ctx, TCGOp *op)
2391{
2392 /* Opcodes that touch guest memory stop the mb optimization. */
2393 ctx->prev_mb = NULL;
2394 return finish_folding(ctx, op);
Richard Henderson3eefdf22021-08-25 11:06:43 -07002395}
2396
2397static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
2398{
2399 /* Opcodes that touch guest memory stop the mb optimization. */
2400 ctx->prev_mb = NULL;
Richard Henderson082b3ef2024-12-08 20:34:57 -06002401 return true;
Richard Henderson3eefdf22021-08-25 11:06:43 -07002402}
2403
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002404static bool fold_remainder(OptContext *ctx, TCGOp *op)
2405{
Richard Henderson267c17e2021-10-25 11:30:33 -07002406 if (fold_const2(ctx, op) ||
2407 fold_xx_to_i(ctx, op, 0)) {
2408 return true;
2409 }
Richard Hendersonf9e39342024-12-08 20:36:50 -06002410 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002411}
2412
Richard Henderson95eb2292024-12-08 20:47:59 -06002413/* Return 1 if finished, -1 if simplified, 0 if unchanged. */
2414static int fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
Richard Henderson8d65cda2024-03-26 16:00:40 -10002415{
2416 uint64_t a_zmask, b_val;
2417 TCGCond cond;
2418
2419 if (!arg_is_const(op->args[2])) {
2420 return false;
2421 }
2422
2423 a_zmask = arg_info(op->args[1])->z_mask;
Richard Hendersonc1fa1b32025-02-17 15:17:47 -08002424 b_val = arg_const_val(op->args[2]);
Richard Henderson8d65cda2024-03-26 16:00:40 -10002425 cond = op->args[3];
2426
2427 if (ctx->type == TCG_TYPE_I32) {
2428 a_zmask = (uint32_t)a_zmask;
2429 b_val = (uint32_t)b_val;
2430 }
2431
2432 /*
2433 * A with only low bits set vs B with high bits set means that A < B.
2434 */
2435 if (a_zmask < b_val) {
2436 bool inv = false;
2437
2438 switch (cond) {
2439 case TCG_COND_NE:
2440 case TCG_COND_LEU:
2441 case TCG_COND_LTU:
2442 inv = true;
2443 /* fall through */
2444 case TCG_COND_GTU:
2445 case TCG_COND_GEU:
2446 case TCG_COND_EQ:
2447 return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv);
2448 default:
2449 break;
2450 }
2451 }
2452
2453 /*
2454 * A with only lsb set is already boolean.
2455 */
2456 if (a_zmask <= 1) {
2457 bool convert = false;
2458 bool inv = false;
2459
2460 switch (cond) {
2461 case TCG_COND_EQ:
2462 inv = true;
2463 /* fall through */
2464 case TCG_COND_NE:
2465 convert = (b_val == 0);
2466 break;
2467 case TCG_COND_LTU:
2468 case TCG_COND_TSTEQ:
2469 inv = true;
2470 /* fall through */
2471 case TCG_COND_GEU:
2472 case TCG_COND_TSTNE:
2473 convert = (b_val == 1);
2474 break;
2475 default:
2476 break;
2477 }
2478 if (convert) {
Richard Henderson8d65cda2024-03-26 16:00:40 -10002479 if (!inv && !neg) {
2480 return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
2481 }
2482
Richard Henderson8d65cda2024-03-26 16:00:40 -10002483 if (!inv) {
Richard Henderson69713582025-01-06 22:48:57 -08002484 op->opc = INDEX_op_neg;
Richard Henderson8d65cda2024-03-26 16:00:40 -10002485 } else if (neg) {
Richard Henderson79602f62025-01-06 09:11:39 -08002486 op->opc = INDEX_op_add;
Richard Henderson8d65cda2024-03-26 16:00:40 -10002487 op->args[2] = arg_new_constant(ctx, -1);
2488 } else {
Richard Hendersonfffd3dc2025-01-06 15:18:35 -08002489 op->opc = INDEX_op_xor;
Richard Henderson8d65cda2024-03-26 16:00:40 -10002490 op->args[2] = arg_new_constant(ctx, 1);
2491 }
Richard Henderson95eb2292024-12-08 20:47:59 -06002492 return -1;
Richard Henderson8d65cda2024-03-26 16:00:40 -10002493 }
2494 }
Richard Henderson95eb2292024-12-08 20:47:59 -06002495 return 0;
Richard Henderson8d65cda2024-03-26 16:00:40 -10002496}
2497
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002498static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
2499{
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002500 TCGCond cond = op->args[3];
2501 TCGArg ret, src1, src2;
2502 TCGOp *op2;
2503 uint64_t val;
2504 int sh;
2505 bool inv;
2506
2507 if (!is_tst_cond(cond) || !arg_is_const(op->args[2])) {
2508 return;
2509 }
2510
2511 src2 = op->args[2];
Richard Hendersonc1fa1b32025-02-17 15:17:47 -08002512 val = arg_const_val(src2);
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002513 if (!is_power_of_2(val)) {
2514 return;
2515 }
2516 sh = ctz64(val);
2517
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002518 ret = op->args[0];
2519 src1 = op->args[1];
2520 inv = cond == TCG_COND_TSTEQ;
2521
Richard Hendersonfa361ee2025-01-12 11:50:09 -08002522 if (sh && neg && !inv && TCG_TARGET_sextract_valid(ctx->type, sh, 1)) {
2523 op->opc = INDEX_op_sextract;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002524 op->args[1] = src1;
2525 op->args[2] = sh;
2526 op->args[3] = 1;
2527 return;
Richard Henderson07d5d502025-01-11 09:01:46 -08002528 } else if (sh && TCG_TARGET_extract_valid(ctx->type, sh, 1)) {
2529 op->opc = INDEX_op_extract;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002530 op->args[1] = src1;
2531 op->args[2] = sh;
2532 op->args[3] = 1;
2533 } else {
2534 if (sh) {
Richard Henderson74dbd362025-01-07 22:52:10 -08002535 op2 = opt_insert_before(ctx, op, INDEX_op_shr, 3);
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002536 op2->args[0] = ret;
2537 op2->args[1] = src1;
2538 op2->args[2] = arg_new_constant(ctx, sh);
2539 src1 = ret;
2540 }
Richard Hendersonc3b920b2025-01-06 10:32:44 -08002541 op->opc = INDEX_op_and;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002542 op->args[1] = src1;
2543 op->args[2] = arg_new_constant(ctx, 1);
2544 }
2545
2546 if (neg && inv) {
Richard Henderson93a9ddb2025-01-06 22:06:08 -08002547 op2 = opt_insert_after(ctx, op, INDEX_op_add, 3);
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002548 op2->args[0] = ret;
2549 op2->args[1] = ret;
Richard Henderson93a9ddb2025-01-06 22:06:08 -08002550 op2->args[2] = arg_new_constant(ctx, -1);
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002551 } else if (inv) {
Richard Hendersonfffd3dc2025-01-06 15:18:35 -08002552 op2 = opt_insert_after(ctx, op, INDEX_op_xor, 3);
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002553 op2->args[0] = ret;
2554 op2->args[1] = ret;
2555 op2->args[2] = arg_new_constant(ctx, 1);
2556 } else if (neg) {
Richard Henderson69713582025-01-06 22:48:57 -08002557 op2 = opt_insert_after(ctx, op, INDEX_op_neg, 2);
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002558 op2->args[0] = ret;
2559 op2->args[1] = ret;
2560 }
2561}
2562
Richard Hendersonc63ff552021-08-24 09:35:30 -07002563static bool fold_setcond(OptContext *ctx, TCGOp *op)
2564{
Richard Hendersonfb04ab72024-01-10 18:21:58 +11002565 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
Richard Henderson246c4b72023-10-24 16:36:50 -07002566 &op->args[2], &op->args[3]);
Richard Hendersonc63ff552021-08-24 09:35:30 -07002567 if (i >= 0) {
2568 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2569 }
Richard Henderson8d65cda2024-03-26 16:00:40 -10002570
Richard Henderson95eb2292024-12-08 20:47:59 -06002571 i = fold_setcond_zmask(ctx, op, false);
2572 if (i > 0) {
Richard Henderson8d65cda2024-03-26 16:00:40 -10002573 return true;
2574 }
Richard Henderson95eb2292024-12-08 20:47:59 -06002575 if (i == 0) {
2576 fold_setcond_tst_pow2(ctx, op, false);
2577 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07002578
Richard Henderson2c8a2832024-12-08 20:50:37 -06002579 return fold_masks_z(ctx, op, 1);
Richard Hendersonc63ff552021-08-24 09:35:30 -07002580}
2581
Richard Henderson36355022023-08-04 23:24:04 +00002582static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
2583{
Richard Hendersonfb04ab72024-01-10 18:21:58 +11002584 int i = do_constant_folding_cond1(ctx, op, op->args[0], &op->args[1],
Richard Henderson246c4b72023-10-24 16:36:50 -07002585 &op->args[2], &op->args[3]);
Richard Henderson36355022023-08-04 23:24:04 +00002586 if (i >= 0) {
2587 return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
2588 }
Richard Henderson8d65cda2024-03-26 16:00:40 -10002589
Richard Henderson95eb2292024-12-08 20:47:59 -06002590 i = fold_setcond_zmask(ctx, op, true);
2591 if (i > 0) {
Richard Henderson8d65cda2024-03-26 16:00:40 -10002592 return true;
2593 }
Richard Henderson95eb2292024-12-08 20:47:59 -06002594 if (i == 0) {
2595 fold_setcond_tst_pow2(ctx, op, true);
2596 }
Richard Henderson36355022023-08-04 23:24:04 +00002597
2598 /* Value is {0,-1} so all bits are repetitions of the sign. */
Richard Henderson081cf082024-12-08 20:50:58 -06002599 return fold_masks_s(ctx, op, -1);
Richard Henderson36355022023-08-04 23:24:04 +00002600}
2601
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002602static bool fold_setcond2(OptContext *ctx, TCGOp *op)
2603{
Richard Henderson7e64b112023-10-24 16:53:56 -07002604 TCGCond cond;
Richard Henderson7a2f7082021-08-26 07:06:39 -07002605 int i, inv = 0;
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002606
Richard Hendersonfb04ab72024-01-10 18:21:58 +11002607 i = do_constant_folding_cond2(ctx, op, &op->args[1]);
Richard Henderson7e64b112023-10-24 16:53:56 -07002608 cond = op->args[5];
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002609 if (i >= 0) {
2610 goto do_setcond_const;
2611 }
2612
2613 switch (cond) {
2614 case TCG_COND_LT:
2615 case TCG_COND_GE:
2616 /*
2617 * Simplify LT/GE comparisons vs zero to a single compare
2618 * vs the high word of the input.
2619 */
Richard Henderson27cdb852023-10-23 11:38:00 -07002620 if (arg_is_const_val(op->args[3], 0) &&
2621 arg_is_const_val(op->args[4], 0)) {
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002622 goto do_setcond_high;
2623 }
2624 break;
2625
2626 case TCG_COND_NE:
2627 inv = 1;
2628 QEMU_FALLTHROUGH;
2629 case TCG_COND_EQ:
2630 /*
2631 * Simplify EQ/NE comparisons where one of the pairs
2632 * can be simplified.
2633 */
Richard Henderson67f84c92021-08-25 08:00:20 -07002634 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002635 op->args[3], cond);
2636 switch (i ^ inv) {
2637 case 0:
2638 goto do_setcond_const;
2639 case 1:
2640 goto do_setcond_high;
2641 }
2642
Richard Henderson67f84c92021-08-25 08:00:20 -07002643 i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002644 op->args[4], cond);
2645 switch (i ^ inv) {
2646 case 0:
2647 goto do_setcond_const;
2648 case 1:
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002649 goto do_setcond_low;
2650 }
2651 break;
2652
2653 case TCG_COND_TSTEQ:
2654 case TCG_COND_TSTNE:
Richard Hendersona71d9df2024-06-30 19:46:23 -07002655 if (arg_is_const_val(op->args[3], 0)) {
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002656 goto do_setcond_high;
2657 }
2658 if (arg_is_const_val(op->args[4], 0)) {
2659 goto do_setcond_low;
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002660 }
2661 break;
2662
2663 default:
2664 break;
2665
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002666 do_setcond_low:
2667 op->args[2] = op->args[3];
2668 op->args[3] = cond;
Richard Hendersona363e1e2025-01-10 09:26:44 -08002669 op->opc = INDEX_op_setcond;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002670 return fold_setcond(ctx, op);
2671
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002672 do_setcond_high:
2673 op->args[1] = op->args[2];
2674 op->args[2] = op->args[4];
2675 op->args[3] = cond;
Richard Hendersona363e1e2025-01-10 09:26:44 -08002676 op->opc = INDEX_op_setcond;
Richard Hendersonceb9ee02023-10-23 23:44:27 -07002677 return fold_setcond(ctx, op);
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002678 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07002679
Richard Hendersona53502c2024-12-08 20:56:36 -06002680 return fold_masks_z(ctx, op, 1);
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07002681
2682 do_setcond_const:
2683 return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2684}
2685
Richard Hendersonb6617c82021-08-24 10:44:53 -07002686static bool fold_sextract(OptContext *ctx, TCGOp *op)
2687{
Richard Hendersonf4a818a2024-12-10 16:40:24 -06002688 uint64_t z_mask, o_mask, s_mask, a_mask;
Richard Hendersonbaff5072024-12-08 21:09:30 -06002689 TempOptInfo *t1 = arg_info(op->args[1]);
Richard Henderson57fe5c62021-08-26 12:04:46 -07002690 int pos = op->args[2];
2691 int len = op->args[3];
Richard Hendersonfae450b2021-08-25 22:42:19 -07002692
Richard Hendersonbaff5072024-12-08 21:09:30 -06002693 if (ti_is_const(t1)) {
2694 return tcg_opt_gen_movi(ctx, op, op->args[0],
2695 sextract64(ti_const_val(t1), pos, len));
Richard Hendersonb6617c82021-08-24 10:44:53 -07002696 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07002697
Richard Hendersonf4a818a2024-12-10 16:40:24 -06002698 s_mask = t1->s_mask >> pos;
Richard Hendersonbaff5072024-12-08 21:09:30 -06002699 s_mask |= -1ull << (len - 1);
Richard Hendersonf4a818a2024-12-10 16:40:24 -06002700 a_mask = pos ? -1 : s_mask & ~t1->s_mask;
Richard Henderson57fe5c62021-08-26 12:04:46 -07002701
Richard Hendersonbaff5072024-12-08 21:09:30 -06002702 z_mask = sextract64(t1->z_mask, pos, len);
Richard Hendersonf4a818a2024-12-10 16:40:24 -06002703 o_mask = sextract64(t1->o_mask, pos, len);
2704
2705 return fold_masks_zosa(ctx, op, z_mask, o_mask, s_mask, a_mask);
Richard Hendersonb6617c82021-08-24 10:44:53 -07002706}
2707
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002708static bool fold_shift(OptContext *ctx, TCGOp *op)
2709{
Richard Henderson03329e32024-12-10 16:49:02 -06002710 uint64_t s_mask, z_mask, o_mask;
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002711 TempOptInfo *t1, *t2;
Richard Henderson93a967f2021-08-26 13:24:59 -07002712
Richard Hendersona63ce0e2021-08-25 20:28:53 -07002713 if (fold_const2(ctx, op) ||
Richard Hendersonda48e272021-08-25 20:42:04 -07002714 fold_ix_to_i(ctx, op, 0) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07002715 fold_xi_to_x(ctx, op, 0)) {
2716 return true;
2717 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07002718
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002719 t1 = arg_info(op->args[1]);
2720 t2 = arg_info(op->args[2]);
2721 s_mask = t1->s_mask;
2722 z_mask = t1->z_mask;
Richard Henderson03329e32024-12-10 16:49:02 -06002723 o_mask = t1->o_mask;
Richard Henderson93a967f2021-08-26 13:24:59 -07002724
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002725 if (ti_is_const(t2)) {
2726 int sh = ti_const_val(t2);
Richard Henderson93a967f2021-08-26 13:24:59 -07002727
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002728 z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
Richard Henderson03329e32024-12-10 16:49:02 -06002729 o_mask = do_constant_folding(op->opc, ctx->type, o_mask, sh);
Richard Henderson93a967f2021-08-26 13:24:59 -07002730 s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
Richard Henderson93a967f2021-08-26 13:24:59 -07002731
Richard Henderson03329e32024-12-10 16:49:02 -06002732 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
Richard Hendersonfae450b2021-08-25 22:42:19 -07002733 }
Richard Henderson93a967f2021-08-26 13:24:59 -07002734
2735 switch (op->opc) {
Richard Henderson3949f362025-01-08 08:05:18 -08002736 case INDEX_op_sar:
Richard Henderson93a967f2021-08-26 13:24:59 -07002737 /*
2738 * Arithmetic right shift will not reduce the number of
2739 * input sign repetitions.
2740 */
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002741 return fold_masks_s(ctx, op, s_mask);
Richard Henderson74dbd362025-01-07 22:52:10 -08002742 case INDEX_op_shr:
Richard Henderson93a967f2021-08-26 13:24:59 -07002743 /*
2744 * If the sign bit is known zero, then logical right shift
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002745 * will not reduce the number of input sign repetitions.
Richard Henderson93a967f2021-08-26 13:24:59 -07002746 */
Richard Henderson4ed2ba32024-12-19 19:38:54 -08002747 if (~z_mask & -s_mask) {
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002748 return fold_masks_s(ctx, op, s_mask);
Richard Henderson93a967f2021-08-26 13:24:59 -07002749 }
2750 break;
2751 default:
2752 break;
2753 }
2754
Richard Henderson4e9ce6a2024-12-08 21:13:41 -06002755 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002756}
2757
Richard Henderson9caca882021-08-24 13:30:32 -07002758static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
2759{
2760 TCGOpcode neg_op;
2761 bool have_neg;
2762
Richard Hendersonc1fa1b32025-02-17 15:17:47 -08002763 if (!arg_is_const_val(op->args[1], 0)) {
Richard Henderson9caca882021-08-24 13:30:32 -07002764 return false;
2765 }
2766
2767 switch (ctx->type) {
2768 case TCG_TYPE_I32:
Richard Henderson9caca882021-08-24 13:30:32 -07002769 case TCG_TYPE_I64:
Richard Henderson69713582025-01-06 22:48:57 -08002770 neg_op = INDEX_op_neg;
Richard Hendersonb701f192023-10-25 21:14:04 -07002771 have_neg = true;
Richard Henderson9caca882021-08-24 13:30:32 -07002772 break;
2773 case TCG_TYPE_V64:
2774 case TCG_TYPE_V128:
2775 case TCG_TYPE_V256:
2776 neg_op = INDEX_op_neg_vec;
2777 have_neg = (TCG_TARGET_HAS_neg_vec &&
2778 tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
2779 break;
2780 default:
2781 g_assert_not_reached();
2782 }
2783 if (have_neg) {
2784 op->opc = neg_op;
2785 op->args[1] = op->args[2];
Richard Hendersone25fe882024-04-04 20:53:50 +00002786 return fold_neg_no_const(ctx, op);
Richard Henderson9caca882021-08-24 13:30:32 -07002787 }
2788 return false;
2789}
2790
Richard Hendersonc578ff12021-12-16 06:07:25 -08002791/* We cannot as yet do_constant_folding with vectors. */
2792static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002793{
Richard Hendersonc578ff12021-12-16 06:07:25 -08002794 if (fold_xx_to_i(ctx, op, 0) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07002795 fold_xi_to_x(ctx, op, 0) ||
Richard Henderson9caca882021-08-24 13:30:32 -07002796 fold_sub_to_neg(ctx, op)) {
Richard Hendersoncbe42fb2021-08-25 13:02:00 -07002797 return true;
2798 }
Richard Hendersonfe1d0072024-12-08 21:15:22 -06002799 return finish_folding(ctx, op);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07002800}
2801
Richard Hendersonc578ff12021-12-16 06:07:25 -08002802static bool fold_sub(OptContext *ctx, TCGOp *op)
2803{
Richard Hendersonfe1d0072024-12-08 21:15:22 -06002804 if (fold_const2(ctx, op) ||
2805 fold_xx_to_i(ctx, op, 0) ||
2806 fold_xi_to_x(ctx, op, 0) ||
2807 fold_sub_to_neg(ctx, op)) {
Richard Henderson6334a962023-10-25 18:39:43 -07002808 return true;
2809 }
2810
2811 /* Fold sub r,x,i to add r,x,-i */
2812 if (arg_is_const(op->args[2])) {
Richard Hendersonc1fa1b32025-02-17 15:17:47 -08002813 uint64_t val = arg_const_val(op->args[2]);
Richard Henderson6334a962023-10-25 18:39:43 -07002814
Richard Henderson79602f62025-01-06 09:11:39 -08002815 op->opc = INDEX_op_add;
Richard Henderson6334a962023-10-25 18:39:43 -07002816 op->args[2] = arg_new_constant(ctx, -val);
2817 }
Richard Hendersonfe1d0072024-12-08 21:15:22 -06002818 return finish_folding(ctx, op);
Richard Hendersonc578ff12021-12-16 06:07:25 -08002819}
2820
Richard Hendersonaeb35142025-01-14 18:28:15 -08002821static void squash_prev_borrowout(OptContext *ctx, TCGOp *op)
2822{
2823 TempOptInfo *t2;
2824
2825 op = QTAILQ_PREV(op, link);
2826 switch (op->opc) {
2827 case INDEX_op_subbo:
2828 op->opc = INDEX_op_sub;
2829 fold_sub(ctx, op);
2830 break;
2831 case INDEX_op_subbio:
2832 op->opc = INDEX_op_subbi;
2833 break;
2834 case INDEX_op_subb1o:
2835 t2 = arg_info(op->args[2]);
2836 if (ti_is_const(t2)) {
2837 op->opc = INDEX_op_add;
2838 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2839 /* Perform other constant folding, if needed. */
2840 fold_add(ctx, op);
2841 } else {
2842 TCGArg ret = op->args[0];
2843 op->opc = INDEX_op_sub;
2844 op = opt_insert_after(ctx, op, INDEX_op_add, 3);
2845 op->args[0] = ret;
2846 op->args[1] = ret;
2847 op->args[2] = arg_new_constant(ctx, -1);
2848 }
2849 break;
2850 default:
2851 g_assert_not_reached();
2852 }
2853}
2854
2855static bool fold_subbi(OptContext *ctx, TCGOp *op)
2856{
2857 TempOptInfo *t2;
2858 int borrow_in = ctx->carry_state;
2859
2860 if (borrow_in < 0) {
2861 return finish_folding(ctx, op);
2862 }
2863 ctx->carry_state = -1;
2864
2865 squash_prev_borrowout(ctx, op);
2866 if (borrow_in == 0) {
2867 op->opc = INDEX_op_sub;
2868 return fold_sub(ctx, op);
2869 }
2870
2871 /*
2872 * Propagate the known carry-in into any constant, then negate to
2873 * transform from sub to add. If there is no constant, emit a
2874 * separate add -1.
2875 */
2876 t2 = arg_info(op->args[2]);
2877 if (ti_is_const(t2)) {
2878 op->args[2] = arg_new_constant(ctx, -(ti_const_val(t2) + 1));
2879 } else {
2880 TCGOp *op2 = opt_insert_before(ctx, op, INDEX_op_sub, 3);
2881
2882 op2->args[0] = op->args[0];
2883 op2->args[1] = op->args[1];
2884 op2->args[2] = op->args[2];
2885 fold_sub(ctx, op2);
2886
2887 op->args[1] = op->args[0];
2888 op->args[2] = arg_new_constant(ctx, -1);
2889 }
2890 op->opc = INDEX_op_add;
2891 return fold_add(ctx, op);
2892}
2893
2894static bool fold_subbio(OptContext *ctx, TCGOp *op)
2895{
2896 TempOptInfo *t1, *t2;
2897 int borrow_out = -1;
2898
2899 if (ctx->carry_state < 0) {
2900 return finish_folding(ctx, op);
2901 }
2902
2903 squash_prev_borrowout(ctx, op);
2904 if (ctx->carry_state == 0) {
2905 goto do_subbo;
2906 }
2907
2908 t1 = arg_info(op->args[1]);
2909 t2 = arg_info(op->args[2]);
2910
2911 /* Propagate the known borrow-in into a constant, if possible. */
2912 if (ti_is_const(t2)) {
2913 uint64_t max = ctx->type == TCG_TYPE_I32 ? UINT32_MAX : UINT64_MAX;
2914 uint64_t v = ti_const_val(t2) & max;
2915
2916 if (v < max) {
2917 op->args[2] = arg_new_constant(ctx, v + 1);
2918 goto do_subbo;
2919 }
2920 /* subtracting max + 1 produces known borrow out. */
2921 borrow_out = 1;
2922 }
2923 if (ti_is_const(t1)) {
2924 uint64_t v = ti_const_val(t1);
2925 if (v != 0) {
2926 op->args[2] = arg_new_constant(ctx, v - 1);
2927 goto do_subbo;
2928 }
2929 }
2930
2931 /* Adjust the opcode to remember the known carry-in. */
2932 op->opc = INDEX_op_subb1o;
2933 ctx->carry_state = borrow_out;
2934 return finish_folding(ctx, op);
2935
2936 do_subbo:
2937 op->opc = INDEX_op_subbo;
2938 return fold_subbo(ctx, op);
2939}
2940
2941static bool fold_subbo(OptContext *ctx, TCGOp *op)
2942{
2943 TempOptInfo *t1 = arg_info(op->args[1]);
2944 TempOptInfo *t2 = arg_info(op->args[2]);
2945 int borrow_out = -1;
2946
2947 if (ti_is_const(t2)) {
2948 uint64_t v2 = ti_const_val(t2);
2949 if (v2 == 0) {
2950 borrow_out = 0;
2951 } else if (ti_is_const(t1)) {
2952 uint64_t v1 = ti_const_val(t1);
2953 borrow_out = v1 < v2;
2954 }
2955 }
2956 ctx->carry_state = borrow_out;
2957 return finish_folding(ctx, op);
2958}
2959
Richard Hendersonfae450b2021-08-25 22:42:19 -07002960static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
2961{
Richard Hendersond33e0f02024-12-09 08:53:20 -06002962 uint64_t z_mask = -1, s_mask = 0;
2963
Richard Hendersonfae450b2021-08-25 22:42:19 -07002964 /* We can't do any folding with a load, but we can record bits. */
2965 switch (op->opc) {
Richard Hendersone9968042025-01-21 21:47:16 -08002966 case INDEX_op_ld8s:
Richard Hendersond33e0f02024-12-09 08:53:20 -06002967 s_mask = INT8_MIN;
Richard Henderson57fe5c62021-08-26 12:04:46 -07002968 break;
Richard Hendersone9968042025-01-21 21:47:16 -08002969 case INDEX_op_ld8u:
Richard Hendersond33e0f02024-12-09 08:53:20 -06002970 z_mask = MAKE_64BIT_MASK(0, 8);
Richard Henderson57fe5c62021-08-26 12:04:46 -07002971 break;
Richard Hendersone9968042025-01-21 21:47:16 -08002972 case INDEX_op_ld16s:
Richard Hendersond33e0f02024-12-09 08:53:20 -06002973 s_mask = INT16_MIN;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002974 break;
Richard Hendersone9968042025-01-21 21:47:16 -08002975 case INDEX_op_ld16u:
Richard Hendersond33e0f02024-12-09 08:53:20 -06002976 z_mask = MAKE_64BIT_MASK(0, 16);
Richard Henderson57fe5c62021-08-26 12:04:46 -07002977 break;
Richard Hendersone9968042025-01-21 21:47:16 -08002978 case INDEX_op_ld32s:
Richard Hendersond33e0f02024-12-09 08:53:20 -06002979 s_mask = INT32_MIN;
Richard Hendersonfae450b2021-08-25 22:42:19 -07002980 break;
Richard Hendersone9968042025-01-21 21:47:16 -08002981 case INDEX_op_ld32u:
Richard Hendersond33e0f02024-12-09 08:53:20 -06002982 z_mask = MAKE_64BIT_MASK(0, 32);
Richard Hendersonfae450b2021-08-25 22:42:19 -07002983 break;
2984 default:
2985 g_assert_not_reached();
2986 }
Richard Hendersond33e0f02024-12-09 08:53:20 -06002987 return fold_masks_zs(ctx, op, z_mask, s_mask);
Richard Hendersonfae450b2021-08-25 22:42:19 -07002988}
2989
Richard Hendersonab84dc32023-08-23 23:04:24 -07002990static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
2991{
2992 TCGTemp *dst, *src;
2993 intptr_t ofs;
2994 TCGType type;
2995
2996 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
Richard Henderson0fb5b752024-12-09 09:44:40 -06002997 return finish_folding(ctx, op);
Richard Hendersonab84dc32023-08-23 23:04:24 -07002998 }
2999
3000 type = ctx->type;
3001 ofs = op->args[2];
3002 dst = arg_temp(op->args[0]);
3003 src = find_mem_copy_for(ctx, type, ofs);
3004 if (src && src->base_type == type) {
3005 return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src));
3006 }
3007
3008 reset_ts(ctx, dst);
3009 record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1);
3010 return true;
3011}
3012
3013static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
3014{
3015 intptr_t ofs = op->args[2];
3016 intptr_t lm1;
3017
3018 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
3019 remove_mem_copy_all(ctx);
Richard Henderson082b3ef2024-12-08 20:34:57 -06003020 return true;
Richard Hendersonab84dc32023-08-23 23:04:24 -07003021 }
3022
3023 switch (op->opc) {
Richard Hendersona28f1512025-01-22 13:28:55 -08003024 case INDEX_op_st8:
Richard Hendersonab84dc32023-08-23 23:04:24 -07003025 lm1 = 0;
3026 break;
Richard Hendersona28f1512025-01-22 13:28:55 -08003027 case INDEX_op_st16:
Richard Hendersonab84dc32023-08-23 23:04:24 -07003028 lm1 = 1;
3029 break;
Richard Hendersona28f1512025-01-22 13:28:55 -08003030 case INDEX_op_st32:
Richard Hendersonab84dc32023-08-23 23:04:24 -07003031 lm1 = 3;
3032 break;
Richard Hendersona28f1512025-01-22 13:28:55 -08003033 case INDEX_op_st:
Richard Hendersonab84dc32023-08-23 23:04:24 -07003034 case INDEX_op_st_vec:
3035 lm1 = tcg_type_size(ctx->type) - 1;
3036 break;
3037 default:
3038 g_assert_not_reached();
3039 }
3040 remove_mem_copy_in(ctx, ofs, ofs + lm1);
Richard Henderson082b3ef2024-12-08 20:34:57 -06003041 return true;
Richard Hendersonab84dc32023-08-23 23:04:24 -07003042}
3043
3044static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
3045{
3046 TCGTemp *src;
3047 intptr_t ofs, last;
3048 TCGType type;
3049
3050 if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
Richard Henderson082b3ef2024-12-08 20:34:57 -06003051 return fold_tcg_st(ctx, op);
Richard Hendersonab84dc32023-08-23 23:04:24 -07003052 }
3053
3054 src = arg_temp(op->args[0]);
3055 ofs = op->args[2];
3056 type = ctx->type;
Richard Henderson3eaadae2023-08-23 23:13:06 -07003057
3058 /*
3059 * Eliminate duplicate stores of a constant.
3060 * This happens frequently when the target ISA zero-extends.
3061 */
3062 if (ts_is_const(src)) {
3063 TCGTemp *prev = find_mem_copy_for(ctx, type, ofs);
3064 if (src == prev) {
3065 tcg_op_remove(ctx->tcg, op);
3066 return true;
3067 }
3068 }
3069
Richard Hendersonab84dc32023-08-23 23:04:24 -07003070 last = ofs + tcg_type_size(type) - 1;
3071 remove_mem_copy_in(ctx, ofs, last);
3072 record_mem_copy(ctx, type, src, ofs, last);
Richard Henderson082b3ef2024-12-08 20:34:57 -06003073 return true;
Richard Hendersonab84dc32023-08-23 23:04:24 -07003074}
3075
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003076static bool fold_xor(OptContext *ctx, TCGOp *op)
3077{
Richard Henderson787190e2024-12-10 08:39:56 -06003078 uint64_t z_mask, o_mask, s_mask;
Richard Hendersonc890fd72024-12-08 21:39:01 -06003079 TempOptInfo *t1, *t2;
3080
Richard Henderson7a2f7082021-08-26 07:06:39 -07003081 if (fold_const2_commutative(ctx, op) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07003082 fold_xx_to_i(ctx, op, 0) ||
Richard Hendersona63ce0e2021-08-25 20:28:53 -07003083 fold_xi_to_x(ctx, op, 0) ||
Richard Henderson0e0a32b2021-08-24 13:18:01 -07003084 fold_xi_to_not(ctx, op, -1)) {
Richard Hendersoncbe42fb2021-08-25 13:02:00 -07003085 return true;
3086 }
Richard Hendersonfae450b2021-08-25 22:42:19 -07003087
Richard Hendersonc890fd72024-12-08 21:39:01 -06003088 t1 = arg_info(op->args[1]);
3089 t2 = arg_info(op->args[2]);
Richard Henderson787190e2024-12-10 08:39:56 -06003090
3091 z_mask = (t1->z_mask | t2->z_mask) & ~(t1->o_mask & t2->o_mask);
3092 o_mask = (t1->o_mask & ~t2->z_mask) | (t2->o_mask & ~t1->z_mask);
Richard Hendersonc890fd72024-12-08 21:39:01 -06003093 s_mask = t1->s_mask & t2->s_mask;
Richard Henderson787190e2024-12-10 08:39:56 -06003094
3095 return fold_masks_zos(ctx, op, z_mask, o_mask, s_mask);
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003096}
3097
Kirill Batuzov22613af2011-07-07 16:37:13 +04003098/* Propagate constants and copies, fold constant expressions. */
Aurelien Jarno36e60ef2015-06-04 21:53:27 +02003099void tcg_optimize(TCGContext *s)
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +04003100{
Richard Henderson5cf32be2021-08-24 08:17:08 -07003101 int nb_temps, i;
Richard Hendersond0ed5152021-08-24 07:38:39 -07003102 TCGOp *op, *op_next;
Richard Hendersondc849882021-08-24 07:13:45 -07003103 OptContext ctx = { .tcg = s };
Richard Henderson5d8f5362012-09-21 10:13:38 -07003104
Richard Hendersonab84dc32023-08-23 23:04:24 -07003105 QSIMPLEQ_INIT(&ctx.mem_free);
3106
Kirill Batuzov22613af2011-07-07 16:37:13 +04003107 /* Array VALS has an element for each temp.
3108 If this temp holds a constant then its value is kept in VALS' element.
Aurelien Jarnoe590d4e2012-09-11 12:31:21 +02003109 If this temp is a copy of other ones then the other copies are
3110 available through the doubly linked circular list. */
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +04003111
3112 nb_temps = s->nb_temps;
Richard Henderson8f17a972020-03-30 19:52:02 -07003113 for (i = 0; i < nb_temps; ++i) {
3114 s->temps[i].state_ptr = NULL;
3115 }
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +04003116
Richard Henderson15fa08f2017-11-02 15:19:14 +01003117 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
Richard Hendersonc45cb8b2014-09-19 13:49:15 -07003118 TCGOpcode opc = op->opc;
Richard Henderson5cf32be2021-08-24 08:17:08 -07003119 const TCGOpDef *def;
Richard Henderson404a1482021-08-24 11:08:21 -07003120 bool done = false;
Richard Hendersonc45cb8b2014-09-19 13:49:15 -07003121
Richard Henderson5cf32be2021-08-24 08:17:08 -07003122 /* Calls are special. */
Richard Hendersonc45cb8b2014-09-19 13:49:15 -07003123 if (opc == INDEX_op_call) {
Richard Henderson5cf32be2021-08-24 08:17:08 -07003124 fold_call(&ctx, op);
3125 continue;
Richard Hendersoncf066672014-03-22 20:06:52 -07003126 }
Richard Henderson5cf32be2021-08-24 08:17:08 -07003127
3128 def = &tcg_op_defs[opc];
Richard Hendersonec5d4cb2021-08-24 08:20:27 -07003129 init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
3130 copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
Kirill Batuzov22613af2011-07-07 16:37:13 +04003131
Richard Henderson67f84c92021-08-25 08:00:20 -07003132 /* Pre-compute the type of the operation. */
Richard Henderson4d872212025-01-02 19:43:06 -08003133 ctx.type = TCGOP_TYPE(op);
Richard Henderson67f84c92021-08-25 08:00:20 -07003134
Richard Henderson2cfac7f2021-08-25 13:05:43 -07003135 /*
3136 * Process each opcode.
3137 * Sorted alphabetically by opcode as much as possible.
3138 */
Richard Hendersonc45cb8b2014-09-19 13:49:15 -07003139 switch (opc) {
Richard Henderson79602f62025-01-06 09:11:39 -08003140 case INDEX_op_add:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003141 done = fold_add(&ctx, op);
3142 break;
Richard Hendersonc578ff12021-12-16 06:07:25 -08003143 case INDEX_op_add_vec:
3144 done = fold_add_vec(&ctx, op);
3145 break;
Richard Henderson76f42782025-01-14 13:58:39 -08003146 case INDEX_op_addci:
Richard Hendersonaeb35142025-01-14 18:28:15 -08003147 done = fold_addci(&ctx, op);
3148 break;
Richard Henderson76f42782025-01-14 13:58:39 -08003149 case INDEX_op_addcio:
Richard Hendersonaeb35142025-01-14 18:28:15 -08003150 done = fold_addcio(&ctx, op);
3151 break;
3152 case INDEX_op_addco:
3153 done = fold_addco(&ctx, op);
Richard Henderson76f42782025-01-14 13:58:39 -08003154 break;
Richard Hendersonc3b920b2025-01-06 10:32:44 -08003155 case INDEX_op_and:
3156 case INDEX_op_and_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003157 done = fold_and(&ctx, op);
3158 break;
Richard Henderson46f96bf2025-01-06 12:37:02 -08003159 case INDEX_op_andc:
3160 case INDEX_op_andc_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003161 done = fold_andc(&ctx, op);
3162 break;
Richard Hendersonb6d69fc2025-01-10 11:49:22 -08003163 case INDEX_op_brcond:
Richard Henderson079b0802021-08-24 09:30:59 -07003164 done = fold_brcond(&ctx, op);
3165 break;
Richard Henderson764d2ab2021-08-24 09:22:11 -07003166 case INDEX_op_brcond2_i32:
3167 done = fold_brcond2(&ctx, op);
3168 break;
Richard Henderson0dd07ee2025-01-10 18:51:16 -08003169 case INDEX_op_bswap16:
Richard Henderson7498d882025-01-10 19:53:51 -08003170 case INDEX_op_bswap32:
Richard Henderson3ad5d4c2025-01-10 21:54:44 -08003171 case INDEX_op_bswap64:
Richard Henderson09bacdc2021-08-24 11:58:12 -07003172 done = fold_bswap(&ctx, op);
3173 break;
Richard Henderson5a5bb0a2025-01-08 16:12:46 -08003174 case INDEX_op_clz:
Richard Hendersonc96447d2025-01-08 17:07:01 -08003175 case INDEX_op_ctz:
Richard Henderson30dd0bf2021-08-24 10:51:34 -07003176 done = fold_count_zeros(&ctx, op);
3177 break;
Richard Henderson97218ae2025-01-08 18:37:43 -08003178 case INDEX_op_ctpop:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003179 done = fold_ctpop(&ctx, op);
3180 break;
Richard Henderson4d137ff2025-01-12 20:48:57 -08003181 case INDEX_op_deposit:
Richard Henderson1b1907b2021-08-24 10:47:04 -07003182 done = fold_deposit(&ctx, op);
3183 break;
Richard Hendersonb2c514f2025-01-07 13:22:56 -08003184 case INDEX_op_divs:
Richard Henderson961b80a2025-01-07 14:27:19 -08003185 case INDEX_op_divu:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003186 done = fold_divide(&ctx, op);
3187 break;
Richard Henderson8cdb3fc2021-08-24 12:06:33 -07003188 case INDEX_op_dup_vec:
3189 done = fold_dup(&ctx, op);
3190 break;
3191 case INDEX_op_dup2_vec:
3192 done = fold_dup2(&ctx, op);
3193 break;
Richard Henderson5c0968a2025-01-06 15:47:53 -08003194 case INDEX_op_eqv:
3195 case INDEX_op_eqv_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003196 done = fold_eqv(&ctx, op);
3197 break;
Richard Henderson07d5d502025-01-11 09:01:46 -08003198 case INDEX_op_extract:
Richard Hendersonb6617c82021-08-24 10:44:53 -07003199 done = fold_extract(&ctx, op);
3200 break;
Richard Henderson61d6a872025-01-12 21:40:43 -08003201 case INDEX_op_extract2:
Richard Hendersondcd08992021-08-24 10:41:39 -07003202 done = fold_extract2(&ctx, op);
3203 break;
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003204 case INDEX_op_ext_i32_i64:
3205 done = fold_exts(&ctx, op);
3206 break;
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003207 case INDEX_op_extu_i32_i64:
3208 case INDEX_op_extrl_i64_i32:
3209 case INDEX_op_extrh_i64_i32:
3210 done = fold_extu(&ctx, op);
3211 break;
Richard Hendersone9968042025-01-21 21:47:16 -08003212 case INDEX_op_ld8s:
3213 case INDEX_op_ld8u:
3214 case INDEX_op_ld16s:
3215 case INDEX_op_ld16u:
3216 case INDEX_op_ld32s:
3217 case INDEX_op_ld32u:
Richard Hendersonfae450b2021-08-25 22:42:19 -07003218 done = fold_tcg_ld(&ctx, op);
3219 break;
Richard Hendersone9968042025-01-21 21:47:16 -08003220 case INDEX_op_ld:
Richard Hendersonab84dc32023-08-23 23:04:24 -07003221 case INDEX_op_ld_vec:
3222 done = fold_tcg_ld_memcopy(&ctx, op);
3223 break;
Richard Hendersona28f1512025-01-22 13:28:55 -08003224 case INDEX_op_st8:
3225 case INDEX_op_st16:
3226 case INDEX_op_st32:
Richard Hendersonab84dc32023-08-23 23:04:24 -07003227 done = fold_tcg_st(&ctx, op);
3228 break;
Richard Hendersona28f1512025-01-22 13:28:55 -08003229 case INDEX_op_st:
Richard Hendersonab84dc32023-08-23 23:04:24 -07003230 case INDEX_op_st_vec:
3231 done = fold_tcg_st_memcopy(&ctx, op);
3232 break;
Richard Henderson3eefdf22021-08-25 11:06:43 -07003233 case INDEX_op_mb:
3234 done = fold_mb(&ctx, op);
3235 break;
Richard Hendersonb5701262024-12-28 15:58:24 -08003236 case INDEX_op_mov:
3237 case INDEX_op_mov_vec:
Richard Henderson2cfac7f2021-08-25 13:05:43 -07003238 done = fold_mov(&ctx, op);
3239 break;
Richard Hendersonea46c4b2025-01-10 13:41:25 -08003240 case INDEX_op_movcond:
Richard Henderson0c310a32021-08-24 10:37:24 -07003241 done = fold_movcond(&ctx, op);
3242 break;
Richard Hendersond2c3eca2025-01-07 09:32:18 -08003243 case INDEX_op_mul:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003244 done = fold_mul(&ctx, op);
3245 break;
Richard Hendersonc7428242025-01-07 11:19:29 -08003246 case INDEX_op_mulsh:
Richard Hendersonaa28c9e2025-01-07 10:36:24 -08003247 case INDEX_op_muluh:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003248 done = fold_mul_highpart(&ctx, op);
3249 break;
Richard Hendersonbfe96482025-01-09 07:24:32 -08003250 case INDEX_op_muls2:
Richard Hendersond7761982025-01-09 09:11:53 -08003251 case INDEX_op_mulu2:
Richard Henderson407112b2021-08-26 06:33:04 -07003252 done = fold_multiply2(&ctx, op);
Richard Henderson6b8ac0d2021-08-24 10:24:12 -07003253 break;
Richard Henderson59379a42025-01-06 20:32:54 -08003254 case INDEX_op_nand:
3255 case INDEX_op_nand_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003256 done = fold_nand(&ctx, op);
3257 break;
Richard Henderson69713582025-01-06 22:48:57 -08003258 case INDEX_op_neg:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003259 done = fold_neg(&ctx, op);
3260 break;
Richard Henderson3a8c4e92025-01-06 21:02:17 -08003261 case INDEX_op_nor:
3262 case INDEX_op_nor_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003263 done = fold_nor(&ctx, op);
3264 break;
Richard Henderson5c62d372025-01-06 23:46:47 -08003265 case INDEX_op_not:
3266 case INDEX_op_not_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003267 done = fold_not(&ctx, op);
3268 break;
Richard Henderson49bd7512025-01-06 14:00:40 -08003269 case INDEX_op_or:
3270 case INDEX_op_or_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003271 done = fold_or(&ctx, op);
3272 break;
Richard Henderson6aba25e2025-01-06 14:46:26 -08003273 case INDEX_op_orc:
3274 case INDEX_op_orc_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003275 done = fold_orc(&ctx, op);
3276 break;
Richard Hendersonaae24562025-02-09 12:55:15 -08003277 case INDEX_op_qemu_ld:
Richard Henderson6813be92024-12-08 20:33:30 -06003278 done = fold_qemu_ld_1reg(&ctx, op);
3279 break;
Richard Hendersonaae24562025-02-09 12:55:15 -08003280 case INDEX_op_qemu_ld2:
Richard Henderson6813be92024-12-08 20:33:30 -06003281 done = fold_qemu_ld_2reg(&ctx, op);
Richard Henderson3eefdf22021-08-25 11:06:43 -07003282 break;
Richard Hendersonaae24562025-02-09 12:55:15 -08003283 case INDEX_op_qemu_st:
3284 case INDEX_op_qemu_st2:
Richard Henderson3eefdf22021-08-25 11:06:43 -07003285 done = fold_qemu_st(&ctx, op);
3286 break;
Richard Henderson9a6bc182025-01-07 19:00:51 -08003287 case INDEX_op_rems:
Richard Hendersoncd9acd22025-01-07 20:25:14 -08003288 case INDEX_op_remu:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003289 done = fold_remainder(&ctx, op);
3290 break;
Richard Henderson005a87e2025-01-08 10:42:16 -08003291 case INDEX_op_rotl:
3292 case INDEX_op_rotr:
Richard Henderson3949f362025-01-08 08:05:18 -08003293 case INDEX_op_sar:
Richard Henderson6ca59452025-01-07 21:50:04 -08003294 case INDEX_op_shl:
Richard Henderson74dbd362025-01-07 22:52:10 -08003295 case INDEX_op_shr:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003296 done = fold_shift(&ctx, op);
3297 break;
Richard Hendersona363e1e2025-01-10 09:26:44 -08003298 case INDEX_op_setcond:
Richard Hendersonc63ff552021-08-24 09:35:30 -07003299 done = fold_setcond(&ctx, op);
3300 break;
Richard Hendersona363e1e2025-01-10 09:26:44 -08003301 case INDEX_op_negsetcond:
Richard Henderson36355022023-08-04 23:24:04 +00003302 done = fold_negsetcond(&ctx, op);
3303 break;
Richard Hendersonbc47b1a2021-08-24 09:09:35 -07003304 case INDEX_op_setcond2_i32:
3305 done = fold_setcond2(&ctx, op);
3306 break;
Richard Henderson1f106542024-09-06 12:22:41 -07003307 case INDEX_op_cmp_vec:
3308 done = fold_cmp_vec(&ctx, op);
3309 break;
3310 case INDEX_op_cmpsel_vec:
3311 done = fold_cmpsel_vec(&ctx, op);
3312 break;
Richard Hendersone58b9772024-09-06 22:30:01 -07003313 case INDEX_op_bitsel_vec:
3314 done = fold_bitsel_vec(&ctx, op);
3315 break;
Richard Hendersonfa361ee2025-01-12 11:50:09 -08003316 case INDEX_op_sextract:
Richard Hendersonb6617c82021-08-24 10:44:53 -07003317 done = fold_sextract(&ctx, op);
3318 break;
Richard Henderson60f34f52025-01-06 22:06:32 -08003319 case INDEX_op_sub:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003320 done = fold_sub(&ctx, op);
3321 break;
Richard Hendersonaeb35142025-01-14 18:28:15 -08003322 case INDEX_op_subbi:
3323 done = fold_subbi(&ctx, op);
3324 break;
3325 case INDEX_op_subbio:
3326 done = fold_subbio(&ctx, op);
3327 break;
3328 case INDEX_op_subbo:
3329 done = fold_subbo(&ctx, op);
3330 break;
Richard Hendersonc578ff12021-12-16 06:07:25 -08003331 case INDEX_op_sub_vec:
3332 done = fold_sub_vec(&ctx, op);
3333 break;
Richard Hendersonfffd3dc2025-01-06 15:18:35 -08003334 case INDEX_op_xor:
3335 case INDEX_op_xor_vec:
Richard Henderson2f9f08b2021-08-25 12:03:48 -07003336 done = fold_xor(&ctx, op);
Richard Hendersonb10f3832021-08-23 22:30:17 -07003337 break;
Richard Henderson15268552024-12-08 07:45:11 -06003338 case INDEX_op_set_label:
3339 case INDEX_op_br:
3340 case INDEX_op_exit_tb:
3341 case INDEX_op_goto_tb:
3342 case INDEX_op_goto_ptr:
3343 finish_ebb(&ctx);
3344 done = true;
3345 break;
Richard Henderson2cfac7f2021-08-25 13:05:43 -07003346 default:
Richard Henderson0ae56422024-12-08 21:42:53 -06003347 done = finish_folding(&ctx, op);
Richard Henderson2cfac7f2021-08-25 13:05:43 -07003348 break;
Richard Hendersonb10f3832021-08-23 22:30:17 -07003349 }
Richard Henderson0ae56422024-12-08 21:42:53 -06003350 tcg_debug_assert(done);
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +04003351 }
Kirill Batuzov8f2e8c02011-07-07 16:37:12 +04003352}