diff options
Diffstat (limited to 'tcg/arm/tcg-target.c.inc')
-rw-r--r-- | tcg/arm/tcg-target.c.inc | 1322 |
1 files changed, 587 insertions, 735 deletions
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index d25e68b36b..6a04c73c76 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -23,6 +23,7 @@ */ #include "elf.h" +#include "../tcg-ldst.c.inc" #include "../tcg-pool.c.inc" int arm_arch = __ARM_ARCH; @@ -34,13 +35,6 @@ bool use_idiv_instructions; bool use_neon_instructions; #endif -/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined. */ -#ifdef CONFIG_SOFTMMU -# define USING_SOFTMMU 1 -#else -# define USING_SOFTMMU 0 -#endif - #ifdef CONFIG_DEBUG_TCG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", @@ -85,12 +79,17 @@ static const int tcg_target_reg_alloc_order[] = { static const int tcg_target_call_iarg_regs[4] = { TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3 }; -static const int tcg_target_call_oarg_regs[2] = { - TCG_REG_R0, TCG_REG_R1 -}; + +static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) +{ + tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); + tcg_debug_assert(slot >= 0 && slot <= 3); + return TCG_REG_R0 + slot; +} #define TCG_REG_TMP TCG_REG_R12 #define TCG_VEC_TMP TCG_REG_Q15 +#define TCG_REG_GUEST_BASE TCG_REG_R11 typedef enum { COND_EQ = 0x0, @@ -138,6 +137,8 @@ typedef enum { ARITH_BIC = 0xe << 21, ARITH_MVN = 0xf << 21, + INSN_B = 0x0a000000, + INSN_CLZ = 0x016f0f10, INSN_RBIT = 0x06ff0f30, @@ -350,24 +351,11 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, #define ALL_VECTOR_REGS 0xffff0000u /* - * r0-r2 will be overwritten when reading the tlb entry (softmmu only) - * and r0-r1 doing the byte swapping, so don't use these. - * r3 is removed for softmmu to avoid clashes with helper arguments. + * r0-r3 will be overwritten when reading the tlb entry (system-mode only); + * r14 will be overwritten by the BLNE branching to the slow path. */ -#ifdef CONFIG_SOFTMMU -#define ALL_QLOAD_REGS \ - (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \ - (1 << TCG_REG_R2) | (1 << TCG_REG_R3) | \ - (1 << TCG_REG_R14))) -#define ALL_QSTORE_REGS \ - (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \ - (1 << TCG_REG_R2) | (1 << TCG_REG_R14) | \ - ((TARGET_LONG_BITS == 64) << TCG_REG_R3))) -#else -#define ALL_QLOAD_REGS ALL_GENERAL_REGS -#define ALL_QSTORE_REGS \ - (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1))) -#endif +#define ALL_QLDST_REGS \ + (ALL_GENERAL_REGS & ~((tcg_use_softmmu ? 0xf : 0) | (1 << TCG_REG_R14))) /* * ARM immediates for ALU instructions are made of an unsigned 8-bit @@ -513,7 +501,8 @@ static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8) * mov operand2: values represented with x << (2 * y), x < 0x100 * add, sub, eor...: ditto */ -static bool tcg_target_const_match(int64_t val, TCGType type, int ct) +static bool tcg_target_const_match(int64_t val, int ct, + TCGType type, TCGCond cond, int vece) { if (ct & TCG_CT_CONST) { return 1; @@ -549,7 +538,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) static void tcg_out_b_imm(TCGContext *s, ARMCond cond, int32_t offset) { - tcg_out32(s, (cond << 28) | 0x0a000000 | + tcg_out32(s, (cond << 28) | INSN_B | (((offset - 8) >> 2) & 0x00ffffff)); } @@ -596,11 +585,7 @@ static void tcg_out_b_reg(TCGContext *s, ARMCond cond, TCGReg rn) * Unless the C portion of QEMU is compiled as thumb, we don't need * true BX semantics; merely a branch to an address held in a register. */ - if (use_armv5t_instructions) { - tcg_out_bx_reg(s, cond, rn); - } else { - tcg_out_mov_reg(s, cond, TCG_REG_PC, rn); - } + tcg_out_bx_reg(s, cond, rn); } static void tcg_out_dat_imm(TCGContext *s, ARMCond cond, ARMInsn opc, @@ -691,8 +676,8 @@ tcg_out_ldrd_rwb(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, TCGReg rm) tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 1); } -static void tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt, - TCGReg rn, int imm8) +static void __attribute__((unused)) +tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, int imm8) { tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0); } @@ -927,17 +912,6 @@ static void tcg_out_dat_rIN(TCGContext *s, ARMCond cond, ARMInsn opc, static void tcg_out_mul32(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn, TCGReg rm) { - /* if ArchVersion() < 6 && d == n then UNPREDICTABLE; */ - if (!use_armv6_instructions && rd == rn) { - if (rd == rm) { - /* rd == rn == rm; copy an input to tmp first. */ - tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); - rm = rn = TCG_REG_TMP; - } else { - rn = rm; - rm = rd; - } - } /* mul */ tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn); } @@ -945,17 +919,6 @@ static void tcg_out_mul32(TCGContext *s, ARMCond cond, TCGReg rd, static void tcg_out_umull32(TCGContext *s, ARMCond cond, TCGReg rd0, TCGReg rd1, TCGReg rn, TCGReg rm) { - /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */ - if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) { - if (rd0 == rm || rd1 == rm) { - tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); - rn = TCG_REG_TMP; - } else { - TCGReg t = rn; - rn = rm; - rm = t; - } - } /* umull */ tcg_out32(s, (cond << 28) | 0x00800090 | (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); @@ -964,17 +927,6 @@ static void tcg_out_umull32(TCGContext *s, ARMCond cond, TCGReg rd0, static void tcg_out_smull32(TCGContext *s, ARMCond cond, TCGReg rd0, TCGReg rd1, TCGReg rn, TCGReg rm) { - /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */ - if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) { - if (rd0 == rm || rd1 == rm) { - tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn); - rn = TCG_REG_TMP; - } else { - TCGReg t = rn; - rn = rm; - rm = t; - } - } /* smull */ tcg_out32(s, (cond << 28) | 0x00c00090 | (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn); @@ -992,134 +944,75 @@ static void tcg_out_udiv(TCGContext *s, ARMCond cond, tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8)); } -static void tcg_out_ext8s(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn) +static void tcg_out_ext8s(TCGContext *s, TCGType t, TCGReg rd, TCGReg rn) { - if (use_armv6_instructions) { - /* sxtb */ - tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rn, SHIFT_IMM_LSL(24)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rd, SHIFT_IMM_ASR(24)); - } + /* sxtb */ + tcg_out32(s, 0x06af0070 | (COND_AL << 28) | (rd << 12) | rn); } -static void __attribute__((unused)) -tcg_out_ext8u(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn) +static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn) { - tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff); + tcg_out_dat_imm(s, COND_AL, ARITH_AND, rd, rn, 0xff); } -static void tcg_out_ext16s(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn) +static void tcg_out_ext16s(TCGContext *s, TCGType t, TCGReg rd, TCGReg rn) { - if (use_armv6_instructions) { - /* sxth */ - tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rn, SHIFT_IMM_LSL(16)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rd, SHIFT_IMM_ASR(16)); - } + /* sxth */ + tcg_out32(s, 0x06bf0070 | (COND_AL << 28) | (rd << 12) | rn); } -static void tcg_out_ext16u(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn) +static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn) { - if (use_armv6_instructions) { - /* uxth */ - tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rn, SHIFT_IMM_LSL(16)); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rd, SHIFT_IMM_LSR(16)); - } + /* uxth */ + tcg_out32(s, 0x06ff0070 | (COND_AL << 28) | (rd << 12) | rn); } -static void tcg_out_bswap16(TCGContext *s, ARMCond cond, - TCGReg rd, TCGReg rn, int flags) +static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn) { - if (use_armv6_instructions) { - if (flags & TCG_BSWAP_OS) { - /* revsh */ - tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn); - return; - } + g_assert_not_reached(); +} - /* rev16 */ - tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); - if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { - /* uxth */ - tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rd); - } - return; - } +static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn) +{ + g_assert_not_reached(); +} - if (flags == 0) { - /* - * For stores, no input or output extension: - * rn = xxAB - * lsr tmp, rn, #8 tmp = 0xxA - * and tmp, tmp, #0xff tmp = 000A - * orr rd, tmp, rn, lsl #8 rd = xABA - */ - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8)); - tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff); - tcg_out_dat_reg(s, cond, ARITH_ORR, - rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8)); +static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) +{ + g_assert_not_reached(); +} + +static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn) +{ + g_assert_not_reached(); +} + +static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn) +{ + g_assert_not_reached(); +} + +static void tcg_out_bswap16(TCGContext *s, ARMCond cond, + TCGReg rd, TCGReg rn, int flags) +{ + if (flags & TCG_BSWAP_OS) { + /* revsh */ + tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn); return; } - /* - * Byte swap, leaving the result at the top of the register. - * We will then shift down, zero or sign-extending. - */ - if (flags & TCG_BSWAP_IZ) { - /* - * rn = 00AB - * ror tmp, rn, #8 tmp = B00A - * orr tmp, tmp, tmp, lsl #16 tmp = BA00 - */ - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, rn, SHIFT_IMM_ROR(8)); - tcg_out_dat_reg(s, cond, ARITH_ORR, - TCG_REG_TMP, TCG_REG_TMP, TCG_REG_TMP, - SHIFT_IMM_LSL(16)); - } else { - /* - * rn = xxAB - * and tmp, rn, #0xff00 tmp = 00A0 - * lsl tmp, tmp, #8 tmp = 0A00 - * orr tmp, tmp, rn, lsl #24 tmp = BA00 - */ - tcg_out_dat_rI(s, cond, ARITH_AND, TCG_REG_TMP, rn, 0xff00, 1); - tcg_out_dat_reg(s, cond, ARITH_MOV, - TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSL(8)); - tcg_out_dat_reg(s, cond, ARITH_ORR, - TCG_REG_TMP, TCG_REG_TMP, rn, SHIFT_IMM_LSL(24)); + /* rev16 */ + tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); + if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) { + /* uxth */ + tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rd); } - tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, TCG_REG_TMP, - (flags & TCG_BSWAP_OS - ? SHIFT_IMM_ASR(8) : SHIFT_IMM_LSR(8))); } static void tcg_out_bswap32(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn) { - if (use_armv6_instructions) { - /* rev */ - tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn); - } else { - tcg_out_dat_reg(s, cond, ARITH_EOR, - TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16)); - tcg_out_dat_imm(s, cond, ARITH_BIC, - TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800); - tcg_out_dat_reg(s, cond, ARITH_MOV, - rd, 0, rn, SHIFT_IMM_ROR(8)); - tcg_out_dat_reg(s, cond, ARITH_EOR, - rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8)); - } + /* rev */ + tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn); } static void tcg_out_deposit(TCGContext *s, ARMCond cond, TCGReg rd, @@ -1247,21 +1140,14 @@ static void tcg_out_goto(TCGContext *s, ARMCond cond, const tcg_insn_unit *addr) } /* LDR is interworking from v5t. */ - if (arm_mode || use_armv5t_instructions) { - tcg_out_movi_pool(s, cond, TCG_REG_PC, addri); - return; - } - - /* else v4t */ - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); - tcg_out_bx_reg(s, COND_AL, TCG_REG_TMP); + tcg_out_movi_pool(s, cond, TCG_REG_PC, addri); } /* * The call case is mostly used for helpers - so it's not unreasonable * for them to be beyond branch range. */ -static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr) +static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *addr) { intptr_t addri = (intptr_t)addr; ptrdiff_t disp = tcg_pcrel_diff(s, addr); @@ -1270,26 +1156,20 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr) if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) { if (arm_mode) { tcg_out_bl_imm(s, COND_AL, disp); - return; - } - if (use_armv5t_instructions) { + } else { tcg_out_blx_imm(s, disp); - return; } + return; } - if (use_armv5t_instructions) { - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); - tcg_out_blx_reg(s, COND_AL, TCG_REG_TMP); - } else if (arm_mode) { - /* ??? Know that movi_pool emits exactly 1 insn. */ - tcg_out_mov_reg(s, COND_AL, TCG_REG_R14, TCG_REG_PC); - tcg_out_movi_pool(s, COND_AL, TCG_REG_PC, addri); - } else { - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); - tcg_out_mov_reg(s, COND_AL, TCG_REG_R14, TCG_REG_PC); - tcg_out_bx_reg(s, COND_AL, TCG_REG_TMP); - } + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri); + tcg_out_blx_reg(s, COND_AL, TCG_REG_TMP); +} + +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr, + const TCGHelperInfo *info) +{ + tcg_out_call_int(s, addr); } static void tcg_out_goto_label(TCGContext *s, ARMCond cond, TCGLabel *l) @@ -1306,11 +1186,38 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) { if (use_armv7_instructions) { tcg_out32(s, INSN_DMB_ISH); - } else if (use_armv6_instructions) { + } else { tcg_out32(s, INSN_DMB_MCR); } } +static TCGCond tcg_out_cmp(TCGContext *s, TCGCond cond, TCGReg a, + TCGArg b, int b_const) +{ + if (!is_tst_cond(cond)) { + tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, a, b, b_const); + return cond; + } + + cond = tcg_tst_eqne_cond(cond); + if (b_const) { + int imm12 = encode_imm(b); + + /* + * The compare constraints allow rIN, but TST does not support N. + * Be prepared to load the constant into a scratch register. + */ + if (imm12 >= 0) { + tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, a, imm12); + return cond; + } + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, b); + b = TCG_REG_TMP; + } + tcg_out_dat_reg(s, COND_AL, ARITH_TST, 0, a, b, SHIFT_IMM_LSL(0)); + return cond; +} + static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args, const int *const_args) { @@ -1329,13 +1236,22 @@ static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args, case TCG_COND_LEU: case TCG_COND_GTU: case TCG_COND_GEU: - /* We perform a conditional comparision. If the high half is - equal, then overwrite the flags with the comparison of the - low half. The resulting flags cover the whole. */ + /* + * We perform a conditional comparison. If the high half is + * equal, then overwrite the flags with the comparison of the + * low half. The resulting flags cover the whole. + */ tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, ah, bh, const_bh); tcg_out_dat_rI(s, COND_EQ, ARITH_CMP, 0, al, bl, const_bl); return cond; + case TCG_COND_TSTEQ: + case TCG_COND_TSTNE: + /* Similar, but with TST instead of CMP. */ + tcg_out_dat_rI(s, COND_AL, ARITH_TST, 0, ah, bh, const_bh); + tcg_out_dat_rI(s, COND_EQ, ARITH_TST, 0, al, bl, const_bl); + return tcg_tst_eqne_cond(cond); + case TCG_COND_LT: case TCG_COND_GE: /* We perform a double-word subtraction and examine the result. @@ -1363,7 +1279,7 @@ static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args, /* * Note that TCGReg references Q-registers. - * Q-regno = 2 * D-regno, so shift left by 1 whlie inserting. + * Q-regno = 2 * D-regno, so shift left by 1 while inserting. */ static uint32_t encode_vd(TCGReg rd) { @@ -1431,283 +1347,42 @@ static void tcg_out_vldst(TCGContext *s, ARMInsn insn, tcg_out32(s, insn | (rn << 16) | encode_vd(rd) | 0xf); } -#ifdef CONFIG_SOFTMMU -#include "../tcg-ldst.c.inc" - -/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr, - * int mmu_idx, uintptr_t ra) - */ -static void * const qemu_ld_helpers[8] = { - [MO_UB] = helper_ret_ldub_mmu, - [MO_SB] = helper_ret_ldsb_mmu, -#ifdef HOST_WORDS_BIGENDIAN - [MO_UW] = helper_be_lduw_mmu, - [MO_UL] = helper_be_ldul_mmu, - [MO_Q] = helper_be_ldq_mmu, - [MO_SW] = helper_be_ldsw_mmu, - [MO_SL] = helper_be_ldul_mmu, -#else - [MO_UW] = helper_le_lduw_mmu, - [MO_UL] = helper_le_ldul_mmu, - [MO_Q] = helper_le_ldq_mmu, - [MO_SW] = helper_le_ldsw_mmu, - [MO_SL] = helper_le_ldul_mmu, -#endif -}; - -/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, - * uintxx_t val, int mmu_idx, uintptr_t ra) - */ -static void * const qemu_st_helpers[4] = { - [MO_8] = helper_ret_stb_mmu, -#ifdef HOST_WORDS_BIGENDIAN - [MO_16] = helper_be_stw_mmu, - [MO_32] = helper_be_stl_mmu, - [MO_64] = helper_be_stq_mmu, -#else - [MO_16] = helper_le_stw_mmu, - [MO_32] = helper_le_stl_mmu, - [MO_64] = helper_le_stq_mmu, -#endif -}; - -/* Helper routines for marshalling helper function arguments into - * the correct registers and stack. - * argreg is where we want to put this argument, arg is the argument itself. - * Return value is the updated argreg ready for the next call. - * Note that argreg 0..3 is real registers, 4+ on stack. - * - * We provide routines for arguments which are: immediate, 32 bit - * value in register, 16 and 8 bit values in register (which must be zero - * extended before use) and 64 bit value in a lo:hi register pair. - */ -#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \ -static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \ -{ \ - if (argreg < 4) { \ - MOV_ARG(s, COND_AL, argreg, arg); \ - } else { \ - int ofs = (argreg - 4) * 4; \ - EXT_ARG; \ - tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \ - tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \ - } \ - return argreg + 1; \ -} - -DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32, - (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u, - (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u, - (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP)) -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, ) - -static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, - TCGReg arglo, TCGReg arghi) -{ - /* 64 bit arguments must go in even/odd register pairs - * and in 8-aligned stack slots. - */ - if (argreg & 1) { - argreg++; - } - if (use_armv6_instructions && argreg >= 4 - && (arglo & 1) == 0 && arghi == arglo + 1) { - tcg_out_strd_8(s, COND_AL, arglo, - TCG_REG_CALL_STACK, (argreg - 4) * 4); - return argreg + 2; - } else { - argreg = tcg_out_arg_reg32(s, argreg, arglo); - argreg = tcg_out_arg_reg32(s, argreg, arghi); - return argreg; - } -} - -#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) - -/* We expect to use an 9-bit sign-magnitude negative offset from ENV. */ -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256); - -/* These offsets are built into the LDRD below. */ -QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); -QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4); - -/* Load and compare a TLB entry, leaving the flags set. Returns the register - containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */ +typedef struct { + ARMCond cond; + TCGReg base; + int index; + bool index_scratch; + TCGAtomAlign aa; +} HostAddress; -static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, - MemOp opc, int mem_index, bool is_load) +bool tcg_target_has_memory_bswap(MemOp memop) { - int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) - : offsetof(CPUTLBEntry, addr_write)); - int fast_off = TLB_MASK_TABLE_OFS(mem_index); - int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); - int table_off = fast_off + offsetof(CPUTLBDescFast, table); - unsigned s_bits = opc & MO_SIZE; - unsigned a_bits = get_alignment_bits(opc); - - /* - * We don't support inline unaligned acceses, but we can easily - * support overalignment checks. - */ - if (a_bits < s_bits) { - a_bits = s_bits; - } - - /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */ - if (use_armv6_instructions) { - tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off); - } else { - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R0, TCG_AREG0, mask_off); - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R1, TCG_AREG0, table_off); - } - - /* Extract the tlb index from the address into R0. */ - tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo, - SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)); - - /* - * Add the tlb_table pointer, creating the CPUTLBEntry address in R1. - * Load the tlb comparator into R2/R3 and the fast path addend into R1. - */ - if (cmp_off == 0) { - if (use_armv6_instructions && TARGET_LONG_BITS == 64) { - tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0); - } else { - tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0); - } - } else { - tcg_out_dat_reg(s, COND_AL, ARITH_ADD, - TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0); - if (use_armv6_instructions && TARGET_LONG_BITS == 64) { - tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off); - } else { - tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off); - } - } - if (!use_armv6_instructions && TARGET_LONG_BITS == 64) { - tcg_out_ld32_12(s, COND_AL, TCG_REG_R3, TCG_REG_R1, cmp_off + 4); - } - - /* Load the tlb addend. */ - tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1, - offsetof(CPUTLBEntry, addend)); - - /* - * Check alignment, check comparators. - * Do this in no more than 3 insns. Use MOVW for v7, if possible, - * to reduce the number of sequential conditional instructions. - * Almost all guests have at least 4k pages, which means that we need - * to clear at least 9 bits even for an 8-byte memory, which means it - * isn't worth checking for an immediate operand for BIC. - */ - if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) { - tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1)); - - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, mask); - tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP, - addrlo, TCG_REG_TMP, 0); - tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0); - } else { - if (a_bits) { - tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, - (1 << a_bits) - 1); - } - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, addrlo, - SHIFT_IMM_LSR(TARGET_PAGE_BITS)); - tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP, - 0, TCG_REG_R2, TCG_REG_TMP, - SHIFT_IMM_LSL(TARGET_PAGE_BITS)); - } - - if (TARGET_LONG_BITS == 64) { - tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0); - } - - return TCG_REG_R1; + return false; } -/* Record the context of a call to the out of line helper code for the slow - path for a load or store, so that we can later generate the correct - helper code. */ -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, - TCGReg datalo, TCGReg datahi, TCGReg addrlo, - TCGReg addrhi, tcg_insn_unit *raddr, - tcg_insn_unit *label_ptr) +static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg) { - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->datalo_reg = datalo; - label->datahi_reg = datahi; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - label->raddr = tcg_splitwx_to_rx(raddr); - label->label_ptr[0] = label_ptr; + /* We arrive at the slow path via "BLNE", so R14 contains l->raddr. */ + return TCG_REG_R14; } +static const TCGLdstHelperParam ldst_helper_param = { + .ra_gen = ldst_ra_gen, + .ntmp = 1, + .tmp = { TCG_REG_TMP }, +}; + static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - TCGReg argreg, datalo, datahi; - TCGMemOpIdx oi = lb->oi; - MemOp opc = get_memop(oi); - void *func; + MemOp opc = get_memop(lb->oi); if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } - argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0); - if (TARGET_LONG_BITS == 64) { - argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); - } else { - argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); - } - argreg = tcg_out_arg_imm32(s, argreg, oi); - argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); - - /* For armv6 we can use the canonical unsigned helpers and minimize - icache usage. For pre-armv6, use the signed helpers since we do - not have a single insn sign-extend. */ - if (use_armv6_instructions) { - func = qemu_ld_helpers[opc & MO_SIZE]; - } else { - func = qemu_ld_helpers[opc & MO_SSIZE]; - if (opc & MO_SIGN) { - opc = MO_UL; - } - } - tcg_out_call(s, func); - - datalo = lb->datalo_reg; - datahi = lb->datahi_reg; - switch (opc & MO_SSIZE) { - case MO_SB: - tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0); - break; - case MO_SW: - tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0); - break; - default: - tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); - break; - case MO_Q: - if (datalo != TCG_REG_R1) { - tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); - tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); - } else if (datahi != TCG_REG_R0) { - tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); - tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0); - } else { - tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0); - tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1); - tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP); - } - break; - } + tcg_out_ld_helper_args(s, lb, &ldst_helper_param); + tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]); + tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param); tcg_out_goto(s, COND_AL, lb->raddr); return true; @@ -1715,200 +1390,324 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { - TCGReg argreg, datalo, datahi; - TCGMemOpIdx oi = lb->oi; - MemOp opc = get_memop(oi); + MemOp opc = get_memop(lb->oi); if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { return false; } - argreg = TCG_REG_R0; - argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); - if (TARGET_LONG_BITS == 64) { - argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); - } else { - argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); - } - - datalo = lb->datalo_reg; - datahi = lb->datahi_reg; - switch (opc & MO_SIZE) { - case MO_8: - argreg = tcg_out_arg_reg8(s, argreg, datalo); - break; - case MO_16: - argreg = tcg_out_arg_reg16(s, argreg, datalo); - break; - case MO_32: - default: - argreg = tcg_out_arg_reg32(s, argreg, datalo); - break; - case MO_64: - argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi); - break; - } - - argreg = tcg_out_arg_imm32(s, argreg, oi); - argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14); + tcg_out_st_helper_args(s, lb, &ldst_helper_param); /* Tail-call to the helper, which will return to the fast path. */ tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]); return true; } -#endif /* SOFTMMU */ -static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addend) +/* We expect to use an 9-bit sign-magnitude negative offset from ENV. */ +#define MIN_TLB_MASK_TABLE_OFS -256 + +static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, + TCGReg addrlo, TCGReg addrhi, + MemOpIdx oi, bool is_ld) { - /* Byte swapping is left to middle-end expansion. */ - tcg_debug_assert((opc & MO_BSWAP) == 0); + TCGLabelQemuLdst *ldst = NULL; + MemOp opc = get_memop(oi); + unsigned a_mask; + + if (tcg_use_softmmu) { + *h = (HostAddress){ + .cond = COND_AL, + .base = addrlo, + .index = TCG_REG_R1, + .index_scratch = true, + }; + } else { + *h = (HostAddress){ + .cond = COND_AL, + .base = addrlo, + .index = guest_base ? TCG_REG_GUEST_BASE : -1, + .index_scratch = false, + }; + } + + h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false); + a_mask = (1 << h->aa.align) - 1; + + if (tcg_use_softmmu) { + int mem_index = get_mmuidx(oi); + int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write); + int fast_off = tlb_mask_table_ofs(s, mem_index); + unsigned s_mask = (1 << (opc & MO_SIZE)) - 1; + TCGReg t_addr; + + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + + /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {r0,r1}. */ + QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); + QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4); + tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off); - switch (opc & MO_SSIZE) { - case MO_UB: - tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend); - break; - case MO_SB: - tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend); - break; - case MO_UW: - tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend); - break; - case MO_SW: - tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend); - break; - case MO_UL: - tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend); - break; - case MO_Q: - /* Avoid ldrd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions - && (datalo & 1) == 0 && datahi == datalo + 1) { - tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend); - } else if (datalo != addend) { - tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo); - tcg_out_ld32_12(s, COND_AL, datahi, addend, 4); + /* Extract the tlb index from the address into R0. */ + tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo, + SHIFT_IMM_LSR(s->page_bits - CPU_TLB_ENTRY_BITS)); + + /* + * Add the tlb_table pointer, creating the CPUTLBEntry address in R1. + * Load the tlb comparator into R2/R3 and the fast path addend into R1. + */ + QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN); + if (cmp_off == 0) { + if (s->addr_type == TCG_TYPE_I32) { + tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, + TCG_REG_R1, TCG_REG_R0); + } else { + tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, + TCG_REG_R1, TCG_REG_R0); + } } else { - tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP, - addend, addrlo, SHIFT_IMM_LSL(0)); - tcg_out_ld32_12(s, COND_AL, datalo, TCG_REG_TMP, 0); - tcg_out_ld32_12(s, COND_AL, datahi, TCG_REG_TMP, 4); + tcg_out_dat_reg(s, COND_AL, ARITH_ADD, + TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0); + if (s->addr_type == TCG_TYPE_I32) { + tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off); + } else { + tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off); + } } - break; - default: - g_assert_not_reached(); + + /* Load the tlb addend. */ + tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1, + offsetof(CPUTLBEntry, addend)); + + /* + * Check alignment, check comparators. + * Do this in 2-4 insns. Use MOVW for v7, if possible, + * to reduce the number of sequential conditional instructions. + * Almost all guests have at least 4k pages, which means that we need + * to clear at least 9 bits even for an 8-byte memory, which means it + * isn't worth checking for an immediate operand for BIC. + * + * For unaligned accesses, test the page of the last unit of alignment. + * This leaves the least significant alignment bits unchanged, and of + * course must be zero. + */ + t_addr = addrlo; + if (a_mask < s_mask) { + t_addr = TCG_REG_R0; + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr, + addrlo, s_mask - a_mask); + } + if (use_armv7_instructions && s->page_bits <= 16) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(s->page_mask | a_mask)); + tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP, + t_addr, TCG_REG_TMP, 0); + tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, + TCG_REG_R2, TCG_REG_TMP, 0); + } else { + if (a_mask) { + tcg_debug_assert(a_mask <= 0xff); + tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask); + } + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr, + SHIFT_IMM_LSR(s->page_bits)); + tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP, + 0, TCG_REG_R2, TCG_REG_TMP, + SHIFT_IMM_LSL(s->page_bits)); + } + + if (s->addr_type != TCG_TYPE_I32) { + tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0); + } + } else if (a_mask) { + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + + /* We are expecting alignment to max out at 7 */ + tcg_debug_assert(a_mask <= 0xff); + /* tst addr, #mask */ + tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask); } + + return ldst; } -#ifndef CONFIG_SOFTMMU static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo, - TCGReg datahi, TCGReg addrlo) + TCGReg datahi, HostAddress h) { + TCGReg base; + /* Byte swapping is left to middle-end expansion. */ tcg_debug_assert((opc & MO_BSWAP) == 0); switch (opc & MO_SSIZE) { case MO_UB: - tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0); + if (h.index < 0) { + tcg_out_ld8_12(s, h.cond, datalo, h.base, 0); + } else { + tcg_out_ld8_r(s, h.cond, datalo, h.base, h.index); + } break; case MO_SB: - tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0); + if (h.index < 0) { + tcg_out_ld8s_8(s, h.cond, datalo, h.base, 0); + } else { + tcg_out_ld8s_r(s, h.cond, datalo, h.base, h.index); + } break; case MO_UW: - tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0); + if (h.index < 0) { + tcg_out_ld16u_8(s, h.cond, datalo, h.base, 0); + } else { + tcg_out_ld16u_r(s, h.cond, datalo, h.base, h.index); + } break; case MO_SW: - tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0); + if (h.index < 0) { + tcg_out_ld16s_8(s, h.cond, datalo, h.base, 0); + } else { + tcg_out_ld16s_r(s, h.cond, datalo, h.base, h.index); + } break; case MO_UL: - tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0); - break; - case MO_Q: - /* Avoid ldrd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions - && (datalo & 1) == 0 && datahi == datalo + 1) { - tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0); - } else if (datalo == addrlo) { - tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4); - tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0); + if (h.index < 0) { + tcg_out_ld32_12(s, h.cond, datalo, h.base, 0); } else { - tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0); - tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4); + tcg_out_ld32_r(s, h.cond, datalo, h.base, h.index); } break; + case MO_UQ: + /* We used pair allocation for datalo, so already should be aligned. */ + tcg_debug_assert((datalo & 1) == 0); + tcg_debug_assert(datahi == datalo + 1); + /* LDRD requires alignment; double-check that. */ + if (get_alignment_bits(opc) >= MO_64) { + if (h.index < 0) { + tcg_out_ldrd_8(s, h.cond, datalo, h.base, 0); + break; + } + /* + * Rm (the second address op) must not overlap Rt or Rt + 1. + * Since datalo is aligned, we can simplify the test via alignment. + * Flip the two address arguments if that works. + */ + if ((h.index & ~1) != datalo) { + tcg_out_ldrd_r(s, h.cond, datalo, h.base, h.index); + break; + } + if ((h.base & ~1) != datalo) { + tcg_out_ldrd_r(s, h.cond, datalo, h.index, h.base); + break; + } + } + if (h.index < 0) { + base = h.base; + if (datalo == h.base) { + tcg_out_mov_reg(s, h.cond, TCG_REG_TMP, base); + base = TCG_REG_TMP; + } + } else if (h.index_scratch) { + tcg_out_ld32_rwb(s, h.cond, datalo, h.index, h.base); + tcg_out_ld32_12(s, h.cond, datahi, h.index, 4); + break; + } else { + tcg_out_dat_reg(s, h.cond, ARITH_ADD, TCG_REG_TMP, + h.base, h.index, SHIFT_IMM_LSL(0)); + base = TCG_REG_TMP; + } + tcg_out_ld32_12(s, h.cond, datalo, base, 0); + tcg_out_ld32_12(s, h.cond, datahi, base, 4); + break; default: g_assert_not_reached(); } } -#endif -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) +static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, + TCGReg addrlo, TCGReg addrhi, + MemOpIdx oi, TCGType data_type) { - TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); - TCGMemOpIdx oi; - MemOp opc; -#ifdef CONFIG_SOFTMMU - int mem_index; - TCGReg addend; - tcg_insn_unit *label_ptr; -#endif + MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; + HostAddress h; - datalo = *args++; - datahi = (is64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); - oi = *args++; - opc = get_memop(oi); - -#ifdef CONFIG_SOFTMMU - mem_index = get_mmuidx(oi); - addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1); - - /* This a conditional BL only to load a pointer within this opcode into LR - for the slow path. We will not be using the value for a tail call. */ - label_ptr = s->code_ptr; - tcg_out_bl_imm(s, COND_NE, 0); - - tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend); - - add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, - s->code_ptr, label_ptr); -#else /* !CONFIG_SOFTMMU */ - if (guest_base) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base); - tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP); + ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; + + /* + * This a conditional BL only to load a pointer within this + * opcode into LR for the slow path. We will not be using + * the value for a tail call. + */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out_bl_imm(s, COND_NE, 0); + + tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h); + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); } else { - tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo); + tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h); } -#endif } -static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addend) +static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo, + TCGReg datahi, HostAddress h) { /* Byte swapping is left to middle-end expansion. */ tcg_debug_assert((opc & MO_BSWAP) == 0); switch (opc & MO_SIZE) { case MO_8: - tcg_out_st8_r(s, cond, datalo, addrlo, addend); + if (h.index < 0) { + tcg_out_st8_12(s, h.cond, datalo, h.base, 0); + } else { + tcg_out_st8_r(s, h.cond, datalo, h.base, h.index); + } break; case MO_16: - tcg_out_st16_r(s, cond, datalo, addrlo, addend); + if (h.index < 0) { + tcg_out_st16_8(s, h.cond, datalo, h.base, 0); + } else { + tcg_out_st16_r(s, h.cond, datalo, h.base, h.index); + } break; case MO_32: - tcg_out_st32_r(s, cond, datalo, addrlo, addend); + if (h.index < 0) { + tcg_out_st32_12(s, h.cond, datalo, h.base, 0); + } else { + tcg_out_st32_r(s, h.cond, datalo, h.base, h.index); + } break; case MO_64: - /* Avoid strd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions - && (datalo & 1) == 0 && datahi == datalo + 1) { - tcg_out_strd_r(s, cond, datalo, addrlo, addend); + /* We used pair allocation for datalo, so already should be aligned. */ + tcg_debug_assert((datalo & 1) == 0); + tcg_debug_assert(datahi == datalo + 1); + /* STRD requires alignment; double-check that. */ + if (get_alignment_bits(opc) >= MO_64) { + if (h.index < 0) { + tcg_out_strd_8(s, h.cond, datalo, h.base, 0); + } else { + tcg_out_strd_r(s, h.cond, datalo, h.base, h.index); + } + } else if (h.index < 0) { + tcg_out_st32_12(s, h.cond, datalo, h.base, 0); + tcg_out_st32_12(s, h.cond, datahi, h.base, 4); + } else if (h.index_scratch) { + tcg_out_st32_rwb(s, h.cond, datalo, h.index, h.base); + tcg_out_st32_12(s, h.cond, datahi, h.index, 4); } else { - tcg_out_st32_rwb(s, cond, datalo, addend, addrlo); - tcg_out_st32_12(s, cond, datahi, addend, 4); + tcg_out_dat_reg(s, h.cond, ARITH_ADD, TCG_REG_TMP, + h.base, h.index, SHIFT_IMM_LSL(0)); + tcg_out_st32_12(s, h.cond, datalo, TCG_REG_TMP, 0); + tcg_out_st32_12(s, h.cond, datahi, TCG_REG_TMP, 4); } break; default: @@ -1916,81 +1715,89 @@ static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc, } } -#ifndef CONFIG_SOFTMMU -static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo, - TCGReg datahi, TCGReg addrlo) +static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, + TCGReg addrlo, TCGReg addrhi, + MemOpIdx oi, TCGType data_type) { - /* Byte swapping is left to middle-end expansion. */ - tcg_debug_assert((opc & MO_BSWAP) == 0); - - switch (opc & MO_SIZE) { - case MO_8: - tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0); - break; - case MO_16: - tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0); - break; - case MO_32: - tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); - break; - case MO_64: - /* Avoid strd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions - && (datalo & 1) == 0 && datahi == datalo + 1) { - tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0); - } else { - tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); - tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4); - } - break; - default: - g_assert_not_reached(); + MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; + HostAddress h; + + ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; + + h.cond = COND_EQ; + tcg_out_qemu_st_direct(s, opc, datalo, datahi, h); + + /* The conditional call is last, as we're going to return here. */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out_bl_imm(s, COND_NE, 0); + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } else { + tcg_out_qemu_st_direct(s, opc, datalo, datahi, h); } } -#endif -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) +static void tcg_out_epilogue(TCGContext *s); + +static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) { - TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused)); - TCGMemOpIdx oi; - MemOp opc; -#ifdef CONFIG_SOFTMMU - int mem_index; - TCGReg addend; - tcg_insn_unit *label_ptr; -#endif + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, arg); + tcg_out_epilogue(s); +} - datalo = *args++; - datahi = (is64 ? *args++ : 0); - addrlo = *args++; - addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0); - oi = *args++; - opc = get_memop(oi); - -#ifdef CONFIG_SOFTMMU - mem_index = get_mmuidx(oi); - addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0); - - tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend); - - /* The conditional call must come last, as we're going to return here. */ - label_ptr = s->code_ptr; - tcg_out_bl_imm(s, COND_NE, 0); - - add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, - s->code_ptr, label_ptr); -#else /* !CONFIG_SOFTMMU */ - if (guest_base) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base); - tcg_out_qemu_st_index(s, COND_AL, opc, datalo, - datahi, addrlo, TCG_REG_TMP); +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + uintptr_t i_addr; + intptr_t i_disp; + + /* Direct branch will be patched by tb_target_set_jmp_target. */ + set_jmp_insn_offset(s, which); + tcg_out32(s, INSN_NOP); + + /* When branch is out of range, fall through to indirect. */ + i_addr = get_jmp_target_addr(s, which); + i_disp = tcg_pcrel_diff(s, (void *)i_addr) - 8; + tcg_debug_assert(i_disp < 0); + if (i_disp >= -0xfff) { + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, i_disp); } else { - tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo); + /* + * The TB is close, but outside the 12 bits addressable by + * the load. We can extend this to 20 bits with a sub of a + * shifted immediate from pc. + */ + int h = -i_disp; + int l = -(h & 0xfff); + + h = encode_imm_nofail(h + l); + tcg_out_dat_imm(s, COND_AL, ARITH_SUB, TCG_REG_R0, TCG_REG_PC, h); + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, l); } -#endif + set_jmp_reset_offset(s, which); } -static void tcg_out_epilogue(TCGContext *s); +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + uintptr_t addr = tb->jmp_target_addr[n]; + ptrdiff_t offset = addr - (jmp_rx + 8); + tcg_insn_unit insn; + + /* Either directly branch, or fall through to indirect branch. */ + if (offset == sextract64(offset, 0, 26)) { + /* B <addr> */ + insn = deposit32((COND_AL << 28) | INSN_B, 0, 24, offset >> 2); + } else { + insn = INSN_NOP; + } + + qatomic_set((uint32_t *)jmp_rw, insn); + flush_idcache_range(jmp_rx, jmp_rw, 4); +} static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], @@ -2000,33 +1807,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, int c; switch (opc) { - case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, args[0]); - tcg_out_epilogue(s); - break; - case INDEX_op_goto_tb: - { - /* Indirect jump method */ - intptr_t ptr, dif, dil; - TCGReg base = TCG_REG_PC; - - tcg_debug_assert(s->tb_jmp_insn_offset == 0); - ptr = (intptr_t)tcg_splitwx_to_rx(s->tb_jmp_target_addr + args[0]); - dif = tcg_pcrel_diff(s, (void *)ptr) - 8; - dil = sextract32(dif, 0, 12); - if (dif != dil) { - /* The TB is close, but outside the 12 bits addressable by - the load. We can extend this to 20 bits with a sub of a - shifted immediate from pc. In the vastly unlikely event - the code requires more than 1MB, we'll use 2 insns and - be no worse off. */ - base = TCG_REG_R0; - tcg_out_movi32(s, COND_AL, base, ptr - dil); - } - tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil); - set_jmp_reset_offset(s, args[0]); - } - break; case INDEX_op_goto_ptr: tcg_out_b_reg(s, COND_AL, args[0]); break; @@ -2063,9 +1843,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, /* Constraints mean that v2 is always in the same register as dest, * so we only need to do "if condition passed, move v1 to dest". */ - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, - args[1], args[2], const_args[2]); - tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV, + c = tcg_out_cmp(s, args[5], args[1], args[2], const_args[2]); + tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[c], ARITH_MOV, ARITH_MVN, args[0], 0, args[3], const_args[3]); break; case INDEX_op_add_i32: @@ -2215,17 +1994,21 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_brcond_i32: - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, - args[0], args[1], const_args[1]); - tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], - arg_label(args[3])); + c = tcg_out_cmp(s, args[2], args[0], args[1], const_args[1]); + tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[3])); break; case INDEX_op_setcond_i32: - tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, - args[1], args[2], const_args[2]); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]], + c = tcg_out_cmp(s, args[3], args[1], args[2], const_args[2]); + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, args[0], 0, 1); - tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])], + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)], + ARITH_MOV, args[0], 0, 0); + break; + case INDEX_op_negsetcond_i32: + c = tcg_out_cmp(s, args[3], args[1], args[2], const_args[2]); + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], + ARITH_MVN, args[0], 0, 0); + tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)], ARITH_MOV, args[0], 0, 0); break; @@ -2240,17 +2023,36 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, ARITH_MOV, args[0], 0, 0); break; - case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args, 0); + case INDEX_op_qemu_ld_a32_i32: + tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32); break; - case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args, 1); + case INDEX_op_qemu_ld_a64_i32: + tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], + args[3], TCG_TYPE_I32); break; - case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args, 0); + case INDEX_op_qemu_ld_a32_i64: + tcg_out_qemu_ld(s, args[0], args[1], args[2], -1, + args[3], TCG_TYPE_I64); break; - case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args, 1); + case INDEX_op_qemu_ld_a64_i64: + tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3], + args[4], TCG_TYPE_I64); + break; + + case INDEX_op_qemu_st_a32_i32: + tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32); + break; + case INDEX_op_qemu_st_a64_i32: + tcg_out_qemu_st(s, args[0], -1, args[1], args[2], + args[3], TCG_TYPE_I32); + break; + case INDEX_op_qemu_st_a32_i64: + tcg_out_qemu_st(s, args[0], args[1], args[2], -1, + args[3], TCG_TYPE_I64); + break; + case INDEX_op_qemu_st_a64_i64: + tcg_out_qemu_st(s, args[0], args[1], args[2], args[3], + args[4], TCG_TYPE_I64); break; case INDEX_op_bswap16_i32: @@ -2260,16 +2062,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_bswap32(s, COND_AL, args[0], args[1]); break; - case INDEX_op_ext8s_i32: - tcg_out_ext8s(s, COND_AL, args[0], args[1]); - break; - case INDEX_op_ext16s_i32: - tcg_out_ext16s(s, COND_AL, args[0], args[1]); - break; - case INDEX_op_ext16u_i32: - tcg_out_ext16u(s, COND_AL, args[0], args[1]); - break; - case INDEX_op_deposit_i32: tcg_out_deposit(s, COND_AL, args[0], args[2], args[3], args[4], const_args[2]); @@ -2315,8 +2107,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ + case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */ + case INDEX_op_ext8u_i32: + case INDEX_op_ext16s_i32: + case INDEX_op_ext16u_i32: default: - tcg_abort(); + g_assert_not_reached(); } } @@ -2350,6 +2148,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_add_i32: case INDEX_op_sub_i32: case INDEX_op_setcond_i32: + case INDEX_op_negsetcond_i32: return C_O1_I2(r, r, rIN); case INDEX_op_and_i32: @@ -2395,14 +2194,22 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_setcond2_i32: return C_O1_I4(r, r, r, rI, rI); - case INDEX_op_qemu_ld_i32: - return TARGET_LONG_BITS == 32 ? C_O1_I1(r, l) : C_O1_I2(r, l, l); - case INDEX_op_qemu_ld_i64: - return TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, l) : C_O2_I2(r, r, l, l); - case INDEX_op_qemu_st_i32: - return TARGET_LONG_BITS == 32 ? C_O0_I2(s, s) : C_O0_I3(s, s, s); - case INDEX_op_qemu_st_i64: - return TARGET_LONG_BITS == 32 ? C_O0_I3(s, s, s) : C_O0_I4(s, s, s, s); + case INDEX_op_qemu_ld_a32_i32: + return C_O1_I1(r, q); + case INDEX_op_qemu_ld_a64_i32: + return C_O1_I2(r, q, q); + case INDEX_op_qemu_ld_a32_i64: + return C_O2_I1(e, p, q); + case INDEX_op_qemu_ld_a64_i64: + return C_O2_I2(e, p, q, q); + case INDEX_op_qemu_st_a32_i32: + return C_O0_I2(q, q); + case INDEX_op_qemu_st_a64_i32: + return C_O0_I3(q, q, q); + case INDEX_op_qemu_st_a32_i64: + return C_O0_I3(Q, p, q); + case INDEX_op_qemu_st_a64_i64: + return C_O0_I4(Q, p, q, q); case INDEX_op_st_vec: return C_O0_I2(w, r); @@ -2474,6 +2281,11 @@ static void tcg_target_init(TCGContext *s) if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') { arm_arch = pl[1] - '0'; } + + if (arm_arch < 6) { + error_report("TCG: ARMv%d is unsupported; exiting", arm_arch); + exit(EXIT_FAILURE); + } } tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS; @@ -2523,8 +2335,13 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, tcg_out_vldst(s, INSN_VLD1 | 0x7d0, arg, arg1, arg2); return; case TCG_TYPE_V128: - /* regs 2; size 8; align 16 */ - tcg_out_vldst(s, INSN_VLD1 | 0xae0, arg, arg1, arg2); + /* + * We have only 8-byte alignment for the stack per the ABI. + * Rather than dynamically re-align the stack, it's easier + * to simply not request alignment beyond that. So: + * regs 2; size 8; align 8 + */ + tcg_out_vldst(s, INSN_VLD1 | 0xad0, arg, arg1, arg2); return; default: g_assert_not_reached(); @@ -2543,8 +2360,8 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, tcg_out_vldst(s, INSN_VST1 | 0x7d0, arg, arg1, arg2); return; case TCG_TYPE_V128: - /* regs 2; size 8; align 16 */ - tcg_out_vldst(s, INSN_VST1 | 0xae0, arg, arg1, arg2); + /* See tcg_out_ld re alignment: regs 2; size 8; align 8 */ + tcg_out_vldst(s, INSN_VST1 | 0xad0, arg, arg1, arg2); return; default: g_assert_not_reached(); @@ -2589,6 +2406,31 @@ static void tcg_out_movi(TCGContext *s, TCGType type, tcg_out_movi32(s, COND_AL, ret, arg); } +static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) +{ + return false; +} + +static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, + tcg_target_long imm) +{ + int enc, opc = ARITH_ADD; + + /* All of the easiest immediates to encode are positive. */ + if (imm < 0) { + imm = -imm; + opc = ARITH_SUB; + } + enc = encode_imm(imm); + if (enc >= 0) { + tcg_out_dat_imm(s, COND_AL, opc, rd, rs, enc); + } else { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, imm); + tcg_out_dat_reg(s, COND_AL, opc, rd, rs, + TCG_REG_TMP, SHIFT_IMM_LSL(0)); + } +} + /* Type is always V128, with I64 elements. */ static void tcg_out_dup2_vec(TCGContext *s, TCGReg rd, TCGReg rl, TCGReg rh) { @@ -3115,6 +2957,11 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); + if (!tcg_use_softmmu && guest_base) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); + } + tcg_out_b_reg(s, COND_AL, tcg_target_call_iarg_regs[1]); /* @@ -3139,6 +2986,11 @@ static void tcg_out_epilogue(TCGContext *s) (1 << TCG_REG_R10) | (1 << TCG_REG_R11) | (1 << TCG_REG_PC)); } +static void tcg_out_tb_start(TCGContext *s) +{ + /* nothing to do */ +} + typedef struct { DebugFrameHeader h; uint8_t fde_def_cfa[4]; |