From 1e262b49b5331441f697461e4305fe06719758a7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 18 Mar 2019 12:02:54 -0700 Subject: tcg/i386: Implement tcg_out_dupm_vec At the same time, improve tcg_out_dupi_vec wrt broadcast from the constant pool. Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.inc.c | 57 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 14 deletions(-) (limited to 'tcg') diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index f04933bc19..f4bd00e24f 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -358,7 +358,6 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_MOVBE_MyGy (0xf1 | P_EXT38) #define OPC_MOVD_VyEy (0x6e | P_EXT | P_DATA16) #define OPC_MOVD_EyVy (0x7e | P_EXT | P_DATA16) -#define OPC_MOVDDUP (0x12 | P_EXT | P_SIMDF2) #define OPC_MOVDQA_VxWx (0x6f | P_EXT | P_DATA16) #define OPC_MOVDQA_WxVx (0x7f | P_EXT | P_DATA16) #define OPC_MOVDQU_VxWx (0x6f | P_EXT | P_SIMDF3) @@ -458,6 +457,10 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_UD2 (0x0b | P_EXT) #define OPC_VPBLENDD (0x02 | P_EXT3A | P_DATA16) #define OPC_VPBLENDVB (0x4c | P_EXT3A | P_DATA16) +#define OPC_VPINSRB (0x20 | P_EXT3A | P_DATA16) +#define OPC_VPINSRW (0xc4 | P_EXT | P_DATA16) +#define OPC_VBROADCASTSS (0x18 | P_EXT38 | P_DATA16) +#define OPC_VBROADCASTSD (0x19 | P_EXT38 | P_DATA16) #define OPC_VPBROADCASTB (0x78 | P_EXT38 | P_DATA16) #define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16) #define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16) @@ -855,16 +858,17 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) return true; } +static const int avx2_dup_insn[4] = { + OPC_VPBROADCASTB, OPC_VPBROADCASTW, + OPC_VPBROADCASTD, OPC_VPBROADCASTQ, +}; + static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg r, TCGReg a) { if (have_avx2) { - static const int dup_insn[4] = { - OPC_VPBROADCASTB, OPC_VPBROADCASTW, - OPC_VPBROADCASTD, OPC_VPBROADCASTQ, - }; int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0); - tcg_out_vex_modrm(s, dup_insn[vece] + vex_l, r, 0, a); + tcg_out_vex_modrm(s, avx2_dup_insn[vece] + vex_l, r, 0, a); } else { switch (vece) { case MO_8: @@ -894,10 +898,35 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg r, TCGReg base, intptr_t offset) { - return false; + if (have_avx2) { + int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0); + tcg_out_vex_modrm_offset(s, avx2_dup_insn[vece] + vex_l, + r, 0, base, offset); + } else { + switch (vece) { + case MO_64: + tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSD, r, 0, base, offset); + break; + case MO_32: + tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSS, r, 0, base, offset); + break; + case MO_16: + tcg_out_vex_modrm_offset(s, OPC_VPINSRW, r, r, base, offset); + tcg_out8(s, 0); /* imm8 */ + tcg_out_dup_vec(s, type, vece, r, r); + break; + case MO_8: + tcg_out_vex_modrm_offset(s, OPC_VPINSRB, r, r, base, offset); + tcg_out8(s, 0); /* imm8 */ + tcg_out_dup_vec(s, type, vece, r, r); + break; + default: + g_assert_not_reached(); + } + } + return true; } - static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg) { @@ -918,16 +947,16 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, } else if (have_avx2) { tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTQ + vex_l, ret); } else { - tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret); + tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSD, ret); } new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4); - } else if (have_avx2) { - tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret); - new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0); } else { - tcg_out_vex_modrm_pool(s, OPC_MOVD_VyEy, ret); + if (have_avx2) { + tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSD + vex_l, ret); + } else { + tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret); + } new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0); - tcg_out_dup_vec(s, type, MO_32, ret, ret); } } -- cgit v1.2.3