aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--target-i386/ops_sse.h147
-rw-r--r--target-i386/ops_sse_header.h20
-rw-r--r--target-i386/translate.c109
3 files changed, 272 insertions, 4 deletions
diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h
index 7568681835..4fa8e06624 100644
--- a/target-i386/ops_sse.h
+++ b/target-i386/ops_sse.h
@@ -1,5 +1,5 @@
/*
- * MMX/3DNow!/SSE/SSE2/SSE3/PNI support
+ * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/PNI support
*
* Copyright (c) 2005 Fabrice Bellard
*
@@ -1275,6 +1275,151 @@ void helper_pswapd(MMXReg *d, MMXReg *s)
}
#endif
+/* SSSE3 op helpers */
+void glue(helper_pshufb, SUFFIX) (Reg *d, Reg *s)
+{
+ int i;
+ Reg r;
+
+ for (i = 0; i < (8 << SHIFT); i++)
+ r.B(i) = (s->B(i) & 0x80) ? 0 : (d->B(s->B(i) & ((8 << SHIFT) - 1)));
+
+ *d = r;
+}
+
+void glue(helper_phaddw, SUFFIX) (Reg *d, Reg *s)
+{
+ d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1);
+ d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3);
+ XMM_ONLY(d->W(2) = (int16_t)d->W(4) + (int16_t)d->W(5));
+ XMM_ONLY(d->W(3) = (int16_t)d->W(6) + (int16_t)d->W(7));
+ d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1);
+ d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3);
+ XMM_ONLY(d->W(6) = (int16_t)s->W(4) + (int16_t)s->W(5));
+ XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7));
+}
+
+void glue(helper_phaddd, SUFFIX) (Reg *d, Reg *s)
+{
+ d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1);
+ XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3));
+ d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1);
+ XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3));
+}
+
+void glue(helper_phaddsw, SUFFIX) (Reg *d, Reg *s)
+{
+ d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1));
+ d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3));
+ XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5)));
+ XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7)));
+ d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1));
+ d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3));
+ XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5)));
+ XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7)));
+}
+
+void glue(helper_pmaddubsw, SUFFIX) (Reg *d, Reg *s)
+{
+ d->W(0) = satsw((int8_t)s->B( 0) * (uint8_t)d->B( 0) +
+ (int8_t)s->B( 1) * (uint8_t)d->B( 1));
+ d->W(1) = satsw((int8_t)s->B( 2) * (uint8_t)d->B( 2) +
+ (int8_t)s->B( 3) * (uint8_t)d->B( 3));
+ d->W(2) = satsw((int8_t)s->B( 4) * (uint8_t)d->B( 4) +
+ (int8_t)s->B( 5) * (uint8_t)d->B( 5));
+ d->W(3) = satsw((int8_t)s->B( 6) * (uint8_t)d->B( 6) +
+ (int8_t)s->B( 7) * (uint8_t)d->B( 7));
+#if SHIFT == 1
+ d->W(4) = satsw((int8_t)s->B( 8) * (uint8_t)d->B( 8) +
+ (int8_t)s->B( 9) * (uint8_t)d->B( 9));
+ d->W(5) = satsw((int8_t)s->B(10) * (uint8_t)d->B(10) +
+ (int8_t)s->B(11) * (uint8_t)d->B(11));
+ d->W(6) = satsw((int8_t)s->B(12) * (uint8_t)d->B(12) +
+ (int8_t)s->B(13) * (uint8_t)d->B(13));
+ d->W(7) = satsw((int8_t)s->B(14) * (uint8_t)d->B(14) +
+ (int8_t)s->B(15) * (uint8_t)d->B(15));
+#endif
+}
+
+void glue(helper_phsubw, SUFFIX) (Reg *d, Reg *s)
+{
+ d->W(0) = (int16_t)d->W(0) - (int16_t)d->W(1);
+ d->W(1) = (int16_t)d->W(2) - (int16_t)d->W(3);
+ XMM_ONLY(d->W(2) = (int16_t)d->W(4) - (int16_t)d->W(5));
+ XMM_ONLY(d->W(3) = (int16_t)d->W(6) - (int16_t)d->W(7));
+ d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) - (int16_t)s->W(1);
+ d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) - (int16_t)s->W(3);
+ XMM_ONLY(d->W(6) = (int16_t)s->W(4) - (int16_t)s->W(5));
+ XMM_ONLY(d->W(7) = (int16_t)s->W(6) - (int16_t)s->W(7));
+}
+
+void glue(helper_phsubd, SUFFIX) (Reg *d, Reg *s)
+{
+ d->L(0) = (int32_t)d->L(0) - (int32_t)d->L(1);
+ XMM_ONLY(d->L(1) = (int32_t)d->L(2) - (int32_t)d->L(3));
+ d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) - (int32_t)s->L(1);
+ XMM_ONLY(d->L(3) = (int32_t)s->L(2) - (int32_t)s->L(3));
+}
+
+void glue(helper_phsubsw, SUFFIX) (Reg *d, Reg *s)
+{
+ d->W(0) = satsw((int16_t)d->W(0) - (int16_t)d->W(1));
+ d->W(1) = satsw((int16_t)d->W(2) - (int16_t)d->W(3));
+ XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) - (int16_t)d->W(5)));
+ XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) - (int16_t)d->W(7)));
+ d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) - (int16_t)s->W(1));
+ d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) - (int16_t)s->W(3));
+ XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) - (int16_t)s->W(5)));
+ XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) - (int16_t)s->W(7)));
+}
+
+#define FABSB(_, x) x > INT8_MAX ? -(int8_t ) x : x
+#define FABSW(_, x) x > INT16_MAX ? -(int16_t) x : x
+#define FABSL(_, x) x > INT32_MAX ? -(int32_t) x : x
+SSE_HELPER_B(helper_pabsb, FABSB)
+SSE_HELPER_W(helper_pabsw, FABSW)
+SSE_HELPER_L(helper_pabsd, FABSL)
+
+#define FMULHRSW(d, s) ((int16_t) d * (int16_t) s + 0x4000) >> 15
+SSE_HELPER_W(helper_pmulhrsw, FMULHRSW)
+
+#define FSIGNB(d, s) s <= INT8_MAX ? s ? d : 0 : -(int8_t ) d
+#define FSIGNW(d, s) s <= INT16_MAX ? s ? d : 0 : -(int16_t) d
+#define FSIGNL(d, s) s <= INT32_MAX ? s ? d : 0 : -(int32_t) d
+SSE_HELPER_B(helper_psignb, FSIGNB)
+SSE_HELPER_W(helper_psignw, FSIGNW)
+SSE_HELPER_L(helper_psignd, FSIGNL)
+
+void glue(helper_palignr, SUFFIX) (Reg *d, Reg *s, int32_t shift)
+{
+ Reg r;
+
+ /* XXX could be checked during translation */
+ if (shift >= (16 << SHIFT)) {
+ r.Q(0) = 0;
+ XMM_ONLY(r.Q(1) = 0);
+ } else {
+ shift <<= 3;
+#define SHR(v, i) (i < 64 && i > -64 ? i > 0 ? v >> (i) : (v << -(i)) : 0)
+#if SHIFT == 0
+ r.Q(0) = SHR(s->Q(0), shift - 0) |
+ SHR(d->Q(0), shift - 64);
+#else
+ r.Q(0) = SHR(s->Q(0), shift - 0) |
+ SHR(s->Q(1), shift - 64) |
+ SHR(d->Q(0), shift - 128) |
+ SHR(d->Q(1), shift - 192);
+ r.Q(1) = SHR(s->Q(0), shift + 64) |
+ SHR(s->Q(1), shift - 0) |
+ SHR(d->Q(0), shift - 64) |
+ SHR(d->Q(1), shift - 128);
+#endif
+#undef SHR
+ }
+
+ *d = r;
+}
+
#undef SHIFT
#undef XMM_ONLY
#undef Reg
diff --git a/target-i386/ops_sse_header.h b/target-i386/ops_sse_header.h
index 442e12cd42..22b77b7a79 100644
--- a/target-i386/ops_sse_header.h
+++ b/target-i386/ops_sse_header.h
@@ -1,5 +1,5 @@
/*
- * MMX/3DNow!/SSE/SSE2/SSE3/PNI support
+ * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/PNI support
*
* Copyright (c) 2005 Fabrice Bellard
*
@@ -251,6 +251,24 @@ DEF_HELPER(void, helper_pfsubr, (MMXReg *d, MMXReg *s))
DEF_HELPER(void, helper_pswapd, (MMXReg *d, MMXReg *s))
#endif
+/* SSSE3 op helpers */
+DEF_HELPER(void, glue(helper_phaddw, SUFFIX), (Reg *d, Reg *s))
+DEF_HELPER(void, glue(helper_phaddd, SUFFIX), (Reg *d, Reg *s))
+DEF_HELPER(void, glue(helper_phaddsw, SUFFIX), (Reg *d, Reg *s))
+DEF_HELPER(void, glue(helper_phsubw, SUFFIX), (Reg *d, Reg *s))
+DEF_HELPER(void, glue(helper_phsubd, SUFFIX), (Reg *d, Reg *s))
+DEF_HELPER(void, glue(helper_phsubsw, SUFFIX), (Reg *d, Reg *s))
+DEF_HELPER(void, glue(helper_pabsb, SUFFIX), (Reg *d, Reg *s))
+DEF_HELPER(void, glue(helper_pabsw, SUFFIX), (Reg *d, Reg *s))
+DEF_HELPER(void, glue(helper_pabsd, SUFFIX), (Reg *d, Reg *s))
+DEF_HELPER(void, glue(helper_pmaddubsw, SUFFIX), (Reg *d, Reg *s))
+DEF_HELPER(void, glue(helper_pmulhrsw, SUFFIX), (Reg *d, Reg *s))
+DEF_HELPER(void, glue(helper_pshufb, SUFFIX), (Reg *d, Reg *s))
+DEF_HELPER(void, glue(helper_psignb, SUFFIX), (Reg *d, Reg *s))
+DEF_HELPER(void, glue(helper_psignw, SUFFIX), (Reg *d, Reg *s))
+DEF_HELPER(void, glue(helper_psignd, SUFFIX), (Reg *d, Reg *s))
+DEF_HELPER(void, glue(helper_palignr, SUFFIX), (Reg *d, Reg *s, int32_t shift))
+
#undef SHIFT
#undef Reg
#undef SUFFIX
diff --git a/target-i386/translate.c b/target-i386/translate.c
index c4a119543a..c739d690ad 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -2770,6 +2770,9 @@ static void *sse_op_table1[256][4] = {
[0xc2] = SSE_FOP(cmpeq),
[0xc6] = { helper_shufps, helper_shufpd },
+ [0x38] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3 */
+ [0x3a] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3 */
+
/* MMX ops and their SSE extensions */
[0x60] = MMX_OP2(punpcklbw),
[0x61] = MMX_OP2(punpcklwd),
@@ -2921,6 +2924,28 @@ static void *sse_op_table5[256] = {
[0xbf] = helper_pavgb_mmx /* pavgusb */
};
+static void *sse_op_table6[256][2] = {
+ [0x00] = MMX_OP2(pshufb),
+ [0x01] = MMX_OP2(phaddw),
+ [0x02] = MMX_OP2(phaddd),
+ [0x03] = MMX_OP2(phaddsw),
+ [0x04] = MMX_OP2(pmaddubsw),
+ [0x05] = MMX_OP2(phsubw),
+ [0x06] = MMX_OP2(phsubd),
+ [0x07] = MMX_OP2(phsubsw),
+ [0x08] = MMX_OP2(psignb),
+ [0x09] = MMX_OP2(psignw),
+ [0x0a] = MMX_OP2(psignd),
+ [0x0b] = MMX_OP2(pmulhrsw),
+ [0x1c] = MMX_OP2(pabsb),
+ [0x1d] = MMX_OP2(pabsw),
+ [0x1e] = MMX_OP2(pabsd),
+};
+
+static void *sse_op_table7[256][2] = {
+ [0x0f] = MMX_OP2(palignr),
+};
+
static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
{
int b1, op1_offset, op2_offset, is_xmm, val, ot;
@@ -2960,7 +2985,8 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
return;
}
if (is_xmm && !(s->flags & HF_OSFXSR_MASK))
- goto illegal_op;
+ if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))
+ goto illegal_op;
if (b == 0x0e) {
if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
goto illegal_op;
@@ -3482,6 +3508,84 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
reg = ((modrm >> 3) & 7) | rex_r;
gen_op_mov_reg_T0(OT_LONG, reg);
break;
+ case 0x038:
+ case 0x138:
+ if (!(s->cpuid_ext_features & CPUID_EXT_SSSE3))
+ goto illegal_op;
+
+ b = modrm;
+ modrm = ldub_code(s->pc++);
+ rm = modrm & 7;
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+
+ sse_op2 = sse_op_table6[b][b1];
+ if (!sse_op2)
+ goto illegal_op;
+
+ if (b1) {
+ op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+ if (mod == 3) {
+ op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
+ } else {
+ op2_offset = offsetof(CPUX86State,xmm_t0);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldo_env_A0(s->mem_index, op2_offset);
+ }
+ } else {
+ op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
+ if (mod == 3) {
+ op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+ } else {
+ op2_offset = offsetof(CPUX86State,mmx_t0);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldq_env_A0(s->mem_index, op2_offset);
+ }
+ }
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1);
+ break;
+ case 0x03a:
+ case 0x13a:
+ if (!(s->cpuid_ext_features & CPUID_EXT_SSSE3))
+ goto illegal_op;
+
+ b = modrm;
+ modrm = ldub_code(s->pc++);
+ rm = modrm & 7;
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+
+ sse_op2 = sse_op_table7[b][b1];
+ if (!sse_op2)
+ goto illegal_op;
+
+ if (b1) {
+ op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+ if (mod == 3) {
+ op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
+ } else {
+ op2_offset = offsetof(CPUX86State,xmm_t0);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldo_env_A0(s->mem_index, op2_offset);
+ }
+ } else {
+ op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
+ if (mod == 3) {
+ op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+ } else {
+ op2_offset = offsetof(CPUX86State,mmx_t0);
+ gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+ gen_ldq_env_A0(s->mem_index, op2_offset);
+ }
+ }
+ val = ldub_code(s->pc++);
+
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ tcg_gen_helper_0_3(sse_op2, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
+ break;
default:
goto illegal_op;
}
@@ -6987,7 +7091,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
gen_eob(s);
}
break;
- /* MMX/3DNow!/SSE/SSE2/SSE3 support */
+ /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3 support */
case 0x1c3: /* MOVNTI reg, mem */
if (!(s->cpuid_features & CPUID_SSE2))
goto illegal_op;
@@ -7100,6 +7204,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
case 0x110 ... 0x117:
case 0x128 ... 0x12f:
+ case 0x138 ... 0x13a:
case 0x150 ... 0x177:
case 0x17c ... 0x17f:
case 0x1c2: