diff options
author | Alexander Gilday <alexander.gilday@arm.com> | 2018-04-05 13:25:17 +0100 |
---|---|---|
committer | Alexander Gilday <alexander.gilday@arm.com> | 2018-04-19 09:29:09 +0000 |
commit | 560332df277a0e143763e5f5038fbb539c57453b (patch) | |
tree | 6a5326476cd1c52e1abcaa78d322261e99dad2a2 /src | |
parent | 4e5bad9e4915ba673bfe016dbdced31fe3cb7687 (diff) |
Add support for dot product instructions.
Add support for v8.2 signed and unsigned dot product instructions: Sdot
and Udot.
Change-Id: I19d0ad1ad962e6c2a72efe8f62a4512e430b5e33
Diffstat (limited to 'src')
-rw-r--r-- | src/aarch64/assembler-aarch64.cc | 46 | ||||
-rw-r--r-- | src/aarch64/assembler-aarch64.h | 18 | ||||
-rw-r--r-- | src/aarch64/constants-aarch64.h | 8 | ||||
-rw-r--r-- | src/aarch64/decoder-aarch64.cc | 4 | ||||
-rw-r--r-- | src/aarch64/disasm-aarch64.cc | 32 | ||||
-rw-r--r-- | src/aarch64/logic-aarch64.cc | 69 | ||||
-rw-r--r-- | src/aarch64/macro-assembler-aarch64.h | 4 | ||||
-rw-r--r-- | src/aarch64/operands-aarch64.h | 8 | ||||
-rw-r--r-- | src/aarch64/simulator-aarch64.cc | 14 | ||||
-rw-r--r-- | src/aarch64/simulator-aarch64.h | 23 |
10 files changed, 214 insertions, 12 deletions
diff --git a/src/aarch64/assembler-aarch64.cc b/src/aarch64/assembler-aarch64.cc index 147bb86d..8525aa72 100644 --- a/src/aarch64/assembler-aarch64.cc +++ b/src/aarch64/assembler-aarch64.cc @@ -2871,6 +2871,26 @@ void Assembler::sqrdmlsh(const VRegister& vd, } +void Assembler::sdot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(AreSameFormat(vn, vm)); + VIXL_ASSERT((vd.Is2S() && vn.Is8B()) || (vd.Is4S() && vn.Is16B())); + + Emit(VFormat(vd) | NEON_SDOT | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::udot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(AreSameFormat(vn, vm)); + VIXL_ASSERT((vd.Is2S() && vn.Is8B()) || (vd.Is4S() && vn.Is16B())); + + Emit(VFormat(vd) | NEON_UDOT | Rm(vm) | Rn(vn) | Rd(vd)); +} + + void Assembler::faddp(const VRegister& vd, const VRegister& vn) { VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D())); Emit(FPFormat(vd) | NEON_FADDP_scalar | Rn(vn) | Rd(vd)); @@ -3093,6 +3113,32 @@ void Assembler::NEONByElementL(const VRegister& vd, } +void Assembler::sdot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index) { + VIXL_ASSERT((vd.Is2S() && vn.Is8B() && vm.Is1S4B()) || + (vd.Is4S() && vn.Is16B() && vm.Is1S4B())); + + int index_num_bits = 2; + Emit(VFormat(vd) | NEON_SDOT_byelement | + ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::udot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index) { + VIXL_ASSERT((vd.Is2S() && vn.Is8B() && vm.Is1S4B()) || + (vd.Is4S() && vn.Is16B() && vm.Is1S4B())); + + int index_num_bits = 2; + Emit(VFormat(vd) | NEON_UDOT_byelement | + ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) | Rd(vd)); +} + + // clang-format off #define NEON_BYELEMENT_LIST(V) \ V(mul, NEON_MUL_byelement, vn.IsVector()) \ diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h index 0c7abcbc..09613bb6 100644 --- a/src/aarch64/assembler-aarch64.h +++ b/src/aarch64/assembler-aarch64.h @@ -2479,10 +2479,16 @@ class Assembler : public vixl::internal::AssemblerBase { // Signed saturating rounding doubling multiply returning high half. void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm); + // Signed dot product [Armv8.2]. + void sdot(const VRegister& vd, const VRegister& vn, const VRegister& vm); + // Signed saturating rounding doubling multiply accumulate returning high // half [Armv8.1]. void sqrdmlah(const VRegister& vd, const VRegister& vn, const VRegister& vm); + // Unsigned dot product [Armv8.2]. + void udot(const VRegister& vd, const VRegister& vn, const VRegister& vm); + // Signed saturating rounding doubling multiply subtract returning high half // [Armv8.1]. void sqrdmlsh(const VRegister& vd, const VRegister& vn, const VRegister& vm); @@ -2499,6 +2505,12 @@ class Assembler : public vixl::internal::AssemblerBase { const VRegister& vm, int vm_index); + // Signed dot product by element [Armv8.2]. + void sdot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + // Signed saturating rounding doubling multiply accumulate element returning // high half [Armv8.1]. void sqrdmlah(const VRegister& vd, @@ -2506,6 +2518,12 @@ class Assembler : public vixl::internal::AssemblerBase { const VRegister& vm, int vm_index); + // Unsigned dot product by element [Armv8.2]. + void udot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + // Signed saturating rounding doubling multiply subtract element returning // high half [Armv8.1]. void sqrdmlsh(const VRegister& vd, diff --git a/src/aarch64/constants-aarch64.h b/src/aarch64/constants-aarch64.h index 64a42469..24e1d88a 100644 --- a/src/aarch64/constants-aarch64.h +++ b/src/aarch64/constants-aarch64.h @@ -1587,6 +1587,8 @@ enum NEON3SameExtraOp { NEON3SameExtraMask = 0xBE20FC00, NEON_SQRDMLAH = NEON3SameExtraFixed | NEON3SameExtraUBit, NEON_SQRDMLSH = NEON3SameExtraFixed | NEON3SameExtraUBit | 0x00000800, + NEON_SDOT = NEON3SameExtraFixed | 0x00001000, + NEON_UDOT = NEON3SameExtraFixed | NEON3SameExtraUBit | 0x00001000, /* v8.3 Complex Numbers */ NEON3SameExtraFCFixed = 0x2E00C400, @@ -1698,6 +1700,10 @@ enum NEONByIndexedElementOp { NEON_SQDMLSL_byelement = NEONByIndexedElementFixed | 0x00007000, NEON_SQDMULH_byelement = NEONByIndexedElementFixed | 0x0000C000, NEON_SQRDMULH_byelement = NEONByIndexedElementFixed | 0x0000D000, + NEON_SDOT_byelement = NEONByIndexedElementFixed | 0x0000E000, + NEON_SQRDMLAH_byelement = NEONByIndexedElementFixed | 0x2000D000, + NEON_UDOT_byelement = NEONByIndexedElementFixed | 0x2000E000, + NEON_SQRDMLSH_byelement = NEONByIndexedElementFixed | 0x2000F000, // Floating point instructions. NEONByIndexedElementFPFixed = NEONByIndexedElementFixed | 0x00800000, @@ -1707,8 +1713,6 @@ enum NEONByIndexedElementOp { NEON_FMUL_byelement = NEONByIndexedElementFPFixed | 0x00009000, NEON_FMULX_byelement = NEONByIndexedElementFPFixed | 0x20009000, NEON_FCMLA_byelement = NEONByIndexedElementFixed | 0x20001000, - NEON_SQRDMLAH_byelement = NEONByIndexedElementFixed | 0x2000D000, - NEON_SQRDMLSH_byelement = NEONByIndexedElementFixed | 0x2000F000, // Complex instruction(s) this is necessary because 'rot' encoding moves into the NEONByIndex..Mask space NEONByIndexedElementFPComplexMask = 0xBF009400 diff --git a/src/aarch64/decoder-aarch64.cc b/src/aarch64/decoder-aarch64.cc index fe1cb08c..054fd93e 100644 --- a/src/aarch64/decoder-aarch64.cc +++ b/src/aarch64/decoder-aarch64.cc @@ -758,10 +758,8 @@ void Decoder::DecodeNEONVectorDataProcessing(const Instruction* instr) { VisitUnallocated(instr); } else if ((instr->ExtractBits(14, 11) <= 0xE && instr->ExtractBits(14, 11) >= 0x8) || - instr->ExtractBits(14, 12) == 0x0) { + instr->ExtractBits(14, 11) <= 0x2) { VisitNEON3SameExtra(instr); - } else if (instr->ExtractBits(13, 11) < 0x4) { - VisitUnimplemented(instr); } else { VisitUnallocated(instr); } diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc index bc52fb87..98c7fb45 100644 --- a/src/aarch64/disasm-aarch64.cc +++ b/src/aarch64/disasm-aarch64.cc @@ -2286,8 +2286,10 @@ void Disassembler::VisitNEON3Same(const Instruction *instr) { } void Disassembler::VisitNEON3SameExtra(const Instruction *instr) { + static const NEONFormatMap map_usdot = {{30}, {NF_8B, NF_16B}}; + const char *mnemonic = "unimplemented"; - const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s, 'IVFCN"; + const char *form = "(NEON3SameExtra)"; NEONFormatDecoder nfd(instr); @@ -2301,21 +2303,26 @@ void Disassembler::VisitNEON3SameExtra(const Instruction *instr) { mnemonic = "fcadd"; form = "'Vd.%s, 'Vn.%s, 'Vm.%s, 'IVFCNA"; break; - default: - form = "(NEON3SameExtra)"; } } else { + form = "'Vd.%s, 'Vn.%s, 'Vm.%s"; switch (instr->Mask(NEON3SameExtraMask)) { + case NEON_SDOT: + mnemonic = "sdot"; + nfd.SetFormatMap(1, &map_usdot); + nfd.SetFormatMap(2, &map_usdot); + break; case NEON_SQRDMLAH: mnemonic = "sqrdmlah"; - form = "'Vd.%s, 'Vn.%s, 'Vm.%s"; + break; + case NEON_UDOT: + mnemonic = "udot"; + nfd.SetFormatMap(1, &map_usdot); + nfd.SetFormatMap(2, &map_usdot); break; case NEON_SQRDMLSH: mnemonic = "sqrdmlsh"; - form = "'Vd.%s, 'Vn.%s, 'Vm.%s"; break; - default: - form = "(NEON3SameExtra)"; } } @@ -2507,6 +2514,7 @@ void Disassembler::VisitNEONByIndexedElement(const Instruction *instr) { static const NEONFormatMap map_cn = {{23, 22, 30}, {NF_UNDEF, NF_UNDEF, NF_4H, NF_8H, NF_UNDEF, NF_4S, NF_UNDEF, NF_UNDEF}}; + static const NEONFormatMap map_usdot = {{30}, {NF_8B, NF_16B}}; NEONFormatDecoder nfd(instr, &map_ta, @@ -2565,9 +2573,19 @@ void Disassembler::VisitNEONByIndexedElement(const Instruction *instr) { case NEON_SQRDMULH_byelement: mnemonic = "sqrdmulh"; break; + case NEON_SDOT_byelement: + mnemonic = "sdot"; + form = "'Vd.%s, 'Vn.%s, 'Ve.4b['IVByElemIndex]"; + nfd.SetFormatMap(1, &map_usdot); + break; case NEON_SQRDMLAH_byelement: mnemonic = "sqrdmlah"; break; + case NEON_UDOT_byelement: + mnemonic = "udot"; + form = "'Vd.%s, 'Vn.%s, 'Ve.4b['IVByElemIndex]"; + nfd.SetFormatMap(1, &map_usdot); + break; case NEON_SQRDMLSH_byelement: mnemonic = "sqrdmlsh"; break; diff --git a/src/aarch64/logic-aarch64.cc b/src/aarch64/logic-aarch64.cc index d8198fa4..20d4c002 100644 --- a/src/aarch64/logic-aarch64.cc +++ b/src/aarch64/logic-aarch64.cc @@ -823,6 +823,17 @@ LogicVRegister Simulator::sqrdmulh(VectorFormat vform, } +LogicVRegister Simulator::sdot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = VectorFormatFillQ(vform); + return sdot(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + LogicVRegister Simulator::sqrdmlah(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -834,6 +845,17 @@ LogicVRegister Simulator::sqrdmlah(VectorFormat vform, } +LogicVRegister Simulator::udot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = VectorFormatFillQ(vform); + return udot(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + LogicVRegister Simulator::sqrdmlsh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -3270,6 +3292,53 @@ LogicVRegister Simulator::sqrdmulh(VectorFormat vform, } +LogicVRegister Simulator::dot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_signed) { + VectorFormat quarter_vform = + VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform)); + + dst.ClearForWrite(vform); + for (int e = 0; e < LaneCountFromFormat(vform); e++) { + int64_t result = 0; + int64_t element1, element2; + for (int i = 0; i < 4; i++) { + int index = 4 * e + i; + if (is_signed) { + element1 = src1.Int(quarter_vform, index); + element2 = src2.Int(quarter_vform, index); + } else { + element1 = src1.Uint(quarter_vform, index); + element2 = src2.Uint(quarter_vform, index); + } + result += element1 * element2; + } + + result += dst.Int(vform, e); + dst.SetInt(vform, e, result); + } + return dst; +} + + +LogicVRegister Simulator::sdot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return dot(vform, dst, src1, src2, true); +} + + +LogicVRegister Simulator::udot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return dot(vform, dst, src1, src2, false); +} + + LogicVRegister Simulator::sqrdmlash(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h index e0789306..29a29556 100644 --- a/src/aarch64/macro-assembler-aarch64.h +++ b/src/aarch64/macro-assembler-aarch64.h @@ -2284,7 +2284,9 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { V(sqdmull, Sqdmull) \ V(sqdmull2, Sqdmull2) \ V(sqrdmulh, Sqrdmulh) \ + V(sdot, Sdot) \ V(sqrdmlah, Sqrdmlah) \ + V(udot, Udot) \ V(sqrdmlsh, Sqrdmlsh) \ V(sqrshl, Sqrshl) \ V(sqshl, Sqshl) \ @@ -2460,7 +2462,9 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { V(mls, Mls) \ V(sqdmulh, Sqdmulh) \ V(sqrdmulh, Sqrdmulh) \ + V(sdot, Sdot) \ V(sqrdmlah, Sqrdmlah) \ + V(udot, Udot) \ V(sqrdmlsh, Sqrdmlsh) \ V(sqdmull, Sqdmull) \ V(sqdmull2, Sqdmull2) \ diff --git a/src/aarch64/operands-aarch64.h b/src/aarch64/operands-aarch64.h index 2fb1f1d3..5b154eaa 100644 --- a/src/aarch64/operands-aarch64.h +++ b/src/aarch64/operands-aarch64.h @@ -207,6 +207,10 @@ class CPURegister { bool IsD() const { return IsV() && Is64Bits(); } bool IsQ() const { return IsV() && Is128Bits(); } + // Semantic type for sdot and udot instructions. + bool IsS4B() const { return IsS(); } + const VRegister& S4B() const { return S(); } + const Register& W() const; const Register& X() const; const VRegister& V() const; @@ -392,6 +396,10 @@ class VRegister : public CPURegister { return Is32Bits(); } + // Semantic type for sdot and udot instructions. + bool Is1S4B() const { return Is1S(); } + + bool IsLaneSizeB() const { return GetLaneSizeInBits() == kBRegSize; } bool IsLaneSizeH() const { return GetLaneSizeInBits() == kHRegSize; } bool IsLaneSizeS() const { return GetLaneSizeInBits() == kSRegSize; } diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc index 5432c1f3..3abc7800 100644 --- a/src/aarch64/simulator-aarch64.cc +++ b/src/aarch64/simulator-aarch64.cc @@ -3670,9 +3670,15 @@ void Simulator::VisitNEON3SameExtra(const Instruction* instr) { } } else { switch (instr->Mask(NEON3SameExtraMask)) { + case NEON_SDOT: + sdot(vf, rd, rn, rm); + break; case NEON_SQRDMLAH: sqrdmlah(vf, rd, rn, rm); break; + case NEON_UDOT: + udot(vf, rd, rn, rm); + break; case NEON_SQRDMLSH: sqrdmlsh(vf, rd, rn, rm); break; @@ -3952,10 +3958,18 @@ void Simulator::VisitNEONByIndexedElement(const Instruction* instr) { Op = &Simulator::sqrdmulh; vf = vf_r; break; + case NEON_SDOT_byelement: + Op = &Simulator::sdot; + vf = vf_r; + break; case NEON_SQRDMLAH_byelement: Op = &Simulator::sqrdmlah; vf = vf_r; break; + case NEON_UDOT_byelement: + Op = &Simulator::udot; + vf = vf_r; + break; case NEON_SQRDMLSH_byelement: Op = &Simulator::sqrdmlsh; vf = vf_r; diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h index 7c84cb38..4de98294 100644 --- a/src/aarch64/simulator-aarch64.h +++ b/src/aarch64/simulator-aarch64.h @@ -1985,11 +1985,21 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src1, const LogicVRegister& src2, int index); + LogicVRegister sdot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); LogicVRegister sqrdmlah(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, const LogicVRegister& src2, int index); + LogicVRegister udot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); LogicVRegister sqrdmlsh(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, @@ -2577,6 +2587,19 @@ class Simulator : public DecoderVisitor { const LogicVRegister& src1, const LogicVRegister& src2, bool round = true); + LogicVRegister dot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_signed); + LogicVRegister sdot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister udot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); LogicVRegister sqrdmlash(VectorFormat vform, LogicVRegister dst, const LogicVRegister& src1, |