Support SHA-1 accelerating instructions (#102)
Add support for six Neon SHA-1 accelerating instructions.
diff --git a/src/aarch64/assembler-aarch64.cc b/src/aarch64/assembler-aarch64.cc
index 009b08c..89fd169 100644
--- a/src/aarch64/assembler-aarch64.cc
+++ b/src/aarch64/assembler-aarch64.cc
@@ -5909,6 +5909,54 @@
Emit(0xce608c00 | Rd(vd) | Rn(vn) | Rm(vm));
}
+void Assembler::sha1c(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
+ VIXL_ASSERT(vd.IsQ() && vn.IsS() && vm.Is4S());
+
+ Emit(0x5e000000 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sha1h(const VRegister& sd, const VRegister& sn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
+ VIXL_ASSERT(sd.IsS() && sn.IsS());
+
+ Emit(0x5e280800 | Rd(sd) | Rn(sn));
+}
+
+void Assembler::sha1m(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
+ VIXL_ASSERT(vd.IsQ() && vn.IsS() && vm.Is4S());
+
+ Emit(0x5e002000 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sha1p(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
+ VIXL_ASSERT(vd.IsQ() && vn.IsS() && vm.Is4S());
+
+ Emit(0x5e001000 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sha1su0(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
+ VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
+
+ Emit(0x5e003000 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sha1su1(const VRegister& vd, const VRegister& vn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
+ VIXL_ASSERT(vd.Is4S() && vn.Is4S());
+
+ Emit(0x5e281800 | Rd(vd) | Rn(vn));
+}
+
// Note:
// For all ToImm instructions below, a difference in case
// for the same letter indicates a negated bit.
diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h
index bbba5db..b0f4d42 100644
--- a/src/aarch64/assembler-aarch64.h
+++ b/src/aarch64/assembler-aarch64.h
@@ -3642,6 +3642,24 @@
// Rotate and Exclusive-OR
void rax1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+ // SHA1 hash update (choose).
+ void sha1c(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+ // SHA1 fixed rotate.
+ void sha1h(const VRegister& sd, const VRegister& sn);
+
+ // SHA1 hash update (majority).
+ void sha1m(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+ // SHA1 hash update (parity).
+ void sha1p(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+ // SHA1 schedule update 0.
+ void sha1su0(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+ // SHA1 schedule update 1.
+ void sha1su1(const VRegister& vd, const VRegister& vn);
+
// Scalable Vector Extensions.
// Absolute value (predicated).
diff --git a/src/aarch64/cpu-features-auditor-aarch64.cc b/src/aarch64/cpu-features-auditor-aarch64.cc
index a85587b..2d41453 100644
--- a/src/aarch64/cpu-features-auditor-aarch64.cc
+++ b/src/aarch64/cpu-features-auditor-aarch64.cc
@@ -247,11 +247,13 @@
void CPUFeaturesAuditor::VisitCrypto2RegSHA(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
+ scope.Record(CPUFeatures::kNEON, CPUFeatures::kSHA1);
USE(instr);
}
void CPUFeaturesAuditor::VisitCrypto3RegSHA(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
+ scope.Record(CPUFeatures::kNEON, CPUFeatures::kSHA1);
USE(instr);
}
diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc
index 5a8241d..fd9879f 100644
--- a/src/aarch64/disasm-aarch64.cc
+++ b/src/aarch64/disasm-aarch64.cc
@@ -2172,12 +2172,20 @@
void Disassembler::VisitCrypto2RegSHA(const Instruction *instr) {
- VisitUnimplemented(instr);
+ const char *form = "'Vd.4s, 'Vn.4s";
+ if (form_hash_ == "sha1h_ss_cryptosha2"_h) {
+ form = "'Sd, 'Sn";
+ }
+ FormatWithDecodedMnemonic(instr, form);
}
void Disassembler::VisitCrypto3RegSHA(const Instruction *instr) {
- VisitUnimplemented(instr);
+ const char *form = "'Qd, 'Sn, 'Vm.4s";
+ if (form_hash_ == "sha1su0_vvv_cryptosha3"_h) {
+ form = "'Vd.4s, 'Vn.4s, 'Vm.4s";
+ }
+ FormatWithDecodedMnemonic(instr, form);
}
diff --git a/src/aarch64/logic-aarch64.cc b/src/aarch64/logic-aarch64.cc
index 43b2071..e5d1625 100644
--- a/src/aarch64/logic-aarch64.cc
+++ b/src/aarch64/logic-aarch64.cc
@@ -2513,6 +2513,7 @@
LogicVRegister dst,
const LogicVRegister& src,
int rotation) {
+ dst.ClearForWrite(vform);
int width = LaneSizeInBitsFromFormat(vform);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
uint64_t value = src.Uint(vform, i);
@@ -2521,6 +2522,14 @@
return dst;
}
+LogicVRegister Simulator::rol(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int rotation) {
+ int ror_equivalent = LaneSizeInBitsFromFormat(vform) - rotation;
+ return ror(vform, dst, src, ror_equivalent);
+}
+
LogicVRegister Simulator::ext(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
@@ -7885,6 +7894,21 @@
return dst;
}
+template <>
+uint64_t SHA1Operation<"choose"_h>(uint64_t x, uint64_t y, uint64_t z) {
+ return ((y ^ z) & x) ^ z;
+}
+
+template <>
+uint64_t SHA1Operation<"majority"_h>(uint64_t x, uint64_t y, uint64_t z) {
+ return (x & y) | ((x | y) & z);
+}
+
+template <>
+uint64_t SHA1Operation<"parity"_h>(uint64_t x, uint64_t y, uint64_t z) {
+ return x ^ y ^ z;
+}
+
} // namespace aarch64
} // namespace vixl
diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h
index 8878ef7..15e51a5 100644
--- a/src/aarch64/macro-assembler-aarch64.h
+++ b/src/aarch64/macro-assembler-aarch64.h
@@ -2800,6 +2800,10 @@
V(saddl2, Saddl2) \
V(saddw, Saddw) \
V(saddw2, Saddw2) \
+ V(sha1c, Sha1c) \
+ V(sha1m, Sha1m) \
+ V(sha1p, Sha1p) \
+ V(sha1su0, Sha1su0) \
V(shadd, Shadd) \
V(shsub, Shsub) \
V(smax, Smax) \
@@ -2944,6 +2948,8 @@
V(sadalp, Sadalp) \
V(saddlp, Saddlp) \
V(saddlv, Saddlv) \
+ V(sha1h, Sha1h) \
+ V(sha1su1, Sha1su1) \
V(smaxv, Smaxv) \
V(sminv, Sminv) \
V(sqabs, Sqabs) \
diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc
index fdc7106..fbe5970 100644
--- a/src/aarch64/simulator-aarch64.cc
+++ b/src/aarch64/simulator-aarch64.cc
@@ -7171,12 +7171,57 @@
void Simulator::VisitCrypto2RegSHA(const Instruction* instr) {
- VisitUnimplemented(instr);
+ SimVRegister& rd = ReadVRegister(instr->GetRd());
+ SimVRegister& rn = ReadVRegister(instr->GetRn());
+
+ switch (form_hash_) {
+ case "sha1h_ss_cryptosha2"_h:
+ ror(kFormatS, rd, rn, 2);
+ break;
+ case "sha1su1_vv_cryptosha2"_h: {
+ SimVRegister temp;
+
+ // temp = srcdst ^ (src >> 32);
+ ext(kFormat16B, temp, rn, temp, 4);
+ eor(kFormat16B, temp, rd, temp);
+
+ // srcdst = ROL(temp, 1) ^ (ROL(temp, 2) << 96)
+ rol(kFormat4S, rd, temp, 1);
+ rol(kFormatS, temp, temp, 2); // kFormatS will zero bits <127:32>
+ ext(kFormat16B, temp, temp, temp, 4);
+ eor(kFormat16B, rd, rd, temp);
+ break;
+ }
+ case "sha256su0_vv_cryptosha2"_h:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
}
void Simulator::VisitCrypto3RegSHA(const Instruction* instr) {
- VisitUnimplemented(instr);
+ SimVRegister& rd = ReadVRegister(instr->GetRd());
+ SimVRegister& rn = ReadVRegister(instr->GetRn());
+ SimVRegister& rm = ReadVRegister(instr->GetRm());
+
+ switch (form_hash_) {
+ case "sha1c_qsv_cryptosha3"_h:
+ sha1<"choose"_h>(rd, rn, rm);
+ break;
+ case "sha1m_qsv_cryptosha3"_h:
+ sha1<"majority"_h>(rd, rn, rm);
+ break;
+ case "sha1p_qsv_cryptosha3"_h:
+ sha1<"parity"_h>(rd, rn, rm);
+ break;
+ case "sha1su0_vvv_cryptosha3"_h: {
+ SimVRegister temp;
+ ext(kFormat16B, temp, rd, rn, 8);
+ eor(kFormat16B, temp, temp, rd);
+ eor(kFormat16B, rd, temp, rm);
+ break;
+ }
+ }
}
diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h
index 73277e4..09dc7e1 100644
--- a/src/aarch64/simulator-aarch64.h
+++ b/src/aarch64/simulator-aarch64.h
@@ -1280,9 +1280,10 @@
uint32_t seed_;
};
-
class Debugger;
+template <uint32_t mode>
+uint64_t SHA1Operation(uint64_t x, uint64_t y, uint64_t z);
class Simulator : public DecoderVisitor {
public:
@@ -3768,6 +3769,10 @@
LogicVRegister dst,
const LogicVRegister& src,
int rotation);
+ LogicVRegister rol(VectorFormat vform,
+ LogicVRegister dst,
+ const LogicVRegister& src,
+ int rotation);
LogicVRegister ext(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src1,
@@ -4492,6 +4497,36 @@
LogicVRegister srcdst,
const LogicVRegister& src1,
const LogicVRegister& src2);
+
+ template <uint32_t mode>
+ LogicVRegister sha1(LogicVRegister srcdst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ uint64_t y = src1.Uint(kFormat4S, 0);
+ uint64_t sd[4] = {};
+ srcdst.UintArray(kFormat4S, sd);
+
+ for (unsigned i = 0; i < ArrayLength(sd); i++) {
+ uint64_t t = SHA1Operation<mode>(sd[1], sd[2], sd[3]);
+
+ y += RotateLeft(sd[0], 5, kSRegSize) + t;
+ y += src2.Uint(kFormat4S, i);
+
+ sd[1] = RotateLeft(sd[1], 30, kSRegSize);
+
+ // y:sd = ROL(y:sd, 32)
+ uint64_t temp = sd[3];
+ sd[3] = sd[2];
+ sd[2] = sd[1];
+ sd[1] = sd[0];
+ sd[0] = y;
+ y = temp;
+ }
+
+ srcdst.SetUintArray(kFormat4S, sd);
+ return srcdst;
+ }
+
#define NEON_3VREG_LOGIC_LIST(V) \
V(addhn) \
V(addhn2) \
diff --git a/src/utils-vixl.h b/src/utils-vixl.h
index 281c5cf..f95f6c8 100644
--- a/src/utils-vixl.h
+++ b/src/utils-vixl.h
@@ -239,6 +239,11 @@
return value & width_mask;
}
+inline uint64_t RotateLeft(uint64_t value,
+ unsigned int rotate,
+ unsigned int width) {
+ return RotateRight(value, width - rotate, width);
+}
// Wrapper class for passing FP16 values through the assembler.
// This is purely to aid with type checking/casting.
diff --git a/test/aarch64/test-cpu-features-aarch64.cc b/test/aarch64/test-cpu-features-aarch64.cc
index 8430d7f..56ec38f 100644
--- a/test/aarch64/test-cpu-features-aarch64.cc
+++ b/test/aarch64/test-cpu-features-aarch64.cc
@@ -3794,5 +3794,16 @@
TEST_NEON_SHA3(xar_0, xar(v0.V2D(), v1.V2D(), v2.V2D(), 42))
TEST_NEON_SHA3(rax1_0, rax1(v0.V2D(), v1.V2D(), v2.V2D()))
+#define TEST_NEON_SHA1(NAME, ASM) \
+ TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA1), \
+ NEON_SHA1_##NAME, \
+ ASM)
+TEST_NEON_SHA1(sha1c_0, sha1c(q0, s12, v20.V4S()))
+TEST_NEON_SHA1(sha1m_0, sha1m(q22, s2, v13.V4S()))
+TEST_NEON_SHA1(sha1p_0, sha1p(q31, s5, v15.V4S()))
+TEST_NEON_SHA1(sha1su0_0, sha1su0(v19.V4S(), v9.V4S(), v27.V4S()))
+TEST_NEON_SHA1(sha1h_0, sha1h(s12, s0))
+TEST_NEON_SHA1(sha1su1_0, sha1su1(v2.V4S(), v4.V4S()))
+
} // namespace aarch64
} // namespace vixl
diff --git a/test/aarch64/test-disasm-neon-aarch64.cc b/test/aarch64/test-disasm-neon-aarch64.cc
index 774114d..18d400e 100644
--- a/test/aarch64/test-disasm-neon-aarch64.cc
+++ b/test/aarch64/test-disasm-neon-aarch64.cc
@@ -4530,6 +4530,20 @@
CLEANUP();
}
+TEST(neon_sha1) {
+ SETUP();
+
+ COMPARE_MACRO(Sha1c(q0, s12, v20.V4S()), "sha1c q0, s12, v20.4s");
+ COMPARE_MACRO(Sha1m(q22, s2, v13.V4S()), "sha1m q22, s2, v13.4s");
+ COMPARE_MACRO(Sha1p(q31, s5, v15.V4S()), "sha1p q31, s5, v15.4s");
+ COMPARE_MACRO(Sha1su0(v19.V4S(), v9.V4S(), v27.V4S()),
+ "sha1su0 v19.4s, v9.4s, v27.4s");
+ COMPARE_MACRO(Sha1h(s12, s0), "sha1h s12, s0");
+ COMPARE_MACRO(Sha1su1(v2.V4S(), v4.V4S()), "sha1su1 v2.4s, v4.4s");
+
+ CLEANUP();
+}
+
TEST(neon_unallocated_regression_test) {
SETUP();
diff --git a/test/aarch64/test-simulator-sve-aarch64.cc b/test/aarch64/test-simulator-sve-aarch64.cc
index bdd5c81..1ba1aee 100644
--- a/test/aarch64/test-simulator-sve-aarch64.cc
+++ b/test/aarch64/test-simulator-sve-aarch64.cc
@@ -394,6 +394,214 @@
}
}
+TEST_SVE(neon_sha1_2reg) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32,
+ CPUFeatures::kSHA1);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+ __ dci(0x5e280800); // sha1h s0, s0
+ // vl128 state = 0xc388d4f8
+ __ dci(0x5e280a28); // sha1h s8, s17
+ // vl128 state = 0x5c88b904
+ __ dci(0x5e280a2a); // sha1h s10, s17
+ // vl128 state = 0x6f63c596
+ __ dci(0x5e281aae); // sha1su1 v14.4s, v21.4s
+ // vl128 state = 0x85e1119d
+ __ dci(0x5e281abe); // sha1su1 v30.4s, v21.4s
+ // vl128 state = 0x9b814260
+ __ dci(0x5e281a0e); // sha1su1 v14.4s, v16.4s
+ // vl128 state = 0x8ccca0ab
+ __ dci(0x5e281a0a); // sha1su1 v10.4s, v16.4s
+ // vl128 state = 0x42262836
+ __ dci(0x5e281acb); // sha1su1 v11.4s, v22.4s
+ // vl128 state = 0xabcde33d
+ __ dci(0x5e281acf); // sha1su1 v15.4s, v22.4s
+ // vl128 state = 0xdf44e7be
+ __ dci(0x5e281adf); // sha1su1 v31.4s, v22.4s
+ // vl128 state = 0x48c332a3
+ __ dci(0x5e280a9d); // sha1h s29, s20
+ // vl128 state = 0x56bafe13
+ __ dci(0x5e28188d); // sha1su1 v13.4s, v4.4s
+ // vl128 state = 0x218eb351
+ __ dci(0x5e2808cf); // sha1h s15, s6
+ // vl128 state = 0xc1720d9f
+ __ dci(0x5e2808cb); // sha1h s11, s6
+ // vl128 state = 0x67119e1c
+ __ dci(0x5e2808c9); // sha1h s9, s6
+ // vl128 state = 0x31f69637
+ __ dci(0x5e2808c1); // sha1h s1, s6
+ // vl128 state = 0x214a25ff
+ __ dci(0x5e280871); // sha1h s17, s3
+ // vl128 state = 0xa5e88b55
+ __ dci(0x5e280815); // sha1h s21, s0
+ // vl128 state = 0xc8c91e29
+ __ dci(0x5e28185d); // sha1su1 v29.4s, v2.4s
+ // vl128 state = 0x5582c6a8
+ __ dci(0x5e28185f); // sha1su1 v31.4s, v2.4s
+ // vl128 state = 0xd3288a61
+ __ dci(0x5e28087e); // sha1h s30, s3
+ // vl128 state = 0x350b39c2
+ __ dci(0x5e28093f); // sha1h s31, s9
+ // vl128 state = 0xbdc1ac98
+ __ dci(0x5e28093b); // sha1h s27, s9
+ // vl128 state = 0x62f828bf
+ __ dci(0x5e28092b); // sha1h s11, s9
+ // vl128 state = 0xc8f2f671
+ __ dci(0x5e2819bb); // sha1su1 v27.4s, v13.4s
+ // vl128 state = 0x24ec8c34
+ __ dci(0x5e281b93); // sha1su1 v19.4s, v28.4s
+ // vl128 state = 0x71e188de
+ __ dci(0x5e281b97); // sha1su1 v23.4s, v28.4s
+ // vl128 state = 0x22490375
+ __ dci(0x5e281b95); // sha1su1 v21.4s, v28.4s
+ // vl128 state = 0x016b70d1
+ __ dci(0x5e281b51); // sha1su1 v17.4s, v26.4s
+ // vl128 state = 0xa6252086
+ __ dci(0x5e2819d3); // sha1su1 v19.4s, v14.4s
+ // vl128 state = 0x78683885
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x78683885,
+ 0x59574c2a,
+ 0x449978bf,
+ 0x0ddab775,
+ 0x1a043ef3,
+ 0xf501e2e7,
+ 0xa219e725,
+ 0xf17f57c8,
+ 0x4ccdbf99,
+ 0x419d4fc3,
+ 0x7302571d,
+ 0xd6bee170,
+ 0x7d81c301,
+ 0xbaa7d729,
+ 0xf33f0bc4,
+ 0xff8b070a,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(neon_sha1_3reg) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32,
+ CPUFeatures::kSHA1);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+ __ dci(0x5e1f02bd); // sha1c q29, s21, v31.4s
+ // vl128 state = 0xec2a37ad
+ __ dci(0x5e0810af); // sha1p q15, s5, v8.4s
+ // vl128 state = 0x3fe9252a
+ __ dci(0x5e122227); // sha1m q7, s17, v18.4s
+ // vl128 state = 0x4465789e
+ __ dci(0x5e0b039d); // sha1c q29, s28, v11.4s
+ // vl128 state = 0x2186488a
+ __ dci(0x5e1a03e9); // sha1c q9, s31, v26.4s
+ // vl128 state = 0x9eddf8e3
+ __ dci(0x5e0c138c); // sha1p q12, s28, v12.4s
+ // vl128 state = 0x0ca7cd3d
+ __ dci(0x5e1f1316); // sha1p q22, s24, v31.4s
+ // vl128 state = 0xb80a61c0
+ __ dci(0x5e052204); // sha1m q4, s16, v5.4s
+ // vl128 state = 0x941821ca
+ __ dci(0x5e0a00d6); // sha1c q22, s6, v10.4s
+ // vl128 state = 0x5e71ccae
+ __ dci(0x5e0e032e); // sha1c q14, s25, v14.4s
+ // vl128 state = 0x7ed4486a
+ __ dci(0x5e1d1098); // sha1p q24, s4, v29.4s
+ // vl128 state = 0x0978a637
+ __ dci(0x5e0400d9); // sha1c q25, s6, v4.4s
+ // vl128 state = 0x34c8609e
+ __ dci(0x5e1a330e); // sha1su0 v14.4s, v24.4s, v26.4s
+ // vl128 state = 0xcb078fad
+ __ dci(0x5e1e30f5); // sha1su0 v21.4s, v7.4s, v30.4s
+ // vl128 state = 0x885200be
+ __ dci(0x5e1e32e1); // sha1su0 v1.4s, v23.4s, v30.4s
+ // vl128 state = 0xabc6a188
+ __ dci(0x5e0733d3); // sha1su0 v19.4s, v30.4s, v7.4s
+ // vl128 state = 0x37a4fe6f
+ __ dci(0x5e0b22e6); // sha1m q6, s23, v11.4s
+ // vl128 state = 0x68b788d2
+ __ dci(0x5e011210); // sha1p q16, s16, v1.4s
+ // vl128 state = 0x6b36b092
+ __ dci(0x5e1702e1); // sha1c q1, s23, v23.4s
+ // vl128 state = 0x74ef56f5
+ __ dci(0x5e1e30f6); // sha1su0 v22.4s, v7.4s, v30.4s
+ // vl128 state = 0x5a150dfd
+ __ dci(0x5e1b3348); // sha1su0 v8.4s, v26.4s, v27.4s
+ // vl128 state = 0xe0a45d9c
+ __ dci(0x5e0a3041); // sha1su0 v1.4s, v2.4s, v10.4s
+ // vl128 state = 0x6ba02d02
+ __ dci(0x5e17119a); // sha1p q26, s12, v23.4s
+ // vl128 state = 0x3bf511fc
+ __ dci(0x5e0b32c7); // sha1su0 v7.4s, v22.4s, v11.4s
+ // vl128 state = 0xf5c513b6
+ __ dci(0x5e063016); // sha1su0 v22.4s, v0.4s, v6.4s
+ // vl128 state = 0x3eb44b28
+ __ dci(0x5e05323c); // sha1su0 v28.4s, v17.4s, v5.4s
+ // vl128 state = 0x7c2d3adf
+ __ dci(0x5e1d132a); // sha1p q10, s25, v29.4s
+ // vl128 state = 0x2b0963c4
+ __ dci(0x5e13003c); // sha1c q28, s1, v19.4s
+ // vl128 state = 0x4a582d00
+ __ dci(0x5e13322c); // sha1su0 v12.4s, v17.4s, v19.4s
+ // vl128 state = 0x7bb2cc8c
+ __ dci(0x5e032330); // sha1m q16, s25, v3.4s
+ // vl128 state = 0x2a8b4c0d
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x2a8b4c0d,
+ 0x114e25bb,
+ 0x4f035af9,
+ 0x23db7966,
+ 0x3d106b42,
+ 0x62651fcf,
+ 0x44c20879,
+ 0xadf71d73,
+ 0xe6858f82,
+ 0x93a74ae5,
+ 0xc270310e,
+ 0x3d07058c,
+ 0x69f83d0e,
+ 0x28c5813b,
+ 0xbb9de2c1,
+ 0xe06b94cd,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
TEST_SVE(neon_sha3) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,
@@ -558,5 +766,6 @@
}
}
+
} // namespace aarch64
} // namespace vixl
diff --git a/tools/code_coverage.log b/tools/code_coverage.log
index d29b39b..ae815b5 100644
--- a/tools/code_coverage.log
+++ b/tools/code_coverage.log
@@ -23,6 +23,7 @@
1693487542 82.91% 97.57% 94.87%
1694008240 82.72% 97.50% 94.95%
1697036303 82.87% 97.56% 94.76%
+1698228274 82.93% 97.68% 94.90%
1698330215 82.92% 97.57% 94.88%
1702052331 82.89% 97.59% 94.77%
1706691191 82.87% 97.59% 94.74%