Support SHA-1 accelerating instructions (#102) Add support for six Neon SHA-1 accelerating instructions.

commit: 7eb48c6e004ff1b5411553d6f883d80f98b11904 [log] [tgz]
author: mmc28a <78873583+mmc28a@users.noreply.github.com> Tue Jun 25 10:12:38 2024 +0100
committer: GitHub <noreply@github.com> Tue Jun 25 10:12:38 2024 +0100
tree: b626d6337384c43ff25e081c6270028fdca49bc7
parent: f307b4a2dbb2a04d1392074aa688986e11596e45 [diff]
diff --git a/src/aarch64/assembler-aarch64.cc b/src/aarch64/assembler-aarch64.cc
index 009b08c..89fd169 100644
--- a/src/aarch64/assembler-aarch64.cc
+++ b/src/aarch64/assembler-aarch64.cc

@@ -5909,6 +5909,54 @@
   Emit(0xce608c00 | Rd(vd) | Rn(vn) | Rm(vm));
 }
 
+void Assembler::sha1c(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
+  VIXL_ASSERT(vd.IsQ() && vn.IsS() && vm.Is4S());
+
+  Emit(0x5e000000 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sha1h(const VRegister& sd, const VRegister& sn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
+  VIXL_ASSERT(sd.IsS() && sn.IsS());
+
+  Emit(0x5e280800 | Rd(sd) | Rn(sn));
+}
+
+void Assembler::sha1m(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
+  VIXL_ASSERT(vd.IsQ() && vn.IsS() && vm.Is4S());
+
+  Emit(0x5e002000 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sha1p(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
+  VIXL_ASSERT(vd.IsQ() && vn.IsS() && vm.Is4S());
+
+  Emit(0x5e001000 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sha1su0(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
+  VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
+
+  Emit(0x5e003000 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sha1su1(const VRegister& vd, const VRegister& vn) {
+  VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+  VIXL_ASSERT(CPUHas(CPUFeatures::kSHA1));
+  VIXL_ASSERT(vd.Is4S() && vn.Is4S());
+
+  Emit(0x5e281800 | Rd(vd) | Rn(vn));
+}
+
 // Note:
 // For all ToImm instructions below, a difference in case
 // for the same letter indicates a negated bit.

diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h
index bbba5db..b0f4d42 100644
--- a/src/aarch64/assembler-aarch64.h
+++ b/src/aarch64/assembler-aarch64.h

@@ -3642,6 +3642,24 @@
   // Rotate and Exclusive-OR
   void rax1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
 
+  // SHA1 hash update (choose).
+  void sha1c(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+  // SHA1 fixed rotate.
+  void sha1h(const VRegister& sd, const VRegister& sn);
+
+  // SHA1 hash update (majority).
+  void sha1m(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+  // SHA1 hash update (parity).
+  void sha1p(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+  // SHA1 schedule update 0.
+  void sha1su0(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+  // SHA1 schedule update 1.
+  void sha1su1(const VRegister& vd, const VRegister& vn);
+
   // Scalable Vector Extensions.
 
   // Absolute value (predicated).

diff --git a/src/aarch64/cpu-features-auditor-aarch64.cc b/src/aarch64/cpu-features-auditor-aarch64.cc
index a85587b..2d41453 100644
--- a/src/aarch64/cpu-features-auditor-aarch64.cc
+++ b/src/aarch64/cpu-features-auditor-aarch64.cc

@@ -247,11 +247,13 @@
 
 void CPUFeaturesAuditor::VisitCrypto2RegSHA(const Instruction* instr) {
   RecordInstructionFeaturesScope scope(this);
+  scope.Record(CPUFeatures::kNEON, CPUFeatures::kSHA1);
   USE(instr);
 }
 
 void CPUFeaturesAuditor::VisitCrypto3RegSHA(const Instruction* instr) {
   RecordInstructionFeaturesScope scope(this);
+  scope.Record(CPUFeatures::kNEON, CPUFeatures::kSHA1);
   USE(instr);
 }
 

diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc
index 5a8241d..fd9879f 100644
--- a/src/aarch64/disasm-aarch64.cc
+++ b/src/aarch64/disasm-aarch64.cc

@@ -2172,12 +2172,20 @@
 
 
 void Disassembler::VisitCrypto2RegSHA(const Instruction *instr) {
-  VisitUnimplemented(instr);
+  const char *form = "'Vd.4s, 'Vn.4s";
+  if (form_hash_ == "sha1h_ss_cryptosha2"_h) {
+    form = "'Sd, 'Sn";
+  }
+  FormatWithDecodedMnemonic(instr, form);
 }
 
 
 void Disassembler::VisitCrypto3RegSHA(const Instruction *instr) {
-  VisitUnimplemented(instr);
+  const char *form = "'Qd, 'Sn, 'Vm.4s";
+  if (form_hash_ == "sha1su0_vvv_cryptosha3"_h) {
+    form = "'Vd.4s, 'Vn.4s, 'Vm.4s";
+  }
+  FormatWithDecodedMnemonic(instr, form);
 }
 
 

diff --git a/src/aarch64/logic-aarch64.cc b/src/aarch64/logic-aarch64.cc
index 43b2071..e5d1625 100644
--- a/src/aarch64/logic-aarch64.cc
+++ b/src/aarch64/logic-aarch64.cc

@@ -2513,6 +2513,7 @@
                               LogicVRegister dst,
                               const LogicVRegister& src,
                               int rotation) {
+  dst.ClearForWrite(vform);
   int width = LaneSizeInBitsFromFormat(vform);
   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
     uint64_t value = src.Uint(vform, i);
@@ -2521,6 +2522,14 @@
   return dst;
 }
 
+LogicVRegister Simulator::rol(VectorFormat vform,
+                              LogicVRegister dst,
+                              const LogicVRegister& src,
+                              int rotation) {
+  int ror_equivalent = LaneSizeInBitsFromFormat(vform) - rotation;
+  return ror(vform, dst, src, ror_equivalent);
+}
+
 LogicVRegister Simulator::ext(VectorFormat vform,
                               LogicVRegister dst,
                               const LogicVRegister& src1,
@@ -7885,6 +7894,21 @@
   return dst;
 }
 
+template <>
+uint64_t SHA1Operation<"choose"_h>(uint64_t x, uint64_t y, uint64_t z) {
+  return ((y ^ z) & x) ^ z;
+}
+
+template <>
+uint64_t SHA1Operation<"majority"_h>(uint64_t x, uint64_t y, uint64_t z) {
+  return (x & y) | ((x | y) & z);
+}
+
+template <>
+uint64_t SHA1Operation<"parity"_h>(uint64_t x, uint64_t y, uint64_t z) {
+  return x ^ y ^ z;
+}
+
 }  // namespace aarch64
 }  // namespace vixl
 

diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h
index 8878ef7..15e51a5 100644
--- a/src/aarch64/macro-assembler-aarch64.h
+++ b/src/aarch64/macro-assembler-aarch64.h

@@ -2800,6 +2800,10 @@
   V(saddl2, Saddl2)              \
   V(saddw, Saddw)                \
   V(saddw2, Saddw2)              \
+  V(sha1c, Sha1c)                \
+  V(sha1m, Sha1m)                \
+  V(sha1p, Sha1p)                \
+  V(sha1su0, Sha1su0)            \
   V(shadd, Shadd)                \
   V(shsub, Shsub)                \
   V(smax, Smax)                  \
@@ -2944,6 +2948,8 @@
   V(sadalp, Sadalp)              \
   V(saddlp, Saddlp)              \
   V(saddlv, Saddlv)              \
+  V(sha1h, Sha1h)                \
+  V(sha1su1, Sha1su1)            \
   V(smaxv, Smaxv)                \
   V(sminv, Sminv)                \
   V(sqabs, Sqabs)                \

diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc
index fdc7106..fbe5970 100644
--- a/src/aarch64/simulator-aarch64.cc
+++ b/src/aarch64/simulator-aarch64.cc

@@ -7171,12 +7171,57 @@
 
 
 void Simulator::VisitCrypto2RegSHA(const Instruction* instr) {
-  VisitUnimplemented(instr);
+  SimVRegister& rd = ReadVRegister(instr->GetRd());
+  SimVRegister& rn = ReadVRegister(instr->GetRn());
+
+  switch (form_hash_) {
+    case "sha1h_ss_cryptosha2"_h:
+      ror(kFormatS, rd, rn, 2);
+      break;
+    case "sha1su1_vv_cryptosha2"_h: {
+      SimVRegister temp;
+
+      // temp = srcdst ^ (src >> 32);
+      ext(kFormat16B, temp, rn, temp, 4);
+      eor(kFormat16B, temp, rd, temp);
+
+      // srcdst = ROL(temp, 1) ^ (ROL(temp, 2) << 96)
+      rol(kFormat4S, rd, temp, 1);
+      rol(kFormatS, temp, temp, 2);  // kFormatS will zero bits <127:32>
+      ext(kFormat16B, temp, temp, temp, 4);
+      eor(kFormat16B, rd, rd, temp);
+      break;
+    }
+    case "sha256su0_vv_cryptosha2"_h:
+      VIXL_UNIMPLEMENTED();
+      break;
+  }
 }
 
 
 void Simulator::VisitCrypto3RegSHA(const Instruction* instr) {
-  VisitUnimplemented(instr);
+  SimVRegister& rd = ReadVRegister(instr->GetRd());
+  SimVRegister& rn = ReadVRegister(instr->GetRn());
+  SimVRegister& rm = ReadVRegister(instr->GetRm());
+
+  switch (form_hash_) {
+    case "sha1c_qsv_cryptosha3"_h:
+      sha1<"choose"_h>(rd, rn, rm);
+      break;
+    case "sha1m_qsv_cryptosha3"_h:
+      sha1<"majority"_h>(rd, rn, rm);
+      break;
+    case "sha1p_qsv_cryptosha3"_h:
+      sha1<"parity"_h>(rd, rn, rm);
+      break;
+    case "sha1su0_vvv_cryptosha3"_h: {
+      SimVRegister temp;
+      ext(kFormat16B, temp, rd, rn, 8);
+      eor(kFormat16B, temp, temp, rd);
+      eor(kFormat16B, rd, temp, rm);
+      break;
+    }
+  }
 }
 
 

diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h
index 73277e4..09dc7e1 100644
--- a/src/aarch64/simulator-aarch64.h
+++ b/src/aarch64/simulator-aarch64.h

@@ -1280,9 +1280,10 @@
   uint32_t seed_;
 };
 
-
 class Debugger;
 
+template <uint32_t mode>
+uint64_t SHA1Operation(uint64_t x, uint64_t y, uint64_t z);
 
 class Simulator : public DecoderVisitor {
  public:
@@ -3768,6 +3769,10 @@
                      LogicVRegister dst,
                      const LogicVRegister& src,
                      int rotation);
+  LogicVRegister rol(VectorFormat vform,
+                     LogicVRegister dst,
+                     const LogicVRegister& src,
+                     int rotation);
   LogicVRegister ext(VectorFormat vform,
                      LogicVRegister dst,
                      const LogicVRegister& src1,
@@ -4492,6 +4497,36 @@
                          LogicVRegister srcdst,
                          const LogicVRegister& src1,
                          const LogicVRegister& src2);
+
+  template <uint32_t mode>
+  LogicVRegister sha1(LogicVRegister srcdst,
+                      const LogicVRegister& src1,
+                      const LogicVRegister& src2) {
+    uint64_t y = src1.Uint(kFormat4S, 0);
+    uint64_t sd[4] = {};
+    srcdst.UintArray(kFormat4S, sd);
+
+    for (unsigned i = 0; i < ArrayLength(sd); i++) {
+      uint64_t t = SHA1Operation<mode>(sd[1], sd[2], sd[3]);
+
+      y += RotateLeft(sd[0], 5, kSRegSize) + t;
+      y += src2.Uint(kFormat4S, i);
+
+      sd[1] = RotateLeft(sd[1], 30, kSRegSize);
+
+      // y:sd = ROL(y:sd, 32)
+      uint64_t temp = sd[3];
+      sd[3] = sd[2];
+      sd[2] = sd[1];
+      sd[1] = sd[0];
+      sd[0] = y;
+      y = temp;
+    }
+
+    srcdst.SetUintArray(kFormat4S, sd);
+    return srcdst;
+  }
+
 #define NEON_3VREG_LOGIC_LIST(V) \
   V(addhn)                       \
   V(addhn2)                      \

diff --git a/src/utils-vixl.h b/src/utils-vixl.h
index 281c5cf..f95f6c8 100644
--- a/src/utils-vixl.h
+++ b/src/utils-vixl.h

@@ -239,6 +239,11 @@
   return value & width_mask;
 }
 
+inline uint64_t RotateLeft(uint64_t value,
+                           unsigned int rotate,
+                           unsigned int width) {
+  return RotateRight(value, width - rotate, width);
+}
 
 // Wrapper class for passing FP16 values through the assembler.
 // This is purely to aid with type checking/casting.

diff --git a/test/aarch64/test-cpu-features-aarch64.cc b/test/aarch64/test-cpu-features-aarch64.cc
index 8430d7f..56ec38f 100644
--- a/test/aarch64/test-cpu-features-aarch64.cc
+++ b/test/aarch64/test-cpu-features-aarch64.cc

@@ -3794,5 +3794,16 @@
 TEST_NEON_SHA3(xar_0, xar(v0.V2D(), v1.V2D(), v2.V2D(), 42))
 TEST_NEON_SHA3(rax1_0, rax1(v0.V2D(), v1.V2D(), v2.V2D()))
 
+#define TEST_NEON_SHA1(NAME, ASM)                                    \
+  TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA1), \
+                NEON_SHA1_##NAME,                                    \
+                ASM)
+TEST_NEON_SHA1(sha1c_0, sha1c(q0, s12, v20.V4S()))
+TEST_NEON_SHA1(sha1m_0, sha1m(q22, s2, v13.V4S()))
+TEST_NEON_SHA1(sha1p_0, sha1p(q31, s5, v15.V4S()))
+TEST_NEON_SHA1(sha1su0_0, sha1su0(v19.V4S(), v9.V4S(), v27.V4S()))
+TEST_NEON_SHA1(sha1h_0, sha1h(s12, s0))
+TEST_NEON_SHA1(sha1su1_0, sha1su1(v2.V4S(), v4.V4S()))
+
 }  // namespace aarch64
 }  // namespace vixl

diff --git a/test/aarch64/test-disasm-neon-aarch64.cc b/test/aarch64/test-disasm-neon-aarch64.cc
index 774114d..18d400e 100644
--- a/test/aarch64/test-disasm-neon-aarch64.cc
+++ b/test/aarch64/test-disasm-neon-aarch64.cc

@@ -4530,6 +4530,20 @@
   CLEANUP();
 }
 
+TEST(neon_sha1) {
+  SETUP();
+
+  COMPARE_MACRO(Sha1c(q0, s12, v20.V4S()), "sha1c q0, s12, v20.4s");
+  COMPARE_MACRO(Sha1m(q22, s2, v13.V4S()), "sha1m q22, s2, v13.4s");
+  COMPARE_MACRO(Sha1p(q31, s5, v15.V4S()), "sha1p q31, s5, v15.4s");
+  COMPARE_MACRO(Sha1su0(v19.V4S(), v9.V4S(), v27.V4S()),
+                "sha1su0 v19.4s, v9.4s, v27.4s");
+  COMPARE_MACRO(Sha1h(s12, s0), "sha1h s12, s0");
+  COMPARE_MACRO(Sha1su1(v2.V4S(), v4.V4S()), "sha1su1 v2.4s, v4.4s");
+
+  CLEANUP();
+}
+
 TEST(neon_unallocated_regression_test) {
   SETUP();
 

diff --git a/test/aarch64/test-simulator-sve-aarch64.cc b/test/aarch64/test-simulator-sve-aarch64.cc
index bdd5c81..1ba1aee 100644
--- a/test/aarch64/test-simulator-sve-aarch64.cc
+++ b/test/aarch64/test-simulator-sve-aarch64.cc

@@ -394,6 +394,214 @@
   }
 }
 
+TEST_SVE(neon_sha1_2reg) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kSHA1);
+  START();
+
+  SetInitialMachineState(&masm);
+  // state = 0xe2bd2480
+
+  {
+    ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+    __ dci(0x5e280800);  // sha1h s0, s0
+    // vl128 state = 0xc388d4f8
+    __ dci(0x5e280a28);  // sha1h s8, s17
+    // vl128 state = 0x5c88b904
+    __ dci(0x5e280a2a);  // sha1h s10, s17
+    // vl128 state = 0x6f63c596
+    __ dci(0x5e281aae);  // sha1su1 v14.4s, v21.4s
+    // vl128 state = 0x85e1119d
+    __ dci(0x5e281abe);  // sha1su1 v30.4s, v21.4s
+    // vl128 state = 0x9b814260
+    __ dci(0x5e281a0e);  // sha1su1 v14.4s, v16.4s
+    // vl128 state = 0x8ccca0ab
+    __ dci(0x5e281a0a);  // sha1su1 v10.4s, v16.4s
+    // vl128 state = 0x42262836
+    __ dci(0x5e281acb);  // sha1su1 v11.4s, v22.4s
+    // vl128 state = 0xabcde33d
+    __ dci(0x5e281acf);  // sha1su1 v15.4s, v22.4s
+    // vl128 state = 0xdf44e7be
+    __ dci(0x5e281adf);  // sha1su1 v31.4s, v22.4s
+    // vl128 state = 0x48c332a3
+    __ dci(0x5e280a9d);  // sha1h s29, s20
+    // vl128 state = 0x56bafe13
+    __ dci(0x5e28188d);  // sha1su1 v13.4s, v4.4s
+    // vl128 state = 0x218eb351
+    __ dci(0x5e2808cf);  // sha1h s15, s6
+    // vl128 state = 0xc1720d9f
+    __ dci(0x5e2808cb);  // sha1h s11, s6
+    // vl128 state = 0x67119e1c
+    __ dci(0x5e2808c9);  // sha1h s9, s6
+    // vl128 state = 0x31f69637
+    __ dci(0x5e2808c1);  // sha1h s1, s6
+    // vl128 state = 0x214a25ff
+    __ dci(0x5e280871);  // sha1h s17, s3
+    // vl128 state = 0xa5e88b55
+    __ dci(0x5e280815);  // sha1h s21, s0
+    // vl128 state = 0xc8c91e29
+    __ dci(0x5e28185d);  // sha1su1 v29.4s, v2.4s
+    // vl128 state = 0x5582c6a8
+    __ dci(0x5e28185f);  // sha1su1 v31.4s, v2.4s
+    // vl128 state = 0xd3288a61
+    __ dci(0x5e28087e);  // sha1h s30, s3
+    // vl128 state = 0x350b39c2
+    __ dci(0x5e28093f);  // sha1h s31, s9
+    // vl128 state = 0xbdc1ac98
+    __ dci(0x5e28093b);  // sha1h s27, s9
+    // vl128 state = 0x62f828bf
+    __ dci(0x5e28092b);  // sha1h s11, s9
+    // vl128 state = 0xc8f2f671
+    __ dci(0x5e2819bb);  // sha1su1 v27.4s, v13.4s
+    // vl128 state = 0x24ec8c34
+    __ dci(0x5e281b93);  // sha1su1 v19.4s, v28.4s
+    // vl128 state = 0x71e188de
+    __ dci(0x5e281b97);  // sha1su1 v23.4s, v28.4s
+    // vl128 state = 0x22490375
+    __ dci(0x5e281b95);  // sha1su1 v21.4s, v28.4s
+    // vl128 state = 0x016b70d1
+    __ dci(0x5e281b51);  // sha1su1 v17.4s, v26.4s
+    // vl128 state = 0xa6252086
+    __ dci(0x5e2819d3);  // sha1su1 v19.4s, v14.4s
+    // vl128 state = 0x78683885
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0x78683885,
+        0x59574c2a,
+        0x449978bf,
+        0x0ddab775,
+        0x1a043ef3,
+        0xf501e2e7,
+        0xa219e725,
+        0xf17f57c8,
+        0x4ccdbf99,
+        0x419d4fc3,
+        0x7302571d,
+        0xd6bee170,
+        0x7d81c301,
+        0xbaa7d729,
+        0xf33f0bc4,
+        0xff8b070a,
+    };
+    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+  }
+}
+
+TEST_SVE(neon_sha1_3reg) {
+  SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+                          CPUFeatures::kNEON,
+                          CPUFeatures::kCRC32,
+                          CPUFeatures::kSHA1);
+  START();
+
+  SetInitialMachineState(&masm);
+  // state = 0xe2bd2480
+
+  {
+    ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+    __ dci(0x5e1f02bd);  // sha1c q29, s21, v31.4s
+    // vl128 state = 0xec2a37ad
+    __ dci(0x5e0810af);  // sha1p q15, s5, v8.4s
+    // vl128 state = 0x3fe9252a
+    __ dci(0x5e122227);  // sha1m q7, s17, v18.4s
+    // vl128 state = 0x4465789e
+    __ dci(0x5e0b039d);  // sha1c q29, s28, v11.4s
+    // vl128 state = 0x2186488a
+    __ dci(0x5e1a03e9);  // sha1c q9, s31, v26.4s
+    // vl128 state = 0x9eddf8e3
+    __ dci(0x5e0c138c);  // sha1p q12, s28, v12.4s
+    // vl128 state = 0x0ca7cd3d
+    __ dci(0x5e1f1316);  // sha1p q22, s24, v31.4s
+    // vl128 state = 0xb80a61c0
+    __ dci(0x5e052204);  // sha1m q4, s16, v5.4s
+    // vl128 state = 0x941821ca
+    __ dci(0x5e0a00d6);  // sha1c q22, s6, v10.4s
+    // vl128 state = 0x5e71ccae
+    __ dci(0x5e0e032e);  // sha1c q14, s25, v14.4s
+    // vl128 state = 0x7ed4486a
+    __ dci(0x5e1d1098);  // sha1p q24, s4, v29.4s
+    // vl128 state = 0x0978a637
+    __ dci(0x5e0400d9);  // sha1c q25, s6, v4.4s
+    // vl128 state = 0x34c8609e
+    __ dci(0x5e1a330e);  // sha1su0 v14.4s, v24.4s, v26.4s
+    // vl128 state = 0xcb078fad
+    __ dci(0x5e1e30f5);  // sha1su0 v21.4s, v7.4s, v30.4s
+    // vl128 state = 0x885200be
+    __ dci(0x5e1e32e1);  // sha1su0 v1.4s, v23.4s, v30.4s
+    // vl128 state = 0xabc6a188
+    __ dci(0x5e0733d3);  // sha1su0 v19.4s, v30.4s, v7.4s
+    // vl128 state = 0x37a4fe6f
+    __ dci(0x5e0b22e6);  // sha1m q6, s23, v11.4s
+    // vl128 state = 0x68b788d2
+    __ dci(0x5e011210);  // sha1p q16, s16, v1.4s
+    // vl128 state = 0x6b36b092
+    __ dci(0x5e1702e1);  // sha1c q1, s23, v23.4s
+    // vl128 state = 0x74ef56f5
+    __ dci(0x5e1e30f6);  // sha1su0 v22.4s, v7.4s, v30.4s
+    // vl128 state = 0x5a150dfd
+    __ dci(0x5e1b3348);  // sha1su0 v8.4s, v26.4s, v27.4s
+    // vl128 state = 0xe0a45d9c
+    __ dci(0x5e0a3041);  // sha1su0 v1.4s, v2.4s, v10.4s
+    // vl128 state = 0x6ba02d02
+    __ dci(0x5e17119a);  // sha1p q26, s12, v23.4s
+    // vl128 state = 0x3bf511fc
+    __ dci(0x5e0b32c7);  // sha1su0 v7.4s, v22.4s, v11.4s
+    // vl128 state = 0xf5c513b6
+    __ dci(0x5e063016);  // sha1su0 v22.4s, v0.4s, v6.4s
+    // vl128 state = 0x3eb44b28
+    __ dci(0x5e05323c);  // sha1su0 v28.4s, v17.4s, v5.4s
+    // vl128 state = 0x7c2d3adf
+    __ dci(0x5e1d132a);  // sha1p q10, s25, v29.4s
+    // vl128 state = 0x2b0963c4
+    __ dci(0x5e13003c);  // sha1c q28, s1, v19.4s
+    // vl128 state = 0x4a582d00
+    __ dci(0x5e13322c);  // sha1su0 v12.4s, v17.4s, v19.4s
+    // vl128 state = 0x7bb2cc8c
+    __ dci(0x5e032330);  // sha1m q16, s25, v3.4s
+    // vl128 state = 0x2a8b4c0d
+  }
+
+  uint32_t state;
+  ComputeMachineStateHash(&masm, &state);
+  __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+  __ Ldr(w0, MemOperand(x0));
+
+  END();
+  if (CAN_RUN()) {
+    RUN();
+    uint32_t expected_hashes[] = {
+        0x2a8b4c0d,
+        0x114e25bb,
+        0x4f035af9,
+        0x23db7966,
+        0x3d106b42,
+        0x62651fcf,
+        0x44c20879,
+        0xadf71d73,
+        0xe6858f82,
+        0x93a74ae5,
+        0xc270310e,
+        0x3d07058c,
+        0x69f83d0e,
+        0x28c5813b,
+        0xbb9de2c1,
+        0xe06b94cd,
+    };
+    ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+  }
+}
+
 TEST_SVE(neon_sha3) {
   SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
                           CPUFeatures::kNEON,
@@ -558,5 +766,6 @@
   }
 }
 
+
 }  // namespace aarch64
 }  // namespace vixl

diff --git a/tools/code_coverage.log b/tools/code_coverage.log
index d29b39b..ae815b5 100644
--- a/tools/code_coverage.log
+++ b/tools/code_coverage.log

@@ -23,6 +23,7 @@
 1693487542 82.91% 97.57% 94.87%
 1694008240 82.72% 97.50% 94.95%
 1697036303 82.87% 97.56% 94.76%
+1698228274 82.93% 97.68% 94.90%
 1698330215 82.92% 97.57% 94.88%
 1702052331 82.89% 97.59% 94.77%
 1706691191 82.87% 97.59% 94.74%
commit	7eb48c6e004ff1b5411553d6f883d80f98b11904	[log] [tgz]
author	mmc28a <78873583+mmc28a@users.noreply.github.com>	Tue Jun 25 10:12:38 2024 +0100
committer	GitHub <noreply@github.com>	Tue Jun 25 10:12:38 2024 +0100
tree	b626d6337384c43ff25e081c6270028fdca49bc7
parent	f307b4a2dbb2a04d1392074aa688986e11596e45 [diff]