Support SHA-2 accelerating instructions (#103)
Add support for four Neon SHA-2 accelerating instructions.
diff --git a/src/aarch64/assembler-aarch64.cc b/src/aarch64/assembler-aarch64.cc
index 89fd169..ef73402 100644
--- a/src/aarch64/assembler-aarch64.cc
+++ b/src/aarch64/assembler-aarch64.cc
@@ -5957,6 +5957,38 @@
Emit(0x5e281800 | Rd(vd) | Rn(vn));
}
+void Assembler::sha256h(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSHA2));
+ VIXL_ASSERT(vd.IsQ() && vn.IsQ() && vm.Is4S());
+
+ Emit(0x5e004000 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sha256h2(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSHA2));
+ VIXL_ASSERT(vd.IsQ() && vn.IsQ() && vm.Is4S());
+
+ Emit(0x5e005000 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
+void Assembler::sha256su0(const VRegister& vd, const VRegister& vn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSHA2));
+ VIXL_ASSERT(vd.Is4S() && vn.Is4S());
+
+ Emit(0x5e282800 | Rd(vd) | Rn(vn));
+}
+
+void Assembler::sha256su1(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSHA2));
+ VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
+
+ Emit(0x5e006000 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
// Note:
// For all ToImm instructions below, a difference in case
// for the same letter indicates a negated bit.
diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h
index b0f4d42..4f3093f 100644
--- a/src/aarch64/assembler-aarch64.h
+++ b/src/aarch64/assembler-aarch64.h
@@ -3660,6 +3660,18 @@
// SHA1 schedule update 1.
void sha1su1(const VRegister& vd, const VRegister& vn);
+ // SHA256 hash update (part 1).
+ void sha256h(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+ // SHA256 hash update (part 2).
+ void sha256h2(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
+ // SHA256 schedule update 0.
+ void sha256su0(const VRegister& vd, const VRegister& vn);
+
+ // SHA256 schedule update 1.
+ void sha256su1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
// Scalable Vector Extensions.
// Absolute value (predicated).
diff --git a/src/aarch64/cpu-features-auditor-aarch64.cc b/src/aarch64/cpu-features-auditor-aarch64.cc
index 2d41453..7d41511 100644
--- a/src/aarch64/cpu-features-auditor-aarch64.cc
+++ b/src/aarch64/cpu-features-auditor-aarch64.cc
@@ -247,13 +247,29 @@
void CPUFeaturesAuditor::VisitCrypto2RegSHA(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
- scope.Record(CPUFeatures::kNEON, CPUFeatures::kSHA1);
+ if (form_hash_ == "sha256su0_vv_cryptosha2"_h) {
+ scope.Record(CPUFeatures::kNEON, CPUFeatures::kSHA2);
+ } else {
+ scope.Record(CPUFeatures::kNEON, CPUFeatures::kSHA1);
+ }
USE(instr);
}
void CPUFeaturesAuditor::VisitCrypto3RegSHA(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
- scope.Record(CPUFeatures::kNEON, CPUFeatures::kSHA1);
+ switch (form_hash_) {
+ case "sha1c_qsv_cryptosha3"_h:
+ case "sha1m_qsv_cryptosha3"_h:
+ case "sha1p_qsv_cryptosha3"_h:
+ case "sha1su0_vvv_cryptosha3"_h:
+ scope.Record(CPUFeatures::kNEON, CPUFeatures::kSHA1);
+ break;
+ case "sha256h_qqv_cryptosha3"_h:
+ case "sha256h2_qqv_cryptosha3"_h:
+ case "sha256su1_vvv_cryptosha3"_h:
+ scope.Record(CPUFeatures::kNEON, CPUFeatures::kSHA2);
+ break;
+ }
USE(instr);
}
diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc
index fd9879f..ec57da9 100644
--- a/src/aarch64/disasm-aarch64.cc
+++ b/src/aarch64/disasm-aarch64.cc
@@ -2182,8 +2182,15 @@
void Disassembler::VisitCrypto3RegSHA(const Instruction *instr) {
const char *form = "'Qd, 'Sn, 'Vm.4s";
- if (form_hash_ == "sha1su0_vvv_cryptosha3"_h) {
- form = "'Vd.4s, 'Vn.4s, 'Vm.4s";
+ switch (form_hash_) {
+ case "sha1su0_vvv_cryptosha3"_h:
+ case "sha256su1_vvv_cryptosha3"_h:
+ form = "'Vd.4s, 'Vn.4s, 'Vm.4s";
+ break;
+ case "sha256h_qqv_cryptosha3"_h:
+ case "sha256h2_qqv_cryptosha3"_h:
+ form = "'Qd, 'Qn, 'Vm.4s";
+ break;
}
FormatWithDecodedMnemonic(instr, form);
}
diff --git a/src/aarch64/logic-aarch64.cc b/src/aarch64/logic-aarch64.cc
index e5d1625..ffd3bf8 100644
--- a/src/aarch64/logic-aarch64.cc
+++ b/src/aarch64/logic-aarch64.cc
@@ -7909,6 +7909,89 @@
return x ^ y ^ z;
}
+template <unsigned A, unsigned B, unsigned C>
+static uint64_t SHA2Sigma(uint64_t x) {
+ return RotateRight(x, A, kSRegSize) ^ RotateRight(x, B, kSRegSize) ^
+ RotateRight(x, C, kSRegSize);
+}
+
+LogicVRegister Simulator::sha2h(LogicVRegister srcdst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool part1) {
+ uint64_t x[4] = {};
+ uint64_t y[4] = {};
+ if (part1) {
+ // Switch input order based on which part is being handled.
+ srcdst.UintArray(kFormat4S, x);
+ src1.UintArray(kFormat4S, y);
+ } else {
+ src1.UintArray(kFormat4S, x);
+ srcdst.UintArray(kFormat4S, y);
+ }
+
+ for (unsigned i = 0; i < ArrayLength(x); i++) {
+ uint64_t chs = SHA1Operation<"choose"_h>(y[0], y[1], y[2]);
+ uint64_t maj = SHA1Operation<"majority"_h>(x[0], x[1], x[2]);
+
+ uint64_t w = src2.Uint(kFormat4S, i);
+ uint64_t t = y[3] + SHA2Sigma<6, 11, 25>(y[0]) + chs + w;
+
+ x[3] += t;
+ y[3] = t + SHA2Sigma<2, 13, 22>(x[0]) + maj;
+
+ // y:x = ROL(y:x, 32)
+ SHARotateEltsLeftOne(x);
+ SHARotateEltsLeftOne(y);
+ std::swap(x[0], y[0]);
+ }
+
+ srcdst.SetUintArray(kFormat4S, part1 ? x : y);
+ return srcdst;
+}
+
+template <unsigned A, unsigned B, unsigned C>
+static uint64_t SHA2SURotate(uint64_t x) {
+ return RotateRight(x, A, kSRegSize) ^ RotateRight(x, B, kSRegSize) ^
+ ((x & 0xffffffff) >> C);
+}
+
+LogicVRegister Simulator::sha2su0(LogicVRegister srcdst,
+ const LogicVRegister& src1) {
+ uint64_t w[4] = {};
+ uint64_t result[4];
+ srcdst.UintArray(kFormat4S, w);
+ uint64_t x = src1.Uint(kFormat4S, 0);
+
+ result[0] = SHA2SURotate<7, 18, 3>(w[1]) + w[0];
+ result[1] = SHA2SURotate<7, 18, 3>(w[2]) + w[1];
+ result[2] = SHA2SURotate<7, 18, 3>(w[3]) + w[2];
+ result[3] = SHA2SURotate<7, 18, 3>(x) + w[3];
+
+ srcdst.SetUintArray(kFormat4S, result);
+ return srcdst;
+}
+
+LogicVRegister Simulator::sha2su1(LogicVRegister srcdst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2) {
+ uint64_t w[4] = {};
+ uint64_t x[4] = {};
+ uint64_t y[4] = {};
+ uint64_t result[4];
+ srcdst.UintArray(kFormat4S, w);
+ src1.UintArray(kFormat4S, x);
+ src2.UintArray(kFormat4S, y);
+
+ result[0] = SHA2SURotate<17, 19, 10>(y[2]) + w[0] + x[1];
+ result[1] = SHA2SURotate<17, 19, 10>(y[3]) + w[1] + x[2];
+ result[2] = SHA2SURotate<17, 19, 10>(result[0]) + w[2] + x[3];
+ result[3] = SHA2SURotate<17, 19, 10>(result[1]) + w[3] + y[0];
+
+ srcdst.SetUintArray(kFormat4S, result);
+ return srcdst;
+}
+
} // namespace aarch64
} // namespace vixl
diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h
index 15e51a5..d2c22ec 100644
--- a/src/aarch64/macro-assembler-aarch64.h
+++ b/src/aarch64/macro-assembler-aarch64.h
@@ -2804,6 +2804,9 @@
V(sha1m, Sha1m) \
V(sha1p, Sha1p) \
V(sha1su0, Sha1su0) \
+ V(sha256h, Sha256h) \
+ V(sha256h2, Sha256h2) \
+ V(sha256su1, Sha256su1) \
V(shadd, Shadd) \
V(shsub, Shsub) \
V(smax, Smax) \
@@ -2950,6 +2953,7 @@
V(saddlv, Saddlv) \
V(sha1h, Sha1h) \
V(sha1su1, Sha1su1) \
+ V(sha256su0, Sha256su0) \
V(smaxv, Smaxv) \
V(sminv, Sminv) \
V(sqabs, Sqabs) \
diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc
index fbe5970..6a84f3f 100644
--- a/src/aarch64/simulator-aarch64.cc
+++ b/src/aarch64/simulator-aarch64.cc
@@ -7193,7 +7193,7 @@
break;
}
case "sha256su0_vv_cryptosha2"_h:
- VIXL_UNIMPLEMENTED();
+ sha2su0(rd, rn);
break;
}
}
@@ -7221,6 +7221,15 @@
eor(kFormat16B, rd, temp, rm);
break;
}
+ case "sha256h_qqv_cryptosha3"_h:
+ sha2h(rd, rn, rm, /* part1 = */ true);
+ break;
+ case "sha256h2_qqv_cryptosha3"_h:
+ sha2h(rd, rn, rm, /* part1 = */ false);
+ break;
+ case "sha256su1_vvv_cryptosha3"_h:
+ sha2su1(rd, rn, rm);
+ break;
}
}
diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h
index 09dc7e1..ed209ea 100644
--- a/src/aarch64/simulator-aarch64.h
+++ b/src/aarch64/simulator-aarch64.h
@@ -4498,6 +4498,16 @@
const LogicVRegister& src1,
const LogicVRegister& src2);
+ template <unsigned N>
+ static void SHARotateEltsLeftOne(uint64_t (&x)[N]) {
+ VIXL_STATIC_ASSERT(N == 4);
+ uint64_t temp = x[3];
+ x[3] = x[2];
+ x[2] = x[1];
+ x[1] = x[0];
+ x[0] = temp;
+ }
+
template <uint32_t mode>
LogicVRegister sha1(LogicVRegister srcdst,
const LogicVRegister& src1,
@@ -4515,18 +4525,23 @@
sd[1] = RotateLeft(sd[1], 30, kSRegSize);
// y:sd = ROL(y:sd, 32)
- uint64_t temp = sd[3];
- sd[3] = sd[2];
- sd[2] = sd[1];
- sd[1] = sd[0];
- sd[0] = y;
- y = temp;
+ SHARotateEltsLeftOne(sd);
+ std::swap(sd[0], y);
}
srcdst.SetUintArray(kFormat4S, sd);
return srcdst;
}
+ LogicVRegister sha2h(LogicVRegister srcdst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool part1);
+ LogicVRegister sha2su0(LogicVRegister srcdst, const LogicVRegister& src1);
+ LogicVRegister sha2su1(LogicVRegister srcdst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2);
+
#define NEON_3VREG_LOGIC_LIST(V) \
V(addhn) \
V(addhn2) \
diff --git a/test/aarch64/test-cpu-features-aarch64.cc b/test/aarch64/test-cpu-features-aarch64.cc
index 56ec38f..d7e9f8c 100644
--- a/test/aarch64/test-cpu-features-aarch64.cc
+++ b/test/aarch64/test-cpu-features-aarch64.cc
@@ -3805,5 +3805,15 @@
TEST_NEON_SHA1(sha1h_0, sha1h(s12, s0))
TEST_NEON_SHA1(sha1su1_0, sha1su1(v2.V4S(), v4.V4S()))
+#define TEST_FEAT(NAME, ASM) \
+ TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA2), \
+ NEON_SHA2_##NAME, \
+ ASM)
+TEST_FEAT(sha256h_0, sha256h(q0, q12, v20.V4S()))
+TEST_FEAT(sha256h2_0, sha256h2(q22, q2, v13.V4S()))
+TEST_FEAT(sha256su0_0, sha256su0(v2.V4S(), v4.V4S()))
+TEST_FEAT(sha256su1_0, sha256su1(v19.V4S(), v9.V4S(), v27.V4S()))
+#undef TEST_FEAT
+
} // namespace aarch64
} // namespace vixl
diff --git a/test/aarch64/test-disasm-neon-aarch64.cc b/test/aarch64/test-disasm-neon-aarch64.cc
index 18d400e..9b00aff 100644
--- a/test/aarch64/test-disasm-neon-aarch64.cc
+++ b/test/aarch64/test-disasm-neon-aarch64.cc
@@ -4544,6 +4544,18 @@
CLEANUP();
}
+TEST(neon_sha2) {
+ SETUP();
+
+ COMPARE_MACRO(Sha256h(q0, q12, v20.V4S()), "sha256h q0, q12, v20.4s");
+ COMPARE_MACRO(Sha256h2(q22, q2, v13.V4S()), "sha256h2 q22, q2, v13.4s");
+ COMPARE_MACRO(Sha256su0(v2.V4S(), v4.V4S()), "sha256su0 v2.4s, v4.4s");
+ COMPARE_MACRO(Sha256su1(v19.V4S(), v9.V4S(), v27.V4S()),
+ "sha256su1 v19.4s, v9.4s, v27.4s");
+
+ CLEANUP();
+}
+
TEST(neon_unallocated_regression_test) {
SETUP();
diff --git a/test/aarch64/test-simulator-sve-aarch64.cc b/test/aarch64/test-simulator-sve-aarch64.cc
index 1ba1aee..98450db 100644
--- a/test/aarch64/test-simulator-sve-aarch64.cc
+++ b/test/aarch64/test-simulator-sve-aarch64.cc
@@ -602,6 +602,318 @@
}
}
+TEST_SVE(neon_sha2h) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32,
+ CPUFeatures::kSHA2);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+ __ dci(0x5e0152a2); // sha256h2 q2, q21, v1.4s
+ // vl128 state = 0x6bda8984
+ __ dci(0x5e1552b2); // sha256h2 q18, q21, v21.4s
+ // vl128 state = 0xe985c68a
+ __ dci(0x5e055293); // sha256h2 q19, q20, v5.4s
+ // vl128 state = 0xab18a98b
+ __ dci(0x5e055297); // sha256h2 q23, q20, v5.4s
+ // vl128 state = 0x896bad28
+ __ dci(0x5e0752a7); // sha256h2 q7, q21, v7.4s
+ // vl128 state = 0x4e00ba08
+ __ dci(0x5e175223); // sha256h2 q3, q17, v23.4s
+ // vl128 state = 0x380f3893
+ __ dci(0x5e1f5262); // sha256h2 q2, q19, v31.4s
+ // vl128 state = 0xb431122d
+ __ dci(0x5e1f5272); // sha256h2 q18, q19, v31.4s
+ // vl128 state = 0x18140047
+ __ dci(0x5e1e4262); // sha256h q2, q19, v30.4s
+ // vl128 state = 0x721779be
+ __ dci(0x5e164363); // sha256h q3, q27, v22.4s
+ // vl128 state = 0x383ad878
+ __ dci(0x5e175361); // sha256h2 q1, q27, v23.4s
+ // vl128 state = 0xd985bd85
+ __ dci(0x5e115360); // sha256h2 q0, q27, v17.4s
+ // vl128 state = 0xfa5e77f3
+ __ dci(0x5e135270); // sha256h2 q16, q19, v19.4s
+ // vl128 state = 0x4fc1f5cc
+ __ dci(0x5e195260); // sha256h2 q0, q19, v25.4s
+ // vl128 state = 0x89435952
+ __ dci(0x5e1952c4); // sha256h2 q4, q22, v25.4s
+ // vl128 state = 0x93c60c86
+ __ dci(0x5e1a52c6); // sha256h2 q6, q22, v26.4s
+ // vl128 state = 0xedc42105
+ __ dci(0x5e1a52c4); // sha256h2 q4, q22, v26.4s
+ // vl128 state = 0xd5d638a8
+ __ dci(0x5e1a4285); // sha256h q5, q20, v26.4s
+ // vl128 state = 0x9f9da446
+ __ dci(0x5e1a428d); // sha256h q13, q20, v26.4s
+ // vl128 state = 0x87d49cfb
+ __ dci(0x5e1b42cf); // sha256h q15, q22, v27.4s
+ // vl128 state = 0xa6802b10
+ __ dci(0x5e1b43ed); // sha256h q13, q31, v27.4s
+ // vl128 state = 0x2e346937
+ __ dci(0x5e0b436f); // sha256h q15, q27, v11.4s
+ // vl128 state = 0x1005f372
+ __ dci(0x5e03433f); // sha256h q31, q25, v3.4s
+ // vl128 state = 0xd908918c
+ __ dci(0x5e13532f); // sha256h2 q15, q25, v19.4s
+ // vl128 state = 0x31c73fe0
+ __ dci(0x5e01533f); // sha256h2 q31, q25, v1.4s
+ // vl128 state = 0x84e35a20
+ __ dci(0x5e03523d); // sha256h2 q29, q17, v3.4s
+ // vl128 state = 0x40da34aa
+ __ dci(0x5e0b527c); // sha256h2 q28, q19, v11.4s
+ // vl128 state = 0x506a21d9
+ __ dci(0x5e0f5238); // sha256h2 q24, q17, v15.4s
+ // vl128 state = 0x6a67f033
+ __ dci(0x5e0d5210); // sha256h2 q16, q16, v13.4s
+ // vl128 state = 0x317e084c
+ __ dci(0x5e0d5214); // sha256h2 q20, q16, v13.4s
+ // vl128 state = 0xdd0eb379
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xdd0eb379,
+ 0x15384d69,
+ 0x32bbc73a,
+ 0xc5879e77,
+ 0x9241294d,
+ 0xfc01bad8,
+ 0xf5e79af5,
+ 0xee66e696,
+ 0x535158e8,
+ 0x09cfa8b6,
+ 0x8cd83eae,
+ 0x93ff18b0,
+ 0x561444e4,
+ 0xa6249eea,
+ 0x830e4c73,
+ 0xb516eaae,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(neon_sha2su0) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32,
+ CPUFeatures::kSHA2);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+ __ dci(0x5e2828e3); // sha256su0 v3.4s, v7.4s
+ // vl128 state = 0xbc7a7764
+ __ dci(0x5e282be1); // sha256su0 v1.4s, v31.4s
+ // vl128 state = 0x6138a856
+ __ dci(0x5e282be9); // sha256su0 v9.4s, v31.4s
+ // vl128 state = 0x49c6be17
+ __ dci(0x5e282beb); // sha256su0 v11.4s, v31.4s
+ // vl128 state = 0xca658743
+ __ dci(0x5e2829bb); // sha256su0 v27.4s, v13.4s
+ // vl128 state = 0x1bf1d233
+ __ dci(0x5e2829ba); // sha256su0 v26.4s, v13.4s
+ // vl128 state = 0xafb0c6ae
+ __ dci(0x5e2829aa); // sha256su0 v10.4s, v13.4s
+ // vl128 state = 0x2182e90d
+ __ dci(0x5e282b2e); // sha256su0 v14.4s, v25.4s
+ // vl128 state = 0x401d297d
+ __ dci(0x5e282aaf); // sha256su0 v15.4s, v21.4s
+ // vl128 state = 0x6c01fefa
+ __ dci(0x5e282aad); // sha256su0 v13.4s, v21.4s
+ // vl128 state = 0x0f4c191d
+ __ dci(0x5e282a7d); // sha256su0 v29.4s, v19.4s
+ // vl128 state = 0xcf26aa1b
+ __ dci(0x5e282ad9); // sha256su0 v25.4s, v22.4s
+ // vl128 state = 0xae04081e
+ __ dci(0x5e282ac9); // sha256su0 v9.4s, v22.4s
+ // vl128 state = 0x08149009
+ __ dci(0x5e282acb); // sha256su0 v11.4s, v22.4s
+ // vl128 state = 0xa691e487
+ __ dci(0x5e282ac3); // sha256su0 v3.4s, v22.4s
+ // vl128 state = 0xd728e1b5
+ __ dci(0x5e282ac7); // sha256su0 v7.4s, v22.4s
+ // vl128 state = 0x120fac30
+ __ dci(0x5e282ac5); // sha256su0 v5.4s, v22.4s
+ // vl128 state = 0x88086f82
+ __ dci(0x5e282ac4); // sha256su0 v4.4s, v22.4s
+ // vl128 state = 0x625160b7
+ __ dci(0x5e282a65); // sha256su0 v5.4s, v19.4s
+ // vl128 state = 0x308feecd
+ __ dci(0x5e282a6d); // sha256su0 v13.4s, v19.4s
+ // vl128 state = 0x65f03097
+ __ dci(0x5e282a65); // sha256su0 v5.4s, v19.4s
+ // vl128 state = 0x44d9fbb6
+ __ dci(0x5e282a67); // sha256su0 v7.4s, v19.4s
+ // vl128 state = 0x694fe04a
+ __ dci(0x5e282a17); // sha256su0 v23.4s, v16.4s
+ // vl128 state = 0x3d5c139b
+ __ dci(0x5e282a13); // sha256su0 v19.4s, v16.4s
+ // vl128 state = 0x922f40a5
+ __ dci(0x5e282b3b); // sha256su0 v27.4s, v25.4s
+ // vl128 state = 0x4f9c34f2
+ __ dci(0x5e282ab9); // sha256su0 v25.4s, v21.4s
+ // vl128 state = 0x18a4f581
+ __ dci(0x5e282ab1); // sha256su0 v17.4s, v21.4s
+ // vl128 state = 0x69da3844
+ __ dci(0x5e282ab9); // sha256su0 v25.4s, v21.4s
+ // vl128 state = 0x57f8ce0b
+ __ dci(0x5e282a1d); // sha256su0 v29.4s, v16.4s
+ // vl128 state = 0xafa03001
+ __ dci(0x5e282ad5); // sha256su0 v21.4s, v22.4s
+ // vl128 state = 0x029b78a8
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x029b78a8,
+ 0x479a8911,
+ 0x6bdbdb48,
+ 0x5ef3718b,
+ 0x695ce173,
+ 0x586543d0,
+ 0xd00a22be,
+ 0xe63a91b9,
+ 0x42bb89a2,
+ 0xea48ee79,
+ 0x9788ac35,
+ 0x1e8599a3,
+ 0xd0d2d6ee,
+ 0xfe7aaaf7,
+ 0x77da6831,
+ 0xb93fb875,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(neon_sha2su1) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32,
+ CPUFeatures::kSHA2);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
+ __ dci(0x5e1e6146); // sha256su1 v6.4s, v10.4s, v30.4s
+ // vl128 state = 0x3bbf7782
+ __ dci(0x5e0f6144); // sha256su1 v4.4s, v10.4s, v15.4s
+ // vl128 state = 0xf8c83149
+ __ dci(0x5e0e6174); // sha256su1 v20.4s, v11.4s, v14.4s
+ // vl128 state = 0x3b8c353b
+ __ dci(0x5e0e6170); // sha256su1 v16.4s, v11.4s, v14.4s
+ // vl128 state = 0x1041e30e
+ __ dci(0x5e0a6131); // sha256su1 v17.4s, v9.4s, v10.4s
+ // vl128 state = 0xe4d81cd2
+ __ dci(0x5e0a6135); // sha256su1 v21.4s, v9.4s, v10.4s
+ // vl128 state = 0x24869db3
+ __ dci(0x5e0a6131); // sha256su1 v17.4s, v9.4s, v10.4s
+ // vl128 state = 0xfb093436
+ __ dci(0x5e0a6199); // sha256su1 v25.4s, v12.4s, v10.4s
+ // vl128 state = 0x0c7939ba
+ __ dci(0x5e0e639b); // sha256su1 v27.4s, v28.4s, v14.4s
+ // vl128 state = 0xa7e5c40a
+ __ dci(0x5e0663ab); // sha256su1 v11.4s, v29.4s, v6.4s
+ // vl128 state = 0xc4ae571c
+ __ dci(0x5e06619b); // sha256su1 v27.4s, v12.4s, v6.4s
+ // vl128 state = 0xf84ef221
+ __ dci(0x5e066199); // sha256su1 v25.4s, v12.4s, v6.4s
+ // vl128 state = 0x24f98d3c
+ __ dci(0x5e0e6118); // sha256su1 v24.4s, v8.4s, v14.4s
+ // vl128 state = 0xcdb43a3b
+ __ dci(0x5e0f601a); // sha256su1 v26.4s, v0.4s, v15.4s
+ // vl128 state = 0x85fd37e9
+ __ dci(0x5e096012); // sha256su1 v18.4s, v0.4s, v9.4s
+ // vl128 state = 0xabccd3f6
+ __ dci(0x5e0c601a); // sha256su1 v26.4s, v0.4s, v12.4s
+ // vl128 state = 0x8c0232e5
+ __ dci(0x5e1c602a); // sha256su1 v10.4s, v1.4s, v28.4s
+ // vl128 state = 0xcdcf37ba
+ __ dci(0x5e1e622e); // sha256su1 v14.4s, v17.4s, v30.4s
+ // vl128 state = 0x25129c9a
+ __ dci(0x5e1e623e); // sha256su1 v30.4s, v17.4s, v30.4s
+ // vl128 state = 0xd0a281b7
+ __ dci(0x5e1e630e); // sha256su1 v14.4s, v24.4s, v30.4s
+ // vl128 state = 0x3ed92f18
+ __ dci(0x5e1f639e); // sha256su1 v30.4s, v28.4s, v31.4s
+ // vl128 state = 0xda1056b9
+ __ dci(0x5e0f629f); // sha256su1 v31.4s, v20.4s, v15.4s
+ // vl128 state = 0x367274fa
+ __ dci(0x5e0f63bd); // sha256su1 v29.4s, v29.4s, v15.4s
+ // vl128 state = 0x46a79748
+ __ dci(0x5e0f63b5); // sha256su1 v21.4s, v29.4s, v15.4s
+ // vl128 state = 0xdc427315
+ __ dci(0x5e0b63f7); // sha256su1 v23.4s, v31.4s, v11.4s
+ // vl128 state = 0x91547f41
+ __ dci(0x5e0263e7); // sha256su1 v7.4s, v31.4s, v2.4s
+ // vl128 state = 0x1c233ffa
+ __ dci(0x5e0062f7); // sha256su1 v23.4s, v23.4s, v0.4s
+ // vl128 state = 0x8c2948a1
+ __ dci(0x5e1062c7); // sha256su1 v7.4s, v22.4s, v16.4s
+ // vl128 state = 0x8b72f498
+ __ dci(0x5e1062c6); // sha256su1 v6.4s, v22.4s, v16.4s
+ // vl128 state = 0x43d27746
+ __ dci(0x5e1063ee); // sha256su1 v14.4s, v31.4s, v16.4s
+ // vl128 state = 0xa864e589
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0xa864e589,
+ 0xc588dfe0,
+ 0x171add38,
+ 0x884ca9db,
+ 0x5f47fb6a,
+ 0x0bd024c5,
+ 0xa6921cce,
+ 0x01dc8899,
+ 0x0f5b4b19,
+ 0x948260c1,
+ 0x4d4faafe,
+ 0x76ee7ff7,
+ 0xd9a56156,
+ 0x63c8e138,
+ 0xe687f7c3,
+ 0x51785434,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
TEST_SVE(neon_sha3) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,