Support SM4 accelerating instructions (#123)
Add support for two Neon SM4 accelerating instructions.
diff --git a/src/aarch64/assembler-aarch64.cc b/src/aarch64/assembler-aarch64.cc
index 31d5875..3ab0faa 100644
--- a/src/aarch64/assembler-aarch64.cc
+++ b/src/aarch64/assembler-aarch64.cc
@@ -6117,6 +6117,22 @@
Emit(0xce408c00 | Rd(vd) | Rn(vn) | Rm(vm) | i);
}
+void Assembler::sm4e(const VRegister& vd, const VRegister& vn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSM4));
+ VIXL_ASSERT(vd.Is4S() && vn.Is4S());
+
+ Emit(0xcec08400 | Rd(vd) | Rn(vn));
+}
+
+void Assembler::sm4ekey(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSM4));
+ VIXL_ASSERT(vd.Is4S() && vn.Is4S() && vm.Is4S());
+
+ Emit(0xce60c800 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
// Note:
// For all ToImm instructions below, a difference in case
// for the same letter indicates a negated bit.
diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h
index c1e4e6a..441a528 100644
--- a/src/aarch64/assembler-aarch64.h
+++ b/src/aarch64/assembler-aarch64.h
@@ -3732,6 +3732,12 @@
const VRegister& vm,
int index);
+ // SM4 Encode.
+ void sm4e(const VRegister& vd, const VRegister& vn);
+
+ // SM4 Key.
+ void sm4ekey(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
// Scalable Vector Extensions.
// Absolute value (predicated).
diff --git a/src/aarch64/cpu-features-auditor-aarch64.cc b/src/aarch64/cpu-features-auditor-aarch64.cc
index 66d29f0..407ff98 100644
--- a/src/aarch64/cpu-features-auditor-aarch64.cc
+++ b/src/aarch64/cpu-features-auditor-aarch64.cc
@@ -285,6 +285,12 @@
USE(instr);
}
+void CPUFeaturesAuditor::VisitCryptoSM4(const Instruction* instr) {
+ RecordInstructionFeaturesScope scope(this);
+ scope.Record(CPUFeatures::kNEON, CPUFeatures::kSM4);
+ USE(instr);
+}
+
void CPUFeaturesAuditor::VisitDataProcessing1Source(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
switch (instr->Mask(DataProcessing1SourceMask)) {
diff --git a/src/aarch64/cpu-features-auditor-aarch64.h b/src/aarch64/cpu-features-auditor-aarch64.h
index 7d5ca2f..489083a 100644
--- a/src/aarch64/cpu-features-auditor-aarch64.h
+++ b/src/aarch64/cpu-features-auditor-aarch64.h
@@ -114,6 +114,7 @@
VISITOR_LIST(DECLARE)
#undef DECLARE
void VisitCryptoSM3(const Instruction* instr);
+ void VisitCryptoSM4(const Instruction* instr);
void LoadStoreHelper(const Instruction* instr);
void LoadStorePairHelper(const Instruction* instr);
diff --git a/src/aarch64/decoder-visitor-map-aarch64.h b/src/aarch64/decoder-visitor-map-aarch64.h
index b5358ce..bda71ce 100644
--- a/src/aarch64/decoder-visitor-map-aarch64.h
+++ b/src/aarch64/decoder-visitor-map-aarch64.h
@@ -2663,8 +2663,8 @@
{"sm3tt1b_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitCryptoSM3}, \
{"sm3tt2a_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitCryptoSM3}, \
{"sm3tt2b_vvv_crypto3_imm2"_h, &VISITORCLASS::VisitCryptoSM3}, \
- {"sm4ekey_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
- {"sm4e_vv4_cryptosha512_2"_h, &VISITORCLASS::VisitUnimplemented}, \
+ {"sm4ekey_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitCryptoSM4}, \
+ {"sm4e_vv4_cryptosha512_2"_h, &VISITORCLASS::VisitCryptoSM4}, \
{"st64b_64l_memop"_h, &VISITORCLASS::VisitUnimplemented}, \
{"st64bv_64_memop"_h, &VISITORCLASS::VisitUnimplemented}, \
{"st64bv0_64_memop"_h, &VISITORCLASS::VisitUnimplemented}, \
diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc
index ec4dfc9..cc42709 100644
--- a/src/aarch64/disasm-aarch64.cc
+++ b/src/aarch64/disasm-aarch64.cc
@@ -2223,6 +2223,16 @@
FormatWithDecodedMnemonic(instr, form, suffix);
}
+void Disassembler::VisitCryptoSM4(const Instruction *instr) {
+ VIXL_ASSERT((form_hash_ == "sm4ekey_vvv4_cryptosha512_3"_h) ||
+ (form_hash_ == "sm4e_vv4_cryptosha512_2"_h));
+ const char *form = "'Vd.4s, 'Vn.4s";
+ const char *suffix =
+ (form_hash_ == "sm4e_vv4_cryptosha512_2"_h) ? NULL : ", 'Vm.4s";
+
+ FormatWithDecodedMnemonic(instr, form, suffix);
+}
+
void Disassembler::DisassembleSHA512(const Instruction *instr) {
const char *form = "'Qd, 'Qn, 'Vm.2d";
const char *suffix = NULL;
diff --git a/src/aarch64/disasm-aarch64.h b/src/aarch64/disasm-aarch64.h
index b139c4c..8f028b5 100644
--- a/src/aarch64/disasm-aarch64.h
+++ b/src/aarch64/disasm-aarch64.h
@@ -244,6 +244,7 @@
void Disassemble_Xd_XnSP_XmSP(const Instruction* instr);
void VisitCryptoSM3(const Instruction* instr);
+ void VisitCryptoSM4(const Instruction* instr);
void Format(const Instruction* instr,
const char* mnemonic,
diff --git a/src/aarch64/logic-aarch64.cc b/src/aarch64/logic-aarch64.cc
index 246ffe9..2d923cd 100644
--- a/src/aarch64/logic-aarch64.cc
+++ b/src/aarch64/logic-aarch64.cc
@@ -8477,6 +8477,77 @@
return srcdst;
}
+static uint64_t SM4SBox(uint64_t x) {
+ static const uint8_t sbox[256] = {
+ 0x48, 0x39, 0xcb, 0xd7, 0x3e, 0x5f, 0xee, 0x79, 0x20, 0x4d, 0xdc, 0x3a,
+ 0xec, 0x7d, 0xf0, 0x18, 0x84, 0xc6, 0x6e, 0xc5, 0x09, 0xf1, 0xb9, 0x65,
+ 0x7e, 0x77, 0x96, 0x0c, 0x4a, 0x97, 0x69, 0x89, 0xb0, 0xb4, 0xe5, 0xb8,
+ 0x12, 0xd0, 0x74, 0x2d, 0xbd, 0x7b, 0xcd, 0xa5, 0x88, 0x31, 0xc1, 0x0a,
+ 0xd8, 0x5a, 0x10, 0x1f, 0x41, 0x5c, 0xd9, 0x11, 0x7f, 0xbc, 0xdd, 0xbb,
+ 0x92, 0xaf, 0x1b, 0x8d, 0x51, 0x5b, 0x6c, 0x6d, 0x72, 0x6a, 0xff, 0x03,
+ 0x2f, 0x8e, 0xfd, 0xde, 0x45, 0x37, 0xdb, 0xd5, 0x6f, 0x4e, 0x53, 0x0d,
+ 0xab, 0x23, 0x29, 0xc0, 0x60, 0xca, 0x66, 0x82, 0x2e, 0xe2, 0xf6, 0x1d,
+ 0xe3, 0xb1, 0x8c, 0xf5, 0x30, 0x32, 0x93, 0xad, 0x55, 0x1a, 0x34, 0x9b,
+ 0xa4, 0x5d, 0xae, 0xe0, 0xa1, 0x15, 0x61, 0xf9, 0xce, 0xf2, 0xf7, 0xa3,
+ 0xb5, 0x38, 0xc7, 0x40, 0xd2, 0x8a, 0xbf, 0xea, 0x9e, 0xc8, 0xc4, 0xa0,
+ 0xe7, 0x02, 0x36, 0x4c, 0x52, 0x27, 0xd3, 0x9f, 0x57, 0x46, 0x00, 0xd4,
+ 0x87, 0x78, 0x21, 0x01, 0x3b, 0x7c, 0x22, 0x25, 0xa2, 0xd1, 0x58, 0x63,
+ 0x5e, 0x0e, 0x24, 0x1e, 0x35, 0x9d, 0x56, 0x70, 0x4b, 0x0f, 0xeb, 0xf8,
+ 0x8b, 0xda, 0x64, 0x71, 0xb2, 0x81, 0x6b, 0x68, 0xa8, 0x4f, 0x85, 0xe6,
+ 0x19, 0x3c, 0x59, 0x83, 0xba, 0x17, 0x73, 0xf3, 0xfc, 0xa7, 0x07, 0x47,
+ 0xa6, 0x3f, 0x8f, 0x75, 0xfa, 0x94, 0xdf, 0x80, 0x95, 0xe8, 0x08, 0xc9,
+ 0xa9, 0x1c, 0xb3, 0xe4, 0x62, 0xac, 0xcf, 0xed, 0x43, 0x0b, 0x54, 0x33,
+ 0x7a, 0x98, 0xef, 0x91, 0xf4, 0x50, 0x42, 0x9c, 0x99, 0x06, 0x86, 0x49,
+ 0x26, 0x13, 0x44, 0xaa, 0xc3, 0x04, 0xbe, 0x2a, 0x76, 0x9a, 0x67, 0x2b,
+ 0x05, 0x2c, 0xfb, 0x28, 0xc2, 0x14, 0xb6, 0x16, 0xb7, 0x3d, 0xe1, 0xcc,
+ 0xfe, 0xe9, 0x90, 0xd6,
+ };
+ uint64_t result = 0;
+ for (int j = 24; j >= 0; j -= 8) {
+ uint8_t s = 255 - ((x >> j) & 0xff);
+ result = (result << 8) | sbox[s];
+ }
+ return result;
+}
+
+LogicVRegister Simulator::sm4(LogicVRegister srcdst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool is_key) {
+ using namespace std::placeholders;
+ auto ROL = std::bind(RotateLeft, _1, _2, kSRegSize);
+
+ VectorFormat vf = kFormat4S;
+ uint64_t result[4] = {};
+ if (is_key) {
+ src1.UintArray(vf, result);
+ } else {
+ srcdst.UintArray(vf, result);
+ }
+
+ for (int i = 0; i < 4; i++) {
+ uint64_t k = is_key ? src2.Uint(vf, i) : src1.Uint(vf, i);
+ uint64_t intval = result[3] ^ result[2] ^ result[1] ^ k;
+ intval = SM4SBox(intval);
+
+ if (is_key) {
+ intval ^= ROL(intval, 13) ^ ROL(intval, 23);
+ } else {
+ intval ^=
+ ROL(intval, 2) ^ ROL(intval, 10) ^ ROL(intval, 18) ^ ROL(intval, 24);
+ }
+
+ intval ^= result[0];
+
+ result[0] = result[1];
+ result[1] = result[2];
+ result[2] = result[3];
+ result[3] = intval;
+ }
+ srcdst.SetUintArray(vf, result);
+ return srcdst;
+}
+
} // namespace aarch64
} // namespace vixl
diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h
index f2bd917..1763f49 100644
--- a/src/aarch64/macro-assembler-aarch64.h
+++ b/src/aarch64/macro-assembler-aarch64.h
@@ -2814,6 +2814,7 @@
V(shsub, Shsub) \
V(sm3partw1, Sm3partw1) \
V(sm3partw2, Sm3partw2) \
+ V(sm4ekey, Sm4ekey) \
V(smax, Smax) \
V(smaxp, Smaxp) \
V(smin, Smin) \
@@ -2964,6 +2965,7 @@
V(sha1su1, Sha1su1) \
V(sha256su0, Sha256su0) \
V(sha512su0, Sha512su0) \
+ V(sm4e, Sm4e) \
V(smaxv, Smaxv) \
V(sminv, Sminv) \
V(sqabs, Sqabs) \
diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc
index 070e630..1a2959b 100644
--- a/src/aarch64/simulator-aarch64.cc
+++ b/src/aarch64/simulator-aarch64.cc
@@ -7353,6 +7353,22 @@
}
}
+void Simulator::VisitCryptoSM4(const Instruction* instr) {
+ SimVRegister& rd = ReadVRegister(instr->GetRd());
+ SimVRegister& rn = ReadVRegister(instr->GetRn());
+ SimVRegister& rm = ReadVRegister(instr->GetRm());
+
+ bool is_key = false;
+ switch (form_hash_) {
+ case "sm4ekey_vvv4_cryptosha512_3"_h:
+ is_key = true;
+ VIXL_FALLTHROUGH();
+ case "sm4e_vv4_cryptosha512_2"_h:
+ sm4(rd, rn, rm, is_key);
+ break;
+ }
+}
+
void Simulator::SimulateSHA512(const Instruction* instr) {
SimVRegister& rd = ReadVRegister(instr->GetRd());
SimVRegister& rn = ReadVRegister(instr->GetRn());
diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h
index fa530bd..632b8ed 100644
--- a/src/aarch64/simulator-aarch64.h
+++ b/src/aarch64/simulator-aarch64.h
@@ -1534,6 +1534,7 @@
void SimulateSHA512(const Instruction* instr);
void VisitCryptoSM3(const Instruction* instr);
+ void VisitCryptoSM4(const Instruction* instr);
// Integer register accessors.
@@ -4587,6 +4588,11 @@
int index,
bool is_a);
+ LogicVRegister sm4(LogicVRegister dst,
+ const LogicVRegister& src1,
+ const LogicVRegister& src2,
+ bool is_key);
+
#define NEON_3VREG_LOGIC_LIST(V) \
V(addhn) \
V(addhn2) \
diff --git a/test/aarch64/test-cpu-features-aarch64.cc b/test/aarch64/test-cpu-features-aarch64.cc
index 5cc2a58..5b5e603 100644
--- a/test/aarch64/test-cpu-features-aarch64.cc
+++ b/test/aarch64/test-cpu-features-aarch64.cc
@@ -3848,5 +3848,13 @@
TEST_FEAT(sm3tt2b_0, sm3tt2b(v30.V4S(), v29.V4S(), v9.V4S(), 0))
#undef TEST_FEAT
+#define TEST_FEAT(NAME, ASM) \
+ TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSM4), \
+ NEON_SM4_##NAME, \
+ ASM)
+TEST_FEAT(sm4e, sm4e(v12.V4S(), v13.V4S()))
+TEST_FEAT(sm4ekey, sm4ekey(v12.V4S(), v13.V4S(), v14.V4S()))
+#undef TEST_FEAT
+
} // namespace aarch64
} // namespace vixl
diff --git a/test/aarch64/test-disasm-aarch64.cc b/test/aarch64/test-disasm-aarch64.cc
index b4a674e..14a354b 100644
--- a/test/aarch64/test-disasm-aarch64.cc
+++ b/test/aarch64/test-disasm-aarch64.cc
@@ -3813,9 +3813,8 @@
"sm3partw2"); // SM3PARTW2_VVV4_cryptosha512_3
// ARMv8.2 - SM4
- // COMPARE_PREFIX(dci(0xce60c800), "sm4ekey"); //
- // SM4EKEY_VVV4_cryptosha512_3
- // COMPARE_PREFIX(dci(0xcec08400), "sm4e"); // SM4E_VV4_cryptosha512_2
+ COMPARE_PREFIX(dci(0xce60c800), "sm4ekey"); // SM4EKEY_VVV4_cryptosha512_3
+ COMPARE_PREFIX(dci(0xcec08400), "sm4e"); // SM4E_VV4_cryptosha512_2
// ARMv8.2 - SPE
// COMPARE_PREFIX(dci(0xd503223f), "psb"); // PSB_HC_hints
diff --git a/test/aarch64/test-disasm-neon-aarch64.cc b/test/aarch64/test-disasm-neon-aarch64.cc
index 912ee22..f50e5a6 100644
--- a/test/aarch64/test-disasm-neon-aarch64.cc
+++ b/test/aarch64/test-disasm-neon-aarch64.cc
@@ -4600,6 +4600,16 @@
CLEANUP();
}
+TEST(neon_sm4) {
+ SETUP();
+
+ COMPARE_MACRO(Sm4e(v12.V4S(), v13.V4S()), "sm4e v12.4s, v13.4s");
+ COMPARE_MACRO(Sm4ekey(v12.V4S(), v13.V4S(), v14.V4S()),
+ "sm4ekey v12.4s, v13.4s, v14.4s");
+
+ CLEANUP();
+}
+
TEST(neon_unallocated_regression_test) {
SETUP();
diff --git a/test/aarch64/test-simulator-sve-aarch64.cc b/test/aarch64/test-simulator-sve-aarch64.cc
index 585c00c..6b5b958 100644
--- a/test/aarch64/test-simulator-sve-aarch64.cc
+++ b/test/aarch64/test-simulator-sve-aarch64.cc
@@ -1870,5 +1870,173 @@
}
}
+TEST_SVE(neon_sm4e) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32,
+ CPUFeatures::kSM4);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
+ __ dci(0xcec08400); // sm4e v0.4s, v0.4s
+ // vl128 state = 0xa687bacc
+ __ dci(0xcec08628); // sm4e v8.4s, v17.4s
+ // vl128 state = 0xf174e346
+ __ dci(0xcec0862a); // sm4e v10.4s, v17.4s
+ // vl128 state = 0xab88f8ca
+ __ dci(0xcec08628); // sm4e v8.4s, v17.4s
+ // vl128 state = 0x000d3840
+ __ dci(0xcec08638); // sm4e v24.4s, v17.4s
+ // vl128 state = 0xd980ddc2
+ __ dci(0xcec08688); // sm4e v8.4s, v20.4s
+ // vl128 state = 0xd501f2c2
+ __ dci(0xcec0868c); // sm4e v12.4s, v20.4s
+ // vl128 state = 0x699d6b6f
+ __ dci(0xcec0864d); // sm4e v13.4s, v18.4s
+ // vl128 state = 0x67baf406
+ __ dci(0xcec08649); // sm4e v9.4s, v18.4s
+ // vl128 state = 0x178b048e
+ __ dci(0xcec08659); // sm4e v25.4s, v18.4s
+ // vl128 state = 0x552a70d9
+ __ dci(0xcec0865d); // sm4e v29.4s, v18.4s
+ // vl128 state = 0x3be534d1
+ __ dci(0xcec0865f); // sm4e v31.4s, v18.4s
+ // vl128 state = 0x396fdf70
+ __ dci(0xcec08657); // sm4e v23.4s, v18.4s
+ // vl128 state = 0x836c474b
+ __ dci(0xcec086e7); // sm4e v7.4s, v23.4s
+ // vl128 state = 0x71aebad7
+ __ dci(0xcec08683); // sm4e v3.4s, v20.4s
+ // vl128 state = 0xadfd515c
+ __ dci(0xcec08681); // sm4e v1.4s, v20.4s
+ // vl128 state = 0xf1465ab4
+ __ dci(0xcec087c0); // sm4e v0.4s, v30.4s
+ // vl128 state = 0x8555b40f
+ __ dci(0xcec087c4); // sm4e v4.4s, v30.4s
+ // vl128 state = 0x2cb3f99f
+ __ dci(0xcec087d4); // sm4e v20.4s, v30.4s
+ // vl128 state = 0x733336fd
+ __ dci(0xcec085fc); // sm4e v28.4s, v15.4s
+ // vl128 state = 0x11b138f9
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x11b138f9,
+ 0x5993c196,
+ 0xb9eef6b5,
+ 0xf96d88cf,
+ 0x8e92bd49,
+ 0x04d27185,
+ 0x8833f291,
+ 0x77933d5b,
+ 0x135500cc,
+ 0xe5ca977f,
+ 0x3e4536af,
+ 0xb169aa9d,
+ 0xe0b4425b,
+ 0x35c1f76e,
+ 0x54e3448a,
+ 0x4dbf0c92,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
+TEST_SVE(neon_sm4ekey) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32,
+ CPUFeatures::kSM4);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
+ __ dci(0xce6fc9d4); // sm4ekey v20.4s, v14.4s, v15.4s
+ // vl128 state = 0x4bb7b396
+ __ dci(0xce6bc8d5); // sm4ekey v21.4s, v6.4s, v11.4s
+ // vl128 state = 0xf4354b26
+ __ dci(0xce6bc8c5); // sm4ekey v5.4s, v6.4s, v11.4s
+ // vl128 state = 0x0a331378
+ __ dci(0xce6bc8cd); // sm4ekey v13.4s, v6.4s, v11.4s
+ // vl128 state = 0x7ed4c2a7
+ __ dci(0xce6fc8e5); // sm4ekey v5.4s, v7.4s, v15.4s
+ // vl128 state = 0x38a433fd
+ __ dci(0xce6fc8e4); // sm4ekey v4.4s, v7.4s, v15.4s
+ // vl128 state = 0xc1ad0d76
+ __ dci(0xce6bcaec); // sm4ekey v12.4s, v23.4s, v11.4s
+ // vl128 state = 0x81660ce3
+ __ dci(0xce6bcae8); // sm4ekey v8.4s, v23.4s, v11.4s
+ // vl128 state = 0x79f3e5c1
+ __ dci(0xce7bcaaa); // sm4ekey v10.4s, v21.4s, v27.4s
+ // vl128 state = 0x231e0a79
+ __ dci(0xce72caa8); // sm4ekey v8.4s, v21.4s, v18.4s
+ // vl128 state = 0xd931c858
+ __ dci(0xce7ac8aa); // sm4ekey v10.4s, v5.4s, v26.4s
+ // vl128 state = 0x2476ef6a
+ __ dci(0xce7bc888); // sm4ekey v8.4s, v4.4s, v27.4s
+ // vl128 state = 0xd4a9ac83
+ __ dci(0xce7bc889); // sm4ekey v9.4s, v4.4s, v27.4s
+ // vl128 state = 0x149fd9b3
+ __ dci(0xce7bc9cd); // sm4ekey v13.4s, v14.4s, v27.4s
+ // vl128 state = 0xece67fce
+ __ dci(0xce79cbc5); // sm4ekey v5.4s, v30.4s, v25.4s
+ // vl128 state = 0xccb45863
+ __ dci(0xce71cac4); // sm4ekey v4.4s, v22.4s, v17.4s
+ // vl128 state = 0xafb23c9d
+ __ dci(0xce71c8e0); // sm4ekey v0.4s, v7.4s, v17.4s
+ // vl128 state = 0x5c808694
+ __ dci(0xce71c882); // sm4ekey v2.4s, v4.4s, v17.4s
+ // vl128 state = 0x6cea5132
+ __ dci(0xce73c803); // sm4ekey v3.4s, v0.4s, v19.4s
+ // vl128 state = 0x67e316db
+ __ dci(0xce71c847); // sm4ekey v7.4s, v2.4s, v17.4s
+ // vl128 state = 0x317aafac
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x317aafac,
+ 0xbacd34de,
+ 0x3e92f0b2,
+ 0x3043dbe3,
+ 0x6dda4d17,
+ 0x6e59ba0d,
+ 0xa29887cf,
+ 0x3bee1f56,
+ 0xacd43191,
+ 0x97ab7ada,
+ 0x39ebcf53,
+ 0xea7b411e,
+ 0xd8e1efe9,
+ 0x2b99fc57,
+ 0xf5f62e02,
+ 0xd50621d1,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
} // namespace aarch64
} // namespace vixl