Support SHA-3 accelerating instructions (#101)
Add support for Neon BCAX, EOR3, RAX1 and XAR instructions, used to accelerate
SHA-3.
diff --git a/src/aarch64/assembler-aarch64.cc b/src/aarch64/assembler-aarch64.cc
index ad2e7c9..009b08c 100644
--- a/src/aarch64/assembler-aarch64.cc
+++ b/src/aarch64/assembler-aarch64.cc
@@ -5876,6 +5876,39 @@
Emit(0x6e80a400 | Rd(vd) | Rn(vn) | Rm(vm));
}
+void Assembler::bcax(const VRegister& vd, const VRegister& vn, const VRegister& vm, const VRegister& va) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSHA3));
+ VIXL_ASSERT(vd.Is16B() && vn.Is16B() && vm.Is16B());
+
+ Emit(0xce200000 | Rd(vd) | Rn(vn) | Rm(vm) | Ra(va));
+}
+
+void Assembler::eor3(const VRegister& vd, const VRegister& vn, const VRegister& vm, const VRegister& va) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSHA3));
+ VIXL_ASSERT(vd.Is16B() && vn.Is16B() && vm.Is16B() && va.Is16B());
+
+ Emit(0xce000000 | Rd(vd) | Rn(vn) | Rm(vm) | Ra(va));
+}
+
+void Assembler::xar(const VRegister& vd, const VRegister& vn, const VRegister& vm, int rotate) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSHA3));
+ VIXL_ASSERT(vd.Is2D() && vn.Is2D() && vm.Is2D());
+ VIXL_ASSERT(IsUint6(rotate));
+
+ Emit(0xce800000 | Rd(vd) | Rn(vn) | Rm(vm) | rotate << 10);
+}
+
+void Assembler::rax1(const VRegister& vd, const VRegister& vn, const VRegister& vm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON));
+ VIXL_ASSERT(CPUHas(CPUFeatures::kSHA3));
+ VIXL_ASSERT(vd.Is2D() && vn.Is2D() && vm.Is2D());
+
+ Emit(0xce608c00 | Rd(vd) | Rn(vn) | Rm(vm));
+}
+
// Note:
// For all ToImm instructions below, a difference in case
// for the same letter indicates a negated bit.
diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h
index 9bc7076..bbba5db 100644
--- a/src/aarch64/assembler-aarch64.h
+++ b/src/aarch64/assembler-aarch64.h
@@ -3621,6 +3621,27 @@
// Unsigned 8-bit integer matrix multiply-accumulate (vector).
void ummla(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+ // Bit Clear and exclusive-OR.
+ void bcax(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ const VRegister& va);
+
+ // Three-way Exclusive-OR.
+ void eor3(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ const VRegister& va);
+
+ // Exclusive-OR and Rotate.
+ void xar(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int rotate);
+
+ // Rotate and Exclusive-OR
+ void rax1(const VRegister& vd, const VRegister& vn, const VRegister& vm);
+
// Scalable Vector Extensions.
// Absolute value (predicated).
diff --git a/src/aarch64/cpu-features-auditor-aarch64.cc b/src/aarch64/cpu-features-auditor-aarch64.cc
index 3925ced..a85587b 100644
--- a/src/aarch64/cpu-features-auditor-aarch64.cc
+++ b/src/aarch64/cpu-features-auditor-aarch64.cc
@@ -1835,6 +1835,14 @@
{"umax_64u_minmax_imm"_h, CPUFeatures::kCSSC},
{"umin_32u_minmax_imm"_h, CPUFeatures::kCSSC},
{"umin_64u_minmax_imm"_h, CPUFeatures::kCSSC},
+ {"bcax_vvv16_crypto4"_h,
+ CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA3)},
+ {"eor3_vvv16_crypto4"_h,
+ CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA3)},
+ {"rax1_vvv2_cryptosha512_3"_h,
+ CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA3)},
+ {"xar_vvv2_crypto3_imm6"_h,
+ CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA3)},
};
if (features.count(form_hash_) > 0) {
diff --git a/src/aarch64/decoder-visitor-map-aarch64.h b/src/aarch64/decoder-visitor-map-aarch64.h
index a0a6ef2..92131da 100644
--- a/src/aarch64/decoder-visitor-map-aarch64.h
+++ b/src/aarch64/decoder-visitor-map-aarch64.h
@@ -2638,7 +2638,6 @@
&VISITORCLASS::VisitUnconditionalBranchToRegister}, \
{"ret_64r_branch_reg"_h, \
&VISITORCLASS::VisitUnconditionalBranchToRegister}, \
- {"bcax_vvv16_crypto4"_h, &VISITORCLASS::VisitUnimplemented}, \
{"bfcvtn_asimdmisc_4s"_h, &VISITORCLASS::VisitUnimplemented}, \
{"bfdot_asimdelem_e"_h, &VISITORCLASS::VisitUnimplemented}, \
{"bfdot_asimdsame2_d"_h, &VISITORCLASS::VisitUnimplemented}, \
@@ -2646,7 +2645,6 @@
{"bfmlal_asimdsame2_f"_h, &VISITORCLASS::VisitUnimplemented}, \
{"bfmmla_asimdsame2_e"_h, &VISITORCLASS::VisitUnimplemented}, \
{"dsb_bon_barriers"_h, &VISITORCLASS::VisitUnimplemented}, \
- {"eor3_vvv16_crypto4"_h, &VISITORCLASS::VisitUnimplemented}, \
{"ld64b_64l_memop"_h, &VISITORCLASS::VisitUnimplemented}, \
{"ldgm_64bulk_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \
{"ldtrb_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
@@ -2658,7 +2656,6 @@
{"ldtrsw_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
{"ldtr_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
{"ldtr_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \
- {"rax1_vvv2_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
{"sha512h2_qqv_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
{"sha512h_qqv_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \
{"sha512su0_vv2_cryptosha512_2"_h, &VISITORCLASS::VisitUnimplemented}, \
@@ -2686,7 +2683,6 @@
{"ttest_br_systemresult"_h, &VISITORCLASS::VisitUnimplemented}, \
{"wfet_only_systeminstrswithreg"_h, &VISITORCLASS::VisitUnimplemented}, \
{"wfit_only_systeminstrswithreg"_h, &VISITORCLASS::VisitUnimplemented}, \
- {"xar_vvv2_crypto3_imm6"_h, &VISITORCLASS::VisitUnimplemented}, \
{"bfcvt_z_p_z_s2bf"_h, &VISITORCLASS::VisitUnimplemented}, \
{"bfcvtnt_z_p_z_s2bf"_h, &VISITORCLASS::VisitUnimplemented}, \
{"bfdot_z_zzz"_h, &VISITORCLASS::VisitUnimplemented}, \
diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc
index 9f53e81..5a8241d 100644
--- a/src/aarch64/disasm-aarch64.cc
+++ b/src/aarch64/disasm-aarch64.cc
@@ -753,6 +753,10 @@
{"umax_64u_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
{"umin_32u_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
{"umin_64u_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
+ {"bcax_vvv16_crypto4"_h, &Disassembler::DisassembleNEON4Same},
+ {"eor3_vvv16_crypto4"_h, &Disassembler::DisassembleNEON4Same},
+ {"xar_vvv2_crypto3_imm6"_h, &Disassembler::DisassembleNEONXar},
+ {"rax1_vvv2_cryptosha512_3"_h, &Disassembler::DisassembleNEONRax1},
};
return &form_to_visitor;
} // NOLINT(readability/fn_size)
@@ -2430,6 +2434,17 @@
Format(instr, mnemonic, nfd.Substitute(form), suffix);
}
+void Disassembler::DisassembleNEON4Same(const Instruction *instr) {
+ FormatWithDecodedMnemonic(instr, "'Vd.16b, 'Vn.16b, 'Vm.16b, 'Va.16b");
+}
+
+void Disassembler::DisassembleNEONXar(const Instruction *instr) {
+ FormatWithDecodedMnemonic(instr, "'Vd.2d, 'Vn.2d, 'Vm.2d, #'u1510");
+}
+
+void Disassembler::DisassembleNEONRax1(const Instruction *instr) {
+ FormatWithDecodedMnemonic(instr, "'Vd.2d, 'Vn.2d, 'Vm.2d");
+}
void Disassembler::VisitNEON3Different(const Instruction *instr) {
const char *mnemonic = mnemonic_.c_str();
diff --git a/src/aarch64/disasm-aarch64.h b/src/aarch64/disasm-aarch64.h
index 7985383..0da49e4 100644
--- a/src/aarch64/disasm-aarch64.h
+++ b/src/aarch64/disasm-aarch64.h
@@ -229,6 +229,9 @@
void DisassembleNEONScalar2RegMiscOnlyD(const Instruction* instr);
void DisassembleNEONFPScalar2RegMisc(const Instruction* instr);
void DisassembleNEONPolynomialMul(const Instruction* instr);
+ void DisassembleNEON4Same(const Instruction* instr);
+ void DisassembleNEONXar(const Instruction* instr);
+ void DisassembleNEONRax1(const Instruction* instr);
void DisassembleMTELoadTag(const Instruction* instr);
void DisassembleMTEStoreTag(const Instruction* instr);
diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h
index f6fc4d7..8878ef7 100644
--- a/src/aarch64/macro-assembler-aarch64.h
+++ b/src/aarch64/macro-assembler-aarch64.h
@@ -2787,6 +2787,7 @@
V(pmull2, Pmull2) \
V(raddhn, Raddhn) \
V(raddhn2, Raddhn2) \
+ V(rax1, Rax1) \
V(rsubhn, Rsubhn) \
V(rsubhn2, Rsubhn2) \
V(saba, Saba) \
@@ -3152,6 +3153,14 @@
SVE_3VREG_COMMUTATIVE_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
#undef DEFINE_MACRO_ASM_FUNC
+ void Bcax(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ const VRegister& va) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ bcax(vd, vn, vm, va);
+ }
void Bic(const VRegister& vd, const int imm8, const int left_shift = 0) {
VIXL_ASSERT(allow_macro_instructions_);
SingleEmissionCheckScope guard(this);
@@ -3192,6 +3201,14 @@
SingleEmissionCheckScope guard(this);
dup(vd, rn);
}
+ void Eor3(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ const VRegister& va) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ eor3(vd, vn, vm, va);
+ }
void Ext(const VRegister& vd,
const VRegister& vn,
const VRegister& vm,
@@ -3498,6 +3515,14 @@
SingleEmissionCheckScope guard(this);
umov(rd, vn, vn_index);
}
+ void Xar(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ int rotate) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ xar(vd, vn, vm, rotate);
+ }
void Crc32b(const Register& rd, const Register& rn, const Register& rm) {
VIXL_ASSERT(allow_macro_instructions_);
SingleEmissionCheckScope guard(this);
diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc
index 81bab07..fdc7106 100644
--- a/src/aarch64/simulator-aarch64.cc
+++ b/src/aarch64/simulator-aarch64.cc
@@ -507,6 +507,10 @@
{"umax_64u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
{"umin_32u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
{"umin_64u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
+ {"bcax_vvv16_crypto4"_h, &Simulator::SimulateNEONSHA3},
+ {"eor3_vvv16_crypto4"_h, &Simulator::SimulateNEONSHA3},
+ {"rax1_vvv2_cryptosha512_3"_h, &Simulator::SimulateNEONSHA3},
+ {"xar_vvv2_crypto3_imm6"_h, &Simulator::SimulateNEONSHA3},
};
return &form_to_visitor;
}
@@ -9926,6 +9930,34 @@
}
}
+void Simulator::SimulateNEONSHA3(const Instruction* instr) {
+ SimVRegister& rd = ReadVRegister(instr->GetRd());
+ SimVRegister& rn = ReadVRegister(instr->GetRn());
+ SimVRegister& rm = ReadVRegister(instr->GetRm());
+ SimVRegister& ra = ReadVRegister(instr->GetRa());
+ SimVRegister temp;
+
+ switch (form_hash_) {
+ case "bcax_vvv16_crypto4"_h:
+ bic(kFormat16B, temp, rm, ra);
+ eor(kFormat16B, rd, rn, temp);
+ break;
+ case "eor3_vvv16_crypto4"_h:
+ eor(kFormat16B, temp, rm, ra);
+ eor(kFormat16B, rd, rn, temp);
+ break;
+ case "rax1_vvv2_cryptosha512_3"_h:
+ ror(kFormat2D, temp, rm, 63); // rol(1) => ror(63)
+ eor(kFormat2D, rd, rn, temp);
+ break;
+ case "xar_vvv2_crypto3_imm6"_h:
+ int rot = instr->ExtractBits(15, 10);
+ eor(kFormat2D, temp, rn, rm);
+ ror(kFormat2D, rd, temp, rot);
+ break;
+ }
+}
+
void Simulator::VisitSVEAddressGeneration(const Instruction* instr) {
SimVRegister& zd = ReadVRegister(instr->GetRd());
SimVRegister& zn = ReadVRegister(instr->GetRn());
diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h
index 760fa6c..73277e4 100644
--- a/src/aarch64/simulator-aarch64.h
+++ b/src/aarch64/simulator-aarch64.h
@@ -1509,6 +1509,7 @@
void SimulateNEONFPMulByElementLong(const Instruction* instr);
void SimulateNEONComplexMulByElement(const Instruction* instr);
void SimulateNEONDotProdByElement(const Instruction* instr);
+ void SimulateNEONSHA3(const Instruction* instr);
void SimulateMTEAddSubTag(const Instruction* instr);
void SimulateMTETagMaskInsert(const Instruction* instr);
void SimulateMTESubPointer(const Instruction* instr);
diff --git a/test/aarch64/test-cpu-features-aarch64.cc b/test/aarch64/test-cpu-features-aarch64.cc
index 187bbd5..8430d7f 100644
--- a/test/aarch64/test-cpu-features-aarch64.cc
+++ b/test/aarch64/test-cpu-features-aarch64.cc
@@ -3785,5 +3785,14 @@
TEST_FEAT(pmull1q_0, pmull(v5.V1Q(), v6.V1D(), v7.V1D()))
#undef TEST_FEAT
+#define TEST_NEON_SHA3(NAME, ASM) \
+ TEST_TEMPLATE(CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kSHA3), \
+ NEON_SHA3_##NAME, \
+ ASM)
+TEST_NEON_SHA3(bcax_0, bcax(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B()))
+TEST_NEON_SHA3(eor3_0, eor3(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B()))
+TEST_NEON_SHA3(xar_0, xar(v0.V2D(), v1.V2D(), v2.V2D(), 42))
+TEST_NEON_SHA3(rax1_0, rax1(v0.V2D(), v1.V2D(), v2.V2D()))
+
} // namespace aarch64
} // namespace vixl
diff --git a/test/aarch64/test-disasm-aarch64.cc b/test/aarch64/test-disasm-aarch64.cc
index 7c8f2cc..5d8579f 100644
--- a/test/aarch64/test-disasm-aarch64.cc
+++ b/test/aarch64/test-disasm-aarch64.cc
@@ -3789,10 +3789,10 @@
COMPARE_PREFIX(dci(0xd503221f), "esb"); // ESB_HI_hints
// ARMv8.2 - SHA3
- // COMPARE_PREFIX(dci(0xce000000), "eor3"); // EOR3_VVV16_crypto4
- // COMPARE_PREFIX(dci(0xce200000), "bcax"); // BCAX_VVV16_crypto4
- // COMPARE_PREFIX(dci(0xce608c00), "rax1"); // RAX1_VVV2_cryptosha512_3
- // COMPARE_PREFIX(dci(0xce800000), "xar"); // XAR_VVV2_crypto3_imm6
+ COMPARE_PREFIX(dci(0xce000000), "eor3"); // EOR3_VVV16_crypto4
+ COMPARE_PREFIX(dci(0xce200000), "bcax"); // BCAX_VVV16_crypto4
+ COMPARE_PREFIX(dci(0xce608c00), "rax1"); // RAX1_VVV2_cryptosha512_3
+ COMPARE_PREFIX(dci(0xce800000), "xar"); // XAR_VVV2_crypto3_imm6
// ARMv8.2 - SHA512
// COMPARE_PREFIX(dci(0xce608000), "sha512h"); // SHA512H_QQV_cryptosha512_3
diff --git a/test/aarch64/test-disasm-neon-aarch64.cc b/test/aarch64/test-disasm-neon-aarch64.cc
index c2824c9..774114d 100644
--- a/test/aarch64/test-disasm-neon-aarch64.cc
+++ b/test/aarch64/test-disasm-neon-aarch64.cc
@@ -4516,6 +4516,20 @@
CLEANUP();
}
+TEST(neon_sha3) {
+ SETUP();
+
+ COMPARE_MACRO(Bcax(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B()),
+ "bcax v0.16b, v1.16b, v2.16b, v3.16b");
+ COMPARE_MACRO(Eor3(v10.V16B(), v11.V16B(), v12.V16B(), v13.V16B()),
+ "eor3 v10.16b, v11.16b, v12.16b, v13.16b");
+ COMPARE_MACRO(Xar(v20.V2D(), v21.V2D(), v22.V2D(), 42),
+ "xar v20.2d, v21.2d, v22.2d, #42");
+ COMPARE_MACRO(Rax1(v0.V2D(), v1.V2D(), v2.V2D()), "rax1 v0.2d, v1.2d, v2.2d");
+
+ CLEANUP();
+}
+
TEST(neon_unallocated_regression_test) {
SETUP();
diff --git a/test/aarch64/test-simulator-sve-aarch64.cc b/test/aarch64/test-simulator-sve-aarch64.cc
index 1ba7783..bdd5c81 100644
--- a/test/aarch64/test-simulator-sve-aarch64.cc
+++ b/test/aarch64/test-simulator-sve-aarch64.cc
@@ -394,5 +394,169 @@
}
}
+TEST_SVE(neon_sha3) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
+ CPUFeatures::kNEON,
+ CPUFeatures::kCRC32,
+ CPUFeatures::kSHA3);
+ START();
+
+ SetInitialMachineState(&masm);
+ // state = 0xe2bd2480
+
+ {
+ ExactAssemblyScope scope(&masm, 60 * kInstructionSize);
+ __ dci(0xce608c00); // rax1 v0.2d, v0.2d, v0.2d
+ // vl128 state = 0x960c2b9f
+ __ dci(0xce608e28); // rax1 v8.2d, v17.2d, v0.2d
+ // vl128 state = 0x89ea3f7b
+ __ dci(0xce618e6c); // rax1 v12.2d, v19.2d, v1.2d
+ // vl128 state = 0xa7801384
+ __ dci(0xce718e48); // rax1 v8.2d, v18.2d, v17.2d
+ // vl128 state = 0x4477d70d
+ __ dci(0xce738e60); // rax1 v0.2d, v19.2d, v19.2d
+ // vl128 state = 0xdee66854
+ __ dci(0xce6b8e61); // rax1 v1.2d, v19.2d, v11.2d
+ // vl128 state = 0x2e383dc2
+ __ dci(0xce6e8e60); // rax1 v0.2d, v19.2d, v14.2d
+ // vl128 state = 0xa022bb6d
+ __ dci(0xce6e8e62); // rax1 v2.2d, v19.2d, v14.2d
+ // vl128 state = 0x923f5d32
+ __ dci(0xce668e23); // rax1 v3.2d, v17.2d, v6.2d
+ // vl128 state = 0xc2c6ca00
+ __ dci(0xce260e33); // bcax v19.16b, v17.16b, v6.16b, v3.16b
+ // vl128 state = 0x517e85e9
+ __ dci(0xce260e23); // bcax v3.16b, v17.16b, v6.16b, v3.16b
+ // vl128 state = 0xbcf4c332
+ __ dci(0xce260e93); // bcax v19.16b, v20.16b, v6.16b, v3.16b
+ // vl128 state = 0x5d9d51ef
+ __ dci(0xce260a11); // bcax v17.16b, v16.16b, v6.16b, v2.16b
+ // vl128 state = 0x69ce0099
+ __ dci(0xce260a15); // bcax v21.16b, v16.16b, v6.16b, v2.16b
+ // vl128 state = 0x9a2cdc9f
+ __ dci(0xce244a11); // bcax v17.16b, v16.16b, v4.16b, v18.16b
+ // vl128 state = 0x27eeff29
+ __ dci(0xce304a10); // bcax v16.16b, v16.16b, v16.16b, v18.16b
+ // vl128 state = 0x6d586875
+ __ dci(0xce314b18); // bcax v24.16b, v24.16b, v17.16b, v18.16b
+ // vl128 state = 0xe38b6054
+ __ dci(0xce214b28); // bcax v8.16b, v25.16b, v1.16b, v18.16b
+ // vl128 state = 0x27a3f5f6
+ __ dci(0xce294f38); // bcax v24.16b, v25.16b, v9.16b, v19.16b
+ // vl128 state = 0x7d7ffa9b
+ __ dci(0xce214e39); // bcax v25.16b, v17.16b, v1.16b, v19.16b
+ // vl128 state = 0x936374f0
+ __ dci(0xce216a3d); // bcax v29.16b, v17.16b, v1.16b, v26.16b
+ // vl128 state = 0x1c5136d5
+ __ dci(0xce296b39); // bcax v25.16b, v25.16b, v9.16b, v26.16b
+ // vl128 state = 0x75cd7131
+ __ dci(0xce216338); // bcax v24.16b, v25.16b, v1.16b, v24.16b
+ // vl128 state = 0xcc747626
+ __ dci(0xce2163f9); // bcax v25.16b, v31.16b, v1.16b, v24.16b
+ // vl128 state = 0x9409c8bc
+ __ dci(0xce2043f1); // bcax v17.16b, v31.16b, v0.16b, v16.16b
+ // vl128 state = 0x8db3a0c8
+ __ dci(0xce2043f5); // bcax v21.16b, v31.16b, v0.16b, v16.16b
+ // vl128 state = 0xa55f8d7d
+ __ dci(0xce2043e5); // bcax v5.16b, v31.16b, v0.16b, v16.16b
+ // vl128 state = 0xe1960c7a
+ __ dci(0xce224be7); // bcax v7.16b, v31.16b, v2.16b, v18.16b
+ // vl128 state = 0xc9599bde
+ __ dci(0xce204bb7); // bcax v23.16b, v29.16b, v0.16b, v18.16b
+ // vl128 state = 0x7176d08d
+ __ dci(0xce004b9f); // eor3 v31.16b, v28.16b, v0.16b, v18.16b
+ // vl128 state = 0x10620821
+ __ dci(0xce000baf); // eor3 v15.16b, v29.16b, v0.16b, v2.16b
+ // vl128 state = 0x0aba0288
+ __ dci(0xce0a0bab); // eor3 v11.16b, v29.16b, v10.16b, v2.16b
+ // vl128 state = 0xe6517156
+ __ dci(0xce0e1baf); // eor3 v15.16b, v29.16b, v14.16b, v6.16b
+ // vl128 state = 0x6b7021fb
+ __ dci(0xce0e3fa7); // eor3 v7.16b, v29.16b, v14.16b, v15.16b
+ // vl128 state = 0x05761b1f
+ __ dci(0xce0e2fe5); // eor3 v5.16b, v31.16b, v14.16b, v11.16b
+ // vl128 state = 0xe01822c6
+ __ dci(0xce2e2fc7); // bcax v7.16b, v30.16b, v14.16b, v11.16b
+ // vl128 state = 0xdc6444d7
+ __ dci(0xce3e2dcf); // bcax v15.16b, v14.16b, v30.16b, v11.16b
+ // vl128 state = 0xa5ecad2e
+ __ dci(0xce3e3fdf); // bcax v31.16b, v30.16b, v30.16b, v15.16b
+ // vl128 state = 0x2124dc42
+ __ dci(0xce3a3ede); // bcax v30.16b, v22.16b, v26.16b, v15.16b
+ // vl128 state = 0x57f77204
+ __ dci(0xce3a2e9c); // bcax v28.16b, v20.16b, v26.16b, v11.16b
+ // vl128 state = 0x6e8d303d
+ __ dci(0xce3a2294); // bcax v20.16b, v20.16b, v26.16b, v8.16b
+ // vl128 state = 0xdb53d42c
+ __ dci(0xce38029c); // bcax v28.16b, v20.16b, v24.16b, v0.16b
+ // vl128 state = 0x258d49b8
+ __ dci(0xce38088c); // bcax v12.16b, v4.16b, v24.16b, v2.16b
+ // vl128 state = 0xe751a348
+ __ dci(0xce28008e); // bcax v14.16b, v4.16b, v8.16b, v0.16b
+ // vl128 state = 0x8ce0aa1a
+ __ dci(0xce28008a); // bcax v10.16b, v4.16b, v8.16b, v0.16b
+ // vl128 state = 0x1fdf89a5
+ __ dci(0xce280088); // bcax v8.16b, v4.16b, v8.16b, v0.16b
+ // vl128 state = 0xcc51f5e1
+ __ dci(0xce2a1089); // bcax v9.16b, v4.16b, v10.16b, v4.16b
+ // vl128 state = 0xdaf766b0
+ __ dci(0xce0b1081); // eor3 v1.16b, v4.16b, v11.16b, v4.16b
+ // vl128 state = 0x2da7deb5
+ __ dci(0xce0a1011); // eor3 v17.16b, v0.16b, v10.16b, v4.16b
+ // vl128 state = 0xcc86f5d4
+ __ dci(0xce121010); // eor3 v16.16b, v0.16b, v18.16b, v4.16b
+ // vl128 state = 0xfb722105
+ __ dci(0xce921118); // xar v24.2d, v8.2d, v18.2d, #4
+ // vl128 state = 0x9a7752e3
+ __ dci(0xce9a1199); // xar v25.2d, v12.2d, v26.2d, #4
+ // vl128 state = 0x83a251c2
+ __ dci(0xce9e11dd); // xar v29.2d, v14.2d, v30.2d, #4
+ // vl128 state = 0x1e31c9d5
+ __ dci(0xce9e915c); // xar v28.2d, v10.2d, v30.2d, #36
+ // vl128 state = 0x0e421d73
+ __ dci(0xce1e115d); // eor3 v29.16b, v10.16b, v30.16b, v4.16b
+ // vl128 state = 0xb5a8c677
+ __ dci(0xce3e515c); // bcax v28.16b, v10.16b, v30.16b, v20.16b
+ // vl128 state = 0x21587300
+ __ dci(0xce3e5154); // bcax v20.16b, v10.16b, v30.16b, v20.16b
+ // vl128 state = 0x9459c629
+ __ dci(0xce3e1056); // bcax v22.16b, v2.16b, v30.16b, v4.16b
+ // vl128 state = 0xdb02263a
+ __ dci(0xce2a105e); // bcax v30.16b, v2.16b, v10.16b, v4.16b
+ // vl128 state = 0xc9d210aa
+ __ dci(0xce3a5056); // bcax v22.16b, v2.16b, v26.16b, v20.16b
+ // vl128 state = 0x4cc56293
+ }
+
+ uint32_t state;
+ ComputeMachineStateHash(&masm, &state);
+ __ Mov(x0, reinterpret_cast<uint64_t>(&state));
+ __ Ldr(w0, MemOperand(x0));
+
+ END();
+ if (CAN_RUN()) {
+ RUN();
+ uint32_t expected_hashes[] = {
+ 0x4cc56293,
+ 0xee8bac03,
+ 0xc1253ac9,
+ 0x9fe5aa0f,
+ 0x43df27f4,
+ 0x19f03be6,
+ 0xd26c928b,
+ 0x7b9da4c4,
+ 0xe13149a7,
+ 0x9fa11ed9,
+ 0xe02cc4dd,
+ 0x7848dfe7,
+ 0x5ed1726f,
+ 0x983e0123,
+ 0x34166240,
+ 0xc4ee172f,
+ };
+ ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
+ }
+}
+
} // namespace aarch64
} // namespace vixl
diff --git a/tools/code_coverage.log b/tools/code_coverage.log
index f913151..d29b39b 100644
--- a/tools/code_coverage.log
+++ b/tools/code_coverage.log
@@ -23,6 +23,7 @@
1693487542 82.91% 97.57% 94.87%
1694008240 82.72% 97.50% 94.95%
1697036303 82.87% 97.56% 94.76%
+1698330215 82.92% 97.57% 94.88%
1702052331 82.89% 97.59% 94.77%
1706691191 82.87% 97.59% 94.74%
1707395574 82.89% 97.59% 94.77%