blob: c3d268f397ff081912bd55a112f95f7095800c5d [file] [log] [blame]
// Copyright 2021, VIXL authors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of ARM Limited nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "test-runner.h"
#include "test-utils.h"
#include "aarch64/cpu-aarch64.h"
#include "aarch64/disasm-aarch64.h"
#include "aarch64/macro-assembler-aarch64.h"
#include "aarch64/simulator-aarch64.h"
#include "aarch64/test-utils-aarch64.h"
#include "test-assembler-aarch64.h"
#define TEST_SVE(name) TEST_SVE_INNER("SIM", name)
namespace vixl {
namespace aarch64 {
TEST_SVE(sve_matmul) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kSVEI8MM,
CPUFeatures::kNEON,
CPUFeatures::kCRC32);
START();
SetInitialMachineState(&masm);
// state = 0xe2bd2480
{
ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
__ dci(0x45179979); // smmla z25.s, z11.b, z23.b
// vl128 state = 0xf1ca8a4d
__ dci(0x45179b51); // smmla z17.s, z26.b, z23.b
// vl128 state = 0x4458ad10
__ dci(0x45d79b53); // ummla z19.s, z26.b, z23.b
// vl128 state = 0x43d4d064
__ dci(0x45d69b17); // ummla z23.s, z24.b, z22.b
// vl128 state = 0x601e77c8
__ dci(0x45c69b33); // ummla z19.s, z25.b, z6.b
// vl128 state = 0x561b4e22
__ dci(0x45c49b1b); // ummla z27.s, z24.b, z4.b
// vl128 state = 0x89b65d78
__ dci(0x45dc9b1a); // ummla z26.s, z24.b, z28.b
// vl128 state = 0x85c9e62d
__ dci(0x45d99b1b); // ummla z27.s, z24.b, z25.b
// vl128 state = 0x3fc74134
__ dci(0x45d99b19); // ummla z25.s, z24.b, z25.b
// vl128 state = 0xa2fa347b
__ dci(0x45d99b1b); // ummla z27.s, z24.b, z25.b
// vl128 state = 0xb9854782
__ dci(0x45899b1a); // usmmla z26.s, z24.b, z9.b
// vl128 state = 0x7fd376d8
__ dci(0x45099b8a); // smmla z10.s, z28.b, z9.b
// vl128 state = 0xb41d8433
__ dci(0x45019bcb); // smmla z11.s, z30.b, z1.b
// vl128 state = 0xc9c0e80d
__ dci(0x45019bdb); // smmla z27.s, z30.b, z1.b
// vl128 state = 0xf1130e02
__ dci(0x45019b6b); // smmla z11.s, z27.b, z1.b
// vl128 state = 0x282d3dc7
__ dci(0x45019b6f); // smmla z15.s, z27.b, z1.b
// vl128 state = 0x34570238
__ dci(0x45859b6b); // usmmla z11.s, z27.b, z5.b
// vl128 state = 0xc451206a
__ dci(0x45919b6a); // usmmla z10.s, z27.b, z17.b
// vl128 state = 0xa58e2ea8
__ dci(0x45909a62); // usmmla z2.s, z19.b, z16.b
// vl128 state = 0x7b5f948d
__ dci(0x45809a52); // usmmla z18.s, z18.b, z0.b
// vl128 state = 0xf746260d
__ dci(0x45889b53); // usmmla z19.s, z26.b, z8.b
// vl128 state = 0xc31cc539
__ dci(0x45809a57); // usmmla z23.s, z18.b, z0.b
// vl128 state = 0x736bb3ee
__ dci(0x45809a96); // usmmla z22.s, z20.b, z0.b
// vl128 state = 0xbb05fef6
__ dci(0x45809a92); // usmmla z18.s, z20.b, z0.b
// vl128 state = 0xbc594372
__ dci(0x45809a82); // usmmla z2.s, z20.b, z0.b
// vl128 state = 0x87c5a584
__ dci(0x45829ad2); // usmmla z18.s, z22.b, z2.b
// vl128 state = 0xa413f733
__ dci(0x45889ad6); // usmmla z22.s, z22.b, z8.b
// vl128 state = 0x87ec445d
__ dci(0x45c898d2); // ummla z18.s, z6.b, z8.b
// vl128 state = 0x3ca8a6e5
__ dci(0x450898d0); // smmla z16.s, z6.b, z8.b
// vl128 state = 0x4300d87b
__ dci(0x45189ad8); // smmla z24.s, z22.b, z24.b
// vl128 state = 0x38be2e8a
__ dci(0x451c9bd9); // smmla z25.s, z30.b, z28.b
// vl128 state = 0x8a3e6103
__ dci(0x45989bc9); // usmmla z9.s, z30.b, z24.b
// vl128 state = 0xc728e586
__ dci(0x451c9bd9); // smmla z25.s, z30.b, z28.b
// vl128 state = 0x4cb44c0e
__ dci(0x459c99d1); // usmmla z17.s, z14.b, z28.b
// vl128 state = 0x84ebcb36
__ dci(0x459c99d5); // usmmla z21.s, z14.b, z28.b
// vl128 state = 0x8813d2e2
__ dci(0x451c999d); // smmla z29.s, z12.b, z28.b
// vl128 state = 0x8f26ee51
__ dci(0x451c999f); // smmla z31.s, z12.b, z28.b
// vl128 state = 0x5d626fd0
__ dci(0x459e998f); // usmmla z15.s, z12.b, z30.b
// vl128 state = 0x6b64cc8f
__ dci(0x459f991f); // usmmla z31.s, z8.b, z31.b
// vl128 state = 0x41648186
__ dci(0x4587991e); // usmmla z30.s, z8.b, z7.b
// vl128 state = 0x701525ec
__ dci(0x45079816); // smmla z22.s, z0.b, z7.b
// vl128 state = 0x61a2d024
__ dci(0x450f9897); // smmla z23.s, z4.b, z15.b
// vl128 state = 0x82ba6bd5
__ dci(0x450b98d3); // smmla z19.s, z6.b, z11.b
// vl128 state = 0xa842bbde
__ dci(0x450b98db); // smmla z27.s, z6.b, z11.b
// vl128 state = 0x9977677a
__ dci(0x451f98d3); // smmla z19.s, z6.b, z31.b
// vl128 state = 0xe6d6c2ef
__ dci(0x451b9adb); // smmla z27.s, z22.b, z27.b
// vl128 state = 0xa535453f
__ dci(0x450b98d9); // smmla z25.s, z6.b, z11.b
// vl128 state = 0xeda3f381
__ dci(0x458b9adb); // usmmla z27.s, z22.b, z11.b
// vl128 state = 0xd72dbdef
__ dci(0x45cb98da); // ummla z26.s, z6.b, z11.b
// vl128 state = 0xfae4975b
__ dci(0x45c999d2); // ummla z18.s, z14.b, z9.b
// vl128 state = 0x0aa6e1f6
}
uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));
END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0x0aa6e1f6, // vl128
0xba2d4547, // vl256
0x15b8fc1b, // vl512
0x1041114e, // vl1024
0x035f6eca, // vl2048
};
ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2(
core.GetSVELaneCount(kQRegSize))],
x0);
}
}
TEST_SVE(sve_fmatmul_s) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kSVEF32MM,
CPUFeatures::kNEON,
CPUFeatures::kCRC32);
START();
SetInitialMachineState(&masm);
// state = 0xe2bd2480
{
ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
__ dci(0x64a1e6ee); // fmmla z14.s, z23.s, z1.s
// vl128 state = 0x9db41bef
__ dci(0x64b1e7fe); // fmmla z30.s, z31.s, z17.s
// vl128 state = 0xc1535e55
__ dci(0x64b9e7d6); // fmmla z22.s, z30.s, z25.s
// vl128 state = 0xc65aad35
__ dci(0x64bde6c6); // fmmla z6.s, z22.s, z29.s
// vl128 state = 0x68387c22
__ dci(0x64b9e4c2); // fmmla z2.s, z6.s, z25.s
// vl128 state = 0xcf08b3a4
__ dci(0x64b9e543); // fmmla z3.s, z10.s, z25.s
// vl128 state = 0x969bbe77
__ dci(0x64b9e553); // fmmla z19.s, z10.s, z25.s
// vl128 state = 0xc3f514e1
__ dci(0x64b9e557); // fmmla z23.s, z10.s, z25.s
// vl128 state = 0x4b351c29
__ dci(0x64b9e773); // fmmla z19.s, z27.s, z25.s
// vl128 state = 0x5e026315
__ dci(0x64bbe757); // fmmla z23.s, z26.s, z27.s
// vl128 state = 0x61684fe6
__ dci(0x64bbe755); // fmmla z21.s, z26.s, z27.s
// vl128 state = 0x719b4ce0
__ dci(0x64bfe554); // fmmla z20.s, z10.s, z31.s
// vl128 state = 0xdf3d2a1c
__ dci(0x64bfe550); // fmmla z16.s, z10.s, z31.s
// vl128 state = 0x3279aab8
__ dci(0x64bfe714); // fmmla z20.s, z24.s, z31.s
// vl128 state = 0x0b985869
__ dci(0x64b7e756); // fmmla z22.s, z26.s, z23.s
// vl128 state = 0x14230587
__ dci(0x64b7e737); // fmmla z23.s, z25.s, z23.s
// vl128 state = 0x2cb88e7f
__ dci(0x64bfe767); // fmmla z7.s, z27.s, z31.s
// vl128 state = 0xb5ec0c65
__ dci(0x64bfe777); // fmmla z23.s, z27.s, z31.s
// vl128 state = 0xb5e5eab0
__ dci(0x64bfe715); // fmmla z21.s, z24.s, z31.s
// vl128 state = 0xd0491fb5
__ dci(0x64b7e797); // fmmla z23.s, z28.s, z23.s
// vl128 state = 0x98a55a30
}
uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));
END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0x98a55a30, // vl128
0x590b7715, // vl256
0x1f8653a6, // vl512
0xa22484c7, // vl1024
0xa764c43f, // vl2048
};
ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2(
core.GetSVELaneCount(kQRegSize))],
x0);
}
}
// Below here, there are tests for Neon instructions. As these forms of test
// check the entire register state, they also need SVE features.
TEST_SVE(neon_pmull) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,
CPUFeatures::kCRC32,
CPUFeatures::kPmull1Q);
START();
SetInitialMachineState(&masm);
// state = 0xe2bd2480
{
ExactAssemblyScope scope(&masm, 40 * kInstructionSize);
__ dci(0x4e20e000); // pmull2 v0.8h, v0.16b, v0.16b
// vl128 state = 0x5eba4d4f
__ dci(0x4e20e228); // pmull2 v8.8h, v17.16b, v0.16b
// vl128 state = 0x86bceb87
__ dci(0x4ee0e22a); // pmull2 v10.1q, v17.2d, v0.2d
// vl128 state = 0x1332fe02
__ dci(0x0ee8e222); // pmull v2.1q, v17.1d, v8.1d
// vl128 state = 0xd357dc7b
__ dci(0x4eece226); // pmull2 v6.1q, v17.2d, v12.2d
// vl128 state = 0xdff409ad
__ dci(0x0eece276); // pmull v22.1q, v19.1d, v12.1d
// vl128 state = 0xd8af1dc6
__ dci(0x0eede232); // pmull v18.1q, v17.1d, v13.1d
// vl128 state = 0x41e6ed0e
__ dci(0x0efde216); // pmull v22.1q, v16.1d, v29.1d
// vl128 state = 0x1f10365f
__ dci(0x0effe23e); // pmull v30.1q, v17.1d, v31.1d
// vl128 state = 0x9779ece5
__ dci(0x0ee7e23f); // pmull v31.1q, v17.1d, v7.1d
// vl128 state = 0x11fc8ce9
__ dci(0x0ee2e23e); // pmull v30.1q, v17.1d, v2.1d
// vl128 state = 0x101d5a6f
__ dci(0x0ee2e23c); // pmull v28.1q, v17.1d, v2.1d
// vl128 state = 0xcc4fe26e
__ dci(0x0eeae27d); // pmull v29.1q, v19.1d, v10.1d
// vl128 state = 0xc84be9f4
__ dci(0x4eeae24d); // pmull2 v13.1q, v18.2d, v10.2d
// vl128 state = 0x2fc540b4
__ dci(0x4eeae25d); // pmull2 v29.1q, v18.2d, v10.2d
// vl128 state = 0x1b2d99cd
__ dci(0x4eeae2ed); // pmull2 v13.1q, v23.2d, v10.2d
// vl128 state = 0x8a278b95
__ dci(0x4eeae2e9); // pmull2 v9.1q, v23.2d, v10.2d
// vl128 state = 0x3359b4c8
__ dci(0x4efee2e8); // pmull2 v8.1q, v23.2d, v30.2d
// vl128 state = 0x5c25ed31
__ dci(0x4effe3e0); // pmull2 v0.1q, v31.2d, v31.2d
// vl128 state = 0x28ff67d1
__ dci(0x4eefe3d0); // pmull2 v16.1q, v30.2d, v15.2d
// vl128 state = 0x1543436d
__ dci(0x4ee7e2d1); // pmull2 v17.1q, v22.2d, v7.2d
// vl128 state = 0x71b8bc90
__ dci(0x4eefe3d5); // pmull2 v21.1q, v30.2d, v15.2d
// vl128 state = 0x3d35ca02
__ dci(0x4eefe314); // pmull2 v20.1q, v24.2d, v15.2d
// vl128 state = 0x40e8fade
__ dci(0x4eefe310); // pmull2 v16.1q, v24.2d, v15.2d
// vl128 state = 0xb8affb87
__ dci(0x4eefe300); // pmull2 v0.1q, v24.2d, v15.2d
// vl128 state = 0x4824ee5c
__ dci(0x4eede350); // pmull2 v16.1q, v26.2d, v13.2d
// vl128 state = 0x39202868
__ dci(0x4ee7e354); // pmull2 v20.1q, v26.2d, v7.2d
// vl128 state = 0xc8fde340
__ dci(0x4e27e356); // pmull2 v22.8h, v26.16b, v7.16b
// vl128 state = 0x0f02316b
__ dci(0x4e37e15e); // pmull2 v30.8h, v10.16b, v23.16b
// vl128 state = 0xced4f8bd
__ dci(0x4e33e05f); // pmull2 v31.8h, v2.16b, v19.16b
// vl128 state = 0x0c76bdb3
__ dci(0x0e23e05e); // pmull v30.8h, v2.8b, v3.8b
// vl128 state = 0x0e36962b
__ dci(0x4e23e25f); // pmull2 v31.8h, v18.16b, v3.16b
// vl128 state = 0x11a8dcc3
__ dci(0x4e23e25b); // pmull2 v27.8h, v18.16b, v3.16b
// vl128 state = 0xf01bfe16
__ dci(0x4e23e259); // pmull2 v25.8h, v18.16b, v3.16b
// vl128 state = 0xea351afe
__ dci(0x4e22e2c9); // pmull2 v9.8h, v22.16b, v2.16b
// vl128 state = 0x16e933ef
__ dci(0x4e3ae2c8); // pmull2 v8.8h, v22.16b, v26.16b
// vl128 state = 0x02528a2a
__ dci(0x4e32e249); // pmull2 v9.8h, v18.16b, v18.16b
// vl128 state = 0xe7e20633
__ dci(0x4e36e20d); // pmull2 v13.8h, v16.16b, v22.16b
// vl128 state = 0x6f231732
__ dci(0x4e36e205); // pmull2 v5.8h, v16.16b, v22.16b
// vl128 state = 0x423eb7ea
__ dci(0x4e22e20d); // pmull2 v13.8h, v16.16b, v2.16b
// vl128 state = 0xfc0d1c14
}
uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));
END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0xfc0d1c14, // vl128
0x4cb040a3, // vl256
0xfa35b836, // vl512
0x1936a795, // vl1024
0x255baab5, // vl2048
};
ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2(
core.GetSVELaneCount(kQRegSize))],
x0);
}
}
TEST_SVE(neon_sha1_2reg) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,
CPUFeatures::kCRC32,
CPUFeatures::kSHA1);
START();
SetInitialMachineState(&masm);
// state = 0xe2bd2480
{
ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
__ dci(0x5e280800); // sha1h s0, s0
// vl128 state = 0xc388d4f8
__ dci(0x5e280a28); // sha1h s8, s17
// vl128 state = 0x5c88b904
__ dci(0x5e280a2a); // sha1h s10, s17
// vl128 state = 0x6f63c596
__ dci(0x5e281aae); // sha1su1 v14.4s, v21.4s
// vl128 state = 0x85e1119d
__ dci(0x5e281abe); // sha1su1 v30.4s, v21.4s
// vl128 state = 0x9b814260
__ dci(0x5e281a0e); // sha1su1 v14.4s, v16.4s
// vl128 state = 0x8ccca0ab
__ dci(0x5e281a0a); // sha1su1 v10.4s, v16.4s
// vl128 state = 0x42262836
__ dci(0x5e281acb); // sha1su1 v11.4s, v22.4s
// vl128 state = 0xabcde33d
__ dci(0x5e281acf); // sha1su1 v15.4s, v22.4s
// vl128 state = 0xdf44e7be
__ dci(0x5e281adf); // sha1su1 v31.4s, v22.4s
// vl128 state = 0x48c332a3
__ dci(0x5e280a9d); // sha1h s29, s20
// vl128 state = 0x56bafe13
__ dci(0x5e28188d); // sha1su1 v13.4s, v4.4s
// vl128 state = 0x218eb351
__ dci(0x5e2808cf); // sha1h s15, s6
// vl128 state = 0xc1720d9f
__ dci(0x5e2808cb); // sha1h s11, s6
// vl128 state = 0x67119e1c
__ dci(0x5e2808c9); // sha1h s9, s6
// vl128 state = 0x31f69637
__ dci(0x5e2808c1); // sha1h s1, s6
// vl128 state = 0x214a25ff
__ dci(0x5e280871); // sha1h s17, s3
// vl128 state = 0xa5e88b55
__ dci(0x5e280815); // sha1h s21, s0
// vl128 state = 0xc8c91e29
__ dci(0x5e28185d); // sha1su1 v29.4s, v2.4s
// vl128 state = 0x5582c6a8
__ dci(0x5e28185f); // sha1su1 v31.4s, v2.4s
// vl128 state = 0xd3288a61
__ dci(0x5e28087e); // sha1h s30, s3
// vl128 state = 0x350b39c2
__ dci(0x5e28093f); // sha1h s31, s9
// vl128 state = 0xbdc1ac98
__ dci(0x5e28093b); // sha1h s27, s9
// vl128 state = 0x62f828bf
__ dci(0x5e28092b); // sha1h s11, s9
// vl128 state = 0xc8f2f671
__ dci(0x5e2819bb); // sha1su1 v27.4s, v13.4s
// vl128 state = 0x24ec8c34
__ dci(0x5e281b93); // sha1su1 v19.4s, v28.4s
// vl128 state = 0x71e188de
__ dci(0x5e281b97); // sha1su1 v23.4s, v28.4s
// vl128 state = 0x22490375
__ dci(0x5e281b95); // sha1su1 v21.4s, v28.4s
// vl128 state = 0x016b70d1
__ dci(0x5e281b51); // sha1su1 v17.4s, v26.4s
// vl128 state = 0xa6252086
__ dci(0x5e2819d3); // sha1su1 v19.4s, v14.4s
// vl128 state = 0x78683885
}
uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));
END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0x78683885, // vl128
0x59574c2a, // vl256
0x0ddab775, // vl512
0xf17f57c8, // vl1024
0xff8b070a, // vl2048
};
ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2(
core.GetSVELaneCount(kQRegSize))],
x0);
}
}
TEST_SVE(neon_sha1_3reg) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,
CPUFeatures::kCRC32,
CPUFeatures::kSHA1);
START();
SetInitialMachineState(&masm);
// state = 0xe2bd2480
{
ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
__ dci(0x5e1f02bd); // sha1c q29, s21, v31.4s
// vl128 state = 0xec2a37ad
__ dci(0x5e0810af); // sha1p q15, s5, v8.4s
// vl128 state = 0x3fe9252a
__ dci(0x5e122227); // sha1m q7, s17, v18.4s
// vl128 state = 0x4465789e
__ dci(0x5e0b039d); // sha1c q29, s28, v11.4s
// vl128 state = 0x2186488a
__ dci(0x5e1a03e9); // sha1c q9, s31, v26.4s
// vl128 state = 0x9eddf8e3
__ dci(0x5e0c138c); // sha1p q12, s28, v12.4s
// vl128 state = 0x0ca7cd3d
__ dci(0x5e1f1316); // sha1p q22, s24, v31.4s
// vl128 state = 0xb80a61c0
__ dci(0x5e052204); // sha1m q4, s16, v5.4s
// vl128 state = 0x941821ca
__ dci(0x5e0a00d6); // sha1c q22, s6, v10.4s
// vl128 state = 0x5e71ccae
__ dci(0x5e0e032e); // sha1c q14, s25, v14.4s
// vl128 state = 0x7ed4486a
__ dci(0x5e1d1098); // sha1p q24, s4, v29.4s
// vl128 state = 0x0978a637
__ dci(0x5e0400d9); // sha1c q25, s6, v4.4s
// vl128 state = 0x34c8609e
__ dci(0x5e1a330e); // sha1su0 v14.4s, v24.4s, v26.4s
// vl128 state = 0xcb078fad
__ dci(0x5e1e30f5); // sha1su0 v21.4s, v7.4s, v30.4s
// vl128 state = 0x885200be
__ dci(0x5e1e32e1); // sha1su0 v1.4s, v23.4s, v30.4s
// vl128 state = 0xabc6a188
__ dci(0x5e0733d3); // sha1su0 v19.4s, v30.4s, v7.4s
// vl128 state = 0x37a4fe6f
__ dci(0x5e0b22e6); // sha1m q6, s23, v11.4s
// vl128 state = 0x68b788d2
__ dci(0x5e011210); // sha1p q16, s16, v1.4s
// vl128 state = 0x6b36b092
__ dci(0x5e1702e1); // sha1c q1, s23, v23.4s
// vl128 state = 0x74ef56f5
__ dci(0x5e1e30f6); // sha1su0 v22.4s, v7.4s, v30.4s
// vl128 state = 0x5a150dfd
__ dci(0x5e1b3348); // sha1su0 v8.4s, v26.4s, v27.4s
// vl128 state = 0xe0a45d9c
__ dci(0x5e0a3041); // sha1su0 v1.4s, v2.4s, v10.4s
// vl128 state = 0x6ba02d02
__ dci(0x5e17119a); // sha1p q26, s12, v23.4s
// vl128 state = 0x3bf511fc
__ dci(0x5e0b32c7); // sha1su0 v7.4s, v22.4s, v11.4s
// vl128 state = 0xf5c513b6
__ dci(0x5e063016); // sha1su0 v22.4s, v0.4s, v6.4s
// vl128 state = 0x3eb44b28
__ dci(0x5e05323c); // sha1su0 v28.4s, v17.4s, v5.4s
// vl128 state = 0x7c2d3adf
__ dci(0x5e1d132a); // sha1p q10, s25, v29.4s
// vl128 state = 0x2b0963c4
__ dci(0x5e13003c); // sha1c q28, s1, v19.4s
// vl128 state = 0x4a582d00
__ dci(0x5e13322c); // sha1su0 v12.4s, v17.4s, v19.4s
// vl128 state = 0x7bb2cc8c
__ dci(0x5e032330); // sha1m q16, s25, v3.4s
// vl128 state = 0x2a8b4c0d
}
uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));
END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0x2a8b4c0d, // vl128
0x114e25bb, // vl256
0x23db7966, // vl512
0xadf71d73, // vl1024
0xe06b94cd, // vl2048
};
ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2(
core.GetSVELaneCount(kQRegSize))],
x0);
}
}
TEST_SVE(neon_sha2h) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,
CPUFeatures::kCRC32,
CPUFeatures::kSHA2);
START();
SetInitialMachineState(&masm);
// state = 0xe2bd2480
{
ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
__ dci(0x5e0152a2); // sha256h2 q2, q21, v1.4s
// vl128 state = 0x6bda8984
__ dci(0x5e1552b2); // sha256h2 q18, q21, v21.4s
// vl128 state = 0xe985c68a
__ dci(0x5e055293); // sha256h2 q19, q20, v5.4s
// vl128 state = 0xab18a98b
__ dci(0x5e055297); // sha256h2 q23, q20, v5.4s
// vl128 state = 0x896bad28
__ dci(0x5e0752a7); // sha256h2 q7, q21, v7.4s
// vl128 state = 0x4e00ba08
__ dci(0x5e175223); // sha256h2 q3, q17, v23.4s
// vl128 state = 0x380f3893
__ dci(0x5e1f5262); // sha256h2 q2, q19, v31.4s
// vl128 state = 0xb431122d
__ dci(0x5e1f5272); // sha256h2 q18, q19, v31.4s
// vl128 state = 0x18140047
__ dci(0x5e1e4262); // sha256h q2, q19, v30.4s
// vl128 state = 0x721779be
__ dci(0x5e164363); // sha256h q3, q27, v22.4s
// vl128 state = 0x383ad878
__ dci(0x5e175361); // sha256h2 q1, q27, v23.4s
// vl128 state = 0xd985bd85
__ dci(0x5e115360); // sha256h2 q0, q27, v17.4s
// vl128 state = 0xfa5e77f3
__ dci(0x5e135270); // sha256h2 q16, q19, v19.4s
// vl128 state = 0x4fc1f5cc
__ dci(0x5e195260); // sha256h2 q0, q19, v25.4s
// vl128 state = 0x89435952
__ dci(0x5e1952c4); // sha256h2 q4, q22, v25.4s
// vl128 state = 0x93c60c86
__ dci(0x5e1a52c6); // sha256h2 q6, q22, v26.4s
// vl128 state = 0xedc42105
__ dci(0x5e1a52c4); // sha256h2 q4, q22, v26.4s
// vl128 state = 0xd5d638a8
__ dci(0x5e1a4285); // sha256h q5, q20, v26.4s
// vl128 state = 0x9f9da446
__ dci(0x5e1a428d); // sha256h q13, q20, v26.4s
// vl128 state = 0x87d49cfb
__ dci(0x5e1b42cf); // sha256h q15, q22, v27.4s
// vl128 state = 0xa6802b10
__ dci(0x5e1b43ed); // sha256h q13, q31, v27.4s
// vl128 state = 0x2e346937
__ dci(0x5e0b436f); // sha256h q15, q27, v11.4s
// vl128 state = 0x1005f372
__ dci(0x5e03433f); // sha256h q31, q25, v3.4s
// vl128 state = 0xd908918c
__ dci(0x5e13532f); // sha256h2 q15, q25, v19.4s
// vl128 state = 0x31c73fe0
__ dci(0x5e01533f); // sha256h2 q31, q25, v1.4s
// vl128 state = 0x84e35a20
__ dci(0x5e03523d); // sha256h2 q29, q17, v3.4s
// vl128 state = 0x40da34aa
__ dci(0x5e0b527c); // sha256h2 q28, q19, v11.4s
// vl128 state = 0x506a21d9
__ dci(0x5e0f5238); // sha256h2 q24, q17, v15.4s
// vl128 state = 0x6a67f033
__ dci(0x5e0d5210); // sha256h2 q16, q16, v13.4s
// vl128 state = 0x317e084c
__ dci(0x5e0d5214); // sha256h2 q20, q16, v13.4s
// vl128 state = 0xdd0eb379
}
uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));
END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0xdd0eb379, // vl128
0x15384d69, // vl256
0xc5879e77, // vl512
0xee66e696, // vl1024
0xb516eaae, // vl2048
};
ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2(
core.GetSVELaneCount(kQRegSize))],
x0);
}
}
TEST_SVE(neon_sha2su0) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,
CPUFeatures::kCRC32,
CPUFeatures::kSHA2);
START();
SetInitialMachineState(&masm);
// state = 0xe2bd2480
{
ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
__ dci(0x5e2828e3); // sha256su0 v3.4s, v7.4s
// vl128 state = 0xbc7a7764
__ dci(0x5e282be1); // sha256su0 v1.4s, v31.4s
// vl128 state = 0x6138a856
__ dci(0x5e282be9); // sha256su0 v9.4s, v31.4s
// vl128 state = 0x49c6be17
__ dci(0x5e282beb); // sha256su0 v11.4s, v31.4s
// vl128 state = 0xca658743
__ dci(0x5e2829bb); // sha256su0 v27.4s, v13.4s
// vl128 state = 0x1bf1d233
__ dci(0x5e2829ba); // sha256su0 v26.4s, v13.4s
// vl128 state = 0xafb0c6ae
__ dci(0x5e2829aa); // sha256su0 v10.4s, v13.4s
// vl128 state = 0x2182e90d
__ dci(0x5e282b2e); // sha256su0 v14.4s, v25.4s
// vl128 state = 0x401d297d
__ dci(0x5e282aaf); // sha256su0 v15.4s, v21.4s
// vl128 state = 0x6c01fefa
__ dci(0x5e282aad); // sha256su0 v13.4s, v21.4s
// vl128 state = 0x0f4c191d
__ dci(0x5e282a7d); // sha256su0 v29.4s, v19.4s
// vl128 state = 0xcf26aa1b
__ dci(0x5e282ad9); // sha256su0 v25.4s, v22.4s
// vl128 state = 0xae04081e
__ dci(0x5e282ac9); // sha256su0 v9.4s, v22.4s
// vl128 state = 0x08149009
__ dci(0x5e282acb); // sha256su0 v11.4s, v22.4s
// vl128 state = 0xa691e487
__ dci(0x5e282ac3); // sha256su0 v3.4s, v22.4s
// vl128 state = 0xd728e1b5
__ dci(0x5e282ac7); // sha256su0 v7.4s, v22.4s
// vl128 state = 0x120fac30
__ dci(0x5e282ac5); // sha256su0 v5.4s, v22.4s
// vl128 state = 0x88086f82
__ dci(0x5e282ac4); // sha256su0 v4.4s, v22.4s
// vl128 state = 0x625160b7
__ dci(0x5e282a65); // sha256su0 v5.4s, v19.4s
// vl128 state = 0x308feecd
__ dci(0x5e282a6d); // sha256su0 v13.4s, v19.4s
// vl128 state = 0x65f03097
__ dci(0x5e282a65); // sha256su0 v5.4s, v19.4s
// vl128 state = 0x44d9fbb6
__ dci(0x5e282a67); // sha256su0 v7.4s, v19.4s
// vl128 state = 0x694fe04a
__ dci(0x5e282a17); // sha256su0 v23.4s, v16.4s
// vl128 state = 0x3d5c139b
__ dci(0x5e282a13); // sha256su0 v19.4s, v16.4s
// vl128 state = 0x922f40a5
__ dci(0x5e282b3b); // sha256su0 v27.4s, v25.4s
// vl128 state = 0x4f9c34f2
__ dci(0x5e282ab9); // sha256su0 v25.4s, v21.4s
// vl128 state = 0x18a4f581
__ dci(0x5e282ab1); // sha256su0 v17.4s, v21.4s
// vl128 state = 0x69da3844
__ dci(0x5e282ab9); // sha256su0 v25.4s, v21.4s
// vl128 state = 0x57f8ce0b
__ dci(0x5e282a1d); // sha256su0 v29.4s, v16.4s
// vl128 state = 0xafa03001
__ dci(0x5e282ad5); // sha256su0 v21.4s, v22.4s
// vl128 state = 0x029b78a8
}
uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));
END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0x029b78a8, // vl128
0x479a8911, // vl256
0x5ef3718b, // vl512
0xe63a91b9, // vl1024
0xb93fb875, // vl2048
};
ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2(
core.GetSVELaneCount(kQRegSize))],
x0);
}
}
TEST_SVE(neon_sha2su1) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,
CPUFeatures::kCRC32,
CPUFeatures::kSHA2);
START();
SetInitialMachineState(&masm);
// state = 0xe2bd2480
{
ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
__ dci(0x5e1e6146); // sha256su1 v6.4s, v10.4s, v30.4s
// vl128 state = 0x3bbf7782
__ dci(0x5e0f6144); // sha256su1 v4.4s, v10.4s, v15.4s
// vl128 state = 0xf8c83149
__ dci(0x5e0e6174); // sha256su1 v20.4s, v11.4s, v14.4s
// vl128 state = 0x3b8c353b
__ dci(0x5e0e6170); // sha256su1 v16.4s, v11.4s, v14.4s
// vl128 state = 0x1041e30e
__ dci(0x5e0a6131); // sha256su1 v17.4s, v9.4s, v10.4s
// vl128 state = 0xe4d81cd2
__ dci(0x5e0a6135); // sha256su1 v21.4s, v9.4s, v10.4s
// vl128 state = 0x24869db3
__ dci(0x5e0a6131); // sha256su1 v17.4s, v9.4s, v10.4s
// vl128 state = 0xfb093436
__ dci(0x5e0a6199); // sha256su1 v25.4s, v12.4s, v10.4s
// vl128 state = 0x0c7939ba
__ dci(0x5e0e639b); // sha256su1 v27.4s, v28.4s, v14.4s
// vl128 state = 0xa7e5c40a
__ dci(0x5e0663ab); // sha256su1 v11.4s, v29.4s, v6.4s
// vl128 state = 0xc4ae571c
__ dci(0x5e06619b); // sha256su1 v27.4s, v12.4s, v6.4s
// vl128 state = 0xf84ef221
__ dci(0x5e066199); // sha256su1 v25.4s, v12.4s, v6.4s
// vl128 state = 0x24f98d3c
__ dci(0x5e0e6118); // sha256su1 v24.4s, v8.4s, v14.4s
// vl128 state = 0xcdb43a3b
__ dci(0x5e0f601a); // sha256su1 v26.4s, v0.4s, v15.4s
// vl128 state = 0x85fd37e9
__ dci(0x5e096012); // sha256su1 v18.4s, v0.4s, v9.4s
// vl128 state = 0xabccd3f6
__ dci(0x5e0c601a); // sha256su1 v26.4s, v0.4s, v12.4s
// vl128 state = 0x8c0232e5
__ dci(0x5e1c602a); // sha256su1 v10.4s, v1.4s, v28.4s
// vl128 state = 0xcdcf37ba
__ dci(0x5e1e622e); // sha256su1 v14.4s, v17.4s, v30.4s
// vl128 state = 0x25129c9a
__ dci(0x5e1e623e); // sha256su1 v30.4s, v17.4s, v30.4s
// vl128 state = 0xd0a281b7
__ dci(0x5e1e630e); // sha256su1 v14.4s, v24.4s, v30.4s
// vl128 state = 0x3ed92f18
__ dci(0x5e1f639e); // sha256su1 v30.4s, v28.4s, v31.4s
// vl128 state = 0xda1056b9
__ dci(0x5e0f629f); // sha256su1 v31.4s, v20.4s, v15.4s
// vl128 state = 0x367274fa
__ dci(0x5e0f63bd); // sha256su1 v29.4s, v29.4s, v15.4s
// vl128 state = 0x46a79748
__ dci(0x5e0f63b5); // sha256su1 v21.4s, v29.4s, v15.4s
// vl128 state = 0xdc427315
__ dci(0x5e0b63f7); // sha256su1 v23.4s, v31.4s, v11.4s
// vl128 state = 0x91547f41
__ dci(0x5e0263e7); // sha256su1 v7.4s, v31.4s, v2.4s
// vl128 state = 0x1c233ffa
__ dci(0x5e0062f7); // sha256su1 v23.4s, v23.4s, v0.4s
// vl128 state = 0x8c2948a1
__ dci(0x5e1062c7); // sha256su1 v7.4s, v22.4s, v16.4s
// vl128 state = 0x8b72f498
__ dci(0x5e1062c6); // sha256su1 v6.4s, v22.4s, v16.4s
// vl128 state = 0x43d27746
__ dci(0x5e1063ee); // sha256su1 v14.4s, v31.4s, v16.4s
// vl128 state = 0xa864e589
}
uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));
END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0xa864e589, // vl128
0xc588dfe0, // vl256
0x884ca9db, // vl512
0x01dc8899, // vl1024
0x51785434, // vl2048
};
ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2(
core.GetSVELaneCount(kQRegSize))],
x0);
}
}
TEST_SVE(neon_sha3) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,
CPUFeatures::kCRC32,
CPUFeatures::kSHA3);
START();
SetInitialMachineState(&masm);
// state = 0xe2bd2480
{
ExactAssemblyScope scope(&masm, 60 * kInstructionSize);
__ dci(0xce608c00); // rax1 v0.2d, v0.2d, v0.2d
// vl128 state = 0x960c2b9f
__ dci(0xce608e28); // rax1 v8.2d, v17.2d, v0.2d
// vl128 state = 0x89ea3f7b
__ dci(0xce618e6c); // rax1 v12.2d, v19.2d, v1.2d
// vl128 state = 0xa7801384
__ dci(0xce718e48); // rax1 v8.2d, v18.2d, v17.2d
// vl128 state = 0x4477d70d
__ dci(0xce738e60); // rax1 v0.2d, v19.2d, v19.2d
// vl128 state = 0xdee66854
__ dci(0xce6b8e61); // rax1 v1.2d, v19.2d, v11.2d
// vl128 state = 0x2e383dc2
__ dci(0xce6e8e60); // rax1 v0.2d, v19.2d, v14.2d
// vl128 state = 0xa022bb6d
__ dci(0xce6e8e62); // rax1 v2.2d, v19.2d, v14.2d
// vl128 state = 0x923f5d32
__ dci(0xce668e23); // rax1 v3.2d, v17.2d, v6.2d
// vl128 state = 0xc2c6ca00
__ dci(0xce260e33); // bcax v19.16b, v17.16b, v6.16b, v3.16b
// vl128 state = 0x517e85e9
__ dci(0xce260e23); // bcax v3.16b, v17.16b, v6.16b, v3.16b
// vl128 state = 0xbcf4c332
__ dci(0xce260e93); // bcax v19.16b, v20.16b, v6.16b, v3.16b
// vl128 state = 0x5d9d51ef
__ dci(0xce260a11); // bcax v17.16b, v16.16b, v6.16b, v2.16b
// vl128 state = 0x69ce0099
__ dci(0xce260a15); // bcax v21.16b, v16.16b, v6.16b, v2.16b
// vl128 state = 0x9a2cdc9f
__ dci(0xce244a11); // bcax v17.16b, v16.16b, v4.16b, v18.16b
// vl128 state = 0x27eeff29
__ dci(0xce304a10); // bcax v16.16b, v16.16b, v16.16b, v18.16b
// vl128 state = 0x6d586875
__ dci(0xce314b18); // bcax v24.16b, v24.16b, v17.16b, v18.16b
// vl128 state = 0xe38b6054
__ dci(0xce214b28); // bcax v8.16b, v25.16b, v1.16b, v18.16b
// vl128 state = 0x27a3f5f6
__ dci(0xce294f38); // bcax v24.16b, v25.16b, v9.16b, v19.16b
// vl128 state = 0x7d7ffa9b
__ dci(0xce214e39); // bcax v25.16b, v17.16b, v1.16b, v19.16b
// vl128 state = 0x936374f0
__ dci(0xce216a3d); // bcax v29.16b, v17.16b, v1.16b, v26.16b
// vl128 state = 0x1c5136d5
__ dci(0xce296b39); // bcax v25.16b, v25.16b, v9.16b, v26.16b
// vl128 state = 0x75cd7131
__ dci(0xce216338); // bcax v24.16b, v25.16b, v1.16b, v24.16b
// vl128 state = 0xcc747626
__ dci(0xce2163f9); // bcax v25.16b, v31.16b, v1.16b, v24.16b
// vl128 state = 0x9409c8bc
__ dci(0xce2043f1); // bcax v17.16b, v31.16b, v0.16b, v16.16b
// vl128 state = 0x8db3a0c8
__ dci(0xce2043f5); // bcax v21.16b, v31.16b, v0.16b, v16.16b
// vl128 state = 0xa55f8d7d
__ dci(0xce2043e5); // bcax v5.16b, v31.16b, v0.16b, v16.16b
// vl128 state = 0xe1960c7a
__ dci(0xce224be7); // bcax v7.16b, v31.16b, v2.16b, v18.16b
// vl128 state = 0xc9599bde
__ dci(0xce204bb7); // bcax v23.16b, v29.16b, v0.16b, v18.16b
// vl128 state = 0x7176d08d
__ dci(0xce004b9f); // eor3 v31.16b, v28.16b, v0.16b, v18.16b
// vl128 state = 0x10620821
__ dci(0xce000baf); // eor3 v15.16b, v29.16b, v0.16b, v2.16b
// vl128 state = 0x0aba0288
__ dci(0xce0a0bab); // eor3 v11.16b, v29.16b, v10.16b, v2.16b
// vl128 state = 0xe6517156
__ dci(0xce0e1baf); // eor3 v15.16b, v29.16b, v14.16b, v6.16b
// vl128 state = 0x6b7021fb
__ dci(0xce0e3fa7); // eor3 v7.16b, v29.16b, v14.16b, v15.16b
// vl128 state = 0x05761b1f
__ dci(0xce0e2fe5); // eor3 v5.16b, v31.16b, v14.16b, v11.16b
// vl128 state = 0xe01822c6
__ dci(0xce2e2fc7); // bcax v7.16b, v30.16b, v14.16b, v11.16b
// vl128 state = 0xdc6444d7
__ dci(0xce3e2dcf); // bcax v15.16b, v14.16b, v30.16b, v11.16b
// vl128 state = 0xa5ecad2e
__ dci(0xce3e3fdf); // bcax v31.16b, v30.16b, v30.16b, v15.16b
// vl128 state = 0x2124dc42
__ dci(0xce3a3ede); // bcax v30.16b, v22.16b, v26.16b, v15.16b
// vl128 state = 0x57f77204
__ dci(0xce3a2e9c); // bcax v28.16b, v20.16b, v26.16b, v11.16b
// vl128 state = 0x6e8d303d
__ dci(0xce3a2294); // bcax v20.16b, v20.16b, v26.16b, v8.16b
// vl128 state = 0xdb53d42c
__ dci(0xce38029c); // bcax v28.16b, v20.16b, v24.16b, v0.16b
// vl128 state = 0x258d49b8
__ dci(0xce38088c); // bcax v12.16b, v4.16b, v24.16b, v2.16b
// vl128 state = 0xe751a348
__ dci(0xce28008e); // bcax v14.16b, v4.16b, v8.16b, v0.16b
// vl128 state = 0x8ce0aa1a
__ dci(0xce28008a); // bcax v10.16b, v4.16b, v8.16b, v0.16b
// vl128 state = 0x1fdf89a5
__ dci(0xce280088); // bcax v8.16b, v4.16b, v8.16b, v0.16b
// vl128 state = 0xcc51f5e1
__ dci(0xce2a1089); // bcax v9.16b, v4.16b, v10.16b, v4.16b
// vl128 state = 0xdaf766b0
__ dci(0xce0b1081); // eor3 v1.16b, v4.16b, v11.16b, v4.16b
// vl128 state = 0x2da7deb5
__ dci(0xce0a1011); // eor3 v17.16b, v0.16b, v10.16b, v4.16b
// vl128 state = 0xcc86f5d4
__ dci(0xce121010); // eor3 v16.16b, v0.16b, v18.16b, v4.16b
// vl128 state = 0xfb722105
__ dci(0xce921118); // xar v24.2d, v8.2d, v18.2d, #4
// vl128 state = 0x9a7752e3
__ dci(0xce9a1199); // xar v25.2d, v12.2d, v26.2d, #4
// vl128 state = 0x83a251c2
__ dci(0xce9e11dd); // xar v29.2d, v14.2d, v30.2d, #4
// vl128 state = 0x1e31c9d5
__ dci(0xce9e915c); // xar v28.2d, v10.2d, v30.2d, #36
// vl128 state = 0x0e421d73
__ dci(0xce1e115d); // eor3 v29.16b, v10.16b, v30.16b, v4.16b
// vl128 state = 0xb5a8c677
__ dci(0xce3e515c); // bcax v28.16b, v10.16b, v30.16b, v20.16b
// vl128 state = 0x21587300
__ dci(0xce3e5154); // bcax v20.16b, v10.16b, v30.16b, v20.16b
// vl128 state = 0x9459c629
__ dci(0xce3e1056); // bcax v22.16b, v2.16b, v30.16b, v4.16b
// vl128 state = 0xdb02263a
__ dci(0xce2a105e); // bcax v30.16b, v2.16b, v10.16b, v4.16b
// vl128 state = 0xc9d210aa
__ dci(0xce3a5056); // bcax v22.16b, v2.16b, v26.16b, v20.16b
// vl128 state = 0x4cc56293
}
uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));
END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0x4cc56293, // vl128
0xee8bac03, // vl256
0x9fe5aa0f, // vl512
0x7b9da4c4, // vl1024
0xc4ee172f, // vl2048
};
ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2(
core.GetSVELaneCount(kQRegSize))],
x0);
}
}
TEST_SVE(neon_sha512) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,
CPUFeatures::kCRC32,
CPUFeatures::kSHA512);
START();
SetInitialMachineState(&masm);
// state = 0xe2bd2480
{
ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
__ dci(0xce6583cc); // sha512h q12, q30, v5.2d
// vl128 state = 0xecc5733a
__ dci(0xce6586c8); // sha512h2 q8, q22, v5.2d
// vl128 state = 0xe05f2087
__ dci(0xce7586e0); // sha512h2 q0, q23, v21.2d
// vl128 state = 0x1925555b
__ dci(0xce7187e8); // sha512h2 q8, q31, v17.2d
// vl128 state = 0x891dba65
__ dci(0xce7586ec); // sha512h2 q12, q23, v21.2d
// vl128 state = 0xdfbe3239
__ dci(0xce7580fc); // sha512h q28, q7, v21.2d
// vl128 state = 0xba49dbc1
__ dci(0xce7580f4); // sha512h q20, q7, v21.2d
// vl128 state = 0x3ad11a23
__ dci(0xce6780f6); // sha512h q22, q7, v7.2d
// vl128 state = 0xcf9e1803
__ dci(0xce6780f7); // sha512h q23, q7, v7.2d
// vl128 state = 0xe2baee15
__ dci(0xce6785e7); // sha512h2 q7, q15, v7.2d
// vl128 state = 0x900a337c
__ dci(0xce6f8565); // sha512h2 q5, q11, v15.2d
// vl128 state = 0xc6e5d7eb
__ dci(0xce6f8424); // sha512h2 q4, q1, v15.2d
// vl128 state = 0xcbcb6ac1
__ dci(0xce6b84a6); // sha512h2 q6, q5, v11.2d
// vl128 state = 0xa3c1a679
__ dci(0xce7b848e); // sha512h2 q14, q4, v27.2d
// vl128 state = 0x47c4e54d
__ dci(0xce7d849e); // sha512h2 q30, q4, v29.2d
// vl128 state = 0x9f519a29
__ dci(0xce7f859c); // sha512h2 q28, q12, v31.2d
// vl128 state = 0xa4433415
__ dci(0xce778494); // sha512h2 q20, q4, v23.2d
// vl128 state = 0xf03a69ec
__ dci(0xce778484); // sha512h2 q4, q4, v23.2d
// vl128 state = 0x2c728333
__ dci(0xce77850c); // sha512h2 q12, q8, v23.2d
// vl128 state = 0xaedc423e
__ dci(0xce77815c); // sha512h q28, q10, v23.2d
// vl128 state = 0xea9346ea
__ dci(0xce7381cc); // sha512h q12, q14, v19.2d
// vl128 state = 0x05ad87d1
__ dci(0xce7a81dc); // sha512h q28, q14, v26.2d
// vl128 state = 0x9b1cd7b3
__ dci(0xce7285d4); // sha512h2 q20, q14, v18.2d
// vl128 state = 0x154201ac
__ dci(0xce7280d6); // sha512h q22, q6, v18.2d
// vl128 state = 0xd8640492
__ dci(0xce7a81d4); // sha512h q20, q14, v26.2d
// vl128 state = 0x908eb258
__ dci(0xce7281f0); // sha512h q16, q15, v18.2d
// vl128 state = 0x0067f162
__ dci(0xce728572); // sha512h2 q18, q11, v18.2d
// vl128 state = 0xca9bc751
__ dci(0xce728422); // sha512h2 q2, q1, v18.2d
// vl128 state = 0x06b7318d
__ dci(0xce738412); // sha512h2 q18, q0, v19.2d
// vl128 state = 0xad019588
__ dci(0xce718016); // sha512h q22, q0, v17.2d
// vl128 state = 0x55a29e9b
__ dci(0xce718834); // sha512su1 v20.2d, v1.2d, v17.2d
// vl128 state = 0x953a9c7a
__ dci(0xce738876); // sha512su1 v22.2d, v3.2d, v19.2d
// vl128 state = 0x4f194c71
__ dci(0xce638826); // sha512su1 v6.2d, v1.2d, v3.2d
// vl128 state = 0x08e50d47
__ dci(0xce6b886e); // sha512su1 v14.2d, v3.2d, v11.2d
// vl128 state = 0x4bdfb870
__ dci(0xce6b88de); // sha512su1 v30.2d, v6.2d, v11.2d
// vl128 state = 0xbcf4b6c5
__ dci(0xce7f88df); // sha512su1 v31.2d, v6.2d, v31.2d
// vl128 state = 0x916dede1
__ dci(0xce6f8acf); // sha512su1 v15.2d, v22.2d, v15.2d
// vl128 state = 0x3b776003
__ dci(0xce6d8bcb); // sha512su1 v11.2d, v30.2d, v13.2d
// vl128 state = 0x5d5cb7d9
__ dci(0xce6d83ea); // sha512h q10, q31, v13.2d
// vl128 state = 0x18df9e46
__ dci(0xce6d8328); // sha512h q8, q25, v13.2d
// vl128 state = 0xde5807d0
__ dci(0xce6583b8); // sha512h q24, q29, v5.2d
// vl128 state = 0x861020e7
__ dci(0xce6d83f9); // sha512h q25, q31, v13.2d
// vl128 state = 0x39d960f4
__ dci(0xce6d8b78); // sha512su1 v24.2d, v27.2d, v13.2d
// vl128 state = 0x3afc2b5c
__ dci(0xce6c8968); // sha512su1 v8.2d, v11.2d, v12.2d
// vl128 state = 0x74d44114
__ dci(0xce6c8b49); // sha512su1 v9.2d, v26.2d, v12.2d
// vl128 state = 0x72e6b5cd
__ dci(0xce6c8b39); // sha512su1 v25.2d, v25.2d, v12.2d
// vl128 state = 0x6aaa4658
__ dci(0xce6c8b9d); // sha512su1 v29.2d, v28.2d, v12.2d
// vl128 state = 0x7c076c9b
__ dci(0xce648b0d); // sha512su1 v13.2d, v24.2d, v4.2d
// vl128 state = 0x1082519d
__ dci(0xce648385); // sha512h q5, q28, v4.2d
// vl128 state = 0x9ed9d190
__ dci(0xce648715); // sha512h2 q21, q24, v4.2d
// vl128 state = 0xaace5a02
}
uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));
END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0xaace5a02, // vl128
0x912905de, // vl256
0xac6646d5, // vl512
0x49f8d50c, // vl1024
0x6aead9d8, // vl2048
};
ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2(
core.GetSVELaneCount(kQRegSize))],
x0);
}
}
TEST_SVE(neon_sha512su0) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,
CPUFeatures::kCRC32,
CPUFeatures::kSHA512);
START();
SetInitialMachineState(&masm);
// state = 0xe2bd2480
{
ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
__ dci(0xcec083f6); // sha512su0 v22.2d, v31.2d
// vl128 state = 0xf7a54f2b
__ dci(0xcec083e6); // sha512su0 v6.2d, v31.2d
// vl128 state = 0x919c170d
__ dci(0xcec08347); // sha512su0 v7.2d, v26.2d
// vl128 state = 0x8a1800d6
__ dci(0xcec082c6); // sha512su0 v6.2d, v22.2d
// vl128 state = 0x353aa8bf
__ dci(0xcec082c4); // sha512su0 v4.2d, v22.2d
// vl128 state = 0x87d75b6c
__ dci(0xcec082c0); // sha512su0 v0.2d, v22.2d
// vl128 state = 0xf2ee6974
__ dci(0xcec082c1); // sha512su0 v1.2d, v22.2d
// vl128 state = 0xf2ec1e17
__ dci(0xcec082c0); // sha512su0 v0.2d, v22.2d
// vl128 state = 0x1bcca060
__ dci(0xcec082c4); // sha512su0 v4.2d, v22.2d
// vl128 state = 0x67773394
__ dci(0xcec082c5); // sha512su0 v5.2d, v22.2d
// vl128 state = 0xbb344c8d
__ dci(0xcec083e1); // sha512su0 v1.2d, v31.2d
// vl128 state = 0x595e2eb0
__ dci(0xcec081a5); // sha512su0 v5.2d, v13.2d
// vl128 state = 0x7d7f4e15
__ dci(0xcec081a7); // sha512su0 v7.2d, v13.2d
// vl128 state = 0xba4b1bc6
__ dci(0xcec081a3); // sha512su0 v3.2d, v13.2d
// vl128 state = 0x2c56ee6e
__ dci(0xcec083f3); // sha512su0 v19.2d, v31.2d
// vl128 state = 0xefe9b855
__ dci(0xcec08397); // sha512su0 v23.2d, v28.2d
// vl128 state = 0x6f0d20ba
__ dci(0xcec08396); // sha512su0 v22.2d, v28.2d
// vl128 state = 0x9be77fdb
__ dci(0xcec081b7); // sha512su0 v23.2d, v13.2d
// vl128 state = 0x5d981c55
__ dci(0xcec080ff); // sha512su0 v31.2d, v7.2d
// vl128 state = 0x9126079f
__ dci(0xcec080fd); // sha512su0 v29.2d, v7.2d
// vl128 state = 0x3199dc9e
__ dci(0xcec081dc); // sha512su0 v28.2d, v14.2d
// vl128 state = 0x20fb48d7
__ dci(0xcec081cc); // sha512su0 v12.2d, v14.2d
// vl128 state = 0x4ae6221a
__ dci(0xcec08088); // sha512su0 v8.2d, v4.2d
// vl128 state = 0x17e8b62d
__ dci(0xcec0808a); // sha512su0 v10.2d, v4.2d
// vl128 state = 0x90d73468
__ dci(0xcec0809a); // sha512su0 v26.2d, v4.2d
// vl128 state = 0x1f02f97f
__ dci(0xcec081de); // sha512su0 v30.2d, v14.2d
// vl128 state = 0xe5ef3e67
__ dci(0xcec081bf); // sha512su0 v31.2d, v13.2d
// vl128 state = 0xd1bcc363
__ dci(0xcec081bb); // sha512su0 v27.2d, v13.2d
// vl128 state = 0x8bcfab58
__ dci(0xcec08033); // sha512su0 v19.2d, v1.2d
// vl128 state = 0x93fb8bad
__ dci(0xcec080fb); // sha512su0 v27.2d, v7.2d
// vl128 state = 0x3598e921
}
uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));
END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0x3598e921, // vl128
0x7e3ee16c, // vl256
0x193bda79, // vl512
0x9653d8b3, // vl1024
0x4024f5c7, // vl2048
};
ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2(
core.GetSVELaneCount(kQRegSize))],
x0);
}
}
TEST_SVE(neon_aes) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,
CPUFeatures::kCRC32,
CPUFeatures::kAES);
START();
SetInitialMachineState(&masm);
// state = 0xe2bd2480
{
ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
__ dci(0x4e285a86); // aesd v6.16b, v20.16b
// vl128 state = 0x801bfc08
__ dci(0x4e2858ae); // aesd v14.16b, v5.16b
// vl128 state = 0xbd83a757
__ dci(0x4e2858ac); // aesd v12.16b, v5.16b
// vl128 state = 0x9fb1dc6b
__ dci(0x4e2858ae); // aesd v14.16b, v5.16b
// vl128 state = 0xfa1fa7e4
__ dci(0x4e28482a); // aese v10.16b, v1.16b
// vl128 state = 0xecfcfe2d
__ dci(0x4e28483a); // aese v26.16b, v1.16b
// vl128 state = 0x05e22f07
__ dci(0x4e28488a); // aese v10.16b, v4.16b
// vl128 state = 0xdd53df5f
__ dci(0x4e28488e); // aese v14.16b, v4.16b
// vl128 state = 0x9d2ac50f
__ dci(0x4e28484f); // aese v15.16b, v2.16b
// vl128 state = 0xf45146ab
__ dci(0x4e28484b); // aese v11.16b, v2.16b
// vl128 state = 0xf1260a7c
__ dci(0x4e28485b); // aese v27.16b, v2.16b
// vl128 state = 0x3a0844da
__ dci(0x4e285819); // aesd v25.16b, v0.16b
// vl128 state = 0xaca89993
__ dci(0x4e284a09); // aese v9.16b, v16.16b
// vl128 state = 0xef4e9a5f
__ dci(0x4e285a4b); // aesd v11.16b, v18.16b
// vl128 state = 0x209a44bc
__ dci(0x4e285a4f); // aesd v15.16b, v18.16b
// vl128 state = 0xc6d2d718
__ dci(0x4e285a4d); // aesd v13.16b, v18.16b
// vl128 state = 0x1aceef8f
__ dci(0x4e285a45); // aesd v5.16b, v18.16b
// vl128 state = 0x7ed056c6
__ dci(0x4e285af5); // aesd v21.16b, v23.16b
// vl128 state = 0x429ed71e
__ dci(0x4e285a91); // aesd v17.16b, v20.16b
// vl128 state = 0xd7a1f687
__ dci(0x4e284ad9); // aese v25.16b, v22.16b
// vl128 state = 0x8fa44574
__ dci(0x4e284adb); // aese v27.16b, v22.16b
// vl128 state = 0xd2792169
__ dci(0x4e285afa); // aesd v26.16b, v23.16b
// vl128 state = 0xe502f095
__ dci(0x4e285bbb); // aesd v27.16b, v29.16b
// vl128 state = 0x0e3d3238
__ dci(0x4e285bbf); // aesd v31.16b, v29.16b
// vl128 state = 0x0ad06592
__ dci(0x4e285baf); // aesd v15.16b, v29.16b
// vl128 state = 0xb94f3c19
__ dci(0x4e284b3f); // aese v31.16b, v25.16b
// vl128 state = 0xf31a0da1
__ dci(0x4e284917); // aese v23.16b, v8.16b
// vl128 state = 0x7d2d7811
__ dci(0x4e284913); // aese v19.16b, v8.16b
// vl128 state = 0x41b7b854
__ dci(0x4e284911); // aese v17.16b, v8.16b
// vl128 state = 0x60600536
__ dci(0x4e2849d5); // aese v21.16b, v14.16b
// vl128 state = 0x3e0cc74f
}
uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));
END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0x3e0cc74f, // vl128
0x7f17ba2e, // vl256
0x9f15a51b, // vl512
0x6974fa54, // vl1024
0x6e7a0f58, // vl2048
};
ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2(
core.GetSVELaneCount(kQRegSize))],
x0);
}
}
TEST_SVE(neon_aesmc) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,
CPUFeatures::kCRC32,
CPUFeatures::kAES);
START();
SetInitialMachineState(&masm);
// state = 0xe2bd2480
{
ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
__ dci(0x4e287800); // aesimc v0.16b, v0.16b
// vl128 state = 0x03554749
__ dci(0x4e287a28); // aesimc v8.16b, v17.16b
// vl128 state = 0x59d5fedd
__ dci(0x4e287a2a); // aesimc v10.16b, v17.16b
// vl128 state = 0xcda29514
__ dci(0x4e286aae); // aesmc v14.16b, v21.16b
// vl128 state = 0xae8f019a
__ dci(0x4e286abe); // aesmc v30.16b, v21.16b
// vl128 state = 0x7b04c6c0
__ dci(0x4e286a0e); // aesmc v14.16b, v16.16b
// vl128 state = 0xaf6c5ce6
__ dci(0x4e286a0a); // aesmc v10.16b, v16.16b
// vl128 state = 0xf1d7fd2b
__ dci(0x4e286acb); // aesmc v11.16b, v22.16b
// vl128 state = 0x5d693c63
__ dci(0x4e286acf); // aesmc v15.16b, v22.16b
// vl128 state = 0xec8971ad
__ dci(0x4e286adf); // aesmc v31.16b, v22.16b
// vl128 state = 0x6389b200
__ dci(0x4e287a9d); // aesimc v29.16b, v20.16b
// vl128 state = 0xd69341fb
__ dci(0x4e28688d); // aesmc v13.16b, v4.16b
// vl128 state = 0x6344af95
__ dci(0x4e2878cf); // aesimc v15.16b, v6.16b
// vl128 state = 0x5c58dfac
__ dci(0x4e2878cb); // aesimc v11.16b, v6.16b
// vl128 state = 0x7dc9cf34
__ dci(0x4e2878c9); // aesimc v9.16b, v6.16b
// vl128 state = 0xff4b3544
__ dci(0x4e2878c1); // aesimc v1.16b, v6.16b
// vl128 state = 0xd1937de2
__ dci(0x4e287871); // aesimc v17.16b, v3.16b
// vl128 state = 0x7cabd208
__ dci(0x4e287815); // aesimc v21.16b, v0.16b
// vl128 state = 0xbc06df94
__ dci(0x4e28685d); // aesmc v29.16b, v2.16b
// vl128 state = 0xfc4478bb
__ dci(0x4e28685f); // aesmc v31.16b, v2.16b
// vl128 state = 0x0c72c200
__ dci(0x4e28787e); // aesimc v30.16b, v3.16b
// vl128 state = 0xdd822b9d
__ dci(0x4e28793f); // aesimc v31.16b, v9.16b
// vl128 state = 0x1397dcc6
__ dci(0x4e28793b); // aesimc v27.16b, v9.16b
// vl128 state = 0x43f3abd6
__ dci(0x4e28792b); // aesimc v11.16b, v9.16b
// vl128 state = 0xeb8ca365
__ dci(0x4e2869bb); // aesmc v27.16b, v13.16b
// vl128 state = 0x0a957f4f
__ dci(0x4e286b93); // aesmc v19.16b, v28.16b
// vl128 state = 0xbc5da8bd
__ dci(0x4e286b97); // aesmc v23.16b, v28.16b
// vl128 state = 0xc49343cc
__ dci(0x4e286b95); // aesmc v21.16b, v28.16b
// vl128 state = 0x8c80c144
__ dci(0x4e286b51); // aesmc v17.16b, v26.16b
// vl128 state = 0xeda3255d
__ dci(0x4e2869d3); // aesmc v19.16b, v14.16b
// vl128 state = 0x8db8a9d0
}
uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));
END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0x8db8a9d0, // vl128
0xb13d8e1e, // vl256
0x38f7ef7a, // vl512
0xb3f61256, // vl1024
0xe3817f4e, // vl2048
};
ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2(
core.GetSVELaneCount(kQRegSize))],
x0);
}
}
TEST_SVE(neon_sm3) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,
CPUFeatures::kCRC32,
CPUFeatures::kSM3);
START();
SetInitialMachineState(&masm);
// state = 0xe2bd2480
{
ExactAssemblyScope scope(&masm, 10 * kInstructionSize);
__ dci(0xce591017); // sm3ss1 v23.4s, v0.4s, v25.4s, v4.4s
// vl128 state = 0xad4bba0a
__ dci(0xce49121f); // sm3ss1 v31.4s, v16.4s, v9.4s, v4.4s
// vl128 state = 0x84adef21
__ dci(0xce49121e); // sm3ss1 v30.4s, v16.4s, v9.4s, v4.4s
// vl128 state = 0xccfd7e5a
__ dci(0xce49301a); // sm3ss1 v26.4s, v0.4s, v9.4s, v12.4s
// vl128 state = 0x60833cc7
__ dci(0xce49720a); // sm3ss1 v10.4s, v16.4s, v9.4s, v28.4s
// vl128 state = 0x03f03263
__ dci(0xce58721a); // sm3ss1 v26.4s, v16.4s, v24.4s, v28.4s
// vl128 state = 0x31845f40
__ dci(0xce58702a); // sm3ss1 v10.4s, v1.4s, v24.4s, v28.4s
// vl128 state = 0x54c64f70
__ dci(0xce58753a); // sm3ss1 v26.4s, v9.4s, v24.4s, v29.4s
// vl128 state = 0x3d5cb04f
__ dci(0xce507518); // sm3ss1 v24.4s, v8.4s, v16.4s, v29.4s
// vl128 state = 0xe02de221
__ dci(0xce406519); // sm3ss1 v25.4s, v8.4s, v0.4s, v25.4s
// vl128 state = 0x73d36ae8
}
uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));
END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0x73d36ae8, // vl128
0xcbcda2db, // vl256
0xa6857a16, // vl512
0xfb0d092e, // vl1024
0x0a836861, // vl2048
};
ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2(
core.GetSVELaneCount(kQRegSize))],
x0);
}
}
TEST_SVE(neon_sm3partw12) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,
CPUFeatures::kCRC32,
CPUFeatures::kSM3);
START();
SetInitialMachineState(&masm);
// state = 0xe2bd2480
{
ExactAssemblyScope scope(&masm, 30 * kInstructionSize);
__ dci(0xce70c201); // sm3partw1 v1.4s, v16.4s, v16.4s
// vl128 state = 0x6f2069a6
__ dci(0xce72c303); // sm3partw1 v3.4s, v24.4s, v18.4s
// vl128 state = 0x986fa56c
__ dci(0xce76c381); // sm3partw1 v1.4s, v28.4s, v22.4s
// vl128 state = 0x5dbd953c
__ dci(0xce7ec3b1); // sm3partw1 v17.4s, v29.4s, v30.4s
// vl128 state = 0xc72ccca5
__ dci(0xce7ac1b5); // sm3partw1 v21.4s, v13.4s, v26.4s
// vl128 state = 0x33cdfd6a
__ dci(0xce7ac1b7); // sm3partw1 v23.4s, v13.4s, v26.4s
// vl128 state = 0x4303e945
__ dci(0xce7ac1bf); // sm3partw1 v31.4s, v13.4s, v26.4s
// vl128 state = 0x56acac84
__ dci(0xce78c1fd); // sm3partw1 v29.4s, v15.4s, v24.4s
// vl128 state = 0x5e2a2793
__ dci(0xce78c5df); // sm3partw2 v31.4s, v14.4s, v24.4s
// vl128 state = 0xf7c457f3
__ dci(0xce70c55d); // sm3partw2 v29.4s, v10.4s, v16.4s
// vl128 state = 0xfa3557ac
__ dci(0xce60c159); // sm3partw1 v25.4s, v10.4s, v0.4s
// vl128 state = 0xb3ae6830
__ dci(0xce62c55b); // sm3partw2 v27.4s, v10.4s, v2.4s
// vl128 state = 0xa7747c70
__ dci(0xce66c753); // sm3partw2 v19.4s, v26.4s, v6.4s
// vl128 state = 0xb55f5895
__ dci(0xce67c551); // sm3partw2 v17.4s, v10.4s, v7.4s
// vl128 state = 0x519b1342
__ dci(0xce65c750); // sm3partw2 v16.4s, v26.4s, v5.4s
// vl128 state = 0xc4e6e4b9
__ dci(0xce61c718); // sm3partw2 v24.4s, v24.4s, v1.4s
// vl128 state = 0x127c483c
__ dci(0xce61c71c); // sm3partw2 v28.4s, v24.4s, v1.4s
// vl128 state = 0x92783ecc
__ dci(0xce6dc714); // sm3partw2 v20.4s, v24.4s, v13.4s
// vl128 state = 0xe11e87d3
__ dci(0xce65c756); // sm3partw2 v22.4s, v26.4s, v5.4s
// vl128 state = 0x8b6878d0
__ dci(0xce65c5d2); // sm3partw2 v18.4s, v14.4s, v5.4s
// vl128 state = 0xf2fb1e86
__ dci(0xce64c550); // sm3partw2 v16.4s, v10.4s, v4.4s
// vl128 state = 0x73ad3b0f
__ dci(0xce66c578); // sm3partw2 v24.4s, v11.4s, v6.4s
// vl128 state = 0x7e03900d
__ dci(0xce76c55c); // sm3partw2 v28.4s, v10.4s, v22.4s
// vl128 state = 0x1d0b5df6
__ dci(0xce76c54c); // sm3partw2 v12.4s, v10.4s, v22.4s
// vl128 state = 0x1a3d7a77
__ dci(0xce7ec448); // sm3partw2 v8.4s, v2.4s, v30.4s
// vl128 state = 0x3ed2e4bd
__ dci(0xce6ec409); // sm3partw2 v9.4s, v0.4s, v14.4s
// vl128 state = 0x826dd348
__ dci(0xce6ec52b); // sm3partw2 v11.4s, v9.4s, v14.4s
// vl128 state = 0x3ff5e482
__ dci(0xce66c72f); // sm3partw2 v15.4s, v25.4s, v6.4s
// vl128 state = 0x6fd24cd4
__ dci(0xce65c73f); // sm3partw2 v31.4s, v25.4s, v5.4s
// vl128 state = 0xd51ac474
__ dci(0xce67c77b); // sm3partw2 v27.4s, v27.4s, v7.4s
// vl128 state = 0x720d7419
}
uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));
END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0x720d7419, // vl128
0x31445e06, // vl256
0x45a27e4b, // vl512
0x377e1f38, // vl1024
0xf8dfcc40, // vl2048
};
ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2(
core.GetSVELaneCount(kQRegSize))],
x0);
}
}
TEST_SVE(neon_sm3tt1) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,
CPUFeatures::kCRC32,
CPUFeatures::kSM3);
START();
SetInitialMachineState(&masm);
// state = 0xe2bd2480
{
ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
__ dci(0xce53a363); // sm3tt1a v3.4s, v27.4s, v19.s[2]
// vl128 state = 0xaaa8c715
__ dci(0xce58a7a7); // sm3tt1b v7.4s, v29.4s, v24.s[2]
// vl128 state = 0xb99a301d
__ dci(0xce5eb2b7); // sm3tt1a v23.4s, v21.4s, v30.s[3]
// vl128 state = 0xe8dabe99
__ dci(0xce43b6ce); // sm3tt1b v14.4s, v22.4s, v3.s[3]
// vl128 state = 0xaa498ae5
__ dci(0xce448027); // sm3tt1a v7.4s, v1.4s, v4.s[0]
// vl128 state = 0x32093547
__ dci(0xce4286d8); // sm3tt1b v24.4s, v22.4s, v2.s[0]
// vl128 state = 0xe03e3a81
__ dci(0xce44a0f3); // sm3tt1a v19.4s, v7.4s, v4.s[2]
// vl128 state = 0xcb555b4a
__ dci(0xce418233); // sm3tt1a v19.4s, v17.4s, v1.s[0]
// vl128 state = 0x751e4f7d
__ dci(0xce58a49f); // sm3tt1b v31.4s, v4.4s, v24.s[2]
// vl128 state = 0xcaff7580
__ dci(0xce548326); // sm3tt1a v6.4s, v25.4s, v20.s[0]
// vl128 state = 0xc4308a78
__ dci(0xce548124); // sm3tt1a v4.4s, v9.4s, v20.s[0]
// vl128 state = 0x1f1bfdfb
__ dci(0xce5fb282); // sm3tt1a v2.4s, v20.4s, v31.s[3]
// vl128 state = 0xa632c0b2
__ dci(0xce549573); // sm3tt1b v19.4s, v11.4s, v20.s[1]
// vl128 state = 0x7fb7c2d3
__ dci(0xce4387ae); // sm3tt1b v14.4s, v29.4s, v3.s[0]
// vl128 state = 0xe8d4c534
__ dci(0xce5094eb); // sm3tt1b v11.4s, v7.4s, v16.s[1]
// vl128 state = 0xf34a4fbc
__ dci(0xce51b59f); // sm3tt1b v31.4s, v12.4s, v17.s[3]
// vl128 state = 0x98e388e9
__ dci(0xce50a7bf); // sm3tt1b v31.4s, v29.4s, v16.s[2]
// vl128 state = 0x7cd7a6ac
__ dci(0xce5ca52e); // sm3tt1b v14.4s, v9.4s, v28.s[2]
// vl128 state = 0xce9410c5
__ dci(0xce5aa741); // sm3tt1b v1.4s, v26.4s, v26.s[2]
// vl128 state = 0xd83fbd58
__ dci(0xce5e94da); // sm3tt1b v26.4s, v6.4s, v30.s[1]
// vl128 state = 0xc6055fe3
}
uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));
END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0xc6055fe3, // vl128
0xa2c33f98, // vl256
0xf29eb254, // vl512
0x96e46836, // vl1024
0x68cc20ef, // vl2048
};
ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2(
core.GetSVELaneCount(kQRegSize))],
x0);
}
}
TEST_SVE(neon_sm3tt2) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,
CPUFeatures::kCRC32,
CPUFeatures::kSM3);
START();
SetInitialMachineState(&masm);
// state = 0xe2bd2480
{
ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
__ dci(0xce439d42); // sm3tt2b v2.4s, v10.4s, v3.s[1]
// vl128 state = 0x388642cc
__ dci(0xce42b89d); // sm3tt2a v29.4s, v4.4s, v2.s[3]
// vl128 state = 0x66f4e60a
__ dci(0xce4da95d); // sm3tt2a v29.4s, v10.4s, v13.s[2]
// vl128 state = 0x95d4651d
__ dci(0xce49b926); // sm3tt2a v6.4s, v9.4s, v9.s[3]
// vl128 state = 0x826919fe
__ dci(0xce5cae33); // sm3tt2b v19.4s, v17.4s, v28.s[2]
// vl128 state = 0xb5cfefb0
__ dci(0xce478959); // sm3tt2a v25.4s, v10.4s, v7.s[0]
// vl128 state = 0xfe17b730
__ dci(0xce549cc2); // sm3tt2b v2.4s, v6.4s, v20.s[1]
// vl128 state = 0x769a0d76
__ dci(0xce4c9f90); // sm3tt2b v16.4s, v28.4s, v12.s[1]
// vl128 state = 0x8f633b95
__ dci(0xce508d49); // sm3tt2b v9.4s, v10.4s, v16.s[0]
// vl128 state = 0x5eab6daa
__ dci(0xce59ad79); // sm3tt2b v25.4s, v11.4s, v25.s[2]
// vl128 state = 0xfb197616
__ dci(0xce458fd6); // sm3tt2b v22.4s, v30.4s, v5.s[0]
// vl128 state = 0x875ff29d
__ dci(0xce4ab92c); // sm3tt2a v12.4s, v9.4s, v10.s[3]
// vl128 state = 0xad159c01
__ dci(0xce598a1c); // sm3tt2a v28.4s, v16.4s, v25.s[0]
// vl128 state = 0x3da313e4
__ dci(0xce43989f); // sm3tt2a v31.4s, v4.4s, v3.s[1]
// vl128 state = 0xc0a54179
__ dci(0xce459c8a); // sm3tt2b v10.4s, v4.4s, v5.s[1]
// vl128 state = 0x4739cdbf
__ dci(0xce539959); // sm3tt2a v25.4s, v10.4s, v19.s[1]
// vl128 state = 0xd85f84ab
__ dci(0xce429be1); // sm3tt2a v1.4s, v31.4s, v2.s[1]
// vl128 state = 0x85b5871c
__ dci(0xce5d9fe3); // sm3tt2b v3.4s, v31.4s, v29.s[1]
// vl128 state = 0x2be5bd95
__ dci(0xce4ebe16); // sm3tt2b v22.4s, v16.4s, v14.s[3]
// vl128 state = 0x2f8146e9
__ dci(0xce599a63); // sm3tt2a v3.4s, v19.4s, v25.s[1]
// vl128 state = 0xa6e513e2
}
uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));
END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0xa6e513e2, // vl128
0x6bf4ae47, // vl256
0xae1a57e0, // vl512
0x07ed81aa, // vl1024
0xe44416d3, // vl2048
};
ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2(
core.GetSVELaneCount(kQRegSize))],
x0);
}
}
TEST_SVE(neon_sm4e) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,
CPUFeatures::kCRC32,
CPUFeatures::kSM4);
START();
SetInitialMachineState(&masm);
// state = 0xe2bd2480
{
ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
__ dci(0xcec08400); // sm4e v0.4s, v0.4s
// vl128 state = 0xa687bacc
__ dci(0xcec08628); // sm4e v8.4s, v17.4s
// vl128 state = 0xf174e346
__ dci(0xcec0862a); // sm4e v10.4s, v17.4s
// vl128 state = 0xab88f8ca
__ dci(0xcec08628); // sm4e v8.4s, v17.4s
// vl128 state = 0x000d3840
__ dci(0xcec08638); // sm4e v24.4s, v17.4s
// vl128 state = 0xd980ddc2
__ dci(0xcec08688); // sm4e v8.4s, v20.4s
// vl128 state = 0xd501f2c2
__ dci(0xcec0868c); // sm4e v12.4s, v20.4s
// vl128 state = 0x699d6b6f
__ dci(0xcec0864d); // sm4e v13.4s, v18.4s
// vl128 state = 0x67baf406
__ dci(0xcec08649); // sm4e v9.4s, v18.4s
// vl128 state = 0x178b048e
__ dci(0xcec08659); // sm4e v25.4s, v18.4s
// vl128 state = 0x552a70d9
__ dci(0xcec0865d); // sm4e v29.4s, v18.4s
// vl128 state = 0x3be534d1
__ dci(0xcec0865f); // sm4e v31.4s, v18.4s
// vl128 state = 0x396fdf70
__ dci(0xcec08657); // sm4e v23.4s, v18.4s
// vl128 state = 0x836c474b
__ dci(0xcec086e7); // sm4e v7.4s, v23.4s
// vl128 state = 0x71aebad7
__ dci(0xcec08683); // sm4e v3.4s, v20.4s
// vl128 state = 0xadfd515c
__ dci(0xcec08681); // sm4e v1.4s, v20.4s
// vl128 state = 0xf1465ab4
__ dci(0xcec087c0); // sm4e v0.4s, v30.4s
// vl128 state = 0x8555b40f
__ dci(0xcec087c4); // sm4e v4.4s, v30.4s
// vl128 state = 0x2cb3f99f
__ dci(0xcec087d4); // sm4e v20.4s, v30.4s
// vl128 state = 0x733336fd
__ dci(0xcec085fc); // sm4e v28.4s, v15.4s
// vl128 state = 0x11b138f9
}
uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));
END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0x11b138f9, // vl128
0x5993c196, // vl256
0xf96d88cf, // vl512
0x77933d5b, // vl1024
0x4dbf0c92, // vl2048
};
ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2(
core.GetSVELaneCount(kQRegSize))],
x0);
}
}
TEST_SVE(neon_sm4ekey) {
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
CPUFeatures::kNEON,
CPUFeatures::kCRC32,
CPUFeatures::kSM4);
START();
SetInitialMachineState(&masm);
// state = 0xe2bd2480
{
ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
__ dci(0xce6fc9d4); // sm4ekey v20.4s, v14.4s, v15.4s
// vl128 state = 0x4bb7b396
__ dci(0xce6bc8d5); // sm4ekey v21.4s, v6.4s, v11.4s
// vl128 state = 0xf4354b26
__ dci(0xce6bc8c5); // sm4ekey v5.4s, v6.4s, v11.4s
// vl128 state = 0x0a331378
__ dci(0xce6bc8cd); // sm4ekey v13.4s, v6.4s, v11.4s
// vl128 state = 0x7ed4c2a7
__ dci(0xce6fc8e5); // sm4ekey v5.4s, v7.4s, v15.4s
// vl128 state = 0x38a433fd
__ dci(0xce6fc8e4); // sm4ekey v4.4s, v7.4s, v15.4s
// vl128 state = 0xc1ad0d76
__ dci(0xce6bcaec); // sm4ekey v12.4s, v23.4s, v11.4s
// vl128 state = 0x81660ce3
__ dci(0xce6bcae8); // sm4ekey v8.4s, v23.4s, v11.4s
// vl128 state = 0x79f3e5c1
__ dci(0xce7bcaaa); // sm4ekey v10.4s, v21.4s, v27.4s
// vl128 state = 0x231e0a79
__ dci(0xce72caa8); // sm4ekey v8.4s, v21.4s, v18.4s
// vl128 state = 0xd931c858
__ dci(0xce7ac8aa); // sm4ekey v10.4s, v5.4s, v26.4s
// vl128 state = 0x2476ef6a
__ dci(0xce7bc888); // sm4ekey v8.4s, v4.4s, v27.4s
// vl128 state = 0xd4a9ac83
__ dci(0xce7bc889); // sm4ekey v9.4s, v4.4s, v27.4s
// vl128 state = 0x149fd9b3
__ dci(0xce7bc9cd); // sm4ekey v13.4s, v14.4s, v27.4s
// vl128 state = 0xece67fce
__ dci(0xce79cbc5); // sm4ekey v5.4s, v30.4s, v25.4s
// vl128 state = 0xccb45863
__ dci(0xce71cac4); // sm4ekey v4.4s, v22.4s, v17.4s
// vl128 state = 0xafb23c9d
__ dci(0xce71c8e0); // sm4ekey v0.4s, v7.4s, v17.4s
// vl128 state = 0x5c808694
__ dci(0xce71c882); // sm4ekey v2.4s, v4.4s, v17.4s
// vl128 state = 0x6cea5132
__ dci(0xce73c803); // sm4ekey v3.4s, v0.4s, v19.4s
// vl128 state = 0x67e316db
__ dci(0xce71c847); // sm4ekey v7.4s, v2.4s, v17.4s
// vl128 state = 0x317aafac
}
uint32_t state;
ComputeMachineStateHash(&masm, &state);
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
__ Ldr(w0, MemOperand(x0));
END();
if (CAN_RUN()) {
RUN();
uint32_t expected_hashes[] = {
0x317aafac, // vl128
0xbacd34de, // vl256
0x3043dbe3, // vl512
0x3bee1f56, // vl1024
0xd50621d1, // vl2048
};
ASSERT_EQUAL_64(expected_hashes[WhichPowerOf2(
core.GetSVELaneCount(kQRegSize))],
x0);
}
}
} // namespace aarch64
} // namespace vixl