VIXL Release 1.8
Refer to the README.md and LICENCE files for details.
diff --git a/test/test-assembler-a64.cc b/test/test-assembler-a64.cc
index 156e1cb..ed55ae9 100644
--- a/test/test-assembler-a64.cc
+++ b/test/test-assembler-a64.cc
@@ -1,4 +1,4 @@
-// Copyright 2013, ARM Limited
+// Copyright 2015, ARM Limited
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
@@ -225,8 +225,11 @@
#define ASSERT_EQUAL_FP64(expected, result) \
assert(EqualFP64(expected, &core, result))
+#define ASSERT_EQUAL_128(expected_h, expected_l, result) \
+ assert(Equal128(expected_h, expected_l, &core, result))
+
#define ASSERT_LITERAL_POOL_SIZE(expected) \
- assert((expected) == (__ LiteralPoolSize()))
+ assert((expected + kInstructionSize) == (masm.LiteralPoolSize()))
TEST(stack_ops) {
@@ -1330,7 +1333,7 @@
}
-TEST(smaddl_umaddl) {
+TEST(smaddl_umaddl_umull) {
SETUP();
START();
@@ -1348,6 +1351,8 @@
__ Umaddl(x14, w18, w18, x20);
__ Umaddl(x15, w19, w19, x20);
__ Umaddl(x22, w19, w19, x21);
+ __ Umull(x24, w19, w19);
+ __ Umull(x25, w17, w18);
END();
RUN();
@@ -1360,6 +1365,8 @@
ASSERT_EQUAL_64(0xfffffffe00000005, x14);
ASSERT_EQUAL_64(0xfffffffe00000005, x15);
ASSERT_EQUAL_64(1, x22);
+ ASSERT_EQUAL_64(0xfffffffe00000001, x24);
+ ASSERT_EQUAL_64(0x00000000ffffffff, x25);
TEARDOWN();
}
@@ -1460,11 +1467,11 @@
ASSERT_EQUAL_64(0, x8);
ASSERT_EQUAL_64(0xffffffff00000001, x9);
ASSERT_EQUAL_64(0x40000000, x10);
- ASSERT_EQUAL_64(0xC0000000, x11);
+ ASSERT_EQUAL_64(0xc0000000, x11);
ASSERT_EQUAL_64(0x0000000040000000, x12);
ASSERT_EQUAL_64(0x0000000040000000, x13);
ASSERT_EQUAL_64(0x4000000000000000, x14);
- ASSERT_EQUAL_64(0xC000000000000000, x15);
+ ASSERT_EQUAL_64(0xc000000000000000, x15);
ASSERT_EQUAL_64(0, x22);
ASSERT_EQUAL_64(0x80000000, x23);
ASSERT_EQUAL_64(0, x24);
@@ -1892,7 +1899,7 @@
SETUP();
ALLOW_ASM();
- Label wrong;
+ Label done, wrong;
START();
__ Mov(x0, 0x1);
@@ -1966,10 +1973,12 @@
__ Mov(x0, 0x0);
__ Bind(&ok_6);
- END();
+ __ B(&done);
__ Bind(&wrong);
__ Mov(x0, 0x0);
+
+ __ Bind(&done);
END();
RUN();
@@ -2624,6 +2633,3283 @@
}
+TEST(load_store_b) {
+ SETUP();
+
+ uint8_t src[3] = {0x12, 0x23, 0x34};
+ uint8_t dst[3] = {0, 0, 0};
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+ uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, dst_base);
+ __ Mov(x19, src_base);
+ __ Mov(x20, dst_base);
+ __ Mov(x21, src_base);
+ __ Mov(x22, dst_base);
+ __ Ldr(b0, MemOperand(x17, sizeof(src[0])));
+ __ Str(b0, MemOperand(x18, sizeof(dst[0]), PostIndex));
+ __ Ldr(b1, MemOperand(x19, sizeof(src[0]), PostIndex));
+ __ Str(b1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
+ __ Ldr(b2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
+ __ Str(b2, MemOperand(x22, sizeof(dst[0])));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0x23, q0);
+ ASSERT_EQUAL_64(0x23, dst[0]);
+ ASSERT_EQUAL_128(0, 0x12, q1);
+ ASSERT_EQUAL_64(0x12, dst[2]);
+ ASSERT_EQUAL_128(0, 0x34, q2);
+ ASSERT_EQUAL_64(0x34, dst[1]);
+ ASSERT_EQUAL_64(src_base, x17);
+ ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
+ ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
+ ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
+ ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
+ ASSERT_EQUAL_64(dst_base, x22);
+
+ TEARDOWN();
+}
+
+
+TEST(load_store_h) {
+ SETUP();
+
+ uint16_t src[3] = {0x1234, 0x2345, 0x3456};
+ uint16_t dst[3] = {0, 0, 0};
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+ uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, dst_base);
+ __ Mov(x19, src_base);
+ __ Mov(x20, dst_base);
+ __ Mov(x21, src_base);
+ __ Mov(x22, dst_base);
+ __ Ldr(h0, MemOperand(x17, sizeof(src[0])));
+ __ Str(h0, MemOperand(x18, sizeof(dst[0]), PostIndex));
+ __ Ldr(h1, MemOperand(x19, sizeof(src[0]), PostIndex));
+ __ Str(h1, MemOperand(x20, 2 * sizeof(dst[0]), PreIndex));
+ __ Ldr(h2, MemOperand(x21, 2 * sizeof(src[0]), PreIndex));
+ __ Str(h2, MemOperand(x22, sizeof(dst[0])));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0x2345, q0);
+ ASSERT_EQUAL_64(0x2345, dst[0]);
+ ASSERT_EQUAL_128(0, 0x1234, q1);
+ ASSERT_EQUAL_64(0x1234, dst[2]);
+ ASSERT_EQUAL_128(0, 0x3456, q2);
+ ASSERT_EQUAL_64(0x3456, dst[1]);
+ ASSERT_EQUAL_64(src_base, x17);
+ ASSERT_EQUAL_64(dst_base + sizeof(dst[0]), x18);
+ ASSERT_EQUAL_64(src_base + sizeof(src[0]), x19);
+ ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[0]), x20);
+ ASSERT_EQUAL_64(src_base + 2 * sizeof(src[0]), x21);
+ ASSERT_EQUAL_64(dst_base, x22);
+
+ TEARDOWN();
+}
+
+
+TEST(load_store_q) {
+ SETUP();
+
+ uint8_t src[48] = {0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe,
+ 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef,
+ 0x21, 0x43, 0x65, 0x87, 0xa9, 0xcb, 0xed, 0x0f,
+ 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0,
+ 0x24, 0x46, 0x68, 0x8a, 0xac, 0xce, 0xe0, 0x02,
+ 0x42, 0x64, 0x86, 0xa8, 0xca, 0xec, 0x0e, 0x20};
+
+ uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+ uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, dst_base);
+ __ Mov(x19, src_base);
+ __ Mov(x20, dst_base);
+ __ Mov(x21, src_base);
+ __ Mov(x22, dst_base);
+ __ Ldr(q0, MemOperand(x17, 16));
+ __ Str(q0, MemOperand(x18, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x19, 16, PostIndex));
+ __ Str(q1, MemOperand(x20, 32, PreIndex));
+ __ Ldr(q2, MemOperand(x21, 32, PreIndex));
+ __ Str(q2, MemOperand(x22, 16));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0xf0debc9a78563412, 0x0fedcba987654321, q0);
+ ASSERT_EQUAL_64(0x0fedcba987654321, dst[0]);
+ ASSERT_EQUAL_64(0xf0debc9a78563412, dst[1]);
+ ASSERT_EQUAL_128(0xefcdab8967452301, 0xfedcba9876543210, q1);
+ ASSERT_EQUAL_64(0xfedcba9876543210, dst[4]);
+ ASSERT_EQUAL_64(0xefcdab8967452301, dst[5]);
+ ASSERT_EQUAL_128(0x200eeccaa8866442, 0x02e0ceac8a684624, q2);
+ ASSERT_EQUAL_64(0x02e0ceac8a684624, dst[2]);
+ ASSERT_EQUAL_64(0x200eeccaa8866442, dst[3]);
+ ASSERT_EQUAL_64(src_base, x17);
+ ASSERT_EQUAL_64(dst_base + 16, x18);
+ ASSERT_EQUAL_64(src_base + 16, x19);
+ ASSERT_EQUAL_64(dst_base + 32, x20);
+ ASSERT_EQUAL_64(src_base + 32, x21);
+ ASSERT_EQUAL_64(dst_base, x22);
+
+ TEARDOWN();
+}
+
+
+TEST(load_store_v_regoffset) {
+ SETUP();
+
+ uint8_t src[64];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uint8_t dst[64];
+ memset(dst, 0, sizeof(dst));
+
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+ uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
+
+ START();
+ __ Mov(x17, src_base + 16);
+ __ Mov(x18, 1);
+ __ Mov(w19, -1);
+ __ Mov(x20, dst_base - 1);
+
+ __ Ldr(b0, MemOperand(x17, x18));
+ __ Ldr(b1, MemOperand(x17, x19, SXTW));
+
+ __ Ldr(h2, MemOperand(x17, x18));
+ __ Ldr(h3, MemOperand(x17, x18, UXTW, 1));
+ __ Ldr(h4, MemOperand(x17, x19, SXTW, 1));
+ __ Ldr(h5, MemOperand(x17, x18, LSL, 1));
+
+ __ Ldr(s16, MemOperand(x17, x18));
+ __ Ldr(s17, MemOperand(x17, x18, UXTW, 2));
+ __ Ldr(s18, MemOperand(x17, x19, SXTW, 2));
+ __ Ldr(s19, MemOperand(x17, x18, LSL, 2));
+
+ __ Ldr(d20, MemOperand(x17, x18));
+ __ Ldr(d21, MemOperand(x17, x18, UXTW, 3));
+ __ Ldr(d22, MemOperand(x17, x19, SXTW, 3));
+ __ Ldr(d23, MemOperand(x17, x18, LSL, 3));
+
+ __ Ldr(q24, MemOperand(x17, x18));
+ __ Ldr(q25, MemOperand(x17, x18, UXTW, 4));
+ __ Ldr(q26, MemOperand(x17, x19, SXTW, 4));
+ __ Ldr(q27, MemOperand(x17, x18, LSL, 4));
+
+ // Store [bhsdq]27 to adjacent memory locations, then load again to check.
+ __ Str(b27, MemOperand(x20, x18));
+ __ Str(h27, MemOperand(x20, x18, UXTW, 1));
+ __ Add(x20, x20, 8);
+ __ Str(s27, MemOperand(x20, x19, SXTW, 2));
+ __ Sub(x20, x20, 8);
+ __ Str(d27, MemOperand(x20, x18, LSL, 3));
+ __ Add(x20, x20, 32);
+ __ Str(q27, MemOperand(x20, x19, SXTW, 4));
+
+ __ Sub(x20, x20, 32);
+ __ Ldr(q6, MemOperand(x20, x18));
+ __ Ldr(q7, MemOperand(x20, x18, LSL, 4));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0x11, q0);
+ ASSERT_EQUAL_128(0, 0x0f, q1);
+ ASSERT_EQUAL_128(0, 0x1211, q2);
+ ASSERT_EQUAL_128(0, 0x1312, q3);
+ ASSERT_EQUAL_128(0, 0x0f0e, q4);
+ ASSERT_EQUAL_128(0, 0x1312, q5);
+ ASSERT_EQUAL_128(0, 0x14131211, q16);
+ ASSERT_EQUAL_128(0, 0x17161514, q17);
+ ASSERT_EQUAL_128(0, 0x0f0e0d0c, q18);
+ ASSERT_EQUAL_128(0, 0x17161514, q19);
+ ASSERT_EQUAL_128(0, 0x1817161514131211, q20);
+ ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q21);
+ ASSERT_EQUAL_128(0, 0x0f0e0d0c0b0a0908, q22);
+ ASSERT_EQUAL_128(0, 0x1f1e1d1c1b1a1918, q23);
+ ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q24);
+ ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q25);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q26);
+ ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q27);
+ ASSERT_EQUAL_128(0x2027262524232221, 0x2023222120212020, q6);
+ ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q7);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld1_d) {
+ SETUP();
+
+ uint8_t src[32 + 5];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Ldr(q2, MemOperand(x17)); // Initialise top 64-bits of Q register.
+ __ Ld1(v2.V8B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld1(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld1(v20.V1D(), v21.V1D(), v22.V1D(), v23.V1D(), MemOperand(x17));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0x0706050403020100, q2);
+ ASSERT_EQUAL_128(0, 0x0807060504030201, q3);
+ ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4);
+ ASSERT_EQUAL_128(0, 0x0908070605040302, q5);
+ ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6);
+ ASSERT_EQUAL_128(0, 0x1918171615141312, q7);
+ ASSERT_EQUAL_128(0, 0x0a09080706050403, q16);
+ ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17);
+ ASSERT_EQUAL_128(0, 0x1a19181716151413, q18);
+ ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19);
+ ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30);
+ ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31);
+ ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0);
+ ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1);
+ ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20);
+ ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21);
+ ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22);
+ ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld1_d_postindex) {
+ SETUP();
+
+ uint8_t src[32 + 5];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base + 1);
+ __ Mov(x19, src_base + 2);
+ __ Mov(x20, src_base + 3);
+ __ Mov(x21, src_base + 4);
+ __ Mov(x22, src_base + 5);
+ __ Mov(x23, 1);
+ __ Ldr(q2, MemOperand(x17)); // Initialise top 64-bits of Q register.
+ __ Ld1(v2.V8B(), MemOperand(x17, x23, PostIndex));
+ __ Ld1(v3.V8B(), v4.V8B(), MemOperand(x18, 16, PostIndex));
+ __ Ld1(v5.V4H(), v6.V4H(), v7.V4H(), MemOperand(x19, 24, PostIndex));
+ __ Ld1(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(),
+ MemOperand(x20, 32, PostIndex));
+ __ Ld1(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(),
+ MemOperand(x21, 32, PostIndex));
+ __ Ld1(v20.V1D(), v21.V1D(), v22.V1D(), v23.V1D(),
+ MemOperand(x22, 32, PostIndex));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0x0706050403020100, q2);
+ ASSERT_EQUAL_128(0, 0x0807060504030201, q3);
+ ASSERT_EQUAL_128(0, 0x100f0e0d0c0b0a09, q4);
+ ASSERT_EQUAL_128(0, 0x0908070605040302, q5);
+ ASSERT_EQUAL_128(0, 0x11100f0e0d0c0b0a, q6);
+ ASSERT_EQUAL_128(0, 0x1918171615141312, q7);
+ ASSERT_EQUAL_128(0, 0x0a09080706050403, q16);
+ ASSERT_EQUAL_128(0, 0x1211100f0e0d0c0b, q17);
+ ASSERT_EQUAL_128(0, 0x1a19181716151413, q18);
+ ASSERT_EQUAL_128(0, 0x2221201f1e1d1c1b, q19);
+ ASSERT_EQUAL_128(0, 0x0b0a090807060504, q30);
+ ASSERT_EQUAL_128(0, 0x131211100f0e0d0c, q31);
+ ASSERT_EQUAL_128(0, 0x1b1a191817161514, q0);
+ ASSERT_EQUAL_128(0, 0x232221201f1e1d1c, q1);
+ ASSERT_EQUAL_128(0, 0x0c0b0a0908070605, q20);
+ ASSERT_EQUAL_128(0, 0x14131211100f0e0d, q21);
+ ASSERT_EQUAL_128(0, 0x1c1b1a1918171615, q22);
+ ASSERT_EQUAL_128(0, 0x24232221201f1e1d, q23);
+ ASSERT_EQUAL_64(src_base + 1, x17);
+ ASSERT_EQUAL_64(src_base + 1 + 16, x18);
+ ASSERT_EQUAL_64(src_base + 2 + 24, x19);
+ ASSERT_EQUAL_64(src_base + 3 + 32, x20);
+ ASSERT_EQUAL_64(src_base + 4 + 32, x21);
+ ASSERT_EQUAL_64(src_base + 5 + 32, x22);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld1_q) {
+ SETUP();
+
+ uint8_t src[64 + 4];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Ld1(v2.V16B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld1(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld1(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2);
+ ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3);
+ ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4);
+ ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5);
+ ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6);
+ ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7);
+ ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16);
+ ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17);
+ ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18);
+ ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19);
+ ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30);
+ ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31);
+ ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0);
+ ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld1_q_postindex) {
+ SETUP();
+
+ uint8_t src[64 + 4];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base + 1);
+ __ Mov(x19, src_base + 2);
+ __ Mov(x20, src_base + 3);
+ __ Mov(x21, src_base + 4);
+ __ Mov(x22, 1);
+ __ Ld1(v2.V16B(), MemOperand(x17, x22, PostIndex));
+ __ Ld1(v3.V16B(), v4.V16B(), MemOperand(x18, 32, PostIndex));
+ __ Ld1(v5.V8H(), v6.V8H(), v7.V8H(), MemOperand(x19, 48, PostIndex));
+ __ Ld1(v16.V4S(), v17.V4S(), v18.V4S(), v19.V4S(),
+ MemOperand(x20, 64, PostIndex));
+ __ Ld1(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(),
+ MemOperand(x21, 64, PostIndex));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q2);
+ ASSERT_EQUAL_128(0x100f0e0d0c0b0a09, 0x0807060504030201, q3);
+ ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x1817161514131211, q4);
+ ASSERT_EQUAL_128(0x11100f0e0d0c0b0a, 0x0908070605040302, q5);
+ ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x1918171615141312, q6);
+ ASSERT_EQUAL_128(0x31302f2e2d2c2b2a, 0x2928272625242322, q7);
+ ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x0a09080706050403, q16);
+ ASSERT_EQUAL_128(0x2221201f1e1d1c1b, 0x1a19181716151413, q17);
+ ASSERT_EQUAL_128(0x3231302f2e2d2c2b, 0x2a29282726252423, q18);
+ ASSERT_EQUAL_128(0x4241403f3e3d3c3b, 0x3a39383736353433, q19);
+ ASSERT_EQUAL_128(0x131211100f0e0d0c, 0x0b0a090807060504, q30);
+ ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x1b1a191817161514, q31);
+ ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x2b2a292827262524, q0);
+ ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x3b3a393837363534, q1);
+ ASSERT_EQUAL_64(src_base + 1, x17);
+ ASSERT_EQUAL_64(src_base + 1 + 32, x18);
+ ASSERT_EQUAL_64(src_base + 2 + 48, x19);
+ ASSERT_EQUAL_64(src_base + 3 + 64, x20);
+ ASSERT_EQUAL_64(src_base + 4 + 64, x21);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld1_lane) {
+ SETUP();
+
+ uint8_t src[64];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+
+ // Test loading whole register by element.
+ __ Mov(x17, src_base);
+ for (int i = 15; i >= 0; i--) {
+ __ Ld1(v0.B(), i, MemOperand(x17));
+ __ Add(x17, x17, 1);
+ }
+
+ __ Mov(x17, src_base);
+ for (int i = 7; i >= 0; i--) {
+ __ Ld1(v1.H(), i, MemOperand(x17));
+ __ Add(x17, x17, 1);
+ }
+
+ __ Mov(x17, src_base);
+ for (int i = 3; i >= 0; i--) {
+ __ Ld1(v2.S(), i, MemOperand(x17));
+ __ Add(x17, x17, 1);
+ }
+
+ __ Mov(x17, src_base);
+ for (int i = 1; i >= 0; i--) {
+ __ Ld1(v3.D(), i, MemOperand(x17));
+ __ Add(x17, x17, 1);
+ }
+
+ // Test loading a single element into an initialised register.
+ __ Mov(x17, src_base);
+ __ Ldr(q4, MemOperand(x17));
+ __ Ld1(v4.B(), 4, MemOperand(x17));
+ __ Ldr(q5, MemOperand(x17));
+ __ Ld1(v5.H(), 3, MemOperand(x17));
+ __ Ldr(q6, MemOperand(x17));
+ __ Ld1(v6.S(), 2, MemOperand(x17));
+ __ Ldr(q7, MemOperand(x17));
+ __ Ld1(v7.D(), 1, MemOperand(x17));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
+ ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q1);
+ ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q2);
+ ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q3);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5);
+ ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6);
+ ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7);
+
+ TEARDOWN();
+}
+
+TEST(neon_ld2_d) {
+ SETUP();
+
+ uint8_t src[64 + 4];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld2(v6.V4H(), v7.V4H(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x17));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2);
+ ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3);
+ ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4);
+ ASSERT_EQUAL_128(0, 0x100e0c0a08060402, q5);
+ ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q6);
+ ASSERT_EQUAL_128(0, 0x11100d0c09080504, q7);
+ ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q31);
+ ASSERT_EQUAL_128(0, 0x1211100f0a090807, q0);
+
+ TEARDOWN();
+}
+
+TEST(neon_ld2_d_postindex) {
+ SETUP();
+
+ uint8_t src[32 + 4];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base + 1);
+ __ Mov(x19, src_base + 2);
+ __ Mov(x20, src_base + 3);
+ __ Mov(x21, src_base + 4);
+ __ Mov(x22, 1);
+ __ Ld2(v2.V8B(), v3.V8B(), MemOperand(x17, x22, PostIndex));
+ __ Ld2(v4.V8B(), v5.V8B(), MemOperand(x18, 16, PostIndex));
+ __ Ld2(v5.V4H(), v6.V4H(), MemOperand(x19, 16, PostIndex));
+ __ Ld2(v16.V2S(), v17.V2S(), MemOperand(x20, 16, PostIndex));
+ __ Ld2(v31.V2S(), v0.V2S(), MemOperand(x21, 16, PostIndex));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0x0e0c0a0806040200, q2);
+ ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q3);
+ ASSERT_EQUAL_128(0, 0x0f0d0b0907050301, q4);
+ ASSERT_EQUAL_128(0, 0x0f0e0b0a07060302, q5);
+ ASSERT_EQUAL_128(0, 0x11100d0c09080504, q6);
+ ASSERT_EQUAL_128(0, 0x0e0d0c0b06050403, q16);
+ ASSERT_EQUAL_128(0, 0x1211100f0a090807, q17);
+ ASSERT_EQUAL_128(0, 0x0f0e0d0c07060504, q31);
+ ASSERT_EQUAL_128(0, 0x131211100b0a0908, q0);
+
+ ASSERT_EQUAL_64(src_base + 1, x17);
+ ASSERT_EQUAL_64(src_base + 1 + 16, x18);
+ ASSERT_EQUAL_64(src_base + 2 + 16, x19);
+ ASSERT_EQUAL_64(src_base + 3 + 16, x20);
+ ASSERT_EQUAL_64(src_base + 4 + 16, x21);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld2_q) {
+ SETUP();
+
+ uint8_t src[64 + 4];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x17));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2);
+ ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3);
+ ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4);
+ ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5);
+ ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6);
+ ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7);
+ ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16);
+ ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17);
+ ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31);
+ ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld2_q_postindex) {
+ SETUP();
+
+ uint8_t src[64 + 4];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base + 1);
+ __ Mov(x19, src_base + 2);
+ __ Mov(x20, src_base + 3);
+ __ Mov(x21, src_base + 4);
+ __ Mov(x22, 1);
+ __ Ld2(v2.V16B(), v3.V16B(), MemOperand(x17, x22, PostIndex));
+ __ Ld2(v4.V16B(), v5.V16B(), MemOperand(x18, 32, PostIndex));
+ __ Ld2(v6.V8H(), v7.V8H(), MemOperand(x19, 32, PostIndex));
+ __ Ld2(v16.V4S(), v17.V4S(), MemOperand(x20, 32, PostIndex));
+ __ Ld2(v31.V2D(), v0.V2D(), MemOperand(x21, 32, PostIndex));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x1e1c1a1816141210, 0x0e0c0a0806040200, q2);
+ ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q3);
+ ASSERT_EQUAL_128(0x1f1d1b1917151311, 0x0f0d0b0907050301, q4);
+ ASSERT_EQUAL_128(0x201e1c1a18161412, 0x100e0c0a08060402, q5);
+ ASSERT_EQUAL_128(0x1f1e1b1a17161312, 0x0f0e0b0a07060302, q6);
+ ASSERT_EQUAL_128(0x21201d1c19181514, 0x11100d0c09080504, q7);
+ ASSERT_EQUAL_128(0x1e1d1c1b16151413, 0x0e0d0c0b06050403, q16);
+ ASSERT_EQUAL_128(0x2221201f1a191817, 0x1211100f0a090807, q17);
+ ASSERT_EQUAL_128(0x1b1a191817161514, 0x0b0a090807060504, q31);
+ ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x131211100f0e0d0c, q0);
+
+
+
+ ASSERT_EQUAL_64(src_base + 1, x17);
+ ASSERT_EQUAL_64(src_base + 1 + 32, x18);
+ ASSERT_EQUAL_64(src_base + 2 + 32, x19);
+ ASSERT_EQUAL_64(src_base + 3 + 32, x20);
+ ASSERT_EQUAL_64(src_base + 4 + 32, x21);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld2_lane) {
+ SETUP();
+
+ uint8_t src[64];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+
+ // Test loading whole register by element.
+ __ Mov(x17, src_base);
+ for (int i = 15; i >= 0; i--) {
+ __ Ld2(v0.B(), v1.B(), i, MemOperand(x17));
+ __ Add(x17, x17, 1);
+ }
+
+ __ Mov(x17, src_base);
+ for (int i = 7; i >= 0; i--) {
+ __ Ld2(v2.H(), v3.H(), i, MemOperand(x17));
+ __ Add(x17, x17, 1);
+ }
+
+ __ Mov(x17, src_base);
+ for (int i = 3; i >= 0; i--) {
+ __ Ld2(v4.S(), v5.S(), i, MemOperand(x17));
+ __ Add(x17, x17, 1);
+ }
+
+ __ Mov(x17, src_base);
+ for (int i = 1; i >= 0; i--) {
+ __ Ld2(v6.D(), v7.D(), i, MemOperand(x17));
+ __ Add(x17, x17, 1);
+ }
+
+ // Test loading a single element into an initialised register.
+ __ Mov(x17, src_base);
+ __ Mov(x4, x17);
+ __ Ldr(q8, MemOperand(x4, 16, PostIndex));
+ __ Ldr(q9, MemOperand(x4));
+ __ Ld2(v8.B(), v9.B(), 4, MemOperand(x17));
+ __ Mov(x5, x17);
+ __ Ldr(q10, MemOperand(x5, 16, PostIndex));
+ __ Ldr(q11, MemOperand(x5));
+ __ Ld2(v10.H(), v11.H(), 3, MemOperand(x17));
+ __ Mov(x6, x17);
+ __ Ldr(q12, MemOperand(x6, 16, PostIndex));
+ __ Ldr(q13, MemOperand(x6));
+ __ Ld2(v12.S(), v13.S(), 2, MemOperand(x17));
+ __ Mov(x7, x17);
+ __ Ldr(q14, MemOperand(x7, 16, PostIndex));
+ __ Ldr(q15, MemOperand(x7));
+ __ Ld2(v14.D(), v15.D(), 1, MemOperand(x17));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
+ ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
+ ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q2);
+ ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q3);
+ ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q4);
+ ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q5);
+ ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q6);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q7);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11);
+ ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12);
+ ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13);
+ ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld2_lane_postindex) {
+ SETUP();
+
+ uint8_t src[64];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base);
+ __ Mov(x19, src_base);
+ __ Mov(x20, src_base);
+ __ Mov(x21, src_base);
+ __ Mov(x22, src_base);
+ __ Mov(x23, src_base);
+ __ Mov(x24, src_base);
+
+ // Test loading whole register by element.
+ for (int i = 15; i >= 0; i--) {
+ __ Ld2(v0.B(), v1.B(), i, MemOperand(x17, 2, PostIndex));
+ }
+
+ for (int i = 7; i >= 0; i--) {
+ __ Ld2(v2.H(), v3.H(), i, MemOperand(x18, 4, PostIndex));
+ }
+
+ for (int i = 3; i >= 0; i--) {
+ __ Ld2(v4.S(), v5.S(), i, MemOperand(x19, 8, PostIndex));
+ }
+
+ for (int i = 1; i >= 0; i--) {
+ __ Ld2(v6.D(), v7.D(), i, MemOperand(x20, 16, PostIndex));
+ }
+
+ // Test loading a single element into an initialised register.
+ __ Mov(x25, 1);
+ __ Mov(x4, x21);
+ __ Ldr(q8, MemOperand(x4, 16, PostIndex));
+ __ Ldr(q9, MemOperand(x4));
+ __ Ld2(v8.B(), v9.B(), 4, MemOperand(x21, x25, PostIndex));
+ __ Add(x25, x25, 1);
+
+ __ Mov(x5, x22);
+ __ Ldr(q10, MemOperand(x5, 16, PostIndex));
+ __ Ldr(q11, MemOperand(x5));
+ __ Ld2(v10.H(), v11.H(), 3, MemOperand(x22, x25, PostIndex));
+ __ Add(x25, x25, 1);
+
+ __ Mov(x6, x23);
+ __ Ldr(q12, MemOperand(x6, 16, PostIndex));
+ __ Ldr(q13, MemOperand(x6));
+ __ Ld2(v12.S(), v13.S(), 2, MemOperand(x23, x25, PostIndex));
+ __ Add(x25, x25, 1);
+
+ __ Mov(x7, x24);
+ __ Ldr(q14, MemOperand(x7, 16, PostIndex));
+ __ Ldr(q15, MemOperand(x7));
+ __ Ld2(v14.D(), v15.D(), 1, MemOperand(x24, x25, PostIndex));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x00020406080a0c0e, 0x10121416181a1c1e, q0);
+ ASSERT_EQUAL_128(0x01030507090b0d0f, 0x11131517191b1d1f, q1);
+ ASSERT_EQUAL_128(0x0100050409080d0c, 0x1110151419181d1c, q2);
+ ASSERT_EQUAL_128(0x030207060b0a0f0e, 0x131217161b1a1f1e, q3);
+ ASSERT_EQUAL_128(0x030201000b0a0908, 0x131211101b1a1918, q4);
+ ASSERT_EQUAL_128(0x070605040f0e0d0c, 0x171615141f1e1d1c, q5);
+ ASSERT_EQUAL_128(0x0706050403020100, 0x1716151413121110, q6);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1f1e1d1c1b1a1918, q7);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q8);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q9);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q10);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q11);
+ ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q12);
+ ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q13);
+ ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q14);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q15);
+
+
+
+
+ ASSERT_EQUAL_64(src_base + 32, x17);
+ ASSERT_EQUAL_64(src_base + 32, x18);
+ ASSERT_EQUAL_64(src_base + 32, x19);
+ ASSERT_EQUAL_64(src_base + 32, x20);
+ ASSERT_EQUAL_64(src_base + 1, x21);
+ ASSERT_EQUAL_64(src_base + 2, x22);
+ ASSERT_EQUAL_64(src_base + 3, x23);
+ ASSERT_EQUAL_64(src_base + 4, x24);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld2_alllanes) {
+ SETUP();
+
+ uint8_t src[64];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base + 1);
+ __ Mov(x18, 1);
+ __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17));
+ __ Add(x17, x17, 2);
+ __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17));
+ __ Add(x17, x17, 4);
+ __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17));
+ __ Add(x17, x17, 8);
+ __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
+ ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2);
+ ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5);
+ ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6);
+ ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9);
+ ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10);
+ ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11);
+ ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12);
+ ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld2_alllanes_postindex) {
+ SETUP();
+
+ uint8_t src[64];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base + 1);
+ __ Mov(x18, 1);
+ __ Ld2r(v0.V8B(), v1.V8B(), MemOperand(x17, 2, PostIndex));
+ __ Ld2r(v2.V16B(), v3.V16B(), MemOperand(x17, x18, PostIndex));
+ __ Ld2r(v4.V4H(), v5.V4H(), MemOperand(x17, x18, PostIndex));
+ __ Ld2r(v6.V8H(), v7.V8H(), MemOperand(x17, 4, PostIndex));
+ __ Ld2r(v8.V2S(), v9.V2S(), MemOperand(x17, x18, PostIndex));
+ __ Ld2r(v10.V4S(), v11.V4S(), MemOperand(x17, 8, PostIndex));
+ __ Ld2r(v12.V2D(), v13.V2D(), MemOperand(x17, 16, PostIndex));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
+ ASSERT_EQUAL_128(0x0303030303030303, 0x0303030303030303, q2);
+ ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0504050405040504, q4);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q5);
+ ASSERT_EQUAL_128(0x0605060506050605, 0x0605060506050605, q6);
+ ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q7);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0c0b0a090c0b0a09, q8);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x100f0e0d100f0e0d, q9);
+ ASSERT_EQUAL_128(0x0d0c0b0a0d0c0b0a, 0x0d0c0b0a0d0c0b0a, q10);
+ ASSERT_EQUAL_128(0x11100f0e11100f0e, 0x11100f0e11100f0e, q11);
+ ASSERT_EQUAL_128(0x1918171615141312, 0x1918171615141312, q12);
+ ASSERT_EQUAL_128(0x21201f1e1d1c1b1a, 0x21201f1e1d1c1b1a, q13);
+ ASSERT_EQUAL_64(src_base + 34, x17);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld3_d) {
+ SETUP();
+
+ uint8_t src[64 + 4];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2);
+ ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3);
+ ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4);
+ ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5);
+ ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6);
+ ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7);
+ ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8);
+ ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9);
+ ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10);
+ ASSERT_EQUAL_128(0, 0x1211100f06050403, q31);
+ ASSERT_EQUAL_128(0, 0x161514130a090807, q0);
+ ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q1);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld3_d_postindex) {
+ SETUP();
+
+ uint8_t src[32 + 4];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base + 1);
+ __ Mov(x19, src_base + 2);
+ __ Mov(x20, src_base + 3);
+ __ Mov(x21, src_base + 4);
+ __ Mov(x22, 1);
+ __ Ld3(v2.V8B(), v3.V8B(), v4.V8B(), MemOperand(x17, x22, PostIndex));
+ __ Ld3(v5.V8B(), v6.V8B(), v7.V8B(), MemOperand(x18, 24, PostIndex));
+ __ Ld3(v8.V4H(), v9.V4H(), v10.V4H(), MemOperand(x19, 24, PostIndex));
+ __ Ld3(v11.V2S(), v12.V2S(), v13.V2S(), MemOperand(x20, 24, PostIndex));
+ __ Ld3(v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x21, 24, PostIndex));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0x15120f0c09060300, q2);
+ ASSERT_EQUAL_128(0, 0x1613100d0a070401, q3);
+ ASSERT_EQUAL_128(0, 0x1714110e0b080502, q4);
+ ASSERT_EQUAL_128(0, 0x1613100d0a070401, q5);
+ ASSERT_EQUAL_128(0, 0x1714110e0b080502, q6);
+ ASSERT_EQUAL_128(0, 0x1815120f0c090603, q7);
+ ASSERT_EQUAL_128(0, 0x15140f0e09080302, q8);
+ ASSERT_EQUAL_128(0, 0x171611100b0a0504, q9);
+ ASSERT_EQUAL_128(0, 0x191813120d0c0706, q10);
+ ASSERT_EQUAL_128(0, 0x1211100f06050403, q11);
+ ASSERT_EQUAL_128(0, 0x161514130a090807, q12);
+ ASSERT_EQUAL_128(0, 0x1a1918170e0d0c0b, q13);
+ ASSERT_EQUAL_128(0, 0x1312111007060504, q31);
+ ASSERT_EQUAL_128(0, 0x171615140b0a0908, q0);
+ ASSERT_EQUAL_128(0, 0x1b1a19180f0e0d0c, q1);
+
+ ASSERT_EQUAL_64(src_base + 1, x17);
+ ASSERT_EQUAL_64(src_base + 1 + 24, x18);
+ ASSERT_EQUAL_64(src_base + 2 + 24, x19);
+ ASSERT_EQUAL_64(src_base + 3 + 24, x20);
+ ASSERT_EQUAL_64(src_base + 4 + 24, x21);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld3_q) {
+ SETUP();
+
+ uint8_t src[64 + 4];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x17));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2);
+ ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3);
+ ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4);
+ ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5);
+ ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6);
+ ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7);
+ ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8);
+ ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9);
+ ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10);
+ ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11);
+ ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12);
+ ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13);
+ ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31);
+ ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0);
+ ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld3_q_postindex) {
+ SETUP();
+
+ uint8_t src[64 + 4];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base + 1);
+ __ Mov(x19, src_base + 2);
+ __ Mov(x20, src_base + 3);
+ __ Mov(x21, src_base + 4);
+ __ Mov(x22, 1);
+
+ __ Ld3(v2.V16B(), v3.V16B(), v4.V16B(), MemOperand(x17, x22, PostIndex));
+ __ Ld3(v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x18, 48, PostIndex));
+ __ Ld3(v8.V8H(), v9.V8H(), v10.V8H(), MemOperand(x19, 48, PostIndex));
+ __ Ld3(v11.V4S(), v12.V4S(), v13.V4S(), MemOperand(x20, 48, PostIndex));
+ __ Ld3(v31.V2D(), v0.V2D(), v1.V2D(), MemOperand(x21, 48, PostIndex));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x2d2a2724211e1b18, 0x15120f0c09060300, q2);
+ ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q3);
+ ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q4);
+ ASSERT_EQUAL_128(0x2e2b2825221f1c19, 0x1613100d0a070401, q5);
+ ASSERT_EQUAL_128(0x2f2c292623201d1a, 0x1714110e0b080502, q6);
+ ASSERT_EQUAL_128(0x302d2a2724211e1b, 0x1815120f0c090603, q7);
+ ASSERT_EQUAL_128(0x2d2c272621201b1a, 0x15140f0e09080302, q8);
+ ASSERT_EQUAL_128(0x2f2e292823221d1c, 0x171611100b0a0504, q9);
+ ASSERT_EQUAL_128(0x31302b2a25241f1e, 0x191813120d0c0706, q10);
+ ASSERT_EQUAL_128(0x2a2928271e1d1c1b, 0x1211100f06050403, q11);
+ ASSERT_EQUAL_128(0x2e2d2c2b2221201f, 0x161514130a090807, q12);
+ ASSERT_EQUAL_128(0x3231302f26252423, 0x1a1918170e0d0c0b, q13);
+ ASSERT_EQUAL_128(0x232221201f1e1d1c, 0x0b0a090807060504, q31);
+ ASSERT_EQUAL_128(0x2b2a292827262524, 0x131211100f0e0d0c, q0);
+ ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x1b1a191817161514, q1);
+
+ ASSERT_EQUAL_64(src_base + 1, x17);
+ ASSERT_EQUAL_64(src_base + 1 + 48, x18);
+ ASSERT_EQUAL_64(src_base + 2 + 48, x19);
+ ASSERT_EQUAL_64(src_base + 3 + 48, x20);
+ ASSERT_EQUAL_64(src_base + 4 + 48, x21);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld3_lane) {
+ SETUP();
+
+ uint8_t src[64];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+
+ // Test loading whole register by element.
+ __ Mov(x17, src_base);
+ for (int i = 15; i >= 0; i--) {
+ __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17));
+ __ Add(x17, x17, 1);
+ }
+
+ __ Mov(x17, src_base);
+ for (int i = 7; i >= 0; i--) {
+ __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x17));
+ __ Add(x17, x17, 1);
+ }
+
+ __ Mov(x17, src_base);
+ for (int i = 3; i >= 0; i--) {
+ __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x17));
+ __ Add(x17, x17, 1);
+ }
+
+ __ Mov(x17, src_base);
+ for (int i = 1; i >= 0; i--) {
+ __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x17));
+ __ Add(x17, x17, 1);
+ }
+
+ // Test loading a single element into an initialised register.
+ __ Mov(x17, src_base);
+ __ Mov(x4, x17);
+ __ Ldr(q12, MemOperand(x4, 16, PostIndex));
+ __ Ldr(q13, MemOperand(x4, 16, PostIndex));
+ __ Ldr(q14, MemOperand(x4));
+ __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x17));
+ __ Mov(x5, x17);
+ __ Ldr(q15, MemOperand(x5, 16, PostIndex));
+ __ Ldr(q16, MemOperand(x5, 16, PostIndex));
+ __ Ldr(q17, MemOperand(x5));
+ __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x17));
+ __ Mov(x6, x17);
+ __ Ldr(q18, MemOperand(x6, 16, PostIndex));
+ __ Ldr(q19, MemOperand(x6, 16, PostIndex));
+ __ Ldr(q20, MemOperand(x6));
+ __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x17));
+ __ Mov(x7, x17);
+ __ Ldr(q21, MemOperand(x7, 16, PostIndex));
+ __ Ldr(q22, MemOperand(x7, 16, PostIndex));
+ __ Ldr(q23, MemOperand(x7));
+ __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x17));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
+ ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
+ ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2);
+ ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q3);
+ ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q4);
+ ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q5);
+ ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q6);
+ ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q7);
+ ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q8);
+ ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q9);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q10);
+ ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q11);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13);
+ ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16);
+ ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld3_lane_postindex) {
+ SETUP();
+
+ uint8_t src[64];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+
+ // Test loading whole register by element.
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base);
+ __ Mov(x19, src_base);
+ __ Mov(x20, src_base);
+ __ Mov(x21, src_base);
+ __ Mov(x22, src_base);
+ __ Mov(x23, src_base);
+ __ Mov(x24, src_base);
+ for (int i = 15; i >= 0; i--) {
+ __ Ld3(v0.B(), v1.B(), v2.B(), i, MemOperand(x17, 3, PostIndex));
+ }
+
+ for (int i = 7; i >= 0; i--) {
+ __ Ld3(v3.H(), v4.H(), v5.H(), i, MemOperand(x18, 6, PostIndex));
+ }
+
+ for (int i = 3; i >= 0; i--) {
+ __ Ld3(v6.S(), v7.S(), v8.S(), i, MemOperand(x19, 12, PostIndex));
+ }
+
+ for (int i = 1; i >= 0; i--) {
+ __ Ld3(v9.D(), v10.D(), v11.D(), i, MemOperand(x20, 24, PostIndex));
+ }
+
+
+ // Test loading a single element into an initialised register.
+ __ Mov(x25, 1);
+ __ Mov(x4, x21);
+ __ Ldr(q12, MemOperand(x4, 16, PostIndex));
+ __ Ldr(q13, MemOperand(x4, 16, PostIndex));
+ __ Ldr(q14, MemOperand(x4));
+ __ Ld3(v12.B(), v13.B(), v14.B(), 4, MemOperand(x21, x25, PostIndex));
+ __ Add(x25, x25, 1);
+
+ __ Mov(x5, x22);
+ __ Ldr(q15, MemOperand(x5, 16, PostIndex));
+ __ Ldr(q16, MemOperand(x5, 16, PostIndex));
+ __ Ldr(q17, MemOperand(x5));
+ __ Ld3(v15.H(), v16.H(), v17.H(), 3, MemOperand(x22, x25, PostIndex));
+ __ Add(x25, x25, 1);
+
+ __ Mov(x6, x23);
+ __ Ldr(q18, MemOperand(x6, 16, PostIndex));
+ __ Ldr(q19, MemOperand(x6, 16, PostIndex));
+ __ Ldr(q20, MemOperand(x6));
+ __ Ld3(v18.S(), v19.S(), v20.S(), 2, MemOperand(x23, x25, PostIndex));
+ __ Add(x25, x25, 1);
+
+ __ Mov(x7, x24);
+ __ Ldr(q21, MemOperand(x7, 16, PostIndex));
+ __ Ldr(q22, MemOperand(x7, 16, PostIndex));
+ __ Ldr(q23, MemOperand(x7));
+ __ Ld3(v21.D(), v22.D(), v23.D(), 1, MemOperand(x24, x25, PostIndex));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x000306090c0f1215, 0x181b1e2124272a2d, q0);
+ ASSERT_EQUAL_128(0x0104070a0d101316, 0x191c1f2225282b2e, q1);
+ ASSERT_EQUAL_128(0x0205080b0e111417, 0x1a1d202326292c2f, q2);
+ ASSERT_EQUAL_128(0x010007060d0c1312, 0x19181f1e25242b2a, q3);
+ ASSERT_EQUAL_128(0x030209080f0e1514, 0x1b1a212027262d2c, q4);
+ ASSERT_EQUAL_128(0x05040b0a11101716, 0x1d1c232229282f2e, q5);
+ ASSERT_EQUAL_128(0x030201000f0e0d0c, 0x1b1a191827262524, q6);
+ ASSERT_EQUAL_128(0x0706050413121110, 0x1f1e1d1c2b2a2928, q7);
+ ASSERT_EQUAL_128(0x0b0a090817161514, 0x232221202f2e2d2c, q8);
+ ASSERT_EQUAL_128(0x0706050403020100, 0x1f1e1d1c1b1a1918, q9);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2726252423222120, q10);
+ ASSERT_EQUAL_128(0x1716151413121110, 0x2f2e2d2c2b2a2928, q11);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q12);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q13);
+ ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q14);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q15);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q16);
+ ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q17);
+ ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q18);
+ ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q19);
+ ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q20);
+ ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q21);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q22);
+ ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q23);
+
+ ASSERT_EQUAL_64(src_base + 48, x17);
+ ASSERT_EQUAL_64(src_base + 48, x18);
+ ASSERT_EQUAL_64(src_base + 48, x19);
+ ASSERT_EQUAL_64(src_base + 48, x20);
+ ASSERT_EQUAL_64(src_base + 1, x21);
+ ASSERT_EQUAL_64(src_base + 2, x22);
+ ASSERT_EQUAL_64(src_base + 3, x23);
+ ASSERT_EQUAL_64(src_base + 4, x24);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld3_alllanes) {
+ SETUP();
+
+ uint8_t src[64];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base + 1);
+ __ Mov(x18, 1);
+ __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17));
+ __ Add(x17, x17, 3);
+ __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17));
+ __ Add(x17, x17, 6);
+ __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
+ __ Add(x17, x17, 12);
+ __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
+ ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
+ ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
+ ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8);
+ ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9);
+ ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10);
+ ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14);
+ ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15);
+ ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16);
+ ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17);
+ ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18);
+ ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19);
+ ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld3_alllanes_postindex) {
+ SETUP();
+
+ uint8_t src[64];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+ __ Mov(x17, src_base + 1);
+ __ Mov(x18, 1);
+
+ START();
+ __ Mov(x17, src_base + 1);
+ __ Mov(x18, 1);
+ __ Ld3r(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x17, 3, PostIndex));
+ __ Ld3r(v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17, x18, PostIndex));
+ __ Ld3r(v6.V4H(), v7.V4H(), v8.V4H(), MemOperand(x17, x18, PostIndex));
+ __ Ld3r(v9.V8H(), v10.V8H(), v11.V8H(), MemOperand(x17, 6, PostIndex));
+ __ Ld3r(v12.V2S(), v13.V2S(), v14.V2S(), MemOperand(x17, x18, PostIndex));
+ __ Ld3r(v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17, 12, PostIndex));
+ __ Ld3r(v18.V2D(), v19.V2D(), v20.V2D(), MemOperand(x17, 24, PostIndex));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
+ ASSERT_EQUAL_128(0x0404040404040404, 0x0404040404040404, q3);
+ ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
+ ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0605060506050605, q6);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0807080708070807, q7);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0a090a090a090a09, q8);
+ ASSERT_EQUAL_128(0x0706070607060706, 0x0706070607060706, q9);
+ ASSERT_EQUAL_128(0x0908090809080908, 0x0908090809080908, q10);
+ ASSERT_EQUAL_128(0x0b0a0b0a0b0a0b0a, 0x0b0a0b0a0b0a0b0a, q11);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0f0e0d0c, q12);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x1312111013121110, q13);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x1716151417161514, q14);
+ ASSERT_EQUAL_128(0x100f0e0d100f0e0d, 0x100f0e0d100f0e0d, q15);
+ ASSERT_EQUAL_128(0x1413121114131211, 0x1413121114131211, q16);
+ ASSERT_EQUAL_128(0x1817161518171615, 0x1817161518171615, q17);
+ ASSERT_EQUAL_128(0x201f1e1d1c1b1a19, 0x201f1e1d1c1b1a19, q18);
+ ASSERT_EQUAL_128(0x2827262524232221, 0x2827262524232221, q19);
+ ASSERT_EQUAL_128(0x302f2e2d2c2b2a29, 0x302f2e2d2c2b2a29, q20);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld4_d) {
+ SETUP();
+
+ uint8_t src[64 + 4];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Ld4(v2.V8B(), v3.V8B(), v4.V8B(), v5.V8B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld4(v6.V8B(), v7.V8B(), v8.V8B(), v9.V8B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld4(v10.V4H(), v11.V4H(), v12.V4H(), v13.V4H(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld4(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(), MemOperand(x17));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2);
+ ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3);
+ ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4);
+ ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5);
+ ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6);
+ ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7);
+ ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8);
+ ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9);
+ ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10);
+ ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11);
+ ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12);
+ ASSERT_EQUAL_128(0, 0x2120191811100908, q13);
+ ASSERT_EQUAL_128(0, 0x1615141306050403, q30);
+ ASSERT_EQUAL_128(0, 0x1a1918170a090807, q31);
+ ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q0);
+ ASSERT_EQUAL_128(0, 0x2221201f1211100f, q1);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld4_d_postindex) {
+ SETUP();
+
+ uint8_t src[32 + 4];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base + 1);
+ __ Mov(x19, src_base + 2);
+ __ Mov(x20, src_base + 3);
+ __ Mov(x21, src_base + 4);
+ __ Mov(x22, 1);
+ __ Ld4(v2.V8B(), v3.V8B(), v4.V8B(), v5.V8B(),
+ MemOperand(x17, x22, PostIndex));
+ __ Ld4(v6.V8B(), v7.V8B(), v8.V8B(), v9.V8B(),
+ MemOperand(x18, 32, PostIndex));
+ __ Ld4(v10.V4H(), v11.V4H(), v12.V4H(), v13.V4H(),
+ MemOperand(x19, 32, PostIndex));
+ __ Ld4(v14.V2S(), v15.V2S(), v16.V2S(), v17.V2S(),
+ MemOperand(x20, 32, PostIndex));
+ __ Ld4(v30.V2S(), v31.V2S(), v0.V2S(), v1.V2S(),
+ MemOperand(x21, 32, PostIndex));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0x1c1814100c080400, q2);
+ ASSERT_EQUAL_128(0, 0x1d1915110d090501, q3);
+ ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q4);
+ ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q5);
+ ASSERT_EQUAL_128(0, 0x1d1915110d090501, q6);
+ ASSERT_EQUAL_128(0, 0x1e1a16120e0a0602, q7);
+ ASSERT_EQUAL_128(0, 0x1f1b17130f0b0703, q8);
+ ASSERT_EQUAL_128(0, 0x201c1814100c0804, q9);
+ ASSERT_EQUAL_128(0, 0x1b1a13120b0a0302, q10);
+ ASSERT_EQUAL_128(0, 0x1d1c15140d0c0504, q11);
+ ASSERT_EQUAL_128(0, 0x1f1e17160f0e0706, q12);
+ ASSERT_EQUAL_128(0, 0x2120191811100908, q13);
+ ASSERT_EQUAL_128(0, 0x1615141306050403, q14);
+ ASSERT_EQUAL_128(0, 0x1a1918170a090807, q15);
+ ASSERT_EQUAL_128(0, 0x1e1d1c1b0e0d0c0b, q16);
+ ASSERT_EQUAL_128(0, 0x2221201f1211100f, q17);
+ ASSERT_EQUAL_128(0, 0x1716151407060504, q30);
+ ASSERT_EQUAL_128(0, 0x1b1a19180b0a0908, q31);
+ ASSERT_EQUAL_128(0, 0x1f1e1d1c0f0e0d0c, q0);
+ ASSERT_EQUAL_128(0, 0x2322212013121110, q1);
+
+
+ ASSERT_EQUAL_64(src_base + 1, x17);
+ ASSERT_EQUAL_64(src_base + 1 + 32, x18);
+ ASSERT_EQUAL_64(src_base + 2 + 32, x19);
+ ASSERT_EQUAL_64(src_base + 3 + 32, x20);
+ ASSERT_EQUAL_64(src_base + 4 + 32, x21);
+ TEARDOWN();
+}
+
+
+TEST(neon_ld4_q) {
+ SETUP();
+
+ uint8_t src[64 + 4];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld4(v6.V16B(), v7.V16B(), v8.V16B(), v9.V16B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld4(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld4(v14.V4S(), v15.V4S(), v16.V4S(), v17.V4S(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld4(v18.V2D(), v19.V2D(), v20.V2D(), v21.V2D(), MemOperand(x17));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2);
+ ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3);
+ ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4);
+ ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5);
+ ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6);
+ ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7);
+ ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8);
+ ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9);
+ ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10);
+ ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11);
+ ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12);
+ ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13);
+ ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14);
+ ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15);
+ ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16);
+ ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17);
+ ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q18);
+ ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q19);
+ ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q20);
+ ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q21);
+ TEARDOWN();
+}
+
+
+TEST(neon_ld4_q_postindex) {
+ SETUP();
+
+ uint8_t src[64 + 4];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base + 1);
+ __ Mov(x19, src_base + 2);
+ __ Mov(x20, src_base + 3);
+ __ Mov(x21, src_base + 4);
+ __ Mov(x22, 1);
+
+ __ Ld4(v2.V16B(), v3.V16B(), v4.V16B(), v5.V16B(),
+ MemOperand(x17, x22, PostIndex));
+ __ Ld4(v6.V16B(), v7.V16B(), v8.V16B(), v9.V16B(),
+ MemOperand(x18, 64, PostIndex));
+ __ Ld4(v10.V8H(), v11.V8H(), v12.V8H(), v13.V8H(),
+ MemOperand(x19, 64, PostIndex));
+ __ Ld4(v14.V4S(), v15.V4S(), v16.V4S(), v17.V4S(),
+ MemOperand(x20, 64, PostIndex));
+ __ Ld4(v30.V2D(), v31.V2D(), v0.V2D(), v1.V2D(),
+ MemOperand(x21, 64, PostIndex));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x3c3834302c282420, 0x1c1814100c080400, q2);
+ ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q3);
+ ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q4);
+ ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q5);
+ ASSERT_EQUAL_128(0x3d3935312d292521, 0x1d1915110d090501, q6);
+ ASSERT_EQUAL_128(0x3e3a36322e2a2622, 0x1e1a16120e0a0602, q7);
+ ASSERT_EQUAL_128(0x3f3b37332f2b2723, 0x1f1b17130f0b0703, q8);
+ ASSERT_EQUAL_128(0x403c3834302c2824, 0x201c1814100c0804, q9);
+ ASSERT_EQUAL_128(0x3b3a33322b2a2322, 0x1b1a13120b0a0302, q10);
+ ASSERT_EQUAL_128(0x3d3c35342d2c2524, 0x1d1c15140d0c0504, q11);
+ ASSERT_EQUAL_128(0x3f3e37362f2e2726, 0x1f1e17160f0e0706, q12);
+ ASSERT_EQUAL_128(0x4140393831302928, 0x2120191811100908, q13);
+ ASSERT_EQUAL_128(0x3635343326252423, 0x1615141306050403, q14);
+ ASSERT_EQUAL_128(0x3a3938372a292827, 0x1a1918170a090807, q15);
+ ASSERT_EQUAL_128(0x3e3d3c3b2e2d2c2b, 0x1e1d1c1b0e0d0c0b, q16);
+ ASSERT_EQUAL_128(0x4241403f3231302f, 0x2221201f1211100f, q17);
+ ASSERT_EQUAL_128(0x2b2a292827262524, 0x0b0a090807060504, q30);
+ ASSERT_EQUAL_128(0x333231302f2e2d2c, 0x131211100f0e0d0c, q31);
+ ASSERT_EQUAL_128(0x3b3a393837363534, 0x1b1a191817161514, q0);
+ ASSERT_EQUAL_128(0x434241403f3e3d3c, 0x232221201f1e1d1c, q1);
+
+
+
+ ASSERT_EQUAL_64(src_base + 1, x17);
+ ASSERT_EQUAL_64(src_base + 1 + 64, x18);
+ ASSERT_EQUAL_64(src_base + 2 + 64, x19);
+ ASSERT_EQUAL_64(src_base + 3 + 64, x20);
+ ASSERT_EQUAL_64(src_base + 4 + 64, x21);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld4_lane) {
+ SETUP();
+
+ uint8_t src[64];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+
+ // Test loading whole register by element.
+ __ Mov(x17, src_base);
+ for (int i = 15; i >= 0; i--) {
+ __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x17));
+ __ Add(x17, x17, 1);
+ }
+
+ __ Mov(x17, src_base);
+ for (int i = 7; i >= 0; i--) {
+ __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i, MemOperand(x17));
+ __ Add(x17, x17, 1);
+ }
+
+ __ Mov(x17, src_base);
+ for (int i = 3; i >= 0; i--) {
+ __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i, MemOperand(x17));
+ __ Add(x17, x17, 1);
+ }
+
+ __ Mov(x17, src_base);
+ for (int i = 1; i >= 0; i--) {
+ __ Ld4(v12.D(), v13.D(), v14.D(), v15.D(), i, MemOperand(x17));
+ __ Add(x17, x17, 1);
+ }
+
+ // Test loading a single element into an initialised register.
+ __ Mov(x17, src_base);
+ __ Mov(x4, x17);
+ __ Ldr(q16, MemOperand(x4, 16, PostIndex));
+ __ Ldr(q17, MemOperand(x4, 16, PostIndex));
+ __ Ldr(q18, MemOperand(x4, 16, PostIndex));
+ __ Ldr(q19, MemOperand(x4));
+ __ Ld4(v16.B(), v17.B(), v18.B(), v19.B(), 4, MemOperand(x17));
+
+ __ Mov(x5, x17);
+ __ Ldr(q20, MemOperand(x5, 16, PostIndex));
+ __ Ldr(q21, MemOperand(x5, 16, PostIndex));
+ __ Ldr(q22, MemOperand(x5, 16, PostIndex));
+ __ Ldr(q23, MemOperand(x5));
+ __ Ld4(v20.H(), v21.H(), v22.H(), v23.H(), 3, MemOperand(x17));
+
+ __ Mov(x6, x17);
+ __ Ldr(q24, MemOperand(x6, 16, PostIndex));
+ __ Ldr(q25, MemOperand(x6, 16, PostIndex));
+ __ Ldr(q26, MemOperand(x6, 16, PostIndex));
+ __ Ldr(q27, MemOperand(x6));
+ __ Ld4(v24.S(), v25.S(), v26.S(), v27.S(), 2, MemOperand(x17));
+
+ __ Mov(x7, x17);
+ __ Ldr(q28, MemOperand(x7, 16, PostIndex));
+ __ Ldr(q29, MemOperand(x7, 16, PostIndex));
+ __ Ldr(q30, MemOperand(x7, 16, PostIndex));
+ __ Ldr(q31, MemOperand(x7));
+ __ Ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1, MemOperand(x17));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
+ ASSERT_EQUAL_128(0x0102030405060708, 0x090a0b0c0d0e0f10, q1);
+ ASSERT_EQUAL_128(0x0203040506070809, 0x0a0b0c0d0e0f1011, q2);
+ ASSERT_EQUAL_128(0x030405060708090a, 0x0b0c0d0e0f101112, q3);
+ ASSERT_EQUAL_128(0x0100020103020403, 0x0504060507060807, q4);
+ ASSERT_EQUAL_128(0x0302040305040605, 0x0706080709080a09, q5);
+ ASSERT_EQUAL_128(0x0504060507060807, 0x09080a090b0a0c0b, q6);
+ ASSERT_EQUAL_128(0x0706080709080a09, 0x0b0a0c0b0d0c0e0d, q7);
+ ASSERT_EQUAL_128(0x0302010004030201, 0x0504030206050403, q8);
+ ASSERT_EQUAL_128(0x0706050408070605, 0x090807060a090807, q9);
+ ASSERT_EQUAL_128(0x0b0a09080c0b0a09, 0x0d0c0b0a0e0d0c0b, q10);
+ ASSERT_EQUAL_128(0x0f0e0d0c100f0e0d, 0x11100f0e1211100f, q11);
+ ASSERT_EQUAL_128(0x0706050403020100, 0x0807060504030201, q12);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x100f0e0d0c0b0a09, q13);
+ ASSERT_EQUAL_128(0x1716151413121110, 0x1817161514131211, q14);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x201f1e1d1c1b1a19, q15);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17);
+ ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18);
+ ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21);
+ ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22);
+ ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23);
+ ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24);
+ ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25);
+ ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26);
+ ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27);
+ ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29);
+ ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31);
+
+ TEARDOWN();
+}
+
+
+
+TEST(neon_ld4_lane_postindex) {
+ SETUP();
+
+ uint8_t src[64];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+
+ // Test loading whole register by element.
+ __ Mov(x17, src_base);
+ for (int i = 15; i >= 0; i--) {
+ __ Ld4(v0.B(), v1.B(), v2.B(), v3.B(), i,
+ MemOperand(x17, 4, PostIndex));
+ }
+
+ __ Mov(x18, src_base);
+ for (int i = 7; i >= 0; i--) {
+ __ Ld4(v4.H(), v5.H(), v6.H(), v7.H(), i,
+ MemOperand(x18, 8, PostIndex));
+ }
+
+ __ Mov(x19, src_base);
+ for (int i = 3; i >= 0; i--) {
+ __ Ld4(v8.S(), v9.S(), v10.S(), v11.S(), i,
+ MemOperand(x19, 16, PostIndex));
+ }
+
+ __ Mov(x20, src_base);
+ for (int i = 1; i >= 0; i--) {
+ __ Ld4(v12.D(), v13.D(), v14.D(), v15.D(), i,
+ MemOperand(x20, 32, PostIndex));
+ }
+
+ // Test loading a single element into an initialised register.
+ __ Mov(x25, 1);
+ __ Mov(x21, src_base);
+ __ Mov(x22, src_base);
+ __ Mov(x23, src_base);
+ __ Mov(x24, src_base);
+
+ __ Mov(x4, x21);
+ __ Ldr(q16, MemOperand(x4, 16, PostIndex));
+ __ Ldr(q17, MemOperand(x4, 16, PostIndex));
+ __ Ldr(q18, MemOperand(x4, 16, PostIndex));
+ __ Ldr(q19, MemOperand(x4));
+ __ Ld4(v16.B(), v17.B(), v18.B(), v19.B(), 4,
+ MemOperand(x21, x25, PostIndex));
+ __ Add(x25, x25, 1);
+
+ __ Mov(x5, x22);
+ __ Ldr(q20, MemOperand(x5, 16, PostIndex));
+ __ Ldr(q21, MemOperand(x5, 16, PostIndex));
+ __ Ldr(q22, MemOperand(x5, 16, PostIndex));
+ __ Ldr(q23, MemOperand(x5));
+ __ Ld4(v20.H(), v21.H(), v22.H(), v23.H(), 3,
+ MemOperand(x22, x25, PostIndex));
+ __ Add(x25, x25, 1);
+
+ __ Mov(x6, x23);
+ __ Ldr(q24, MemOperand(x6, 16, PostIndex));
+ __ Ldr(q25, MemOperand(x6, 16, PostIndex));
+ __ Ldr(q26, MemOperand(x6, 16, PostIndex));
+ __ Ldr(q27, MemOperand(x6));
+ __ Ld4(v24.S(), v25.S(), v26.S(), v27.S(), 2,
+ MemOperand(x23, x25, PostIndex));
+ __ Add(x25, x25, 1);
+
+ __ Mov(x7, x24);
+ __ Ldr(q28, MemOperand(x7, 16, PostIndex));
+ __ Ldr(q29, MemOperand(x7, 16, PostIndex));
+ __ Ldr(q30, MemOperand(x7, 16, PostIndex));
+ __ Ldr(q31, MemOperand(x7));
+ __ Ld4(v28.D(), v29.D(), v30.D(), v31.D(), 1,
+ MemOperand(x24, x25, PostIndex));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0004080c1014181c, 0x2024282c3034383c, q0);
+ ASSERT_EQUAL_128(0x0105090d1115191d, 0x2125292d3135393d, q1);
+ ASSERT_EQUAL_128(0x02060a0e12161a1e, 0x22262a2e32363a3e, q2);
+ ASSERT_EQUAL_128(0x03070b0f13171b1f, 0x23272b2f33373b3f, q3);
+ ASSERT_EQUAL_128(0x0100090811101918, 0x2120292831303938, q4);
+ ASSERT_EQUAL_128(0x03020b0a13121b1a, 0x23222b2a33323b3a, q5);
+ ASSERT_EQUAL_128(0x05040d0c15141d1c, 0x25242d2c35343d3c, q6);
+ ASSERT_EQUAL_128(0x07060f0e17161f1e, 0x27262f2e37363f3e, q7);
+ ASSERT_EQUAL_128(0x0302010013121110, 0x2322212033323130, q8);
+ ASSERT_EQUAL_128(0x0706050417161514, 0x2726252437363534, q9);
+ ASSERT_EQUAL_128(0x0b0a09081b1a1918, 0x2b2a29283b3a3938, q10);
+ ASSERT_EQUAL_128(0x0f0e0d0c1f1e1d1c, 0x2f2e2d2c3f3e3d3c, q11);
+ ASSERT_EQUAL_128(0x0706050403020100, 0x2726252423222120, q12);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x2f2e2d2c2b2a2928, q13);
+ ASSERT_EQUAL_128(0x1716151413121110, 0x3736353433323130, q14);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3f3e3d3c3b3a3938, q15);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q16);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716150113121110, q17);
+ ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726250223222120, q18);
+ ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736350333323130, q19);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q20);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0302151413121110, q21);
+ ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x0504252423222120, q22);
+ ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x0706353433323130, q23);
+ ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q24);
+ ASSERT_EQUAL_128(0x1f1e1d1c07060504, 0x1716151413121110, q25);
+ ASSERT_EQUAL_128(0x2f2e2d2c0b0a0908, 0x2726252423222120, q26);
+ ASSERT_EQUAL_128(0x3f3e3d3c0f0e0d0c, 0x3736353433323130, q27);
+ ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q28);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x1716151413121110, q29);
+ ASSERT_EQUAL_128(0x1716151413121110, 0x2726252423222120, q30);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x3736353433323130, q31);
+
+ ASSERT_EQUAL_64(src_base + 64, x17);
+ ASSERT_EQUAL_64(src_base + 64, x18);
+ ASSERT_EQUAL_64(src_base + 64, x19);
+ ASSERT_EQUAL_64(src_base + 64, x20);
+ ASSERT_EQUAL_64(src_base + 1, x21);
+ ASSERT_EQUAL_64(src_base + 2, x22);
+ ASSERT_EQUAL_64(src_base + 3, x23);
+ ASSERT_EQUAL_64(src_base + 4, x24);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld4_alllanes) {
+ SETUP();
+
+ uint8_t src[64];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base + 1);
+ __ Mov(x18, 1);
+ __ Ld4r(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x17));
+ __ Add(x17, x17, 4);
+ __ Ld4r(v4.V16B(), v5.V16B(), v6.V16B(), v7.V16B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld4r(v8.V4H(), v9.V4H(), v10.V4H(), v11.V4H(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld4r(v12.V8H(), v13.V8H(), v14.V8H(), v15.V8H(), MemOperand(x17));
+ __ Add(x17, x17, 8);
+ __ Ld4r(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld4r(v20.V4S(), v21.V4S(), v22.V4S(), v23.V4S(), MemOperand(x17));
+ __ Add(x17, x17, 16);
+ __ Ld4r(v24.V2D(), v25.V2D(), v26.V2D(), v27.V2D(), MemOperand(x17));
+
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3);
+ ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
+ ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
+ ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6);
+ ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11);
+ ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12);
+ ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13);
+ ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14);
+ ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19);
+ ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20);
+ ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21);
+ ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22);
+ ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23);
+ ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24);
+ ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25);
+ ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26);
+ ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld4_alllanes_postindex) {
+ SETUP();
+
+ uint8_t src[64];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+ __ Mov(x17, src_base + 1);
+ __ Mov(x18, 1);
+
+ START();
+ __ Mov(x17, src_base + 1);
+ __ Mov(x18, 1);
+ __ Ld4r(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(),
+ MemOperand(x17, 4, PostIndex));
+ __ Ld4r(v4.V16B(), v5.V16B(), v6.V16B(), v7.V16B(),
+ MemOperand(x17, x18, PostIndex));
+ __ Ld4r(v8.V4H(), v9.V4H(), v10.V4H(), v11.V4H(),
+ MemOperand(x17, x18, PostIndex));
+ __ Ld4r(v12.V8H(), v13.V8H(), v14.V8H(), v15.V8H(),
+ MemOperand(x17, 8, PostIndex));
+ __ Ld4r(v16.V2S(), v17.V2S(), v18.V2S(), v19.V2S(),
+ MemOperand(x17, x18, PostIndex));
+ __ Ld4r(v20.V4S(), v21.V4S(), v22.V4S(), v23.V4S(),
+ MemOperand(x17, 16, PostIndex));
+ __ Ld4r(v24.V2D(), v25.V2D(), v26.V2D(), v27.V2D(),
+ MemOperand(x17, 32, PostIndex));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0101010101010101, q0);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0202020202020202, q1);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0303030303030303, q2);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q3);
+ ASSERT_EQUAL_128(0x0505050505050505, 0x0505050505050505, q4);
+ ASSERT_EQUAL_128(0x0606060606060606, 0x0606060606060606, q5);
+ ASSERT_EQUAL_128(0x0707070707070707, 0x0707070707070707, q6);
+ ASSERT_EQUAL_128(0x0808080808080808, 0x0808080808080808, q7);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0706070607060706, q8);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0908090809080908, q9);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a0b0a0b0a0b0a, q10);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0d0c0d0c0d0c0d0c, q11);
+ ASSERT_EQUAL_128(0x0807080708070807, 0x0807080708070807, q12);
+ ASSERT_EQUAL_128(0x0a090a090a090a09, 0x0a090a090a090a09, q13);
+ ASSERT_EQUAL_128(0x0c0b0c0b0c0b0c0b, 0x0c0b0c0b0c0b0c0b, q14);
+ ASSERT_EQUAL_128(0x0e0d0e0d0e0d0e0d, 0x0e0d0e0d0e0d0e0d, q15);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x1211100f1211100f, q16);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x1615141316151413, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x1a1918171a191817, q18);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x1e1d1c1b1e1d1c1b, q19);
+ ASSERT_EQUAL_128(0x1312111013121110, 0x1312111013121110, q20);
+ ASSERT_EQUAL_128(0x1716151417161514, 0x1716151417161514, q21);
+ ASSERT_EQUAL_128(0x1b1a19181b1a1918, 0x1b1a19181b1a1918, q22);
+ ASSERT_EQUAL_128(0x1f1e1d1c1f1e1d1c, 0x1f1e1d1c1f1e1d1c, q23);
+ ASSERT_EQUAL_128(0x2726252423222120, 0x2726252423222120, q24);
+ ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2f2e2d2c2b2a2928, q25);
+ ASSERT_EQUAL_128(0x3736353433323130, 0x3736353433323130, q26);
+ ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3f3e3d3c3b3a3938, q27);
+ ASSERT_EQUAL_64(src_base + 64, x17);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_st1_lane) {
+ SETUP();
+
+ uint8_t src[64];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, -16);
+ __ Ldr(q0, MemOperand(x17));
+
+ for (int i = 15; i >= 0; i--) {
+ __ St1(v0.B(), i, MemOperand(x17));
+ __ Add(x17, x17, 1);
+ }
+ __ Ldr(q1, MemOperand(x17, x18));
+
+ for (int i = 7; i >= 0; i--) {
+ __ St1(v0.H(), i, MemOperand(x17));
+ __ Add(x17, x17, 2);
+ }
+ __ Ldr(q2, MemOperand(x17, x18));
+
+ for (int i = 3; i >= 0; i--) {
+ __ St1(v0.S(), i, MemOperand(x17));
+ __ Add(x17, x17, 4);
+ }
+ __ Ldr(q3, MemOperand(x17, x18));
+
+ for (int i = 1; i >= 0; i--) {
+ __ St1(v0.D(), i, MemOperand(x17));
+ __ Add(x17, x17, 8);
+ }
+ __ Ldr(q4, MemOperand(x17, x18));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1);
+ ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2);
+ ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3);
+ ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_st2_lane) {
+ SETUP();
+
+ // Struct size * addressing modes * element sizes * vector size.
+ uint8_t dst[2 * 2 * 4 * 16];
+ memset(dst, 0, sizeof(dst));
+ uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
+
+ START();
+ __ Mov(x17, dst_base);
+ __ Mov(x18, dst_base);
+ __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
+ __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
+
+ // Test B stores with and without post index.
+ for (int i = 15; i >= 0; i--) {
+ __ St2(v0.B(), v1.B(), i, MemOperand(x18));
+ __ Add(x18, x18, 2);
+ }
+ for (int i = 15; i >= 0; i--) {
+ __ St2(v0.B(), v1.B(), i, MemOperand(x18, 2, PostIndex));
+ }
+ __ Ldr(q2, MemOperand(x17, 0 * 16));
+ __ Ldr(q3, MemOperand(x17, 1 * 16));
+ __ Ldr(q4, MemOperand(x17, 2 * 16));
+ __ Ldr(q5, MemOperand(x17, 3 * 16));
+
+ // Test H stores with and without post index.
+ __ Mov(x0, 4);
+ for (int i = 7; i >= 0; i--) {
+ __ St2(v0.H(), v1.H(), i, MemOperand(x18));
+ __ Add(x18, x18, 4);
+ }
+ for (int i = 7; i >= 0; i--) {
+ __ St2(v0.H(), v1.H(), i, MemOperand(x18, x0, PostIndex));
+ }
+ __ Ldr(q6, MemOperand(x17, 4 * 16));
+ __ Ldr(q7, MemOperand(x17, 5 * 16));
+ __ Ldr(q16, MemOperand(x17, 6 * 16));
+ __ Ldr(q17, MemOperand(x17, 7 * 16));
+
+ // Test S stores with and without post index.
+ for (int i = 3; i >= 0; i--) {
+ __ St2(v0.S(), v1.S(), i, MemOperand(x18));
+ __ Add(x18, x18, 8);
+ }
+ for (int i = 3; i >= 0; i--) {
+ __ St2(v0.S(), v1.S(), i, MemOperand(x18, 8, PostIndex));
+ }
+ __ Ldr(q18, MemOperand(x17, 8 * 16));
+ __ Ldr(q19, MemOperand(x17, 9 * 16));
+ __ Ldr(q20, MemOperand(x17, 10 * 16));
+ __ Ldr(q21, MemOperand(x17, 11 * 16));
+
+ // Test D stores with and without post index.
+ __ Mov(x0, 16);
+ __ St2(v0.D(), v1.D(), 1, MemOperand(x18));
+ __ Add(x18, x18, 16);
+ __ St2(v0.D(), v1.D(), 0, MemOperand(x18, 16, PostIndex));
+ __ St2(v0.D(), v1.D(), 1, MemOperand(x18, x0, PostIndex));
+ __ St2(v0.D(), v1.D(), 0, MemOperand(x18, x0, PostIndex));
+ __ Ldr(q22, MemOperand(x17, 12 * 16));
+ __ Ldr(q23, MemOperand(x17, 13 * 16));
+ __ Ldr(q24, MemOperand(x17, 14 * 16));
+ __ Ldr(q25, MemOperand(x17, 15 * 16));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q2);
+ ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q3);
+ ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q4);
+ ASSERT_EQUAL_128(0x1f0f1e0e1d0d1c0c, 0x1b0b1a0a19091808, q5);
+
+ ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q6);
+ ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q7);
+ ASSERT_EQUAL_128(0x1617060714150405, 0x1213020310110001, q16);
+ ASSERT_EQUAL_128(0x1e1f0e0f1c1d0c0d, 0x1a1b0a0b18190809, q17);
+
+ ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q18);
+ ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q19);
+ ASSERT_EQUAL_128(0x1415161704050607, 0x1011121300010203, q20);
+ ASSERT_EQUAL_128(0x1c1d1e1f0c0d0e0f, 0x18191a1b08090a0b, q21);
+
+ ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22);
+ ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23);
+ ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q22);
+ ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q23);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_st3_lane) {
+ SETUP();
+
+ // Struct size * addressing modes * element sizes * vector size.
+ uint8_t dst[3 * 2 * 4 * 16];
+ memset(dst, 0, sizeof(dst));
+ uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
+
+ START();
+ __ Mov(x17, dst_base);
+ __ Mov(x18, dst_base);
+ __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
+ __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
+ __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
+
+ // Test B stores with and without post index.
+ for (int i = 15; i >= 0; i--) {
+ __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18));
+ __ Add(x18, x18, 3);
+ }
+ for (int i = 15; i >= 0; i--) {
+ __ St3(v0.B(), v1.B(), v2.B(), i, MemOperand(x18, 3, PostIndex));
+ }
+ __ Ldr(q3, MemOperand(x17, 0 * 16));
+ __ Ldr(q4, MemOperand(x17, 1 * 16));
+ __ Ldr(q5, MemOperand(x17, 2 * 16));
+ __ Ldr(q6, MemOperand(x17, 3 * 16));
+ __ Ldr(q7, MemOperand(x17, 4 * 16));
+ __ Ldr(q16, MemOperand(x17, 5 * 16));
+
+ // Test H stores with and without post index.
+ __ Mov(x0, 6);
+ for (int i = 7; i >= 0; i--) {
+ __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18));
+ __ Add(x18, x18, 6);
+ }
+ for (int i = 7; i >= 0; i--) {
+ __ St3(v0.H(), v1.H(), v2.H(), i, MemOperand(x18, x0, PostIndex));
+ }
+ __ Ldr(q17, MemOperand(x17, 6 * 16));
+ __ Ldr(q18, MemOperand(x17, 7 * 16));
+ __ Ldr(q19, MemOperand(x17, 8 * 16));
+ __ Ldr(q20, MemOperand(x17, 9 * 16));
+ __ Ldr(q21, MemOperand(x17, 10 * 16));
+ __ Ldr(q22, MemOperand(x17, 11 * 16));
+
+ // Test S stores with and without post index.
+ for (int i = 3; i >= 0; i--) {
+ __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18));
+ __ Add(x18, x18, 12);
+ }
+ for (int i = 3; i >= 0; i--) {
+ __ St3(v0.S(), v1.S(), v2.S(), i, MemOperand(x18, 12, PostIndex));
+ }
+ __ Ldr(q23, MemOperand(x17, 12 * 16));
+ __ Ldr(q24, MemOperand(x17, 13 * 16));
+ __ Ldr(q25, MemOperand(x17, 14 * 16));
+ __ Ldr(q26, MemOperand(x17, 15 * 16));
+ __ Ldr(q27, MemOperand(x17, 16 * 16));
+ __ Ldr(q28, MemOperand(x17, 17 * 16));
+
+ // Test D stores with and without post index.
+ __ Mov(x0, 24);
+ __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18));
+ __ Add(x18, x18, 24);
+ __ St3(v0.D(), v1.D(), v2.D(), 0, MemOperand(x18, 24, PostIndex));
+ __ St3(v0.D(), v1.D(), v2.D(), 1, MemOperand(x18, x0, PostIndex));
+ __ Ldr(q29, MemOperand(x17, 18 * 16));
+ __ Ldr(q30, MemOperand(x17, 19 * 16));
+ __ Ldr(q31, MemOperand(x17, 20 * 16));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q3);
+ ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q4);
+ ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q5);
+ ASSERT_EQUAL_128(0x0524140423130322, 0x1202211101201000, q6);
+ ASSERT_EQUAL_128(0x1a0a291909281808, 0x2717072616062515, q7);
+ ASSERT_EQUAL_128(0x2f1f0f2e1e0e2d1d, 0x0d2c1c0c2b1b0b2a, q16);
+
+ ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q17);
+ ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q18);
+ ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q19);
+ ASSERT_EQUAL_128(0x1415040522231213, 0x0203202110110001, q20);
+ ASSERT_EQUAL_128(0x0a0b282918190809, 0x2627161706072425, q21);
+ ASSERT_EQUAL_128(0x2e2f1e1f0e0f2c2d, 0x1c1d0c0d2a2b1a1b, q22);
+
+ ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q23);
+ ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q24);
+ ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q25);
+ ASSERT_EQUAL_128(0x0405060720212223, 0x1011121300010203, q26);
+ ASSERT_EQUAL_128(0x18191a1b08090a0b, 0x2425262714151617, q27);
+ ASSERT_EQUAL_128(0x2c2d2e2f1c1d1e1f, 0x0c0d0e0f28292a2b, q28);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_st4_lane) {
+ SETUP();
+
+ // Struct size * element sizes * vector size.
+ uint8_t dst[4 * 4 * 16];
+ memset(dst, 0, sizeof(dst));
+ uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
+
+ START();
+ __ Mov(x17, dst_base);
+ __ Mov(x18, dst_base);
+ __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
+ __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
+ __ Movi(v2.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
+ __ Movi(v3.V2D(), 0x2021222324252627, 0x28292a2b2c2d2e2f);
+
+ // Test B stores without post index.
+ for (int i = 15; i >= 0; i--) {
+ __ St4(v0.B(), v1.B(), v2.B(), v3.B(), i, MemOperand(x18));
+ __ Add(x18, x18, 4);
+ }
+ __ Ldr(q4, MemOperand(x17, 0 * 16));
+ __ Ldr(q5, MemOperand(x17, 1 * 16));
+ __ Ldr(q6, MemOperand(x17, 2 * 16));
+ __ Ldr(q7, MemOperand(x17, 3 * 16));
+
+ // Test H stores with post index.
+ __ Mov(x0, 8);
+ for (int i = 7; i >= 0; i--) {
+ __ St4(v0.H(), v1.H(), v2.H(), v3.H(), i, MemOperand(x18, x0, PostIndex));
+ }
+ __ Ldr(q16, MemOperand(x17, 4 * 16));
+ __ Ldr(q17, MemOperand(x17, 5 * 16));
+ __ Ldr(q18, MemOperand(x17, 6 * 16));
+ __ Ldr(q19, MemOperand(x17, 7 * 16));
+
+ // Test S stores without post index.
+ for (int i = 3; i >= 0; i--) {
+ __ St4(v0.S(), v1.S(), v2.S(), v3.S(), i, MemOperand(x18));
+ __ Add(x18, x18, 16);
+ }
+ __ Ldr(q20, MemOperand(x17, 8 * 16));
+ __ Ldr(q21, MemOperand(x17, 9 * 16));
+ __ Ldr(q22, MemOperand(x17, 10 * 16));
+ __ Ldr(q23, MemOperand(x17, 11 * 16));
+
+ // Test D stores with post index.
+ __ Mov(x0, 32);
+ __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 0, MemOperand(x18, 32, PostIndex));
+ __ St4(v0.D(), v1.D(), v2.D(), v3.D(), 1, MemOperand(x18, x0, PostIndex));
+
+ __ Ldr(q24, MemOperand(x17, 12 * 16));
+ __ Ldr(q25, MemOperand(x17, 13 * 16));
+ __ Ldr(q26, MemOperand(x17, 14 * 16));
+ __ Ldr(q27, MemOperand(x17, 15 * 16));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x2323130322221202, 0x2121110120201000, q4);
+ ASSERT_EQUAL_128(0x2727170726261606, 0x2525150524241404, q5);
+ ASSERT_EQUAL_128(0x2b2b1b0b2a2a1a0a, 0x2929190928281808, q6);
+ ASSERT_EQUAL_128(0x2f2f1f0f2e2e1e0e, 0x2d2d1d0d2c2c1c0c, q7);
+
+ ASSERT_EQUAL_128(0x2223222312130203, 0x2021202110110001, q16);
+ ASSERT_EQUAL_128(0x2627262716170607, 0x2425242514150405, q17);
+ ASSERT_EQUAL_128(0x2a2b2a2b1a1b0a0b, 0x2829282918190809, q18);
+ ASSERT_EQUAL_128(0x2e2f2e2f1e1f0e0f, 0x2c2d2c2d1c1d0c0d, q19);
+
+ ASSERT_EQUAL_128(0x2021222320212223, 0x1011121300010203, q20);
+ ASSERT_EQUAL_128(0x2425262724252627, 0x1415161704050607, q21);
+ ASSERT_EQUAL_128(0x28292a2b28292a2b, 0x18191a1b08090a0b, q22);
+ ASSERT_EQUAL_128(0x2c2d2e2f2c2d2e2f, 0x1c1d1e1f0c0d0e0f, q23);
+
+ ASSERT_EQUAL_128(0x18191a1b1c1d1e1f, 0x08090a0b0c0d0e0f, q24);
+ ASSERT_EQUAL_128(0x28292a2b2c2d2e2f, 0x28292a2b2c2d2e2f, q25);
+ ASSERT_EQUAL_128(0x1011121314151617, 0x0001020304050607, q26);
+ ASSERT_EQUAL_128(0x2021222324252627, 0x2021222324252627, q27);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld1_lane_postindex) {
+ SETUP();
+
+ uint8_t src[64];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base);
+ __ Mov(x19, src_base);
+ __ Mov(x20, src_base);
+ __ Mov(x21, src_base);
+ __ Mov(x22, src_base);
+ __ Mov(x23, src_base);
+ __ Mov(x24, src_base);
+
+ // Test loading whole register by element.
+ for (int i = 15; i >= 0; i--) {
+ __ Ld1(v0.B(), i, MemOperand(x17, 1, PostIndex));
+ }
+
+ for (int i = 7; i >= 0; i--) {
+ __ Ld1(v1.H(), i, MemOperand(x18, 2, PostIndex));
+ }
+
+ for (int i = 3; i >= 0; i--) {
+ __ Ld1(v2.S(), i, MemOperand(x19, 4, PostIndex));
+ }
+
+ for (int i = 1; i >= 0; i--) {
+ __ Ld1(v3.D(), i, MemOperand(x20, 8, PostIndex));
+ }
+
+ // Test loading a single element into an initialised register.
+ __ Mov(x25, 1);
+ __ Ldr(q4, MemOperand(x21));
+ __ Ld1(v4.B(), 4, MemOperand(x21, x25, PostIndex));
+ __ Add(x25, x25, 1);
+
+ __ Ldr(q5, MemOperand(x22));
+ __ Ld1(v5.H(), 3, MemOperand(x22, x25, PostIndex));
+ __ Add(x25, x25, 1);
+
+ __ Ldr(q6, MemOperand(x23));
+ __ Ld1(v6.S(), 2, MemOperand(x23, x25, PostIndex));
+ __ Add(x25, x25, 1);
+
+ __ Ldr(q7, MemOperand(x24));
+ __ Ld1(v7.D(), 1, MemOperand(x24, x25, PostIndex));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q0);
+ ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q1);
+ ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q2);
+ ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q3);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050003020100, q4);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0100050403020100, q5);
+ ASSERT_EQUAL_128(0x0f0e0d0c03020100, 0x0706050403020100, q6);
+ ASSERT_EQUAL_128(0x0706050403020100, 0x0706050403020100, q7);
+ ASSERT_EQUAL_64(src_base + 16, x17);
+ ASSERT_EQUAL_64(src_base + 16, x18);
+ ASSERT_EQUAL_64(src_base + 16, x19);
+ ASSERT_EQUAL_64(src_base + 16, x20);
+ ASSERT_EQUAL_64(src_base + 1, x21);
+ ASSERT_EQUAL_64(src_base + 2, x22);
+ ASSERT_EQUAL_64(src_base + 3, x23);
+ ASSERT_EQUAL_64(src_base + 4, x24);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_st1_lane_postindex) {
+ SETUP();
+
+ uint8_t src[64];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, -16);
+ __ Ldr(q0, MemOperand(x17));
+
+ for (int i = 15; i >= 0; i--) {
+ __ St1(v0.B(), i, MemOperand(x17, 1, PostIndex));
+ }
+ __ Ldr(q1, MemOperand(x17, x18));
+
+ for (int i = 7; i >= 0; i--) {
+ __ St1(v0.H(), i, MemOperand(x17, 2, PostIndex));
+ }
+ __ Ldr(q2, MemOperand(x17, x18));
+
+ for (int i = 3; i >= 0; i--) {
+ __ St1(v0.S(), i, MemOperand(x17, 4, PostIndex));
+ }
+ __ Ldr(q3, MemOperand(x17, x18));
+
+ for (int i = 1; i >= 0; i--) {
+ __ St1(v0.D(), i, MemOperand(x17, 8, PostIndex));
+ }
+ __ Ldr(q4, MemOperand(x17, x18));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0001020304050607, 0x08090a0b0c0d0e0f, q1);
+ ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q2);
+ ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q3);
+ ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q4);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld1_alllanes) {
+ SETUP();
+
+ uint8_t src[64];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base + 1);
+ __ Ld1r(v0.V8B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld1r(v1.V16B(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld1r(v2.V4H(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld1r(v3.V8H(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld1r(v4.V2S(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld1r(v5.V4S(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld1r(v6.V1D(), MemOperand(x17));
+ __ Add(x17, x17, 1);
+ __ Ld1r(v7.V2D(), MemOperand(x17));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0x0101010101010101, q0);
+ ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1);
+ ASSERT_EQUAL_128(0, 0x0403040304030403, q2);
+ ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3);
+ ASSERT_EQUAL_128(0, 0x0807060508070605, q4);
+ ASSERT_EQUAL_128(0x0908070609080706, 0x0908070609080706, q5);
+ ASSERT_EQUAL_128(0, 0x0e0d0c0b0a090807, q6);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0f0e0d0c0b0a0908, q7);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_ld1_alllanes_postindex) {
+ SETUP();
+
+ uint8_t src[64];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base + 1);
+ __ Mov(x18, 1);
+ __ Ld1r(v0.V8B(), MemOperand(x17, 1, PostIndex));
+ __ Ld1r(v1.V16B(), MemOperand(x17, x18, PostIndex));
+ __ Ld1r(v2.V4H(), MemOperand(x17, x18, PostIndex));
+ __ Ld1r(v3.V8H(), MemOperand(x17, 2, PostIndex));
+ __ Ld1r(v4.V2S(), MemOperand(x17, x18, PostIndex));
+ __ Ld1r(v5.V4S(), MemOperand(x17, 4, PostIndex));
+ __ Ld1r(v6.V2D(), MemOperand(x17, 8, PostIndex));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0x0101010101010101, q0);
+ ASSERT_EQUAL_128(0x0202020202020202, 0x0202020202020202, q1);
+ ASSERT_EQUAL_128(0, 0x0403040304030403, q2);
+ ASSERT_EQUAL_128(0x0504050405040504, 0x0504050405040504, q3);
+ ASSERT_EQUAL_128(0, 0x0908070609080706, q4);
+ ASSERT_EQUAL_128(0x0a0908070a090807, 0x0a0908070a090807, q5);
+ ASSERT_EQUAL_128(0x1211100f0e0d0c0b, 0x1211100f0e0d0c0b, q6);
+ ASSERT_EQUAL_64(src_base + 19, x17);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_st1_d) {
+ SETUP();
+
+ uint8_t src[14 * kDRegSizeInBytes];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Ldr(q0, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q3, MemOperand(x17, 16, PostIndex));
+ __ Mov(x17, src_base);
+
+ __ St1(v0.V8B(), MemOperand(x17));
+ __ Ldr(d16, MemOperand(x17, 8, PostIndex));
+
+ __ St1(v0.V8B(), v1.V8B(), MemOperand(x17));
+ __ Ldr(q17, MemOperand(x17, 16, PostIndex));
+
+ __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17));
+ __ Ldr(d18, MemOperand(x17, 8, PostIndex));
+ __ Ldr(d19, MemOperand(x17, 8, PostIndex));
+ __ Ldr(d20, MemOperand(x17, 8, PostIndex));
+
+ __ St1(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x17));
+ __ Ldr(q21, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q22, MemOperand(x17, 16, PostIndex));
+
+ __ St1(v0.V1D(), v1.V1D(), v2.V1D(), v3.V1D(), MemOperand(x17));
+ __ Ldr(q23, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q24, MemOperand(x17));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q0);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q1);
+ ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q2);
+ ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q3);
+ ASSERT_EQUAL_128(0, 0x0706050403020100, q16);
+ ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17);
+ ASSERT_EQUAL_128(0, 0x0706050403020100, q18);
+ ASSERT_EQUAL_128(0, 0x1716151413121110, q19);
+ ASSERT_EQUAL_128(0, 0x2726252423222120, q20);
+ ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21);
+ ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22);
+ ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23);
+ ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_st1_d_postindex) {
+ SETUP();
+
+ uint8_t src[64 + 14 * kDRegSizeInBytes];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, -8);
+ __ Mov(x19, -16);
+ __ Mov(x20, -24);
+ __ Mov(x21, -32);
+ __ Ldr(q0, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q3, MemOperand(x17, 16, PostIndex));
+ __ Mov(x17, src_base);
+
+ __ St1(v0.V8B(), MemOperand(x17, 8, PostIndex));
+ __ Ldr(d16, MemOperand(x17, x18));
+
+ __ St1(v0.V8B(), v1.V8B(), MemOperand(x17, 16, PostIndex));
+ __ Ldr(q17, MemOperand(x17, x19));
+
+ __ St1(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x17, 24, PostIndex));
+ __ Ldr(d18, MemOperand(x17, x20));
+ __ Ldr(d19, MemOperand(x17, x19));
+ __ Ldr(d20, MemOperand(x17, x18));
+
+ __ St1(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(),
+ MemOperand(x17, 32, PostIndex));
+ __ Ldr(q21, MemOperand(x17, x21));
+ __ Ldr(q22, MemOperand(x17, x19));
+
+ __ St1(v0.V1D(), v1.V1D(), v2.V1D(), v3.V1D(),
+ MemOperand(x17, 32, PostIndex));
+ __ Ldr(q23, MemOperand(x17, x21));
+ __ Ldr(q24, MemOperand(x17, x19));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0x0706050403020100, q16);
+ ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q17);
+ ASSERT_EQUAL_128(0, 0x0706050403020100, q18);
+ ASSERT_EQUAL_128(0, 0x1716151413121110, q19);
+ ASSERT_EQUAL_128(0, 0x2726252423222120, q20);
+ ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q21);
+ ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q22);
+ ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q23);
+ ASSERT_EQUAL_128(0x3736353433323130, 0x2726252423222120, q24);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_st1_q) {
+ SETUP();
+
+ uint8_t src[64 + 160];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Ldr(q0, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q3, MemOperand(x17, 16, PostIndex));
+
+ __ St1(v0.V16B(), MemOperand(x17));
+ __ Ldr(q16, MemOperand(x17, 16, PostIndex));
+
+ __ St1(v0.V8H(), v1.V8H(), MemOperand(x17));
+ __ Ldr(q17, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q18, MemOperand(x17, 16, PostIndex));
+
+ __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17));
+ __ Ldr(q19, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q20, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q21, MemOperand(x17, 16, PostIndex));
+
+ __ St1(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x17));
+ __ Ldr(q22, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q23, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q24, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q25, MemOperand(x17));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20);
+ ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23);
+ ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24);
+ ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_st1_q_postindex) {
+ SETUP();
+
+ uint8_t src[64 + 160];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, -16);
+ __ Mov(x19, -32);
+ __ Mov(x20, -48);
+ __ Mov(x21, -64);
+ __ Ldr(q0, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q3, MemOperand(x17, 16, PostIndex));
+
+ __ St1(v0.V16B(), MemOperand(x17, 16, PostIndex));
+ __ Ldr(q16, MemOperand(x17, x18));
+
+ __ St1(v0.V8H(), v1.V8H(), MemOperand(x17, 32, PostIndex));
+ __ Ldr(q17, MemOperand(x17, x19));
+ __ Ldr(q18, MemOperand(x17, x18));
+
+ __ St1(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x17, 48, PostIndex));
+ __ Ldr(q19, MemOperand(x17, x20));
+ __ Ldr(q20, MemOperand(x17, x19));
+ __ Ldr(q21, MemOperand(x17, x18));
+
+ __ St1(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(),
+ MemOperand(x17, 64, PostIndex));
+ __ Ldr(q22, MemOperand(x17, x21));
+ __ Ldr(q23, MemOperand(x17, x20));
+ __ Ldr(q24, MemOperand(x17, x19));
+ __ Ldr(q25, MemOperand(x17, x18));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q16);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q17);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q18);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q19);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q20);
+ ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q21);
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q22);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x1716151413121110, q23);
+ ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726252423222120, q24);
+ ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323130, q25);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_st2_d) {
+ SETUP();
+
+ uint8_t src[4*16];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base);
+ __ Ldr(q0, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x17, 16, PostIndex));
+
+ __ St2(v0.V8B(), v1.V8B(), MemOperand(x18));
+ __ Add(x18, x18, 22);
+ __ St2(v0.V4H(), v1.V4H(), MemOperand(x18));
+ __ Add(x18, x18, 11);
+ __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
+
+ __ Mov(x19, src_base);
+ __ Ldr(q0, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q3, MemOperand(x19, 16, PostIndex));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x1707160615051404, 0x1303120211011000, q0);
+ ASSERT_EQUAL_128(0x0504131203021110, 0x0100151413121110, q1);
+ ASSERT_EQUAL_128(0x1615140706050413, 0x1211100302010014, q2);
+ ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736353433323117, q3);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_st2_d_postindex) {
+ SETUP();
+
+ uint8_t src[4*16];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x22, 5);
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base);
+ __ Ldr(q0, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x17, 16, PostIndex));
+
+ __ St2(v0.V8B(), v1.V8B(), MemOperand(x18, x22, PostIndex));
+ __ St2(v0.V4H(), v1.V4H(), MemOperand(x18, 16, PostIndex));
+ __ St2(v0.V2S(), v1.V2S(), MemOperand(x18));
+
+
+ __ Mov(x19, src_base);
+ __ Ldr(q0, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x19, 16, PostIndex));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0);
+ ASSERT_EQUAL_128(0x0605041312111003, 0x0201001716070615, q1);
+ ASSERT_EQUAL_128(0x2f2e2d2c2b2a2928, 0x2726251716151407, q2);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_st2_q) {
+ SETUP();
+
+ uint8_t src[5*16];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base);
+ __ Ldr(q0, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x17, 16, PostIndex));
+
+ __ St2(v0.V16B(), v1.V16B(), MemOperand(x18));
+ __ Add(x18, x18, 8);
+ __ St2(v0.V8H(), v1.V8H(), MemOperand(x18));
+ __ Add(x18, x18, 22);
+ __ St2(v0.V4S(), v1.V4S(), MemOperand(x18));
+ __ Add(x18, x18, 2);
+ __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
+
+ __ Mov(x19, src_base);
+ __ Ldr(q0, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q3, MemOperand(x19, 16, PostIndex));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x1312030211100100, 0x1303120211011000, q0);
+ ASSERT_EQUAL_128(0x01000b0a19180908, 0x1716070615140504, q1);
+ ASSERT_EQUAL_128(0x1716151413121110, 0x0706050403020100, q2);
+ ASSERT_EQUAL_128(0x1f1e1d1c1b1a1918, 0x0f0e0d0c0b0a0908, q3);
+ TEARDOWN();
+}
+
+
+TEST(neon_st2_q_postindex) {
+ SETUP();
+
+ uint8_t src[5*16];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x22, 5);
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base);
+ __ Ldr(q0, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x17, 16, PostIndex));
+
+ __ St2(v0.V16B(), v1.V16B(), MemOperand(x18, x22, PostIndex));
+ __ St2(v0.V8H(), v1.V8H(), MemOperand(x18, 32, PostIndex));
+ __ St2(v0.V4S(), v1.V4S(), MemOperand(x18, x22, PostIndex));
+ __ St2(v0.V2D(), v1.V2D(), MemOperand(x18));
+
+ __ Mov(x19, src_base);
+ __ Ldr(q0, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q3, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q4, MemOperand(x19, 16, PostIndex));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x1405041312030211, 0x1001000211011000, q0);
+ ASSERT_EQUAL_128(0x1c0d0c1b1a0b0a19, 0x1809081716070615, q1);
+ ASSERT_EQUAL_128(0x0504030201001003, 0x0201001f1e0f0e1d, q2);
+ ASSERT_EQUAL_128(0x0d0c0b0a09081716, 0x1514131211100706, q3);
+ ASSERT_EQUAL_128(0x4f4e4d4c4b4a1f1e, 0x1d1c1b1a19180f0e, q4);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_st3_d) {
+ SETUP();
+
+ uint8_t src[3*16];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base);
+ __ Ldr(q0, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x17, 16, PostIndex));
+
+ __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18));
+ __ Add(x18, x18, 3);
+ __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18));
+ __ Add(x18, x18, 2);
+ __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
+
+
+ __ Mov(x19, src_base);
+ __ Ldr(q0, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x19, 16, PostIndex));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x2221201312111003, 0x0201000100201000, q0);
+ ASSERT_EQUAL_128(0x1f1e1d2726252417, 0x1615140706050423, q1);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_st3_d_postindex) {
+ SETUP();
+
+ uint8_t src[4*16];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x22, 5);
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base);
+ __ Ldr(q0, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x17, 16, PostIndex));
+
+ __ St3(v0.V8B(), v1.V8B(), v2.V8B(), MemOperand(x18, x22, PostIndex));
+ __ St3(v0.V4H(), v1.V4H(), v2.V4H(), MemOperand(x18, 24, PostIndex));
+ __ St3(v0.V2S(), v1.V2S(), v2.V2S(), MemOperand(x18));
+
+
+ __ Mov(x19, src_base);
+ __ Ldr(q0, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q3, MemOperand(x19, 16, PostIndex));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
+ ASSERT_EQUAL_128(0x0201002726171607, 0x0625241514050423, q1);
+ ASSERT_EQUAL_128(0x1615140706050423, 0x2221201312111003, q2);
+ ASSERT_EQUAL_128(0x3f3e3d3c3b3a3938, 0x3736352726252417, q3);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_st3_q) {
+ SETUP();
+
+ uint8_t src[6*16];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base);
+ __ Ldr(q0, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x17, 16, PostIndex));
+
+ __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18));
+ __ Add(x18, x18, 5);
+ __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18));
+ __ Add(x18, x18, 12);
+ __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18));
+ __ Add(x18, x18, 22);
+ __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
+
+ __ Mov(x19, src_base);
+ __ Ldr(q0, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q3, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q4, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q5, MemOperand(x19, 16, PostIndex));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
+ ASSERT_EQUAL_128(0x0605042322212013, 0x1211100302010023, q1);
+ ASSERT_EQUAL_128(0x1007060504030201, 0x0025241716151407, q2);
+ ASSERT_EQUAL_128(0x0827262524232221, 0x2017161514131211, q3);
+ ASSERT_EQUAL_128(0x281f1e1d1c1b1a19, 0x180f0e0d0c0b0a09, q4);
+ ASSERT_EQUAL_128(0x5f5e5d5c5b5a5958, 0x572f2e2d2c2b2a29, q5);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_st3_q_postindex) {
+ SETUP();
+
+ uint8_t src[7*16];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x22, 5);
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base);
+ __ Ldr(q0, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x17, 16, PostIndex));
+
+ __ St3(v0.V16B(), v1.V16B(), v2.V16B(), MemOperand(x18, x22, PostIndex));
+ __ St3(v0.V8H(), v1.V8H(), v2.V8H(), MemOperand(x18, 48, PostIndex));
+ __ St3(v0.V4S(), v1.V4S(), v2.V4S(), MemOperand(x18, x22, PostIndex));
+ __ St3(v0.V2D(), v1.V2D(), v2.V2D(), MemOperand(x18));
+
+ __ Mov(x19, src_base);
+ __ Ldr(q0, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q3, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q4, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q5, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q6, MemOperand(x19, 16, PostIndex));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x2213120302212011, 0x1001001101201000, q0);
+ ASSERT_EQUAL_128(0x1809082726171607, 0x0625241514050423, q1);
+ ASSERT_EQUAL_128(0x0e2d2c1d1c0d0c2b, 0x2a1b1a0b0a292819, q2);
+ ASSERT_EQUAL_128(0x0504030201001003, 0x0201002f2e1f1e0f, q3);
+ ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q4);
+ ASSERT_EQUAL_128(0x1d1c1b1a19180f0e, 0x0d0c0b0a09082726, q5);
+ ASSERT_EQUAL_128(0x6f6e6d6c6b6a2f2e, 0x2d2c2b2a29281f1e, q6);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_st4_d) {
+ SETUP();
+
+ uint8_t src[4*16];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base);
+ __ Ldr(q0, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q3, MemOperand(x17, 16, PostIndex));
+
+ __ St4(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(), MemOperand(x18));
+ __ Add(x18, x18, 12);
+ __ St4(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(), MemOperand(x18));
+ __ Add(x18, x18, 15);
+ __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(), MemOperand(x18));
+
+
+ __ Mov(x19, src_base);
+ __ Ldr(q0, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q3, MemOperand(x19, 16, PostIndex));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x1110010032221202, 0X3121110130201000, q0);
+ ASSERT_EQUAL_128(0x1003020100322322, 0X1312030231302120, q1);
+ ASSERT_EQUAL_128(0x1407060504333231, 0X3023222120131211, q2);
+ ASSERT_EQUAL_128(0x3f3e3d3c3b373635, 0x3427262524171615, q3);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_st4_d_postindex) {
+ SETUP();
+
+ uint8_t src[5*16];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x22, 5);
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base);
+ __ Ldr(q0, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q3, MemOperand(x17, 16, PostIndex));
+
+ __ St4(v0.V8B(), v1.V8B(), v2.V8B(), v3.V8B(),
+ MemOperand(x18, x22, PostIndex));
+ __ St4(v0.V4H(), v1.V4H(), v2.V4H(), v3.V4H(),
+ MemOperand(x18, 32, PostIndex));
+ __ St4(v0.V2S(), v1.V2S(), v2.V2S(), v3.V2S(),
+ MemOperand(x18));
+
+
+ __ Mov(x19, src_base);
+ __ Ldr(q0, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q3, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q4, MemOperand(x19, 16, PostIndex));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
+ ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1);
+ ASSERT_EQUAL_128(0x2221201312111003, 0x0201003736272617, q2);
+ ASSERT_EQUAL_128(0x2625241716151407, 0x0605043332313023, q3);
+ ASSERT_EQUAL_128(0x4f4e4d4c4b4a4948, 0x4746453736353427, q4);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_st4_q) {
+ SETUP();
+
+ uint8_t src[7*16];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base);
+ __ Ldr(q0, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q3, MemOperand(x17, 16, PostIndex));
+
+ __ St4(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(), MemOperand(x18));
+ __ Add(x18, x18, 5);
+ __ St4(v0.V8H(), v1.V8H(), v2.V8H(), v3.V8H(), MemOperand(x18));
+ __ Add(x18, x18, 12);
+ __ St4(v0.V4S(), v1.V4S(), v2.V4S(), v3.V4S(), MemOperand(x18));
+ __ Add(x18, x18, 22);
+ __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(), MemOperand(x18));
+ __ Add(x18, x18, 10);
+
+ __ Mov(x19, src_base);
+ __ Ldr(q0, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q3, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q4, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q5, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q6, MemOperand(x19, 16, PostIndex));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
+ ASSERT_EQUAL_128(0x3231302322212013, 0x1211100302010013, q1);
+ ASSERT_EQUAL_128(0x1007060504030201, 0x0015140706050433, q2);
+ ASSERT_EQUAL_128(0x3027262524232221, 0x2017161514131211, q3);
+ ASSERT_EQUAL_128(0x180f0e0d0c0b0a09, 0x0837363534333231, q4);
+ ASSERT_EQUAL_128(0x382f2e2d2c2b2a29, 0x281f1e1d1c1b1a19, q5);
+ ASSERT_EQUAL_128(0x6f6e6d6c6b6a6968, 0x673f3e3d3c3b3a39, q6);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_st4_q_postindex) {
+ SETUP();
+
+ uint8_t src[9*16];
+ for (unsigned i = 0; i < sizeof(src); i++) {
+ src[i] = i;
+ }
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+
+ START();
+ __ Mov(x22, 5);
+ __ Mov(x17, src_base);
+ __ Mov(x18, src_base);
+ __ Ldr(q0, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x17, 16, PostIndex));
+ __ Ldr(q3, MemOperand(x17, 16, PostIndex));
+
+ __ St4(v0.V16B(), v1.V16B(), v2.V16B(), v3.V16B(),
+ MemOperand(x18, x22, PostIndex));
+ __ St4(v0.V8H(), v1.V8H(), v2.V8H(), v3.V8H(),
+ MemOperand(x18, 64, PostIndex));
+ __ St4(v0.V4S(), v1.V4S(), v2.V4S(), v3.V4S(),
+ MemOperand(x18, x22, PostIndex));
+ __ St4(v0.V2D(), v1.V2D(), v2.V2D(), v3.V2D(),
+ MemOperand(x18));
+
+ __ Mov(x19, src_base);
+ __ Ldr(q0, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q1, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q2, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q3, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q4, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q5, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q6, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q7, MemOperand(x19, 16, PostIndex));
+ __ Ldr(q8, MemOperand(x19, 16, PostIndex));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x1203023130212011, 0x1001000130201000, q0);
+ ASSERT_EQUAL_128(0x1607063534252415, 0x1405043332232213, q1);
+ ASSERT_EQUAL_128(0x1a0b0a3938292819, 0x1809083736272617, q2);
+ ASSERT_EQUAL_128(0x1e0f0e3d3c2d2c1d, 0x1c0d0c3b3a2b2a1b, q3);
+ ASSERT_EQUAL_128(0x0504030201001003, 0x0201003f3e2f2e1f, q4);
+ ASSERT_EQUAL_128(0x2524232221201716, 0x1514131211100706, q5);
+ ASSERT_EQUAL_128(0x0d0c0b0a09083736, 0x3534333231302726, q6);
+ ASSERT_EQUAL_128(0x2d2c2b2a29281f1e, 0x1d1c1b1a19180f0e, q7);
+ ASSERT_EQUAL_128(0x8f8e8d8c8b8a3f3e, 0x3d3c3b3a39382f2e, q8);
+
+ TEARDOWN();
+}
+
+
TEST(ldp_stp_float) {
SETUP();
@@ -2682,6 +5968,39 @@
}
+TEST(ldp_stp_quad) {
+ SETUP();
+
+ uint64_t src[4] = {0x0123456789abcdef, 0xaaaaaaaa55555555,
+ 0xfedcba9876543210, 0x55555555aaaaaaaa};
+ uint64_t dst[6] = {0, 0, 0, 0, 0, 0};
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+ uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
+
+ START();
+ __ Mov(x16, src_base);
+ __ Mov(x17, dst_base);
+ __ Ldp(q31, q0, MemOperand(x16, 4 * sizeof(src[0]), PostIndex));
+ __ Stp(q0, q31, MemOperand(x17, 2 * sizeof(dst[1]), PreIndex));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0xaaaaaaaa55555555, 0x0123456789abcdef, q31);
+ ASSERT_EQUAL_128(0x55555555aaaaaaaa, 0xfedcba9876543210, q0);
+ ASSERT_EQUAL_64(0, dst[0]);
+ ASSERT_EQUAL_64(0, dst[1]);
+ ASSERT_EQUAL_64(0xfedcba9876543210, dst[2]);
+ ASSERT_EQUAL_64(0x55555555aaaaaaaa, dst[3]);
+ ASSERT_EQUAL_64(0x0123456789abcdef, dst[4]);
+ ASSERT_EQUAL_64(0xaaaaaaaa55555555, dst[5]);
+ ASSERT_EQUAL_64(src_base + 4 * sizeof(src[0]), x16);
+ ASSERT_EQUAL_64(dst_base + 2 * sizeof(dst[1]), x17);
+
+ TEARDOWN();
+}
+
+
TEST(ldp_stp_offset) {
SETUP();
@@ -2796,9 +6115,9 @@
TEST(ldnp_stnp_offset) {
SETUP();
- uint64_t src[3] = {0x0011223344556677, 0x8899aabbccddeeff,
- 0xffeeddccbbaa9988};
- uint64_t dst[7] = {0, 0, 0, 0, 0, 0, 0};
+ uint64_t src[4] = {0x0011223344556677, 0x8899aabbccddeeff,
+ 0xffeeddccbbaa9988, 0x7766554433221100};
+ uint64_t dst[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
@@ -2806,17 +6125,26 @@
__ Mov(x16, src_base);
__ Mov(x17, dst_base);
__ Mov(x18, src_base + 24);
- __ Mov(x19, dst_base + 56);
+ __ Mov(x19, dst_base + 64);
+ __ Mov(x20, src_base + 32);
+
+ // Ensure address set up has happened before executing non-temporal ops.
+ __ Dmb(InnerShareable, BarrierAll);
+
__ Ldnp(w0, w1, MemOperand(x16));
__ Ldnp(w2, w3, MemOperand(x16, 4));
__ Ldnp(x4, x5, MemOperand(x16, 8));
__ Ldnp(w6, w7, MemOperand(x18, -12));
__ Ldnp(x8, x9, MemOperand(x18, -16));
+ __ Ldnp(q16, q17, MemOperand(x16));
+ __ Ldnp(q19, q18, MemOperand(x20, -32));
__ Stnp(w0, w1, MemOperand(x17));
__ Stnp(w2, w3, MemOperand(x17, 8));
__ Stnp(x4, x5, MemOperand(x17, 16));
- __ Stnp(w6, w7, MemOperand(x19, -24));
- __ Stnp(x8, x9, MemOperand(x19, -16));
+ __ Stnp(w6, w7, MemOperand(x19, -32));
+ __ Stnp(x8, x9, MemOperand(x19, -24));
+ __ Stnp(q17, q16, MemOperand(x19));
+ __ Stnp(q18, q19, MemOperand(x19, 32));
END();
RUN();
@@ -2838,10 +6166,119 @@
ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[5]);
ASSERT_EQUAL_64(0xffeeddccbbaa9988, x9);
ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[6]);
+ ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q16);
+ ASSERT_EQUAL_128(0x7766554433221100, 0xffeeddccbbaa9988, q17);
+ ASSERT_EQUAL_128(0x7766554433221100, 0xffeeddccbbaa9988, q18);
+ ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q19);
+ ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[8]);
+ ASSERT_EQUAL_64(0x7766554433221100, dst[9]);
+ ASSERT_EQUAL_64(0x0011223344556677, dst[10]);
+ ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[11]);
+ ASSERT_EQUAL_64(0xffeeddccbbaa9988, dst[12]);
+ ASSERT_EQUAL_64(0x7766554433221100, dst[13]);
+ ASSERT_EQUAL_64(0x0011223344556677, dst[14]);
+ ASSERT_EQUAL_64(0x8899aabbccddeeff, dst[15]);
ASSERT_EQUAL_64(src_base, x16);
ASSERT_EQUAL_64(dst_base, x17);
ASSERT_EQUAL_64(src_base + 24, x18);
- ASSERT_EQUAL_64(dst_base + 56, x19);
+ ASSERT_EQUAL_64(dst_base + 64, x19);
+ ASSERT_EQUAL_64(src_base + 32, x20);
+
+ TEARDOWN();
+}
+
+
+TEST(ldnp_stnp_offset_float) {
+ SETUP();
+
+ float src[3] = {1.2, 2.3, 3.4};
+ float dst[6] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+ uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
+
+ START();
+ __ Mov(x16, src_base);
+ __ Mov(x17, dst_base);
+ __ Mov(x18, src_base + 12);
+ __ Mov(x19, dst_base + 24);
+
+ // Ensure address set up has happened before executing non-temporal ops.
+ __ Dmb(InnerShareable, BarrierAll);
+
+ __ Ldnp(s0, s1, MemOperand(x16));
+ __ Ldnp(s2, s3, MemOperand(x16, 4));
+ __ Ldnp(s5, s4, MemOperand(x18, -8));
+ __ Stnp(s1, s0, MemOperand(x17));
+ __ Stnp(s3, s2, MemOperand(x17, 8));
+ __ Stnp(s4, s5, MemOperand(x19, -8));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_FP32(1.2, s0);
+ ASSERT_EQUAL_FP32(2.3, s1);
+ ASSERT_EQUAL_FP32(2.3, dst[0]);
+ ASSERT_EQUAL_FP32(1.2, dst[1]);
+ ASSERT_EQUAL_FP32(2.3, s2);
+ ASSERT_EQUAL_FP32(3.4, s3);
+ ASSERT_EQUAL_FP32(3.4, dst[2]);
+ ASSERT_EQUAL_FP32(2.3, dst[3]);
+ ASSERT_EQUAL_FP32(3.4, s4);
+ ASSERT_EQUAL_FP32(2.3, s5);
+ ASSERT_EQUAL_FP32(3.4, dst[4]);
+ ASSERT_EQUAL_FP32(2.3, dst[5]);
+ ASSERT_EQUAL_64(src_base, x16);
+ ASSERT_EQUAL_64(dst_base, x17);
+ ASSERT_EQUAL_64(src_base + 12, x18);
+ ASSERT_EQUAL_64(dst_base + 24, x19);
+
+ TEARDOWN();
+}
+
+
+TEST(ldnp_stnp_offset_double) {
+ SETUP();
+
+ double src[3] = {1.2, 2.3, 3.4};
+ double dst[6] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+ uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
+
+ START();
+ __ Mov(x16, src_base);
+ __ Mov(x17, dst_base);
+ __ Mov(x18, src_base + 24);
+ __ Mov(x19, dst_base + 48);
+
+ // Ensure address set up has happened before executing non-temporal ops.
+ __ Dmb(InnerShareable, BarrierAll);
+
+ __ Ldnp(d0, d1, MemOperand(x16));
+ __ Ldnp(d2, d3, MemOperand(x16, 8));
+ __ Ldnp(d5, d4, MemOperand(x18, -16));
+ __ Stnp(d1, d0, MemOperand(x17));
+ __ Stnp(d3, d2, MemOperand(x17, 16));
+ __ Stnp(d4, d5, MemOperand(x19, -16));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_FP64(1.2, d0);
+ ASSERT_EQUAL_FP64(2.3, d1);
+ ASSERT_EQUAL_FP64(2.3, dst[0]);
+ ASSERT_EQUAL_FP64(1.2, dst[1]);
+ ASSERT_EQUAL_FP64(2.3, d2);
+ ASSERT_EQUAL_FP64(3.4, d3);
+ ASSERT_EQUAL_FP64(3.4, dst[2]);
+ ASSERT_EQUAL_FP64(2.3, dst[3]);
+ ASSERT_EQUAL_FP64(3.4, d4);
+ ASSERT_EQUAL_FP64(2.3, d5);
+ ASSERT_EQUAL_FP64(3.4, dst[4]);
+ ASSERT_EQUAL_FP64(2.3, dst[5]);
+ ASSERT_EQUAL_64(src_base, x16);
+ ASSERT_EQUAL_64(dst_base, x17);
+ ASSERT_EQUAL_64(src_base + 24, x18);
+ ASSERT_EQUAL_64(dst_base + 48, x19);
TEARDOWN();
}
@@ -3143,6 +6580,46 @@
}
+TEST(ldur_stur_fp) {
+ SETUP();
+
+ int64_t src[3] = {0x0123456789abcdef, 0x0123456789abcdef,
+ 0x0123456789abcdef};
+ int64_t dst[5] = {0, 0, 0, 0, 0};
+ uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
+ uintptr_t dst_base = reinterpret_cast<uintptr_t>(dst);
+
+ START();
+ __ Mov(x17, src_base);
+ __ Mov(x18, dst_base);
+ __ Ldr(b0, MemOperand(x17));
+ __ Str(b0, MemOperand(x18));
+ __ Ldr(h1, MemOperand(x17, 1));
+ __ Str(h1, MemOperand(x18, 1));
+ __ Ldr(s2, MemOperand(x17, 2));
+ __ Str(s2, MemOperand(x18, 3));
+ __ Ldr(d3, MemOperand(x17, 3));
+ __ Str(d3, MemOperand(x18, 7));
+ __ Ldr(q4, MemOperand(x17, 4));
+ __ Str(q4, MemOperand(x18, 15));
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0xef, q0);
+ ASSERT_EQUAL_128(0, 0xabcd, q1);
+ ASSERT_EQUAL_128(0, 0x456789ab, q2);
+ ASSERT_EQUAL_128(0, 0xabcdef0123456789, q3);
+ ASSERT_EQUAL_128(0x89abcdef01234567, 0x89abcdef01234567, q4);
+ ASSERT_EQUAL_64(0x89456789ababcdef, dst[0]);
+ ASSERT_EQUAL_64(0x67abcdef01234567, dst[1]);
+ ASSERT_EQUAL_64(0x6789abcdef012345, dst[2]);
+ ASSERT_EQUAL_64(0x0089abcdef012345, dst[3]);
+
+ TEARDOWN();
+}
+
+
TEST(ldr_literal) {
SETUP();
@@ -3151,6 +6628,7 @@
__ Ldr(w3, 0xfedcba09);
__ Ldrsw(x4, 0x7fffffff);
__ Ldrsw(x5, 0x80000000);
+ __ Ldr(q11, 0x1234000056780000, 0xabcd0000ef000000);
__ Ldr(d13, 1.234);
__ Ldr(s25, 2.5);
END();
@@ -3161,6 +6639,7 @@
ASSERT_EQUAL_64(0xfedcba09, x3);
ASSERT_EQUAL_64(0x7fffffff, x4);
ASSERT_EQUAL_64(0xffffffff80000000, x5);
+ ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q11);
ASSERT_EQUAL_FP64(1.234, d13);
ASSERT_EQUAL_FP32(2.5, s25);
@@ -3181,9 +6660,10 @@
__ Ldr(w1, 0xfedcba09);
__ Ldrsw(x2, 0x7fffffff);
__ Ldrsw(x3, 0x80000000);
+ __ Ldr(q2, 0x1234000056780000, 0xabcd0000ef000000);
__ Ldr(d0, 1.234);
__ Ldr(s1, 2.5);
- ASSERT_LITERAL_POOL_SIZE(32);
+ ASSERT_LITERAL_POOL_SIZE(48);
// Emit more code than the maximum literal load range to ensure the pool
// should be emitted.
@@ -3200,9 +6680,10 @@
__ Ldr(w5, 0xdcba09fe);
__ Ldrsw(x6, 0x7fffffff);
__ Ldrsw(x7, 0x80000000);
+ __ Ldr(q6, 0x1234000056780000, 0xabcd0000ef000000);
__ Ldr(d4, 123.4);
__ Ldr(s5, 250.0);
- ASSERT_LITERAL_POOL_SIZE(32);
+ ASSERT_LITERAL_POOL_SIZE(48);
END();
RUN();
@@ -3212,12 +6693,14 @@
ASSERT_EQUAL_64(0xfedcba09, x1);
ASSERT_EQUAL_64(0x7fffffff, x2);
ASSERT_EQUAL_64(0xffffffff80000000, x3);
+ ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q2);
ASSERT_EQUAL_FP64(1.234, d0);
ASSERT_EQUAL_FP32(2.5, s1);
ASSERT_EQUAL_64(0x34567890abcdef12, x4);
ASSERT_EQUAL_64(0xdcba09fe, x5);
ASSERT_EQUAL_64(0x7fffffff, x6);
ASSERT_EQUAL_64(0xffffffff80000000, x7);
+ ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q6);
ASSERT_EQUAL_FP64(123.4, d4);
ASSERT_EQUAL_FP32(250.0, s5);
@@ -3225,6 +6708,46 @@
}
+TEST(ldr_literal_values_q) {
+ SETUP();
+
+ static const uint64_t kHalfValues[] = {
+ 0x8000000000000000, 0x7fffffffffffffff, 0x0000000000000000,
+ 0xffffffffffffffff, 0x00ff00ff00ff00ff, 0x1234567890abcdef
+ };
+ const int card = sizeof(kHalfValues) / sizeof(kHalfValues[0]);
+ const Register& ref_low64 = x1;
+ const Register& ref_high64 = x2;
+ const Register& loaded_low64 = x3;
+ const Register& loaded_high64 = x4;
+ const VRegister& tgt = q0;
+
+ START();
+ __ Mov(x0, 0);
+
+ for (int i = 0; i < card; i++) {
+ __ Mov(ref_low64, kHalfValues[i]);
+ for (int j = 0; j < card; j++) {
+ __ Mov(ref_high64, kHalfValues[j]);
+ __ Ldr(tgt, kHalfValues[j], kHalfValues[i]);
+ __ Mov(loaded_low64, tgt.V2D(), 0);
+ __ Mov(loaded_high64, tgt.V2D(), 1);
+ __ Cmp(loaded_low64, ref_low64);
+ __ Ccmp(loaded_high64, ref_high64, NoFlag, eq);
+ __ Cset(x0, ne);
+ }
+ }
+ END();
+
+ RUN();
+
+ // If one of the values differs, the trace can be used to identify which one.
+ ASSERT_EQUAL_64(0, x0);
+
+ TEARDOWN();
+}
+
+
template <typename T>
void LoadIntValueHelper(T values[], int card) {
SETUP();
@@ -3332,11 +6855,13 @@
Literal<uint64_t> before_x(0x1234567890abcdef);
Literal<uint32_t> before_w(0xfedcba09);
Literal<uint32_t> before_sx(0x80000000);
+ Literal<uint64_t> before_q(0x1234000056780000, 0xabcd0000ef000000);
Literal<double> before_d(1.234);
Literal<float> before_s(2.5);
Literal<uint64_t> after_x(0x1234567890abcdef);
Literal<uint32_t> after_w(0xfedcba09);
Literal<uint32_t> after_sx(0x80000000);
+ Literal<uint64_t> after_q(0x1234000056780000, 0xabcd0000ef000000);
Literal<double> after_d(1.234);
Literal<float> after_s(2.5);
@@ -3347,6 +6872,7 @@
__ place(&before_x);
__ place(&before_w);
__ place(&before_sx);
+ __ place(&before_q);
__ place(&before_d);
__ place(&before_s);
__ Bind(&end_of_pool_before);
@@ -3354,12 +6880,14 @@
__ ldr(x2, &before_x);
__ ldr(w3, &before_w);
__ ldrsw(x5, &before_sx);
+ __ ldr(q11, &before_q);
__ ldr(d13, &before_d);
__ ldr(s25, &before_s);
__ ldr(x6, &after_x);
__ ldr(w7, &after_w);
__ ldrsw(x8, &after_sx);
+ __ ldr(q18, &after_q);
__ ldr(d14, &after_d);
__ ldr(s26, &after_s);
@@ -3368,6 +6896,7 @@
__ place(&after_x);
__ place(&after_w);
__ place(&after_sx);
+ __ place(&after_q);
__ place(&after_d);
__ place(&after_s);
__ Bind(&end_of_pool_after);
@@ -3379,12 +6908,14 @@
ASSERT_EQUAL_64(0x1234567890abcdef, x2);
ASSERT_EQUAL_64(0xfedcba09, x3);
ASSERT_EQUAL_64(0xffffffff80000000, x5);
+ ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q11);
ASSERT_EQUAL_FP64(1.234, d13);
ASSERT_EQUAL_FP32(2.5, s25);
ASSERT_EQUAL_64(0x1234567890abcdef, x6);
ASSERT_EQUAL_64(0xfedcba09, x7);
ASSERT_EQUAL_64(0xffffffff80000000, x8);
+ ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q18);
ASSERT_EQUAL_FP64(1.234, d14);
ASSERT_EQUAL_FP32(2.5, s26);
@@ -3400,10 +6931,12 @@
Label end_of_pool_after;
Literal<uint64_t> before_x(0x1234567890abcdef);
Literal<uint32_t> before_w(0xfedcba09);
+ Literal<uint64_t> before_q(0x1234000056780000, 0xabcd0000ef000000);
Literal<double> before_d(1.234);
Literal<float> before_s(2.5);
Literal<uint64_t> after_x(0x1234567890abcdef);
Literal<uint32_t> after_w(0xfedcba09);
+ Literal<uint64_t> after_q(0x1234000056780000, 0xabcd0000ef000000);
Literal<double> after_d(1.234);
Literal<float> after_s(2.5);
@@ -3413,6 +6946,7 @@
__ B(&end_of_pool_before);
__ place(&before_x);
__ place(&before_w);
+ __ place(&before_q);
__ place(&before_d);
__ place(&before_s);
__ Bind(&end_of_pool_before);
@@ -3422,12 +6956,14 @@
__ ldr(x2, &before_x);
__ ldr(w3, &before_w);
__ ldrsw(x5, &before_w); // Re-use before_w.
+ __ ldr(q11, &before_q);
__ ldr(d13, &before_d);
__ ldr(s25, &before_s);
__ ldr(x6, &after_x);
__ ldr(w7, &after_w);
__ ldrsw(x8, &after_w); // Re-use after_w.
+ __ ldr(q18, &after_q);
__ ldr(d14, &after_d);
__ ldr(s26, &after_s);
}
@@ -3436,6 +6972,7 @@
__ B(&end_of_pool_after);
__ place(&after_x);
__ place(&after_w);
+ __ place(&after_q);
__ place(&after_d);
__ place(&after_s);
__ Bind(&end_of_pool_after);
@@ -3447,12 +6984,14 @@
ASSERT_EQUAL_64(0x1234567890abcdef, x2);
ASSERT_EQUAL_64(0xfedcba09, x3);
ASSERT_EQUAL_64(0xfffffffffedcba09, x5);
+ ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q11);
ASSERT_EQUAL_FP64(1.234, d13);
ASSERT_EQUAL_FP32(2.5, s25);
ASSERT_EQUAL_64(0x1234567890abcdef, x6);
ASSERT_EQUAL_64(0xfedcba09, x7);
ASSERT_EQUAL_64(0xfffffffffedcba09, x8);
+ ASSERT_EQUAL_128(0x1234000056780000, 0xabcd0000ef000000, q18);
ASSERT_EQUAL_FP64(1.234, d14);
ASSERT_EQUAL_FP32(2.5, s26);
@@ -5130,11 +8669,11 @@
__ Mov(w21, 0x88888888);
// There are no macro instruction for bfm.
- __ bfm(x10, x1, 16, 31);
- __ bfm(x11, x1, 32, 15);
+ __ Bfm(x10, x1, 16, 31);
+ __ Bfm(x11, x1, 32, 15);
- __ bfm(w20, w1, 16, 23);
- __ bfm(w21, w1, 24, 15);
+ __ Bfm(w20, w1, 16, 23);
+ __ Bfm(w21, w1, 24, 15);
// Aliases.
__ Bfi(x12, x1, 16, 8);
@@ -5166,15 +8705,15 @@
__ Mov(x2, 0xfedcba9876543210);
// There are no macro instruction for sbfm.
- __ sbfm(x10, x1, 16, 31);
- __ sbfm(x11, x1, 32, 15);
- __ sbfm(x12, x1, 32, 47);
- __ sbfm(x13, x1, 48, 35);
+ __ Sbfm(x10, x1, 16, 31);
+ __ Sbfm(x11, x1, 32, 15);
+ __ Sbfm(x12, x1, 32, 47);
+ __ Sbfm(x13, x1, 48, 35);
- __ sbfm(w14, w1, 16, 23);
- __ sbfm(w15, w1, 24, 15);
- __ sbfm(w16, w2, 16, 23);
- __ sbfm(w17, w2, 24, 15);
+ __ Sbfm(w14, w1, 16, 23);
+ __ Sbfm(w15, w1, 24, 15);
+ __ Sbfm(w16, w2, 16, 23);
+ __ Sbfm(w17, w2, 24, 15);
// Aliases.
__ Asr(x18, x1, 32);
@@ -5233,15 +8772,15 @@
__ Mov(x11, 0x8888888888888888);
// There are no macro instruction for ubfm.
- __ ubfm(x10, x1, 16, 31);
- __ ubfm(x11, x1, 32, 15);
- __ ubfm(x12, x1, 32, 47);
- __ ubfm(x13, x1, 48, 35);
+ __ Ubfm(x10, x1, 16, 31);
+ __ Ubfm(x11, x1, 32, 15);
+ __ Ubfm(x12, x1, 32, 47);
+ __ Ubfm(x13, x1, 48, 35);
- __ ubfm(w25, w1, 16, 23);
- __ ubfm(w26, w1, 24, 15);
- __ ubfm(w27, w2, 16, 23);
- __ ubfm(w28, w2, 24, 15);
+ __ Ubfm(w25, w1, 16, 23);
+ __ Ubfm(w26, w1, 24, 15);
+ __ Ubfm(w27, w2, 16, 23);
+ __ Ubfm(w28, w2, 24, 15);
// Aliases
__ Lsl(x15, x1, 63);
@@ -5357,6 +8896,11 @@
__ Fmov(d4, d1);
__ Fmov(d6, rawbits_to_double(0x0123456789abcdef));
__ Fmov(s6, s6);
+
+ __ Fmov(d0, 0.0);
+ __ Fmov(v0.D(), 1, x1);
+ __ Fmov(x2, v0.D(), 1);
+
END();
RUN();
@@ -5368,7 +8912,8 @@
ASSERT_EQUAL_FP64(-13.0, d2);
ASSERT_EQUAL_FP64(-13.0, d4);
ASSERT_EQUAL_FP32(rawbits_to_float(0x89abcdef), s6);
-
+ ASSERT_EQUAL_128(double_to_rawbits(-13.0), 0x0000000000000000, q0);
+ ASSERT_EQUAL_64(double_to_rawbits(-13.0), x2);
TEARDOWN();
}
@@ -5571,7 +9116,6 @@
TEST(fmadd_fmsub_double) {
// It's hard to check the result of fused operations because the only way to
// calculate the result is using fma, which is what the simulator uses anyway.
- // TODO(jbramley): Add tests to check behaviour against a hardware trace.
// Basic operation.
FmaddFmsubHelper(1.0, 2.0, 3.0, 5.0, 1.0, -5.0, -1.0);
@@ -5637,7 +9181,6 @@
TEST(fmadd_fmsub_float) {
// It's hard to check the result of fused operations because the only way to
// calculate the result is using fma, which is what the simulator uses anyway.
- // TODO(jbramley): Add tests to check behaviour against a hardware trace.
// Basic operation.
FmaddFmsubHelper(1.0f, 2.0f, 3.0f, 5.0f, 1.0f, -5.0f, -1.0f);
@@ -7212,6 +10755,110 @@
}
+TEST(fcvt_half) {
+ SETUP();
+
+ START();
+ Label done;
+ {
+ // Check all exact conversions from half to float and back.
+ Label ok, fail;
+ __ Mov(w0, 0);
+ for (int i = 0; i < 0xffff; i += 3) {
+ if ((i & 0x7c00) == 0x7c00) continue;
+ __ Mov(w1, i);
+ __ Fmov(s1, w1);
+ __ Fcvt(s2, h1);
+ __ Fcvt(h2, s2);
+ __ Fmov(w2, s2);
+ __ Cmp(w1, w2);
+ __ B(&fail, ne);
+ }
+ __ B(&ok);
+ __ Bind(&fail);
+ __ Mov(w0, 1);
+ __ B(&done);
+ __ Bind(&ok);
+ }
+ {
+ // Check all exact conversions from half to double and back.
+ Label ok, fail;
+ for (int i = 0; i < 0xffff; i += 3) {
+ if ((i & 0x7c00) == 0x7c00) continue;
+ __ Mov(w1, i);
+ __ Fmov(s1, w1);
+ __ Fcvt(d2, h1);
+ __ Fcvt(h2, d2);
+ __ Mov(w2, v2.S(), 0);
+ __ Cmp(w1, w2);
+ __ B(&fail, ne);
+ }
+ __ B(&ok);
+ __ Bind(&fail);
+ __ Mov(w0, 2);
+ __ Bind(&ok);
+ }
+ __ Bind(&done);
+
+ // Check some other interesting values.
+ __ Fmov(s0, kFP32PositiveInfinity);
+ __ Fmov(s1, kFP32NegativeInfinity);
+ __ Fmov(s2, 65504); // Max half precision.
+ __ Fmov(s3, 6.10352e-5); // Min positive normal.
+ __ Fmov(s4, 6.09756e-5); // Max subnormal.
+ __ Fmov(s5, 5.96046e-8); // Min positive subnormal.
+ __ Fmov(s6, 5e-9); // Not representable -> zero.
+ __ Fmov(s7, -0.0);
+ __ Fcvt(h0, s0);
+ __ Fcvt(h1, s1);
+ __ Fcvt(h2, s2);
+ __ Fcvt(h3, s3);
+ __ Fcvt(h4, s4);
+ __ Fcvt(h5, s5);
+ __ Fcvt(h6, s6);
+ __ Fcvt(h7, s7);
+
+ __ Fmov(d20, kFP64PositiveInfinity);
+ __ Fmov(d21, kFP64NegativeInfinity);
+ __ Fmov(d22, 65504); // Max half precision.
+ __ Fmov(d23, 6.10352e-5); // Min positive normal.
+ __ Fmov(d24, 6.09756e-5); // Max subnormal.
+ __ Fmov(d25, 5.96046e-8); // Min positive subnormal.
+ __ Fmov(d26, 5e-9); // Not representable -> zero.
+ __ Fmov(d27, -0.0);
+ __ Fcvt(h20, d20);
+ __ Fcvt(h21, d21);
+ __ Fcvt(h22, d22);
+ __ Fcvt(h23, d23);
+ __ Fcvt(h24, d24);
+ __ Fcvt(h25, d25);
+ __ Fcvt(h26, d26);
+ __ Fcvt(h27, d27);
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_32(0, w0); // 1 => float failed, 2 => double failed.
+ ASSERT_EQUAL_128(0, kFP16PositiveInfinity, q0);
+ ASSERT_EQUAL_128(0, kFP16NegativeInfinity, q1);
+ ASSERT_EQUAL_128(0, 0x7bff, q2);
+ ASSERT_EQUAL_128(0, 0x0400, q3);
+ ASSERT_EQUAL_128(0, 0x03ff, q4);
+ ASSERT_EQUAL_128(0, 0x0001, q5);
+ ASSERT_EQUAL_128(0, 0, q6);
+ ASSERT_EQUAL_128(0, 0x8000, q7);
+ ASSERT_EQUAL_128(0, kFP16PositiveInfinity, q20);
+ ASSERT_EQUAL_128(0, kFP16NegativeInfinity, q21);
+ ASSERT_EQUAL_128(0, 0x7bff, q22);
+ ASSERT_EQUAL_128(0, 0x0400, q23);
+ ASSERT_EQUAL_128(0, 0x03ff, q24);
+ ASSERT_EQUAL_128(0, 0x0001, q25);
+ ASSERT_EQUAL_128(0, 0, q26);
+ ASSERT_EQUAL_128(0, 0x8000, q27);
+ TEARDOWN();
+}
+
+
TEST(fcvtas) {
SETUP();
@@ -8029,6 +11676,119 @@
}
+TEST(neon_fcvtl) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x000080007efffeff, 0x3100b1007c00fc00);
+ __ Movi(v1.V2D(), 0x03ff83ff00038003, 0x000180017c01fc01);
+ __ Movi(v2.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
+ __ Movi(v3.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
+ __ Movi(v4.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
+ __ Fcvtl(v16.V4S(), v0.V4H());
+ __ Fcvtl2(v17.V4S(), v0.V8H());
+ __ Fcvtl(v18.V4S(), v1.V4H());
+ __ Fcvtl2(v19.V4S(), v1.V8H());
+
+ __ Fcvtl(v20.V2D(), v2.V2S());
+ __ Fcvtl2(v21.V2D(), v2.V4S());
+ __ Fcvtl(v22.V2D(), v3.V2S());
+ __ Fcvtl2(v23.V2D(), v3.V4S());
+ __ Fcvtl(v24.V2D(), v4.V2S());
+ __ Fcvtl2(v25.V2D(), v4.V4S());
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x3e200000be200000, 0x7f800000ff800000, q16);
+ ASSERT_EQUAL_128(0x0000000080000000, 0x7fdfe000ffdfe000, q17);
+ ASSERT_EQUAL_128(0x33800000b3800000, 0x7fc02000ffc02000, q18);
+ ASSERT_EQUAL_128(0x387fc000b87fc000, 0x34400000b4400000, q19);
+ ASSERT_EQUAL_128(0x7ff0000000000000, 0xfff0000000000000, q20);
+ ASSERT_EQUAL_128(0x3fc4000000000000, 0xbfc4000000000000, q21);
+ ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q22);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000000, q23);
+ ASSERT_EQUAL_128(0x36a0000000000000, 0xb6a0000000000000, q24);
+ ASSERT_EQUAL_128(0x7ff9ffffe0000000, 0xfff9ffffe0000000, q25);
+ TEARDOWN();
+}
+
+
+TEST(neon_fcvtn) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
+ __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
+ __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
+ __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
+ __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
+ __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
+ __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
+ __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
+ __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
+
+ __ Fcvtn(v16.V4H(), v0.V4S());
+ __ Fcvtn2(v16.V8H(), v1.V4S());
+ __ Fcvtn(v17.V4H(), v2.V4S());
+ __ Fcvtn(v18.V2S(), v3.V2D());
+ __ Fcvtn2(v18.V4S(), v4.V2D());
+ __ Fcvtn(v19.V2S(), v5.V2D());
+ __ Fcvtn2(v19.V4S(), v6.V2D());
+ __ Fcvtn(v20.V2S(), v7.V2D());
+ __ Fcvtn2(v20.V4S(), v8.V2D());
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x000080007e7ffe7f, 0x3100b1007c00fc00, q16);
+ ASSERT_EQUAL_128(0, 0x7e7ffe7f00008000, q17);
+ ASSERT_EQUAL_128(0x7f800000ff800000, 0x3e200000be200000, q18);
+ ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x0000000080000000, q19);
+ ASSERT_EQUAL_128(0x0000000080000000, 0x7fc7ffffffc7ffff, q20);
+ TEARDOWN();
+}
+
+
+TEST(neon_fcvtxn) {
+ SETUP();
+
+ START();
+ __ Movi(v0.V2D(), 0x3e200000be200000, 0x7f800000ff800000);
+ __ Movi(v1.V2D(), 0x0000000080000000, 0x7f8fffffff8fffff);
+ __ Movi(v2.V2D(), 0x7fcfffffffcfffff, 0x0000000180000001);
+ __ Movi(v3.V2D(), 0x3fc4000000000000, 0xbfc4000000000000);
+ __ Movi(v4.V2D(), 0x7ff0000000000000, 0xfff0000000000000);
+ __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
+ __ Movi(v6.V2D(), 0x7ff0ffffffffffff, 0xfff0ffffffffffff);
+ __ Movi(v7.V2D(), 0x7ff8ffffffffffff, 0xfff8ffffffffffff);
+ __ Movi(v8.V2D(), 0x0000000000000001, 0x8000000000000001);
+ __ Movi(v9.V2D(), 0x41ed000000000000, 0x41efffffffefffff);
+ __ Fcvtxn(v16.V2S(), v0.V2D());
+ __ Fcvtxn2(v16.V4S(), v1.V2D());
+ __ Fcvtxn(v17.V2S(), v2.V2D());
+ __ Fcvtxn2(v17.V4S(), v3.V2D());
+ __ Fcvtxn(v18.V2S(), v4.V2D());
+ __ Fcvtxn2(v18.V4S(), v5.V2D());
+ __ Fcvtxn(v19.V2S(), v6.V2D());
+ __ Fcvtxn2(v19.V4S(), v7.V2D());
+ __ Fcvtxn(v20.V2S(), v8.V2D());
+ __ Fcvtxn2(v20.V4S(), v9.V2D());
+ __ Fcvtxn(s21, d0);
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x000000017f7fffff, 0x310000057f7fffff, q16);
+ ASSERT_EQUAL_128(0x3e200000be200000, 0x7f7fffff00000001, q17);
+ ASSERT_EQUAL_128(0x0000000080000000, 0x7f800000ff800000, q18);
+ ASSERT_EQUAL_128(0x7fc7ffffffc7ffff, 0x7fc7ffffffc7ffff, q19);
+ ASSERT_EQUAL_128(0x4f6800004f7fffff, 0x0000000180000001, q20);
+ ASSERT_EQUAL_128(0, 0x7f7fffff, q21);
+ TEARDOWN();
+}
+
+
// Test that scvtf and ucvtf can convert the 64-bit input into the expected
// value. All possible values of 'fbits' are tested. The expected value is
// modified accordingly in each case.
@@ -9278,7 +13038,7 @@
}
break;
case PushPopRegList:
- __ PushSizeRegList(list, reg_size, CPURegister::kFPRegister);
+ __ PushSizeRegList(list, reg_size, CPURegister::kVRegister);
break;
}
@@ -9300,7 +13060,7 @@
}
break;
case PushPopRegList:
- __ PopSizeRegList(list, reg_size, CPURegister::kFPRegister);
+ __ PopSizeRegList(list, reg_size, CPURegister::kVRegister);
break;
}
@@ -9771,13 +13531,22 @@
// related to NoReg.
VIXL_CHECK(NoReg.Is(NoFPReg));
VIXL_CHECK(NoFPReg.Is(NoReg));
+
+ VIXL_CHECK(NoVReg.Is(NoReg));
+ VIXL_CHECK(NoReg.Is(NoVReg));
+
VIXL_CHECK(NoReg.Is(NoCPUReg));
VIXL_CHECK(NoCPUReg.Is(NoReg));
+
VIXL_CHECK(NoFPReg.Is(NoCPUReg));
VIXL_CHECK(NoCPUReg.Is(NoFPReg));
+ VIXL_CHECK(NoVReg.Is(NoCPUReg));
+ VIXL_CHECK(NoCPUReg.Is(NoVReg));
+
VIXL_CHECK(NoReg.IsNone());
VIXL_CHECK(NoFPReg.IsNone());
+ VIXL_CHECK(NoVReg.IsNone());
VIXL_CHECK(NoCPUReg.IsNone());
}
@@ -9787,6 +13556,7 @@
// related to IsValid().
VIXL_CHECK(!NoReg.IsValid());
VIXL_CHECK(!NoFPReg.IsValid());
+ VIXL_CHECK(!NoVReg.IsValid());
VIXL_CHECK(!NoCPUReg.IsValid());
VIXL_CHECK(x0.IsValid());
@@ -11303,7 +15073,7 @@
uint64_t tags[] = { 0x00, 0x1, 0x55, 0xff };
int tag_count = sizeof(tags) / sizeof(tags[0]);
- const int kMaxDataLength = 128;
+ const int kMaxDataLength = 160;
for (int i = 0; i < tag_count; i++) {
unsigned char src[kMaxDataLength];
@@ -11333,6 +15103,9 @@
int offset = 0;
// Scaled-immediate offsets.
+ __ ldp(q0, q1, MemOperand(x0, offset));
+ __ stp(q0, q1, MemOperand(x1, offset));
+ offset += 2 * kQRegSizeInBytes;
__ ldp(x2, x3, MemOperand(x0, offset));
__ stp(x2, x3, MemOperand(x1, offset));
@@ -11476,16 +15249,21 @@
// previous access.
// Start with a non-zero preindex.
- int preindex = 63 * kXRegSizeInBytes;
+ int preindex = 62 * kXRegSizeInBytes;
int data_length = 0;
__ Mov(x0, src_tagged - preindex);
__ Mov(x1, dst_tagged - preindex);
+ __ ldp(q0, q1, MemOperand(x0, preindex, PreIndex));
+ __ stp(q0, q1, MemOperand(x1, preindex, PreIndex));
+ preindex = 2 * kQRegSizeInBytes;
+ data_length = preindex;
+
__ ldp(x2, x3, MemOperand(x0, preindex, PreIndex));
__ stp(x2, x3, MemOperand(x1, preindex, PreIndex));
preindex = 2 * kXRegSizeInBytes;
- data_length = preindex;
+ data_length += preindex;
__ ldpsw(x2, x3, MemOperand(x0, preindex, PreIndex));
__ stp(w2, w3, MemOperand(x1, preindex, PreIndex));
@@ -11607,6 +15385,11 @@
__ stp(x2, x3, MemOperand(x1, postindex, PostIndex));
data_length = postindex;
+ postindex = 2 * kQRegSizeInBytes;
+ __ ldp(q0, q1, MemOperand(x0, postindex, PostIndex));
+ __ stp(q0, q1, MemOperand(x1, postindex, PostIndex));
+ data_length += postindex;
+
postindex = 2 * kWRegSizeInBytes;
__ ldpsw(x2, x3, MemOperand(x0, postindex, PostIndex));
__ stp(w2, w3, MemOperand(x1, postindex, PostIndex));
@@ -11787,6 +15570,36 @@
}
+TEST(load_store_tagged_register_postindex) {
+ uint64_t src[] = { 0x0706050403020100, 0x0f0e0d0c0b0a0908 };
+ uint64_t tags[] = { 0x00, 0x1, 0x55, 0xff };
+ int tag_count = sizeof(tags) / sizeof(tags[0]);
+
+ for (int j = 0; j < tag_count; j++) {
+ for (int i = 0; i < tag_count; i++) {
+ SETUP();
+ uint64_t src_base = reinterpret_cast<uint64_t>(src);
+ uint64_t src_tagged = CPU::SetPointerTag(src_base, tags[i]);
+ uint64_t offset_tagged = CPU::SetPointerTag(UINT64_C(0), tags[j]);
+
+ START();
+ __ Mov(x10, src_tagged);
+ __ Mov(x11, offset_tagged);
+ __ Ld1(v0.V16B(), MemOperand(x10, x11, PostIndex));
+ // TODO: add other instructions (ld2-4, st1-4) as they become available.
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0f0e0d0c0b0a0908, 0x0706050403020100, q0);
+ ASSERT_EQUAL_64(src_tagged + offset_tagged, x10);
+
+ TEARDOWN();
+ }
+ }
+}
+
+
TEST(branch_tagged) {
SETUP();
START();
@@ -11895,5 +15708,5646 @@
TEARDOWN();
}
+TEST(neon_3same_addp) {
+ SETUP();
+ START();
+
+ __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
+ __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
+ __ Addp(v16.V16B(), v0.V16B(), v1.V16B());
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x00ff54ffff54aaff, 0xffffffffffffffff, q16);
+ TEARDOWN();
+}
+
+TEST(neon_3same_sqdmulh_sqrdmulh) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
+ __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
+ __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
+ __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
+
+ __ Sqdmulh(v16.V4H(), v0.V4H(), v1.V4H());
+ __ Sqdmulh(v17.V4S(), v2.V4S(), v3.V4S());
+ __ Sqdmulh(h18, h0, h1);
+ __ Sqdmulh(s19, s2, s3);
+
+ __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.V4H());
+ __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.V4S());
+ __ Sqrdmulh(h22, h0, h1);
+ __ Sqrdmulh(s23, s2, s3);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100007fff, q16);
+ ASSERT_EQUAL_128(0x000000017fffffff, 0x000000007fffffff, q17);
+ ASSERT_EQUAL_128(0, 0x7fff, q18);
+ ASSERT_EQUAL_128(0, 0x7fffffff, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000100017fff, q20);
+ ASSERT_EQUAL_128(0x000000017fffffff, 0x000000017fffffff, q21);
+ ASSERT_EQUAL_128(0, 0x7fff, q22);
+ ASSERT_EQUAL_128(0, 0x7fffffff, q23);
+ TEARDOWN();
+}
+
+TEST(neon_byelement_sqdmulh_sqrdmulh) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0000000000000000, 0x0000040004008000);
+ __ Movi(v1.V2D(), 0x0000000000000000, 0x0000002000108000);
+ __ Movi(v2.V2D(), 0x0400000080000000, 0x0400000080000000);
+ __ Movi(v3.V2D(), 0x0000002080000000, 0x0000001080000000);
+
+ __ Sqdmulh(v16.V4H(), v0.V4H(), v1.H(), 1);
+ __ Sqdmulh(v17.V4S(), v2.V4S(), v3.S(), 1);
+ __ Sqdmulh(h18, h0, v1.H(), 0);
+ __ Sqdmulh(s19, s2, v3.S(), 0);
+
+ __ Sqrdmulh(v20.V4H(), v0.V4H(), v1.H(), 1);
+ __ Sqrdmulh(v21.V4S(), v2.V4S(), v3.S(), 1);
+ __ Sqrdmulh(h22, h0, v1.H(), 0);
+ __ Sqrdmulh(s23, s2, v3.S(), 0);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000fff0, q16);
+ ASSERT_EQUAL_128(0x00000000fffffff0, 0x00000000fffffff0, q17);
+ ASSERT_EQUAL_128(0, 0x7fff, q18);
+ ASSERT_EQUAL_128(0, 0x7fffffff, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x000000010001fff0, q20);
+ ASSERT_EQUAL_128(0x00000001fffffff0, 0x00000001fffffff0, q21);
+ ASSERT_EQUAL_128(0, 0x7fff, q22);
+ ASSERT_EQUAL_128(0, 0x7fffffff, q23);
+ TEARDOWN();
+}
+
+
+TEST(neon_2regmisc_saddlp) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+
+ __ Saddlp(v16.V8H(), v0.V16B());
+ __ Saddlp(v17.V4H(), v0.V8B());
+
+ __ Saddlp(v18.V4S(), v0.V8H());
+ __ Saddlp(v19.V2S(), v0.V4H());
+
+ __ Saddlp(v20.V2D(), v0.V4S());
+ __ Saddlp(v21.V1D(), v0.V2S());
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0080ffffff010080, 0xff01ffff0080ff01, q16);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xff01ffff0080ff01, q17);
+ ASSERT_EQUAL_128(0x0000800000000081, 0xffff7f81ffff8200, q18);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffff7f81ffff8200, q19);
+ ASSERT_EQUAL_128(0x0000000000818000, 0xffffffff82017f81, q20);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff82017f81, q21);
+ TEARDOWN();
+}
+
+TEST(neon_2regmisc_uaddlp) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+
+ __ Uaddlp(v16.V8H(), v0.V16B());
+ __ Uaddlp(v17.V4H(), v0.V8B());
+
+ __ Uaddlp(v18.V4S(), v0.V8H());
+ __ Uaddlp(v19.V2S(), v0.V4H());
+
+ __ Uaddlp(v20.V2D(), v0.V4S());
+ __ Uaddlp(v21.V1D(), v0.V2S());
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x008000ff01010080, 0x010100ff00800101, q16);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x010100ff00800101, q17);
+ ASSERT_EQUAL_128(0x0000800000010081, 0x00017f8100008200, q18);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x00017f8100008200, q19);
+ ASSERT_EQUAL_128(0x0000000100818000, 0x0000000082017f81, q20);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000082017f81, q21);
+ TEARDOWN();
+}
+
+TEST(neon_2regmisc_sadalp) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Mov(v16.V16B(), v1.V16B());
+ __ Mov(v17.V16B(), v1.V16B());
+ __ Sadalp(v16.V8H(), v0.V16B());
+ __ Sadalp(v17.V4H(), v0.V8B());
+
+ __ Mov(v18.V16B(), v2.V16B());
+ __ Mov(v19.V16B(), v2.V16B());
+ __ Sadalp(v18.V4S(), v1.V8H());
+ __ Sadalp(v19.V2S(), v1.V4H());
+
+ __ Mov(v20.V16B(), v3.V16B());
+ __ Mov(v21.V16B(), v4.V16B());
+ __ Sadalp(v20.V2D(), v2.V4S());
+ __ Sadalp(v21.V1D(), v2.V2S());
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x80808000ff000080, 0xff00ffff00817f00, q16);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xff00ffff00817f00, q17);
+ ASSERT_EQUAL_128(0x7fff0001fffffffe, 0xffffffff80007fff, q18);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff80007fff, q19);
+ ASSERT_EQUAL_128(0x7fffffff80000000, 0x800000007ffffffe, q20);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
+ TEARDOWN();
+}
+
+TEST(neon_2regmisc_uadalp) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Mov(v16.V16B(), v1.V16B());
+ __ Mov(v17.V16B(), v1.V16B());
+ __ Uadalp(v16.V8H(), v0.V16B());
+ __ Uadalp(v17.V4H(), v0.V8B());
+
+ __ Mov(v18.V16B(), v2.V16B());
+ __ Mov(v19.V16B(), v2.V16B());
+ __ Uadalp(v18.V4S(), v1.V8H());
+ __ Uadalp(v19.V2S(), v1.V4H());
+
+ __ Mov(v20.V16B(), v3.V16B());
+ __ Mov(v21.V16B(), v4.V16B());
+ __ Uadalp(v20.V2D(), v2.V4S());
+ __ Uadalp(v21.V1D(), v2.V2S());
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x8080810001000080, 0x010000ff00818100, q16);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x010000ff00818100, q17);
+ ASSERT_EQUAL_128(0x800100010000fffe, 0x0000ffff80007fff, q18);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff80007fff, q19);
+ ASSERT_EQUAL_128(0x8000000180000000, 0x800000007ffffffe, q20);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
+ TEARDOWN();
+}
+
+TEST(neon_3same_mul) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
+ __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
+ __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
+ __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
+
+ __ Mla(v16.V16B(), v0.V16B(), v1.V16B());
+ __ Mls(v17.V16B(), v0.V16B(), v1.V16B());
+ __ Mul(v18.V16B(), v0.V16B(), v1.V16B());
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0102757605b1b208, 0x5f0a61450db90f56, q16);
+ ASSERT_EQUAL_128(0x01029192055b5c08, 0xb30ab5d30d630faa, q17);
+ ASSERT_EQUAL_128(0x0000727200abab00, 0x5600563900ab0056, q18);
+ TEARDOWN();
+}
+
+
+
+TEST(neon_3same_absdiff) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
+ __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
+ __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
+ __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
+
+ __ Saba(v16.V16B(), v0.V16B(), v1.V16B());
+ __ Uaba(v17.V16B(), v0.V16B(), v1.V16B());
+ __ Sabd(v18.V16B(), v0.V16B(), v1.V16B());
+ __ Uabd(v19.V16B(), v0.V16B(), v1.V16B());
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0202aeaf065c5d5e, 0x5e5f600c62646455, q16);
+ ASSERT_EQUAL_128(0x0002585904b0b1b2, 0x5e5f600c62b86455, q17);
+ ASSERT_EQUAL_128(0x0100abab01565656, 0x5555550055565555, q18);
+ ASSERT_EQUAL_128(0xff005555ffaaaaaa, 0x5555550055aa5555, q19);
+ TEARDOWN();
+}
+
+
+TEST(neon_byelement_mul) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
+ __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
+
+
+ __ Mul(v16.V4H(), v0.V4H(), v1.H(), 0);
+ __ Mul(v17.V8H(), v0.V8H(), v1.H(), 7);
+ __ Mul(v18.V2S(), v0.V2S(), v1.S(), 0);
+ __ Mul(v19.V4S(), v0.V4S(), v1.S(), 3);
+
+ __ Movi(v20.V2D(), 0x0000000000000000, 0x0001000200030004);
+ __ Movi(v21.V2D(), 0x0005000600070008, 0x0001000200030004);
+ __ Mla(v20.V4H(), v0.V4H(), v1.H(), 0);
+ __ Mla(v21.V8H(), v0.V8H(), v1.H(), 7);
+
+ __ Movi(v22.V2D(), 0x0000000000000000, 0x0000000200000004);
+ __ Movi(v23.V2D(), 0x0000000600000008, 0x0000000200000004);
+ __ Mla(v22.V2S(), v0.V2S(), v1.S(), 0);
+ __ Mla(v23.V4S(), v0.V4S(), v1.S(), 3);
+
+ __ Movi(v24.V2D(), 0x0000000000000000, 0x0100aaabfe015456);
+ __ Movi(v25.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
+ __ Mls(v24.V4H(), v0.V4H(), v1.H(), 0);
+ __ Mls(v25.V8H(), v0.V8H(), v1.H(), 7);
+
+ __ Movi(v26.V2D(), 0x0000000000000000, 0xc8e2aaabe1c85456);
+ __ Movi(v27.V2D(), 0x39545572c6aa54e4, 0x39545572c6aa54e4);
+ __ Mls(v26.V2S(), v0.V2S(), v1.S(), 0);
+ __ Mls(v27.V4S(), v0.V4S(), v1.S(), 3);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0100aaabfe015456, q16);
+ ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaabe1c85456, q18);
+ ASSERT_EQUAL_128(0x39545572c6aa54e4, 0x39545572c6aa54e4, q19);
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0101aaadfe04545a, q20);
+ ASSERT_EQUAL_128(0xff05aa5b010655b2, 0xff01aa57010255ae, q21);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xc8e2aaade1c8545a, q22);
+ ASSERT_EQUAL_128(0x39545578c6aa54ec, 0x39545574c6aa54e8, q23);
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
+ TEARDOWN();
+}
+
+
+TEST(neon_byelement_mull) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
+ __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
+
+
+ __ Smull(v16.V4S(), v0.V4H(), v1.H(), 7);
+ __ Smull2(v17.V4S(), v0.V8H(), v1.H(), 0);
+ __ Umull(v18.V4S(), v0.V4H(), v1.H(), 7);
+ __ Umull2(v19.V4S(), v0.V8H(), v1.H(), 0);
+
+ __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
+ __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
+ __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
+ __ Movi(v23.V2D(), 0x0000000100000002, 0x0000000200000001);
+
+ __ Smlal(v20.V4S(), v0.V4H(), v1.H(), 7);
+ __ Smlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
+ __ Umlal(v22.V4S(), v0.V4H(), v1.H(), 7);
+ __ Umlal2(v23.V4S(), v0.V8H(), v1.H(), 0);
+
+ __ Movi(v24.V2D(), 0xffffff00ffffaa55, 0x000000ff000055aa);
+ __ Movi(v25.V2D(), 0xffaaaaabffff55ab, 0x0054ffab0000fe01);
+ __ Movi(v26.V2D(), 0x0000ff000000aa55, 0x000000ff000055aa);
+ __ Movi(v27.V2D(), 0x00a9aaab00fe55ab, 0x0054ffab0000fe01);
+
+ __ Smlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
+ __ Smlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
+ __ Umlsl(v26.V4S(), v0.V4H(), v1.H(), 7);
+ __ Umlsl2(v27.V4S(), v0.V8H(), v1.H(), 0);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0xffffff00ffffaa55, 0x000000ff000055aa, q16);
+ ASSERT_EQUAL_128(0xffaaaaabffff55ab, 0x0054ffab0000fe01, q17);
+ ASSERT_EQUAL_128(0x0000ff000000aa55, 0x000000ff000055aa, q18);
+ ASSERT_EQUAL_128(0x00a9aaab00fe55ab, 0x0054ffab0000fe01, q19);
+
+ ASSERT_EQUAL_128(0xffffff01ffffaa57, 0x00000101000055ab, q20);
+ ASSERT_EQUAL_128(0xffaaaaacffff55ad, 0x0054ffad0000fe02, q21);
+ ASSERT_EQUAL_128(0x0000ff010000aa57, 0x00000101000055ab, q22);
+ ASSERT_EQUAL_128(0x00a9aaac00fe55ad, 0x0054ffad0000fe02, q23);
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q26);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_byelement_sqdmull) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xaa55ff55555500ff, 0xff00aa5500ff55aa);
+ __ Movi(v1.V2D(), 0x000155aaff55ff00, 0xaa55ff55555500ff);
+
+ __ Sqdmull(v16.V4S(), v0.V4H(), v1.H(), 7);
+ __ Sqdmull2(v17.V4S(), v0.V8H(), v1.H(), 0);
+ __ Sqdmull(s18, h0, v1.H(), 7);
+
+ __ Movi(v20.V2D(), 0x0000000100000002, 0x0000000200000001);
+ __ Movi(v21.V2D(), 0x0000000100000002, 0x0000000200000001);
+ __ Movi(v22.V2D(), 0x0000000100000002, 0x0000000200000001);
+
+ __ Sqdmlal(v20.V4S(), v0.V4H(), v1.H(), 7);
+ __ Sqdmlal2(v21.V4S(), v0.V8H(), v1.H(), 0);
+ __ Sqdmlal(s22, h0, v1.H(), 7);
+
+ __ Movi(v24.V2D(), 0xfffffe00ffff54aa, 0x000001fe0000ab54);
+ __ Movi(v25.V2D(), 0xff555556fffeab56, 0x00a9ff560001fc02);
+ __ Movi(v26.V2D(), 0x0000000000000000, 0x000000000000ab54);
+
+ __ Sqdmlsl(v24.V4S(), v0.V4H(), v1.H(), 7);
+ __ Sqdmlsl2(v25.V4S(), v0.V8H(), v1.H(), 0);
+ __ Sqdmlsl(s26, h0, v1.H(), 7);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0xfffffe00ffff54aa, 0x000001fe0000ab54, q16);
+ ASSERT_EQUAL_128(0xff555556fffeab56, 0x00a9ff560001fc02, q17);
+ ASSERT_EQUAL_128(0, 0x0000ab54, q18);
+
+ ASSERT_EQUAL_128(0xfffffe01ffff54ac, 0x000002000000ab55, q20);
+ ASSERT_EQUAL_128(0xff555557fffeab58, 0x00a9ff580001fc03, q21);
+ ASSERT_EQUAL_128(0, 0x0000ab55, q22);
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
+ ASSERT_EQUAL_128(0, 0x00000000, q26);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_3diff_absdiff) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
+ __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
+ __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
+ __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
+ __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
+ __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
+
+ __ Sabal(v16.V8H(), v0.V8B(), v1.V8B());
+ __ Uabal(v17.V8H(), v0.V8B(), v1.V8B());
+ __ Sabal2(v18.V8H(), v0.V16B(), v1.V16B());
+ __ Uabal2(v19.V8H(), v0.V16B(), v1.V16B());
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0b620d630f55, q16);
+ ASSERT_EQUAL_128(0x01570359055b0708, 0x095f0bb60d630f55, q17);
+ ASSERT_EQUAL_128(0x0103030405b107b3, 0x090b0b620d640f55, q18);
+ ASSERT_EQUAL_128(0x02010304055b075d, 0x0a090bb60db80fab, q19);
+ TEARDOWN();
+}
+
+
+TEST(neon_3diff_sqdmull) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
+ __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
+ __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
+ __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
+
+ __ Sqdmull(v16.V4S(), v0.V4H(), v1.V4H());
+ __ Sqdmull2(v17.V4S(), v0.V8H(), v1.V8H());
+ __ Sqdmull(v18.V2D(), v2.V2S(), v3.V2S());
+ __ Sqdmull2(v19.V2D(), v2.V4S(), v3.V4S());
+ __ Sqdmull(s20, h0, h1);
+ __ Sqdmull(d21, s2, s3);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q16);
+ ASSERT_EQUAL_128(0x800100007ffe0002, 0x800100007fffffff, q17);
+ ASSERT_EQUAL_128(0x8000000100000000, 0x7fffffffffffffff, q18);
+ ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000100000000, q19);
+ ASSERT_EQUAL_128(0, 0x7fffffff, q20);
+ ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21);
+ TEARDOWN();
+}
+
+
+TEST(neon_3diff_sqdmlal) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
+ __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
+ __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
+ __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
+
+ __ Movi(v16.V2D(), 0xffffffff00000001, 0x8fffffff00000001);
+ __ Movi(v17.V2D(), 0x00000001ffffffff, 0x00000001ffffffff);
+ __ Movi(v18.V2D(), 0x8000000000000001, 0x0000000000000001);
+ __ Movi(v19.V2D(), 0xffffffffffffffff, 0x7fffffffffffffff);
+ __ Movi(v20.V2D(), 0, 0x00000001);
+ __ Movi(v21.V2D(), 0, 0x00000001);
+
+ __ Sqdmlal(v16.V4S(), v0.V4H(), v1.V4H());
+ __ Sqdmlal2(v17.V4S(), v0.V8H(), v1.V8H());
+ __ Sqdmlal(v18.V2D(), v2.V2S(), v3.V2S());
+ __ Sqdmlal2(v19.V2D(), v2.V4S(), v3.V4S());
+ __ Sqdmlal(s20, h0, h1);
+ __ Sqdmlal(d21, s2, s3);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x8000ffff7ffe0003, 0x800000007fffffff, q16);
+ ASSERT_EQUAL_128(0x800100017ffe0001, 0x800100017ffffffe, q17);
+ ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q18);
+ ASSERT_EQUAL_128(0x7ffffffffffffffe, 0x00000000ffffffff, q19);
+ ASSERT_EQUAL_128(0, 0x7fffffff, q20);
+ ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q21);
+ TEARDOWN();
+}
+
+
+TEST(neon_3diff_sqdmlsl) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7fff7fff80008000, 0x80007fff7fff8000);
+ __ Movi(v1.V2D(), 0x80007fff7fff8000, 0x7fff7fff80008000);
+ __ Movi(v2.V2D(), 0x800000007fffffff, 0x7fffffff80000000);
+ __ Movi(v3.V2D(), 0x8000000080000000, 0x8000000080000000);
+
+ __ Movi(v16.V2D(), 0xffffffff00000001, 0x7ffffffe80000001);
+ __ Movi(v17.V2D(), 0x00000001ffffffff, 0x7ffffffe00000001);
+ __ Movi(v18.V2D(), 0x8000000000000001, 0x8000000000000001);
+ __ Movi(v19.V2D(), 0xfffffffffffffffe, 0x7fffffffffffffff);
+ __ Movi(v20.V2D(), 0, 0x00000001);
+ __ Movi(v21.V2D(), 0, 0x00000001);
+
+ __ Sqdmlsl(v16.V4S(), v0.V4H(), v1.V4H());
+ __ Sqdmlsl2(v17.V4S(), v0.V8H(), v1.V8H());
+ __ Sqdmlsl(v18.V2D(), v2.V2S(), v3.V2S());
+ __ Sqdmlsl2(v19.V2D(), v2.V4S(), v3.V4S());
+ __ Sqdmlsl(s20, h0, h1);
+ __ Sqdmlsl(d21, s2, s3);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x7ffeffff8001ffff, 0x7fffffff80000000, q16);
+ ASSERT_EQUAL_128(0x7fff00018001fffd, 0x7fffffff80000002, q17);
+ ASSERT_EQUAL_128(0xffffffff00000001, 0x8000000000000000, q18);
+ ASSERT_EQUAL_128(0x8000000000000000, 0x7fffffffffffffff, q19);
+ ASSERT_EQUAL_128(0, 0x80000002, q20);
+ ASSERT_EQUAL_128(0, 0x8000000000000002, q21);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_3diff_mla) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
+ __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
+ __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
+ __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
+ __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
+ __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
+
+ __ Smlal(v16.V8H(), v0.V8B(), v1.V8B());
+ __ Umlal(v17.V8H(), v0.V8B(), v1.V8B());
+ __ Smlal2(v18.V8H(), v0.V16B(), v1.V16B());
+ __ Umlal2(v19.V8H(), v0.V16B(), v1.V16B());
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x01580304055c2341, 0x090a0ab70d0e0f56, q16);
+ ASSERT_EQUAL_128(0xaa580304ae5c2341, 0x090a5fb70d0eb856, q17);
+ ASSERT_EQUAL_128(0x01020304e878ea7a, 0x090a0ab70cb90f00, q18);
+ ASSERT_EQUAL_128(0x010203043d783f7a, 0x090a5fb761b90f00, q19);
+ TEARDOWN();
+}
+
+
+TEST(neon_3diff_mls) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xff00aa5500ff55ab, 0xff00aa5500ff55aa);
+ __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
+ __ Movi(v16.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
+ __ Movi(v17.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
+ __ Movi(v18.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
+ __ Movi(v19.V2D(), 0x0102030405060708, 0x090a0b0c0d0e0f00);
+
+ __ Smlsl(v16.V8H(), v0.V8B(), v1.V8B());
+ __ Umlsl(v17.V8H(), v0.V8B(), v1.V8B());
+ __ Smlsl2(v18.V8H(), v0.V16B(), v1.V16B());
+ __ Umlsl2(v19.V8H(), v0.V16B(), v1.V16B());
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x00ac030404b0eacf, 0x090a0b610d0e0eaa, q16);
+ ASSERT_EQUAL_128(0x57ac03045bb0eacf, 0x090ab6610d0e65aa, q17);
+ ASSERT_EQUAL_128(0x0102030421942396, 0x090a0b610d630f00, q18);
+ ASSERT_EQUAL_128(0x01020304cc94ce96, 0x090ab661b8630f00, q19);
+ TEARDOWN();
+}
+
+
+TEST(neon_3same_compare) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
+ __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
+
+ __ Cmeq(v16.V16B(), v0.V16B(), v0.V16B());
+ __ Cmeq(v17.V16B(), v0.V16B(), v1.V16B());
+ __ Cmge(v18.V16B(), v0.V16B(), v0.V16B());
+ __ Cmge(v19.V16B(), v0.V16B(), v1.V16B());
+ __ Cmgt(v20.V16B(), v0.V16B(), v0.V16B());
+ __ Cmgt(v21.V16B(), v0.V16B(), v1.V16B());
+ __ Cmhi(v22.V16B(), v0.V16B(), v0.V16B());
+ __ Cmhi(v23.V16B(), v0.V16B(), v1.V16B());
+ __ Cmhs(v24.V16B(), v0.V16B(), v0.V16B());
+ __ Cmhs(v25.V16B(), v0.V16B(), v1.V16B());
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
+ ASSERT_EQUAL_128(0x00ff000000000000, 0x000000ff00000000, q17);
+ ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18);
+ ASSERT_EQUAL_128(0x00ff00ffff00ff00, 0xff0000ff0000ff00, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
+ ASSERT_EQUAL_128(0x000000ffff00ff00, 0xff0000000000ff00, q21);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22);
+ ASSERT_EQUAL_128(0xff00ff0000ff00ff, 0xff00000000ffff00, q23);
+ ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q24);
+ ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xff0000ff00ffff00, q25);
+ TEARDOWN();
+}
+
+
+TEST(neon_3same_scalar_compare) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
+ __ Movi(v1.V2D(), 0x000055aaff55ff00, 0xaa55ff55555500ff);
+
+ __ Cmeq(d16, d0, d0);
+ __ Cmeq(d17, d0, d1);
+ __ Cmeq(d18, d1, d0);
+ __ Cmge(d19, d0, d0);
+ __ Cmge(d20, d0, d1);
+ __ Cmge(d21, d1, d0);
+ __ Cmgt(d22, d0, d0);
+ __ Cmgt(d23, d0, d1);
+ __ Cmhi(d24, d0, d0);
+ __ Cmhi(d25, d0, d1);
+ __ Cmhs(d26, d0, d0);
+ __ Cmhs(d27, d0, d1);
+ __ Cmhs(d28, d1, d0);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q16);
+ ASSERT_EQUAL_128(0, 0x0000000000000000, q17);
+ ASSERT_EQUAL_128(0, 0x0000000000000000, q18);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20);
+ ASSERT_EQUAL_128(0, 0x0000000000000000, q21);
+ ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q23);
+ ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q27);
+ ASSERT_EQUAL_128(0, 0x0000000000000000, q28);
+
+ TEARDOWN();
+}
+
+TEST(neon_2regmisc_fcmeq) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
+ __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
+ __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
+ __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
+
+ __ Fcmeq(s16, s0, 0.0);
+ __ Fcmeq(s17, s1, 0.0);
+ __ Fcmeq(s18, s2, 0.0);
+ __ Fcmeq(d19, d0, 0.0);
+ __ Fcmeq(d20, d1, 0.0);
+ __ Fcmeq(d21, d2, 0.0);
+ __ Fcmeq(v22.V2S(), v0.V2S(), 0.0);
+ __ Fcmeq(v23.V4S(), v1.V4S(), 0.0);
+ __ Fcmeq(v24.V2D(), v1.V2D(), 0.0);
+ __ Fcmeq(v25.V2D(), v2.V2D(), 0.0);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0, 0xffffffff, q16);
+ ASSERT_EQUAL_128(0, 0x00000000, q17);
+ ASSERT_EQUAL_128(0, 0x00000000, q18);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
+ ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
+ ASSERT_EQUAL_128(0, 0x0000000000000000, q21);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
+ TEARDOWN();
+}
+
+TEST(neon_2regmisc_fcmge) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
+ __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
+ __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
+ __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
+
+ __ Fcmge(s16, s0, 0.0);
+ __ Fcmge(s17, s1, 0.0);
+ __ Fcmge(s18, s2, 0.0);
+ __ Fcmge(d19, d0, 0.0);
+ __ Fcmge(d20, d1, 0.0);
+ __ Fcmge(d21, d3, 0.0);
+ __ Fcmge(v22.V2S(), v0.V2S(), 0.0);
+ __ Fcmge(v23.V4S(), v1.V4S(), 0.0);
+ __ Fcmge(v24.V2D(), v1.V2D(), 0.0);
+ __ Fcmge(v25.V2D(), v3.V2D(), 0.0);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0, 0xffffffff, q16);
+ ASSERT_EQUAL_128(0, 0x00000000, q17);
+ ASSERT_EQUAL_128(0, 0x00000000, q18);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
+ ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
+ ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
+ TEARDOWN();
+}
+
+
+TEST(neon_2regmisc_fcmgt) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
+ __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
+ __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
+ __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
+
+ __ Fcmgt(s16, s0, 0.0);
+ __ Fcmgt(s17, s1, 0.0);
+ __ Fcmgt(s18, s2, 0.0);
+ __ Fcmgt(d19, d0, 0.0);
+ __ Fcmgt(d20, d1, 0.0);
+ __ Fcmgt(d21, d3, 0.0);
+ __ Fcmgt(v22.V2S(), v0.V2S(), 0.0);
+ __ Fcmgt(v23.V4S(), v1.V4S(), 0.0);
+ __ Fcmgt(v24.V2D(), v1.V2D(), 0.0);
+ __ Fcmgt(v25.V2D(), v3.V2D(), 0.0);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0, 0x00000000, q16);
+ ASSERT_EQUAL_128(0, 0x00000000, q17);
+ ASSERT_EQUAL_128(0, 0x00000000, q18);
+ ASSERT_EQUAL_128(0, 0x0000000000000000, q19);
+ ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
+ ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
+ ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
+ TEARDOWN();
+}
+
+TEST(neon_2regmisc_fcmle) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
+ __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
+ __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
+ __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
+
+ __ Fcmle(s16, s0, 0.0);
+ __ Fcmle(s17, s1, 0.0);
+ __ Fcmle(s18, s3, 0.0);
+ __ Fcmle(d19, d0, 0.0);
+ __ Fcmle(d20, d1, 0.0);
+ __ Fcmle(d21, d2, 0.0);
+ __ Fcmle(v22.V2S(), v0.V2S(), 0.0);
+ __ Fcmle(v23.V4S(), v1.V4S(), 0.0);
+ __ Fcmle(v24.V2D(), v1.V2D(), 0.0);
+ __ Fcmle(v25.V2D(), v2.V2D(), 0.0);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0, 0xffffffff, q16);
+ ASSERT_EQUAL_128(0, 0x00000000, q17);
+ ASSERT_EQUAL_128(0, 0x00000000, q18);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q19);
+ ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q22);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
+ ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
+ TEARDOWN();
+}
+
+
+TEST(neon_2regmisc_fcmlt) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000); // Zero.
+ __ Movi(v1.V2D(), 0xffffffffffffffff, 0xffffffffffffffff); // Nan.
+ __ Movi(v2.V2D(), 0xbf800000bf800000, 0xbf800000bf800000); // < 0.
+ __ Movi(v3.V2D(), 0x3f8000003f800000, 0x3f8000003f800000); // > 0.
+
+ __ Fcmlt(s16, s0, 0.0);
+ __ Fcmlt(s17, s1, 0.0);
+ __ Fcmlt(s18, s3, 0.0);
+ __ Fcmlt(d19, d0, 0.0);
+ __ Fcmlt(d20, d1, 0.0);
+ __ Fcmlt(d21, d2, 0.0);
+ __ Fcmlt(v22.V2S(), v0.V2S(), 0.0);
+ __ Fcmlt(v23.V4S(), v1.V4S(), 0.0);
+ __ Fcmlt(v24.V2D(), v1.V2D(), 0.0);
+ __ Fcmlt(v25.V2D(), v2.V2D(), 0.0);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0, 0x00000000, q16);
+ ASSERT_EQUAL_128(0, 0x00000000, q17);
+ ASSERT_EQUAL_128(0, 0x00000000, q18);
+ ASSERT_EQUAL_128(0, 0x0000000000000000, q19);
+ ASSERT_EQUAL_128(0, 0x0000000000000000, q20);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q21);
+ ASSERT_EQUAL_128(0, 0x0000000000000000, q22);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q24);
+ ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
+ TEARDOWN();
+}
+
+TEST(neon_2regmisc_cmeq) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
+ __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
+
+ __ Cmeq(v16.V8B(), v1.V8B(), 0);
+ __ Cmeq(v17.V16B(), v1.V16B(), 0);
+ __ Cmeq(v18.V4H(), v1.V4H(), 0);
+ __ Cmeq(v19.V8H(), v1.V8H(), 0);
+ __ Cmeq(v20.V2S(), v0.V2S(), 0);
+ __ Cmeq(v21.V4S(), v0.V4S(), 0);
+ __ Cmeq(d22, d0, 0);
+ __ Cmeq(d23, d1, 0);
+ __ Cmeq(v24.V2D(), v0.V2D(), 0);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000000ff00, q16);
+ ASSERT_EQUAL_128(0xffff0000000000ff, 0xffff00000000ff00, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffff000000000000, q18);
+ ASSERT_EQUAL_128(0xffff000000000000, 0xffff000000000000, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q21);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
+ TEARDOWN();
+}
+
+
+TEST(neon_2regmisc_cmge) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xff01000200030004, 0x0000000000000000);
+ __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
+
+ __ Cmge(v16.V8B(), v1.V8B(), 0);
+ __ Cmge(v17.V16B(), v1.V16B(), 0);
+ __ Cmge(v18.V4H(), v1.V4H(), 0);
+ __ Cmge(v19.V8H(), v1.V8H(), 0);
+ __ Cmge(v20.V2S(), v0.V2S(), 0);
+ __ Cmge(v21.V4S(), v0.V4S(), 0);
+ __ Cmge(d22, d0, 0);
+ __ Cmge(d23, d1, 0);
+ __ Cmge(v24.V2D(), v0.V2D(), 0);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffff00ffffffff00, q16);
+ ASSERT_EQUAL_128(0xffffff0000ff00ff, 0xffff00ffffffff00, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffff0000ffffffff, q18);
+ ASSERT_EQUAL_128(0xffffffff00000000, 0xffff0000ffffffff, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q20);
+ ASSERT_EQUAL_128(0x00000000ffffffff, 0xffffffffffffffff, q21);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
+ TEARDOWN();
+}
+
+
+TEST(neon_2regmisc_cmlt) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0001000200030004, 0xff00000000000000);
+ __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
+
+ __ Cmlt(v16.V8B(), v1.V8B(), 0);
+ __ Cmlt(v17.V16B(), v1.V16B(), 0);
+ __ Cmlt(v18.V4H(), v1.V4H(), 0);
+ __ Cmlt(v19.V8H(), v1.V8H(), 0);
+ __ Cmlt(v20.V2S(), v1.V2S(), 0);
+ __ Cmlt(v21.V4S(), v1.V4S(), 0);
+ __ Cmlt(d22, d0, 0);
+ __ Cmlt(d23, d1, 0);
+ __ Cmlt(v24.V2D(), v0.V2D(), 0);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000ff00000000ff, q16);
+ ASSERT_EQUAL_128(0x000000ffff00ff00, 0x0000ff00000000ff, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000ffff00000000, q18);
+ ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000ffff00000000, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
+ ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
+ TEARDOWN();
+}
+
+
+TEST(neon_2regmisc_cmle) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
+ __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
+
+ __ Cmle(v16.V8B(), v1.V8B(), 0);
+ __ Cmle(v17.V16B(), v1.V16B(), 0);
+ __ Cmle(v18.V4H(), v1.V4H(), 0);
+ __ Cmle(v19.V8H(), v1.V8H(), 0);
+ __ Cmle(v20.V2S(), v1.V2S(), 0);
+ __ Cmle(v21.V4S(), v1.V4S(), 0);
+ __ Cmle(d22, d0, 0);
+ __ Cmle(d23, d1, 0);
+ __ Cmle(v24.V2D(), v0.V2D(), 0);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffffff000000ffff, q16);
+ ASSERT_EQUAL_128(0xffff00ffff00ffff, 0xffffff000000ffff, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffffffff00000000, q18);
+ ASSERT_EQUAL_128(0xffff0000ffffffff, 0xffffffff00000000, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
+ ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000000000000, q21);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q22);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q23);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q24);
+ TEARDOWN();
+}
+
+
+TEST(neon_2regmisc_cmgt) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0001000200030004, 0x0000000000000000);
+ __ Movi(v1.V2D(), 0x000055aaff55ff00, 0x0000ff55555500ff);
+
+ __ Cmgt(v16.V8B(), v1.V8B(), 0);
+ __ Cmgt(v17.V16B(), v1.V16B(), 0);
+ __ Cmgt(v18.V4H(), v1.V4H(), 0);
+ __ Cmgt(v19.V8H(), v1.V8H(), 0);
+ __ Cmgt(v20.V2S(), v0.V2S(), 0);
+ __ Cmgt(v21.V4S(), v0.V4S(), 0);
+ __ Cmgt(d22, d0, 0);
+ __ Cmgt(d23, d1, 0);
+ __ Cmgt(v24.V2D(), v0.V2D(), 0);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0000000000000000, 0x000000ffffff0000, q16);
+ ASSERT_EQUAL_128(0x0000ff0000ff0000, 0x000000ffffff0000, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18);
+ ASSERT_EQUAL_128(0x0000ffff00000000, 0x00000000ffffffff, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q20);
+ ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q21);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q22);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffffffffffffffff, q23);
+ ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q24);
+ TEARDOWN();
+}
+
+
+TEST(neon_2regmisc_neg) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Neg(v16.V8B(), v0.V8B());
+ __ Neg(v17.V16B(), v0.V16B());
+ __ Neg(v18.V4H(), v1.V4H());
+ __ Neg(v19.V8H(), v1.V8H());
+ __ Neg(v20.V2S(), v2.V2S());
+ __ Neg(v21.V4S(), v2.V4S());
+ __ Neg(d22, d3);
+ __ Neg(v23.V2D(), v3.V2D());
+ __ Neg(v24.V2D(), v4.V2D());
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100ff81807f, q16);
+ ASSERT_EQUAL_128(0x81ff00017f8081ff, 0x807f0100ff81807f, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18);
+ ASSERT_EQUAL_128(0x80007fff00010000, 0x00010000ffff8001, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20);
+ ASSERT_EQUAL_128(0x8000000000000001, 0x0000000080000001, q21);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000000001, q22);
+ ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q23);
+ ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_2regmisc_sqneg) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Sqneg(v16.V8B(), v0.V8B());
+ __ Sqneg(v17.V16B(), v0.V16B());
+ __ Sqneg(v18.V4H(), v1.V4H());
+ __ Sqneg(v19.V8H(), v1.V8H());
+ __ Sqneg(v20.V2S(), v2.V2S());
+ __ Sqneg(v21.V4S(), v2.V4S());
+ __ Sqneg(v22.V2D(), v3.V2D());
+ __ Sqneg(v23.V2D(), v4.V2D());
+
+ __ Sqneg(b24, b0);
+ __ Sqneg(h25, h1);
+ __ Sqneg(s26, s2);
+ __ Sqneg(d27, d3);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100ff817f7f, q16);
+ ASSERT_EQUAL_128(0x81ff00017f7f81ff, 0x7f7f0100ff817f7f, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x00010000ffff8001, q18);
+ ASSERT_EQUAL_128(0x7fff7fff00010000, 0x00010000ffff8001, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000001, q20);
+ ASSERT_EQUAL_128(0x7fffffff00000001, 0x0000000080000001, q21);
+ ASSERT_EQUAL_128(0x7fffffffffffffff, 0x8000000000000001, q22);
+ ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23);
+
+ ASSERT_EQUAL_128(0, 0x7f, q24);
+ ASSERT_EQUAL_128(0, 0x8001, q25);
+ ASSERT_EQUAL_128(0, 0x80000001, q26);
+ ASSERT_EQUAL_128(0, 0x8000000000000001, q27);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_2regmisc_abs) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Abs(v16.V8B(), v0.V8B());
+ __ Abs(v17.V16B(), v0.V16B());
+ __ Abs(v18.V4H(), v1.V4H());
+ __ Abs(v19.V8H(), v1.V8H());
+ __ Abs(v20.V2S(), v2.V2S());
+ __ Abs(v21.V4S(), v2.V4S());
+ __ Abs(d22, d3);
+ __ Abs(v23.V2D(), v3.V2D());
+ __ Abs(v24.V2D(), v4.V2D());
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0000000000000000, 0x807f0100017f807f, q16);
+ ASSERT_EQUAL_128(0x7f0100017f807f01, 0x807f0100017f807f, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18);
+ ASSERT_EQUAL_128(0x80007fff00010000, 0x0001000000017fff, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
+ ASSERT_EQUAL_128(0x8000000000000001, 0x000000007fffffff, q21);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffffffffffff, q22);
+ ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q23);
+ ASSERT_EQUAL_128(0x8000000000000000, 0x0000000000000000, q24);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_2regmisc_sqabs) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Sqabs(v16.V8B(), v0.V8B());
+ __ Sqabs(v17.V16B(), v0.V16B());
+ __ Sqabs(v18.V4H(), v1.V4H());
+ __ Sqabs(v19.V8H(), v1.V8H());
+ __ Sqabs(v20.V2S(), v2.V2S());
+ __ Sqabs(v21.V4S(), v2.V4S());
+ __ Sqabs(v22.V2D(), v3.V2D());
+ __ Sqabs(v23.V2D(), v4.V2D());
+
+ __ Sqabs(b24, b0);
+ __ Sqabs(h25, h1);
+ __ Sqabs(s26, s2);
+ __ Sqabs(d27, d3);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0000000000000000, 0x7f7f0100017f7f7f, q16);
+ ASSERT_EQUAL_128(0x7f0100017f7f7f01, 0x7f7f0100017f7f7f, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0001000000017fff, q18);
+ ASSERT_EQUAL_128(0x7fff7fff00010000, 0x0001000000017fff, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
+ ASSERT_EQUAL_128(0x7fffffff00000001, 0x000000007fffffff, q21);
+ ASSERT_EQUAL_128(0x7fffffffffffffff, 0x7fffffffffffffff, q22);
+ ASSERT_EQUAL_128(0x7fffffffffffffff, 0x0000000000000000, q23);
+
+ ASSERT_EQUAL_128(0, 0x7f, q24);
+ ASSERT_EQUAL_128(0, 0x7fff, q25);
+ ASSERT_EQUAL_128(0, 0x7fffffff, q26);
+ ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27);
+
+ TEARDOWN();
+}
+
+TEST(neon_2regmisc_suqadd) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f0180ff);
+
+ __ Movi(v2.V2D(), 0x80008001ffff0000, 0xffff000000017ffd);
+ __ Movi(v3.V2D(), 0xffff000080008001, 0x00017fffffff0001);
+
+ __ Movi(v4.V2D(), 0x80000000fffffffe, 0xfffffff17ffffffe);
+ __ Movi(v5.V2D(), 0xffffffff80000000, 0x7fffffff00000002);
+
+ __ Movi(v6.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v7.V2D(), 0x8000000000000000, 0x8000000000000002);
+
+ __ Mov(v16.V2D(), v0.V2D());
+ __ Mov(v17.V2D(), v0.V2D());
+ __ Mov(v18.V2D(), v2.V2D());
+ __ Mov(v19.V2D(), v2.V2D());
+ __ Mov(v20.V2D(), v4.V2D());
+ __ Mov(v21.V2D(), v4.V2D());
+ __ Mov(v22.V2D(), v6.V2D());
+
+ __ Mov(v23.V2D(), v0.V2D());
+ __ Mov(v24.V2D(), v2.V2D());
+ __ Mov(v25.V2D(), v4.V2D());
+ __ Mov(v26.V2D(), v6.V2D());
+
+ __ Suqadd(v16.V8B(), v1.V8B());
+ __ Suqadd(v17.V16B(), v1.V16B());
+ __ Suqadd(v18.V4H(), v3.V4H());
+ __ Suqadd(v19.V8H(), v3.V8H());
+ __ Suqadd(v20.V2S(), v5.V2S());
+ __ Suqadd(v21.V4S(), v5.V4S());
+ __ Suqadd(v22.V2D(), v7.V2D());
+
+ __ Suqadd(b23, b1);
+ __ Suqadd(h24, h3);
+ __ Suqadd(s25, s5);
+ __ Suqadd(d26, d7);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0000000000000000, 0x81817f7f7f7f007f, q16);
+ ASSERT_EQUAL_128(0x7f7f7f7f7f807f7f, 0x81817f7f7f7f007f, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x00007fff7fff7ffe, q18);
+ ASSERT_EQUAL_128(0x7fff80017fff7fff, 0x00007fff7fff7ffe, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x7ffffff07fffffff, q20);
+ ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x7ffffff07fffffff, q21);
+ ASSERT_EQUAL_128(0x0000000000000001, 0x7fffffffffffffff, q22);
+
+ ASSERT_EQUAL_128(0, 0x7f, q23);
+ ASSERT_EQUAL_128(0, 0x7ffe, q24);
+ ASSERT_EQUAL_128(0, 0x7fffffff, q25);
+ ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26);
+ TEARDOWN();
+}
+
+TEST(neon_2regmisc_usqadd) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f7ffe);
+ __ Movi(v1.V2D(), 0x017f8081ff00017f, 0x010080ff7f018002);
+
+ __ Movi(v2.V2D(), 0x80008001fffe0000, 0xffff000000017ffd);
+ __ Movi(v3.V2D(), 0xffff000000028001, 0x00017fffffff0001);
+
+ __ Movi(v4.V2D(), 0x80000000fffffffe, 0x00000001fffffffe);
+ __ Movi(v5.V2D(), 0xffffffff80000000, 0xfffffffe00000002);
+
+ __ Movi(v6.V2D(), 0x8000000000000002, 0x7fffffffffffffff);
+ __ Movi(v7.V2D(), 0x7fffffffffffffff, 0x8000000000000000);
+
+ __ Mov(v16.V2D(), v0.V2D());
+ __ Mov(v17.V2D(), v0.V2D());
+ __ Mov(v18.V2D(), v2.V2D());
+ __ Mov(v19.V2D(), v2.V2D());
+ __ Mov(v20.V2D(), v4.V2D());
+ __ Mov(v21.V2D(), v4.V2D());
+ __ Mov(v22.V2D(), v6.V2D());
+
+ __ Mov(v23.V2D(), v0.V2D());
+ __ Mov(v24.V2D(), v2.V2D());
+ __ Mov(v25.V2D(), v4.V2D());
+ __ Mov(v26.V2D(), v6.V2D());
+
+ __ Usqadd(v16.V8B(), v1.V8B());
+ __ Usqadd(v17.V16B(), v1.V16B());
+ __ Usqadd(v18.V4H(), v3.V4H());
+ __ Usqadd(v19.V8H(), v3.V8H());
+ __ Usqadd(v20.V2S(), v5.V2S());
+ __ Usqadd(v21.V4S(), v5.V4S());
+ __ Usqadd(v22.V2D(), v7.V2D());
+
+ __ Usqadd(b23, b1);
+ __ Usqadd(h24, h3);
+ __ Usqadd(s25, s5);
+ __ Usqadd(d26, d7);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0000000000000000, 0x81817f00808000ff, q16);
+ ASSERT_EQUAL_128(0x8080008080808080, 0x81817f00808000ff, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffff7fff00007ffe, q18);
+ ASSERT_EQUAL_128(0x7fff8001ffff0000, 0xffff7fff00007ffe, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q20);
+ ASSERT_EQUAL_128(0x7fffffff7ffffffe, 0x00000000ffffffff, q21);
+ ASSERT_EQUAL_128(0xffffffffffffffff, 0x0000000000000000, q22);
+
+ ASSERT_EQUAL_128(0, 0xff, q23);
+ ASSERT_EQUAL_128(0, 0x7ffe, q24);
+ ASSERT_EQUAL_128(0, 0xffffffff, q25);
+ ASSERT_EQUAL_128(0, 0x0000000000000000, q26);
+ TEARDOWN();
+}
+
+
+TEST(system_sys) {
+ SETUP();
+ const char* msg = "SYS test!";
+ uintptr_t msg_addr = reinterpret_cast<uintptr_t>(msg);
+
+ START();
+ __ Mov(x4, msg_addr);
+ __ Sys(3, 0x7, 0x5, 1, x4);
+ __ Mov(x3, x4);
+ __ Sys(3, 0x7, 0xa, 1, x3);
+ __ Mov(x2, x3);
+ __ Sys(3, 0x7, 0xb, 1, x2);
+ __ Mov(x1, x2);
+ __ Sys(3, 0x7, 0xe, 1, x1);
+ // TODO: Add tests to check ZVA equivalent.
+ END();
+
+ RUN();
+
+ TEARDOWN();
+}
+
+
+TEST(system_ic) {
+ SETUP();
+ const char* msg = "IC test!";
+ uintptr_t msg_addr = reinterpret_cast<uintptr_t>(msg);
+
+ START();
+ __ Mov(x11, msg_addr);
+ __ Ic(IVAU, x11);
+ END();
+
+ RUN();
+
+ TEARDOWN();
+}
+
+
+TEST(system_dc) {
+ SETUP();
+ const char* msg = "DC test!";
+ uintptr_t msg_addr = reinterpret_cast<uintptr_t>(msg);
+
+ START();
+ __ Mov(x20, msg_addr);
+ __ Dc(CVAC, x20);
+ __ Mov(x21, x20);
+ __ Dc(CVAU, x21);
+ __ Mov(x22, x21);
+ __ Dc(CIVAC, x22);
+ // TODO: Add tests to check ZVA.
+ END();
+
+ RUN();
+
+ TEARDOWN();
+}
+
+
+TEST(neon_2regmisc_xtn) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Xtn(v16.V8B(), v0.V8H());
+ __ Xtn2(v16.V16B(), v1.V8H());
+ __ Xtn(v17.V4H(), v1.V4S());
+ __ Xtn2(v17.V8H(), v2.V4S());
+ __ Xtn(v18.V2S(), v3.V2D());
+ __ Xtn2(v18.V4S(), v4.V2D());
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0001ff00ff0001ff, 0x01ff800181007f81, q16);
+ ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8001000000007fff, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x00000001ffffffff, q18);
+ TEARDOWN();
+}
+
+
+TEST(neon_2regmisc_sqxtn) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Sqxtn(v16.V8B(), v0.V8H());
+ __ Sqxtn2(v16.V16B(), v1.V8H());
+ __ Sqxtn(v17.V4H(), v1.V4S());
+ __ Sqxtn2(v17.V8H(), v2.V4S());
+ __ Sqxtn(v18.V2S(), v3.V2D());
+ __ Sqxtn2(v18.V4S(), v4.V2D());
+ __ Sqxtn(b19, h0);
+ __ Sqxtn(h20, s0);
+ __ Sqxtn(s21, d0);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x8080ff00ff00017f, 0x7f7a807f80807f80, q16);
+ ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000800080007fff, q17);
+ ASSERT_EQUAL_128(0x8000000000000000, 0x800000007fffffff, q18);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000007fff, q20);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
+ TEARDOWN();
+}
+
+
+TEST(neon_2regmisc_uqxtn) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Uqxtn(v16.V8B(), v0.V8H());
+ __ Uqxtn2(v16.V16B(), v1.V8H());
+ __ Uqxtn(v17.V4H(), v1.V4S());
+ __ Uqxtn2(v17.V8H(), v2.V4S());
+ __ Uqxtn(v18.V2S(), v3.V2D());
+ __ Uqxtn2(v18.V4S(), v4.V2D());
+ __ Uqxtn(b19, h0);
+ __ Uqxtn(h20, s0);
+ __ Uqxtn(s21, d0);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0xffffff00ff0001ff, 0xff7affffffffffff, q16);
+ ASSERT_EQUAL_128(0xffffffff0000ffff, 0xffffffffffffffff, q17);
+ ASSERT_EQUAL_128(0xffffffff00000000, 0xffffffffffffffff, q18);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x00000000000000ff, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q21);
+ TEARDOWN();
+}
+
+
+TEST(neon_2regmisc_sqxtun) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f01007a81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Sqxtun(v16.V8B(), v0.V8H());
+ __ Sqxtun2(v16.V16B(), v1.V8H());
+ __ Sqxtun(v17.V4H(), v1.V4S());
+ __ Sqxtun2(v17.V8H(), v2.V4S());
+ __ Sqxtun(v18.V2S(), v3.V2D());
+ __ Sqxtun2(v18.V4S(), v4.V2D());
+ __ Sqxtun(b19, h0);
+ __ Sqxtun(h20, s0);
+ __ Sqxtun(s21, d0);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x00000000000001ff, 0xff7a00ff0000ff00, q16);
+ ASSERT_EQUAL_128(0x000000000000ffff, 0x000000000000ffff, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x00000000ffffffff, q18);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x000000000000ffff, q20);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q21);
+ TEARDOWN();
+}
+
+TEST(neon_3same_and) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
+ __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
+
+ __ And(v16.V16B(), v0.V16B(), v0.V16B()); // self test
+ __ And(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
+ __ And(v24.V8B(), v0.V8B(), v0.V8B()); // self test
+ __ And(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
+ ASSERT_EQUAL_128(0x0000000000555500, 0xaa00aa00005500aa, q17);
+ ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24);
+ ASSERT_EQUAL_128(0, 0xaa00aa00005500aa, q25);
+ TEARDOWN();
+}
+
+TEST(neon_3same_bic) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
+ __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
+
+ __ Bic(v16.V16B(), v0.V16B(), v0.V16B()); // self test
+ __ Bic(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
+ __ Bic(v24.V8B(), v0.V8B(), v0.V8B()); // self test
+ __ Bic(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16);
+ ASSERT_EQUAL_128(0xff00005500aa5500, 0x0000aa0000005500, q17);
+ ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
+ ASSERT_EQUAL_128(0, 0x0000aa0000005500, q25);
+ TEARDOWN();
+}
+
+TEST(neon_3same_orr) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
+ __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
+
+ __ Orr(v16.V16B(), v0.V16B(), v0.V16B()); // self test
+ __ Orr(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
+ __ Orr(v24.V8B(), v0.V8B(), v0.V8B()); // self test
+ __ Orr(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
+ ASSERT_EQUAL_128(0xffaaffffffffffaa, 0xff55ff5555ff55ff, q17);
+ ASSERT_EQUAL_128(0, 0xff00aa5500ff55aa, q24);
+ ASSERT_EQUAL_128(0, 0xff55ff5555ff55ff, q25);
+ TEARDOWN();
+}
+
+TEST(neon_3same_mov) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
+
+ __ Mov(v16.V16B(), v0.V16B());
+ __ Mov(v17.V8H(), v0.V8H());
+ __ Mov(v18.V4S(), v0.V4S());
+ __ Mov(v19.V2D(), v0.V2D());
+
+ __ Mov(v24.V8B(), v0.V8B());
+ __ Mov(v25.V4H(), v0.V4H());
+ __ Mov(v26.V2S(), v0.V2S());
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q16);
+ ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q17);
+ ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q18);
+ ASSERT_EQUAL_128(0xff00aa5500ff55aa, 0xff00aa5500ff55aa, q19);
+
+ ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q24);
+ ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q25);
+ ASSERT_EQUAL_128(0x0, 0xff00aa5500ff55aa, q26);
+
+ TEARDOWN();
+}
+
+TEST(neon_3same_orn) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
+ __ Movi(v1.V2D(), 0x00aa55aaff55ff00, 0xaa55ff00555500ff);
+
+ __ Orn(v16.V16B(), v0.V16B(), v0.V16B()); // self test
+ __ Orn(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
+ __ Orn(v24.V8B(), v0.V8B(), v0.V8B()); // self test
+ __ Orn(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
+ ASSERT_EQUAL_128(0xff55aa5500ff55ff, 0xffaaaaffaaffffaa, q17);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q24);
+ ASSERT_EQUAL_128(0, 0xffaaaaffaaffffaa, q25);
+ TEARDOWN();
+}
+
+TEST(neon_3same_eor) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xff00aa5500ff55aa, 0xff00aa5500ff55aa);
+ __ Movi(v1.V2D(), 0x00ffaa00aa55aaff, 0xffff005500ff00ff);
+
+ __ Eor(v16.V16B(), v0.V16B(), v0.V16B()); // self test
+ __ Eor(v17.V16B(), v0.V16B(), v1.V16B()); // all combinations
+ __ Eor(v24.V8B(), v0.V8B(), v0.V8B()); // self test
+ __ Eor(v25.V8B(), v0.V8B(), v1.V8B()); // all combinations
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q16);
+ ASSERT_EQUAL_128(0xffff0055aaaaff55, 0x00ffaa0000005555, q17);
+ ASSERT_EQUAL_128(0, 0x0000000000000000, q24);
+ ASSERT_EQUAL_128(0, 0x00ffaa0000005555, q25);
+ TEARDOWN();
+}
+
+TEST(neon_3same_bif) {
+ SETUP();
+
+ START();
+
+ __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
+ __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
+ __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
+
+ __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
+ __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
+ __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
+
+ __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
+ __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
+ __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
+
+ __ Bif(v16.V16B(), v0.V16B(), v1.V16B());
+ __ Bif(v17.V16B(), v2.V16B(), v3.V16B());
+ __ Bif(v18.V8B(), v4.V8B(), v5.V8B());
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0xffffff00ff0055ff, 0xffaa0055aa00aaaa, q16);
+ ASSERT_EQUAL_128(0x5555ffffffcccc00, 0xff333300fff0f000, q17);
+ ASSERT_EQUAL_128(0, 0xf0f0f0f0f00f0ff0, q18);
+ TEARDOWN();
+}
+
+TEST(neon_3same_bit) {
+ SETUP();
+
+ START();
+
+ __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
+ __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
+ __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
+
+ __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
+ __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
+ __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
+
+ __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
+ __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
+ __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
+
+ __ Bit(v16.V16B(), v0.V16B(), v1.V16B());
+ __ Bit(v17.V16B(), v2.V16B(), v3.V16B());
+ __ Bit(v18.V8B(), v4.V8B(), v5.V8B());
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0xff000000ff00ff55, 0xaaff550000aaaaaa, q16);
+ ASSERT_EQUAL_128(0x55550000cc00ffcc, 0x3300ff33f000fff0, q17);
+ ASSERT_EQUAL_128(0, 0xf0f0f0f00ff0f00f, q18);
+ TEARDOWN();
+}
+
+TEST(neon_3same_bsl) {
+ SETUP();
+
+ START();
+
+ __ Movi(v16.V2D(), 0xffff0000ff00ffff, 0xffff00000000aaaa);
+ __ Movi(v0.V2D(), 0xff00ff00ff005555, 0xaaaa5555aaaaaaaa);
+ __ Movi(v1.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
+
+ __ Movi(v17.V2D(), 0x5555aa55cccccccc, 0x33333333f0f0f0f0);
+ __ Movi(v2.V2D(), 0x555555aaff00ff00, 0xff00ff00ff00ff00);
+ __ Movi(v3.V2D(), 0xaa55aa5500ffff00, 0x00ffff0000ffff00);
+
+ __ Movi(v18.V2D(), 0, 0xf0f0f0f00f0f0f0f);
+ __ Movi(v4.V2D(), 0, 0xf0f0f0f0f0f0f0f0);
+ __ Movi(v5.V2D(), 0, 0x00ffff0000ffff00);
+
+ __ Bsl(v16.V16B(), v0.V16B(), v1.V16B());
+ __ Bsl(v17.V16B(), v2.V16B(), v3.V16B());
+ __ Bsl(v18.V8B(), v4.V8B(), v5.V8B());
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0xff0000ffff005555, 0xaaaa55aa55aaffaa, q16);
+ ASSERT_EQUAL_128(0xff550000cc33ff00, 0x33ccff00f00fff00, q17);
+ ASSERT_EQUAL_128(0, 0xf0fffff000f0f000, q18);
+ TEARDOWN();
+}
+
+
+TEST(neon_3same_smax) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
+ __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
+
+ __ Smax(v16.V8B(), v0.V8B(), v1.V8B());
+ __ Smax(v18.V4H(), v0.V4H(), v1.V4H());
+ __ Smax(v20.V2S(), v0.V2S(), v1.V2S());
+
+ __ Smax(v17.V16B(), v0.V16B(), v1.V16B());
+ __ Smax(v19.V8H(), v0.V8H(), v1.V8H());
+ __ Smax(v21.V4S(), v0.V4S(), v1.V4S());
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16);
+ ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18);
+ ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
+ ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17);
+ ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19);
+ ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21);
+ TEARDOWN();
+}
+
+
+TEST(neon_3same_smaxp) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
+ __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
+
+ __ Smaxp(v16.V8B(), v0.V8B(), v1.V8B());
+ __ Smaxp(v18.V4H(), v0.V4H(), v1.V4H());
+ __ Smaxp(v20.V2S(), v0.V2S(), v1.V2S());
+
+ __ Smaxp(v17.V16B(), v0.V16B(), v1.V16B());
+ __ Smaxp(v19.V8H(), v0.V8H(), v1.V8H());
+ __ Smaxp(v21.V4S(), v0.V4S(), v1.V4S());
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0, 0x0000ff55ffff0055, q16);
+ ASSERT_EQUAL_128(0x0, 0x000055ffffff0000, q18);
+ ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
+ ASSERT_EQUAL_128(0x5555aaaa0000ff55, 0xaaaa5555ffff0055, q17);
+ ASSERT_EQUAL_128(0x55aaaaaa000055ff, 0xaaaa5555ffff0000, q19);
+ ASSERT_EQUAL_128(0x55aa555500000000, 0x555555550000aa55, q21);
+ TEARDOWN();
+}
+
+
+TEST(neon_addp_scalar) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
+ __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
+ __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
+
+ __ Addp(d16, v0.V2D());
+ __ Addp(d17, v1.V2D());
+ __ Addp(d18, v2.V2D());
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0, 0x00224466ef66fa80, q16);
+ ASSERT_EQUAL_128(0x0, 0x55aa5556aa5500a9, q17);
+ ASSERT_EQUAL_128(0x0, 0xaaaaaaa96655ff55, q18);
+ TEARDOWN();
+}
+
+TEST(neon_acrosslanes_addv) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
+ __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
+ __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
+
+ __ Addv(b16, v0.V8B());
+ __ Addv(b17, v0.V16B());
+ __ Addv(h18, v1.V4H());
+ __ Addv(h19, v1.V8H());
+ __ Addv(s20, v2.V4S());
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0, 0xc7, q16);
+ ASSERT_EQUAL_128(0x0, 0x99, q17);
+ ASSERT_EQUAL_128(0x0, 0x55a9, q18);
+ ASSERT_EQUAL_128(0x0, 0x55fc, q19);
+ ASSERT_EQUAL_128(0x0, 0x1100a9fe, q20);
+ TEARDOWN();
+}
+
+
+TEST(neon_acrosslanes_saddlv) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
+ __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
+ __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
+
+ __ Saddlv(h16, v0.V8B());
+ __ Saddlv(h17, v0.V16B());
+ __ Saddlv(s18, v1.V4H());
+ __ Saddlv(s19, v1.V8H());
+ __ Saddlv(d20, v2.V4S());
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0, 0xffc7, q16);
+ ASSERT_EQUAL_128(0x0, 0xff99, q17);
+ ASSERT_EQUAL_128(0x0, 0x000055a9, q18);
+ ASSERT_EQUAL_128(0x0, 0x000055fc, q19);
+ ASSERT_EQUAL_128(0x0, 0x0000001100a9fe, q20);
+ TEARDOWN();
+}
+
+
+TEST(neon_acrosslanes_uaddlv) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
+ __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
+ __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
+
+ __ Uaddlv(h16, v0.V8B());
+ __ Uaddlv(h17, v0.V16B());
+ __ Uaddlv(s18, v1.V4H());
+ __ Uaddlv(s19, v1.V8H());
+ __ Uaddlv(d20, v2.V4S());
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0, 0x02c7, q16);
+ ASSERT_EQUAL_128(0x0, 0x0599, q17);
+ ASSERT_EQUAL_128(0x0, 0x000155a9, q18);
+ ASSERT_EQUAL_128(0x0, 0x000355fc, q19);
+ ASSERT_EQUAL_128(0x0, 0x000000021100a9fe, q20);
+ TEARDOWN();
+}
+
+
+TEST(neon_acrosslanes_smaxv) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
+ __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
+ __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
+
+ __ Smaxv(b16, v0.V8B());
+ __ Smaxv(b17, v0.V16B());
+ __ Smaxv(h18, v1.V4H());
+ __ Smaxv(h19, v1.V8H());
+ __ Smaxv(s20, v2.V4S());
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0, 0x33, q16);
+ ASSERT_EQUAL_128(0x0, 0x44, q17);
+ ASSERT_EQUAL_128(0x0, 0x55ff, q18);
+ ASSERT_EQUAL_128(0x0, 0x55ff, q19);
+ ASSERT_EQUAL_128(0x0, 0x66555555, q20);
+ TEARDOWN();
+}
+
+
+TEST(neon_acrosslanes_sminv) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
+ __ Movi(v1.V2D(), 0xfffa5555aaaaaaaa, 0x00000000ffaa55ff);
+ __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
+
+ __ Sminv(b16, v0.V8B());
+ __ Sminv(b17, v0.V16B());
+ __ Sminv(h18, v1.V4H());
+ __ Sminv(h19, v1.V8H());
+ __ Sminv(s20, v2.V4S());
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0, 0xaa, q16);
+ ASSERT_EQUAL_128(0x0, 0x80, q17);
+ ASSERT_EQUAL_128(0x0, 0xffaa, q18);
+ ASSERT_EQUAL_128(0x0, 0xaaaa, q19);
+ ASSERT_EQUAL_128(0x0, 0xaaaaaaaa, q20);
+ TEARDOWN();
+}
+
+TEST(neon_acrosslanes_umaxv) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0011223344aafe80, 0x00112233aabbfc00);
+ __ Movi(v1.V2D(), 0x55aa5555aaaaffab, 0x00000000ffaa55ff);
+ __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
+
+ __ Umaxv(b16, v0.V8B());
+ __ Umaxv(b17, v0.V16B());
+ __ Umaxv(h18, v1.V4H());
+ __ Umaxv(h19, v1.V8H());
+ __ Umaxv(s20, v2.V4S());
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0, 0xfc, q16);
+ ASSERT_EQUAL_128(0x0, 0xfe, q17);
+ ASSERT_EQUAL_128(0x0, 0xffaa, q18);
+ ASSERT_EQUAL_128(0x0, 0xffab, q19);
+ ASSERT_EQUAL_128(0x0, 0xffffffff, q20);
+ TEARDOWN();
+}
+
+
+TEST(neon_acrosslanes_uminv) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0011223344aafe80, 0x02112233aabbfc01);
+ __ Movi(v1.V2D(), 0xfffa5555aaaa0000, 0x00010003ffaa55ff);
+ __ Movi(v2.V2D(), 0xaaaaaaaa66555555, 0xffffffff0000aa00);
+
+ __ Uminv(b16, v0.V8B());
+ __ Uminv(b17, v0.V16B());
+ __ Uminv(h18, v1.V4H());
+ __ Uminv(h19, v1.V8H());
+ __ Uminv(s20, v2.V4S());
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0, 0x01, q16);
+ ASSERT_EQUAL_128(0x0, 0x00, q17);
+ ASSERT_EQUAL_128(0x0, 0x0001, q18);
+ ASSERT_EQUAL_128(0x0, 0x0000, q19);
+ ASSERT_EQUAL_128(0x0, 0x0000aa00, q20);
+ TEARDOWN();
+}
+
+
+TEST(neon_3same_smin) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
+ __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
+
+ __ Smin(v16.V8B(), v0.V8B(), v1.V8B());
+ __ Smin(v18.V4H(), v0.V4H(), v1.V4H());
+ __ Smin(v20.V2S(), v0.V2S(), v1.V2S());
+
+ __ Smin(v17.V16B(), v0.V16B(), v1.V16B());
+ __ Smin(v19.V8H(), v0.V8H(), v1.V8H());
+ __ Smin(v21.V4S(), v0.V4S(), v1.V4S());
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16);
+ ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18);
+ ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20);
+ ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17);
+ ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19);
+ ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21);
+ TEARDOWN();
+}
+
+
+TEST(neon_3same_umax) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
+ __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
+
+ __ Umax(v16.V8B(), v0.V8B(), v1.V8B());
+ __ Umax(v18.V4H(), v0.V4H(), v1.V4H());
+ __ Umax(v20.V2S(), v0.V2S(), v1.V2S());
+
+ __ Umax(v17.V16B(), v0.V16B(), v1.V16B());
+ __ Umax(v19.V8H(), v0.V8H(), v1.V8H());
+ __ Umax(v21.V4S(), v0.V4S(), v1.V4S());
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0, 0xffffffffffaaaaff, q16);
+ ASSERT_EQUAL_128(0x0, 0xffffffffffaaaa55, q18);
+ ASSERT_EQUAL_128(0x0, 0xffffffffffaa55ff, q20);
+ ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaaff, q17);
+ ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaaaa55, q19);
+ ASSERT_EQUAL_128(0xaaaaaaaaaaaaaaaa, 0xffffffffffaa55ff, q21);
+ TEARDOWN();
+}
+
+
+TEST(neon_3same_umin) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xaaaaaaaa55555555, 0xffffffff0000aa55);
+ __ Movi(v1.V2D(), 0x55aa5555aaaaaaaa, 0x00000000ffaa55ff);
+
+ __ Umin(v16.V8B(), v0.V8B(), v1.V8B());
+ __ Umin(v18.V4H(), v0.V4H(), v1.V4H());
+ __ Umin(v20.V2S(), v0.V2S(), v1.V2S());
+
+ __ Umin(v17.V16B(), v0.V16B(), v1.V16B());
+ __ Umin(v19.V8H(), v0.V8H(), v1.V8H());
+ __ Umin(v21.V4S(), v0.V4S(), v1.V4S());
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0, 0x0000000000005555, q16);
+ ASSERT_EQUAL_128(0x0, 0x00000000000055ff, q18);
+ ASSERT_EQUAL_128(0x0, 0x000000000000aa55, q20);
+ ASSERT_EQUAL_128(0x55aa555555555555, 0x0000000000005555, q17);
+ ASSERT_EQUAL_128(0x55aa555555555555, 0x00000000000055ff, q19);
+ ASSERT_EQUAL_128(0x55aa555555555555, 0x000000000000aa55, q21);
+ TEARDOWN();
+}
+
+
+TEST(neon_2regmisc_mvn) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
+
+ __ Mvn(v16.V16B(), v0.V16B());
+ __ Mvn(v17.V8H(), v0.V8H());
+ __ Mvn(v18.V4S(), v0.V4S());
+ __ Mvn(v19.V2D(), v0.V2D());
+
+ __ Mvn(v24.V8B(), v0.V8B());
+ __ Mvn(v25.V4H(), v0.V4H());
+ __ Mvn(v26.V2S(), v0.V2S());
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16);
+ ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q17);
+ ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q18);
+ ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q19);
+
+ ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q24);
+ ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q25);
+ ASSERT_EQUAL_128(0x0, 0xaa55aa55aa55aa55, q26);
+ TEARDOWN();
+}
+
+
+TEST(neon_2regmisc_not) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x00ff00ffff0055aa, 0x55aa55aa55aa55aa);
+ __ Movi(v1.V2D(), 0, 0x00ffff0000ffff00);
+
+ __ Not(v16.V16B(), v0.V16B());
+ __ Not(v17.V8B(), v1.V8B());
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0xff00ff0000ffaa55, 0xaa55aa55aa55aa55, q16);
+ ASSERT_EQUAL_128(0x0, 0xff0000ffff0000ff, q17);
+ TEARDOWN();
+}
+
+TEST(neon_2regmisc_cls_clz_cnt) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
+ __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
+
+ __ Cls(v16.V8B() , v1.V8B());
+ __ Cls(v17.V16B(), v1.V16B());
+ __ Cls(v18.V4H() , v1.V4H());
+ __ Cls(v19.V8H() , v1.V8H());
+ __ Cls(v20.V2S() , v1.V2S());
+ __ Cls(v21.V4S() , v1.V4S());
+
+ __ Clz(v22.V8B() , v0.V8B());
+ __ Clz(v23.V16B(), v0.V16B());
+ __ Clz(v24.V4H() , v0.V4H());
+ __ Clz(v25.V8H() , v0.V8H());
+ __ Clz(v26.V2S() , v0.V2S());
+ __ Clz(v27.V4S() , v0.V4S());
+
+ __ Cnt(v28.V8B() , v0.V8B());
+ __ Cnt(v29.V16B(), v1.V16B());
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0601000000000102, q16);
+ ASSERT_EQUAL_128(0x0601000000000102, 0x0601000000000102, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0006000000000001, q18);
+ ASSERT_EQUAL_128(0x0006000000000001, 0x0006000000000001, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000600000000, q20);
+ ASSERT_EQUAL_128(0x0000000600000000, 0x0000000600000000, q21);
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0404040404040404, q22);
+ ASSERT_EQUAL_128(0x0807060605050505, 0x0404040404040404, q23);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0004000400040004, q24);
+ ASSERT_EQUAL_128(0x000f000600050005, 0x0004000400040004, q25);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000400000004, q26);
+ ASSERT_EQUAL_128(0x0000000f00000005, 0x0000000400000004, q27);
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0102020302030304, q28);
+ ASSERT_EQUAL_128(0x0705050305030301, 0x0103030503050507, q29);
+
+ TEARDOWN();
+}
+
+TEST(neon_2regmisc_rev) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
+ __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
+
+ __ Rev16(v16.V8B() , v0.V8B());
+ __ Rev16(v17.V16B(), v0.V16B());
+
+ __ Rev32(v18.V8B() , v0.V8B());
+ __ Rev32(v19.V16B(), v0.V16B());
+ __ Rev32(v20.V4H() , v0.V4H());
+ __ Rev32(v21.V8H() , v0.V8H());
+
+ __ Rev64(v22.V8B() , v0.V8B());
+ __ Rev64(v23.V16B(), v0.V16B());
+ __ Rev64(v24.V4H() , v0.V4H());
+ __ Rev64(v25.V8H() , v0.V8H());
+ __ Rev64(v26.V2S() , v0.V2S());
+ __ Rev64(v27.V4S() , v0.V4S());
+
+ __ Rbit(v28.V8B() , v1.V8B());
+ __ Rbit(v29.V16B(), v1.V16B());
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x09080b0a0d0c0f0e, q16);
+ ASSERT_EQUAL_128(0x0100030205040706, 0x09080b0a0d0c0f0e, q17);
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0b0a09080f0e0d0c, q18);
+ ASSERT_EQUAL_128(0x0302010007060504, 0x0b0a09080f0e0d0c, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0a0b08090e0f0c0d, q20);
+ ASSERT_EQUAL_128(0x0203000106070405, 0x0a0b08090e0f0c0d, q21);
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0f0e0d0c0b0a0908, q22);
+ ASSERT_EQUAL_128(0x0706050403020100, 0x0f0e0d0c0b0a0908, q23);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0e0f0c0d0a0b0809, q24);
+ ASSERT_EQUAL_128(0x0607040502030001, 0x0e0f0c0d0a0b0809, q25);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0c0d0e0f08090a0b, q26);
+ ASSERT_EQUAL_128(0x0405060700010203, 0x0c0d0e0f08090a0b, q27);
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x80c4a2e691d5b3f7, q28);
+ ASSERT_EQUAL_128(0x7f3b5d196e2a4c08, 0x80c4a2e691d5b3f7, q29);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_sli) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
+ __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
+
+ __ Mov(v16.V2D(), v0.V2D());
+ __ Mov(v17.V2D(), v0.V2D());
+ __ Mov(v18.V2D(), v0.V2D());
+ __ Mov(v19.V2D(), v0.V2D());
+ __ Mov(v20.V2D(), v0.V2D());
+ __ Mov(v21.V2D(), v0.V2D());
+ __ Mov(v22.V2D(), v0.V2D());
+ __ Mov(v23.V2D(), v0.V2D());
+
+ __ Sli(v16.V8B(), v1.V8B(), 4);
+ __ Sli(v17.V16B(), v1.V16B(), 7);
+ __ Sli(v18.V4H(), v1.V4H(), 8);
+ __ Sli(v19.V8H(), v1.V8H(), 15);
+ __ Sli(v20.V2S(), v1.V2S(), 0);
+ __ Sli(v21.V4S(), v1.V4S(), 31);
+ __ Sli(v22.V2D(), v1.V2D(), 48);
+
+ __ Sli(d23, d1, 48);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x18395a7b9cbddeff, q16);
+ ASSERT_EQUAL_128(0x0001020304050607, 0x88898a8b8c8d8e8f, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x2309670bab0def0f, q18);
+ ASSERT_EQUAL_128(0x0001020304050607, 0x88098a0b8c0d8e0f, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0123456789abcdef, q20);
+ ASSERT_EQUAL_128(0x0001020304050607, 0x88090a0b8c0d0e0f, q21);
+ ASSERT_EQUAL_128(0x3210020304050607, 0xcdef0a0b0c0d0e0f, q22);
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0xcdef0a0b0c0d0e0f, q23);
+
+
+ TEARDOWN();
+}
+
+
+TEST(neon_sri) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
+ __ Movi(v1.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
+
+ __ Mov(v16.V2D(), v0.V2D());
+ __ Mov(v17.V2D(), v0.V2D());
+ __ Mov(v18.V2D(), v0.V2D());
+ __ Mov(v19.V2D(), v0.V2D());
+ __ Mov(v20.V2D(), v0.V2D());
+ __ Mov(v21.V2D(), v0.V2D());
+ __ Mov(v22.V2D(), v0.V2D());
+ __ Mov(v23.V2D(), v0.V2D());
+
+ __ Sri(v16.V8B(), v1.V8B(), 4);
+ __ Sri(v17.V16B(), v1.V16B(), 7);
+ __ Sri(v18.V4H(), v1.V4H(), 8);
+ __ Sri(v19.V8H(), v1.V8H(), 15);
+ __ Sri(v20.V2S(), v1.V2S(), 1);
+ __ Sri(v21.V4S(), v1.V4S(), 31);
+ __ Sri(v22.V2D(), v1.V2D(), 48);
+
+ __ Sri(d23, d1, 48);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x00020406080a0c0e, q16);
+ ASSERT_EQUAL_128(0x0101030304040606, 0x08080a0a0d0d0f0f, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x08010a450c890ecd, q18);
+ ASSERT_EQUAL_128(0x0001020304040606, 0x08080a0a0c0d0e0f, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0091a2b344d5e6f7, q20);
+ ASSERT_EQUAL_128(0x0001020304050606, 0x08090a0a0c0d0e0f, q21);
+ ASSERT_EQUAL_128(0x000102030405fedc, 0x08090a0b0c0d0123, q22);
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x08090a0b0c0d0123, q23);
+
+
+ TEARDOWN();
+}
+
+
+TEST(neon_shrn) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Shrn(v16.V8B(), v0.V8H(), 8);
+ __ Shrn2(v16.V16B(), v1.V8H(), 1);
+ __ Shrn(v17.V4H(), v1.V4S(), 16);
+ __ Shrn2(v17.V8H(), v2.V4S(), 1);
+ __ Shrn(v18.V2S(), v3.V2D(), 32);
+ __ Shrn2(v18.V4S(), v3.V2D(), 1);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0000ff00ff0000ff, 0x7f00817f80ff0180, q16);
+ ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x8000ffffffff0001, q17);
+ ASSERT_EQUAL_128(0x00000000ffffffff, 0x800000007fffffff, q18);
+ TEARDOWN();
+}
+
+
+TEST(neon_rshrn) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Rshrn(v16.V8B(), v0.V8H(), 8);
+ __ Rshrn2(v16.V16B(), v1.V8H(), 1);
+ __ Rshrn(v17.V4H(), v1.V4S(), 16);
+ __ Rshrn2(v17.V8H(), v2.V4S(), 1);
+ __ Rshrn(v18.V2S(), v3.V2D(), 32);
+ __ Rshrn2(v18.V4S(), v3.V2D(), 1);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x0001000000000100, 0x7f01827f81ff0181, q16);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x8001ffffffff0001, q17);
+ ASSERT_EQUAL_128(0x0000000100000000, 0x8000000080000000, q18);
+ TEARDOWN();
+}
+
+
+TEST(neon_uqshrn) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Uqshrn(v16.V8B(), v0.V8H(), 8);
+ __ Uqshrn2(v16.V16B(), v1.V8H(), 1);
+ __ Uqshrn(v17.V4H(), v1.V4S(), 16);
+ __ Uqshrn2(v17.V8H(), v2.V4S(), 1);
+ __ Uqshrn(v18.V2S(), v3.V2D(), 32);
+ __ Uqshrn2(v18.V4S(), v3.V2D(), 1);
+
+ __ Uqshrn(b19, h0, 8);
+ __ Uqshrn(h20, s1, 16);
+ __ Uqshrn(s21, d3, 32);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0xffffff00ff0000ff, 0x7f00817f80ff0180, q16);
+ ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8000ffffffff0001, q17);
+ ASSERT_EQUAL_128(0xffffffffffffffff, 0x800000007fffffff, q18);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
+ TEARDOWN();
+}
+
+
+TEST(neon_uqrshrn) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Uqrshrn(v16.V8B(), v0.V8H(), 8);
+ __ Uqrshrn2(v16.V16B(), v1.V8H(), 1);
+ __ Uqrshrn(v17.V4H(), v1.V4S(), 16);
+ __ Uqrshrn2(v17.V8H(), v2.V4S(), 1);
+ __ Uqrshrn(v18.V2S(), v3.V2D(), 32);
+ __ Uqrshrn2(v18.V4S(), v3.V2D(), 1);
+
+ __ Uqrshrn(b19, h0, 8);
+ __ Uqrshrn(h20, s1, 16);
+ __ Uqrshrn(s21, d3, 32);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0xffffff00ff0001ff, 0x7f01827f81ff0181, q16);
+ ASSERT_EQUAL_128(0xffffffff0000ffff, 0x8001ffffffff0001, q17);
+ ASSERT_EQUAL_128(0xffffffffffffffff, 0x8000000080000000, q18);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
+ TEARDOWN();
+}
+
+
+TEST(neon_sqshrn) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Sqshrn(v16.V8B(), v0.V8H(), 8);
+ __ Sqshrn2(v16.V16B(), v1.V8H(), 1);
+ __ Sqshrn(v17.V4H(), v1.V4S(), 16);
+ __ Sqshrn2(v17.V8H(), v2.V4S(), 1);
+ __ Sqshrn(v18.V2S(), v3.V2D(), 32);
+ __ Sqshrn2(v18.V4S(), v3.V2D(), 1);
+
+ __ Sqshrn(b19, h0, 8);
+ __ Sqshrn(h20, s1, 16);
+ __ Sqshrn(s21, d3, 32);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x8080ff00ff00007f, 0x7f00817f80ff0180, q16);
+ ASSERT_EQUAL_128(0x8000ffff00007fff, 0x8000ffffffff0001, q17);
+ ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000080, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
+ TEARDOWN();
+}
+
+
+TEST(neon_sqrshrn) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Sqrshrn(v16.V8B(), v0.V8H(), 8);
+ __ Sqrshrn2(v16.V16B(), v1.V8H(), 1);
+ __ Sqrshrn(v17.V4H(), v1.V4S(), 16);
+ __ Sqrshrn2(v17.V8H(), v2.V4S(), 1);
+ __ Sqrshrn(v18.V2S(), v3.V2D(), 32);
+ __ Sqrshrn2(v18.V4S(), v3.V2D(), 1);
+
+ __ Sqrshrn(b19, h0, 8);
+ __ Sqrshrn(h20, s1, 16);
+ __ Sqrshrn(s21, d3, 32);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x808000000000017f, 0x7f01827f81ff0181, q16);
+ ASSERT_EQUAL_128(0x8000000000007fff, 0x8001ffffffff0001, q17);
+ ASSERT_EQUAL_128(0x800000007fffffff, 0x800000007fffffff, q18);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000081, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
+ TEARDOWN();
+}
+
+
+TEST(neon_sqshrun) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Sqshrun(v16.V8B(), v0.V8H(), 8);
+ __ Sqshrun2(v16.V16B(), v1.V8H(), 1);
+ __ Sqshrun(v17.V4H(), v1.V4S(), 16);
+ __ Sqshrun2(v17.V8H(), v2.V4S(), 1);
+ __ Sqshrun(v18.V2S(), v3.V2D(), 32);
+ __ Sqshrun2(v18.V4S(), v3.V2D(), 1);
+
+ __ Sqshrun(b19, h0, 8);
+ __ Sqshrun(h20, s1, 16);
+ __ Sqshrun(s21, d3, 32);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x00000000000000ff, 0x7f00007f00000100, q16);
+ ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17);
+ ASSERT_EQUAL_128(0x00000000ffffffff, 0x000000007fffffff, q18);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q21);
+ TEARDOWN();
+}
+
+
+TEST(neon_sqrshrun) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Sqrshrun(v16.V8B(), v0.V8H(), 8);
+ __ Sqrshrun2(v16.V16B(), v1.V8H(), 1);
+ __ Sqrshrun(v17.V4H(), v1.V4S(), 16);
+ __ Sqrshrun2(v17.V8H(), v2.V4S(), 1);
+ __ Sqrshrun(v18.V2S(), v3.V2D(), 32);
+ __ Sqrshrun2(v18.V4S(), v3.V2D(), 1);
+
+ __ Sqrshrun(b19, h0, 8);
+ __ Sqrshrun(h20, s1, 16);
+ __ Sqrshrun(s21, d3, 32);
+
+ END();
+
+ RUN();
+ ASSERT_EQUAL_128(0x00000000000001ff, 0x7f01007f00000100, q16);
+ ASSERT_EQUAL_128(0x000000000000ffff, 0x0000000000000001, q17);
+ ASSERT_EQUAL_128(0x00000000ffffffff, 0x0000000080000000, q18);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000001, q20);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080000000, q21);
+ TEARDOWN();
+}
+
+TEST(neon_modimm_bic) {
+ SETUP();
+
+ START();
+
+ __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+
+ __ Bic(v16.V4H(), 0x00, 0);
+ __ Bic(v17.V4H(), 0xff, 8);
+ __ Bic(v18.V8H(), 0x00, 0);
+ __ Bic(v19.V8H(), 0xff, 8);
+
+ __ Bic(v20.V2S(), 0x00, 0);
+ __ Bic(v21.V2S(), 0xff, 8);
+ __ Bic(v22.V2S(), 0x00, 16);
+ __ Bic(v23.V2S(), 0xff, 24);
+
+ __ Bic(v24.V4S(), 0xff, 0);
+ __ Bic(v25.V4S(), 0x00, 8);
+ __ Bic(v26.V4S(), 0xff, 16);
+ __ Bic(v27.V4S(), 0x00, 24);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16);
+ ASSERT_EQUAL_128(0x0, 0x005500ff000000aa, q17);
+ ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18);
+ ASSERT_EQUAL_128(0x00aa0055000000aa, 0x005500ff000000aa, q19);
+
+ ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20);
+ ASSERT_EQUAL_128(0x0, 0x555500ff000000aa, q21);
+ ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22);
+ ASSERT_EQUAL_128(0x0, 0x0055ffff0000aaaa, q23);
+
+ ASSERT_EQUAL_128(0x00aaff00ff005500, 0x5555ff000000aa00, q24);
+ ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25);
+ ASSERT_EQUAL_128(0x0000ff55ff0055aa, 0x5500ffff0000aaaa, q26);
+ ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_modimm_movi_16bit_any) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V4H(), 0xabab);
+ __ Movi(v1.V4H(), 0xab00);
+ __ Movi(v2.V4H(), 0xabff);
+ __ Movi(v3.V8H(), 0x00ab);
+ __ Movi(v4.V8H(), 0xffab);
+ __ Movi(v5.V8H(), 0xabcd);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0, 0xabababababababab, q0);
+ ASSERT_EQUAL_128(0x0, 0xab00ab00ab00ab00, q1);
+ ASSERT_EQUAL_128(0x0, 0xabffabffabffabff, q2);
+ ASSERT_EQUAL_128(0x00ab00ab00ab00ab, 0x00ab00ab00ab00ab, q3);
+ ASSERT_EQUAL_128(0xffabffabffabffab, 0xffabffabffabffab, q4);
+ ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q5);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_modimm_movi_32bit_any) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2S(), 0x000000ab);
+ __ Movi(v1.V2S(), 0x0000ab00);
+ __ Movi(v2.V4S(), 0x00ab0000);
+ __ Movi(v3.V4S(), 0xab000000);
+
+ __ Movi(v4.V2S(), 0xffffffab);
+ __ Movi(v5.V2S(), 0xffffabff);
+ __ Movi(v6.V4S(), 0xffabffff);
+ __ Movi(v7.V4S(), 0xabffffff);
+
+ __ Movi(v16.V2S(), 0x0000abff);
+ __ Movi(v17.V2S(), 0x00abffff);
+ __ Movi(v18.V4S(), 0xffab0000);
+ __ Movi(v19.V4S(), 0xffffab00);
+
+ __ Movi(v20.V4S(), 0xabababab);
+ __ Movi(v21.V4S(), 0xabcdabcd);
+ __ Movi(v22.V4S(), 0xabcdef01);
+ __ Movi(v23.V4S(), 0x00ffff00);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0, 0x000000ab000000ab, q0);
+ ASSERT_EQUAL_128(0x0, 0x0000ab000000ab00, q1);
+ ASSERT_EQUAL_128(0x00ab000000ab0000, 0x00ab000000ab0000, q2);
+ ASSERT_EQUAL_128(0xab000000ab000000, 0xab000000ab000000, q3);
+
+ ASSERT_EQUAL_128(0x0, 0xffffffabffffffab, q4);
+ ASSERT_EQUAL_128(0x0, 0xffffabffffffabff, q5);
+ ASSERT_EQUAL_128(0xffabffffffabffff, 0xffabffffffabffff, q6);
+ ASSERT_EQUAL_128(0xabffffffabffffff, 0xabffffffabffffff, q7);
+
+ ASSERT_EQUAL_128(0x0, 0x0000abff0000abff, q16);
+ ASSERT_EQUAL_128(0x0, 0x00abffff00abffff, q17);
+ ASSERT_EQUAL_128(0xffab0000ffab0000, 0xffab0000ffab0000, q18);
+ ASSERT_EQUAL_128(0xffffab00ffffab00, 0xffffab00ffffab00, q19);
+
+ ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q20);
+ ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q21);
+ ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q22);
+ ASSERT_EQUAL_128(0x00ffff0000ffff00, 0x00ffff0000ffff00, q23);
+ TEARDOWN();
+}
+
+
+TEST(neon_modimm_movi_64bit_any) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V1D(), 0x00ffff0000ffffff);
+ __ Movi(v1.V2D(), 0xabababababababab);
+ __ Movi(v2.V2D(), 0xabcdabcdabcdabcd);
+ __ Movi(v3.V2D(), 0xabcdef01abcdef01);
+ __ Movi(v4.V1D(), 0xabcdef0123456789);
+ __ Movi(v5.V2D(), 0xabcdef0123456789);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0, 0x00ffff0000ffffff, q0);
+ ASSERT_EQUAL_128(0xabababababababab, 0xabababababababab, q1);
+ ASSERT_EQUAL_128(0xabcdabcdabcdabcd, 0xabcdabcdabcdabcd, q2);
+ ASSERT_EQUAL_128(0xabcdef01abcdef01, 0xabcdef01abcdef01, q3);
+ ASSERT_EQUAL_128(0x0, 0xabcdef0123456789, q4);
+ ASSERT_EQUAL_128(0xabcdef0123456789, 0xabcdef0123456789, q5);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_modimm_movi) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V8B(), 0xaa);
+ __ Movi(v1.V16B(), 0x55);
+
+ __ Movi(d2, 0x00ffff0000ffffff);
+ __ Movi(v3.V2D(), 0x00ffff0000ffffff);
+
+ __ Movi(v16.V4H(), 0x00, LSL, 0);
+ __ Movi(v17.V4H(), 0xff, LSL, 8);
+ __ Movi(v18.V8H(), 0x00, LSL, 0);
+ __ Movi(v19.V8H(), 0xff, LSL, 8);
+
+ __ Movi(v20.V2S(), 0x00, LSL, 0);
+ __ Movi(v21.V2S(), 0xff, LSL, 8);
+ __ Movi(v22.V2S(), 0x00, LSL, 16);
+ __ Movi(v23.V2S(), 0xff, LSL, 24);
+
+ __ Movi(v24.V4S(), 0xff, LSL, 0);
+ __ Movi(v25.V4S(), 0x00, LSL, 8);
+ __ Movi(v26.V4S(), 0xff, LSL, 16);
+ __ Movi(v27.V4S(), 0x00, LSL, 24);
+
+ __ Movi(v28.V2S(), 0xaa, MSL, 8);
+ __ Movi(v29.V2S(), 0x55, MSL, 16);
+ __ Movi(v30.V4S(), 0xff, MSL, 8);
+ __ Movi(v31.V4S(), 0x00, MSL, 16);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0, 0xaaaaaaaaaaaaaaaa, q0);
+ ASSERT_EQUAL_128(0x5555555555555555, 0x5555555555555555, q1);
+
+ ASSERT_EQUAL_128(0x0, 0x00ffff0000ffffff, q2);
+ ASSERT_EQUAL_128(0x00ffff0000ffffff, 0x00ffff0000ffffff, q3);
+
+ ASSERT_EQUAL_128(0x0, 0x0000000000000000, q16);
+ ASSERT_EQUAL_128(0x0, 0xff00ff00ff00ff00, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q18);
+ ASSERT_EQUAL_128(0xff00ff00ff00ff00, 0xff00ff00ff00ff00, q19);
+
+ ASSERT_EQUAL_128(0x0, 0x0000000000000000, q20);
+ ASSERT_EQUAL_128(0x0, 0x0000ff000000ff00, q21);
+ ASSERT_EQUAL_128(0x0, 0x0000000000000000, q22);
+ ASSERT_EQUAL_128(0x0, 0xff000000ff000000, q23);
+
+ ASSERT_EQUAL_128(0x000000ff000000ff, 0x000000ff000000ff, q24);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q25);
+ ASSERT_EQUAL_128(0x00ff000000ff0000, 0x00ff000000ff0000, q26);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000000000000, q27);
+
+ ASSERT_EQUAL_128(0x0, 0x0000aaff0000aaff, q28);
+ ASSERT_EQUAL_128(0x0, 0x0055ffff0055ffff, q29);
+ ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q30);
+ ASSERT_EQUAL_128(0x0000ffff0000ffff, 0x0000ffff0000ffff, q31);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_modimm_mvni) {
+ SETUP();
+
+ START();
+
+ __ Mvni(v16.V4H(), 0x00, LSL, 0);
+ __ Mvni(v17.V4H(), 0xff, LSL, 8);
+ __ Mvni(v18.V8H(), 0x00, LSL, 0);
+ __ Mvni(v19.V8H(), 0xff, LSL, 8);
+
+ __ Mvni(v20.V2S(), 0x00, LSL, 0);
+ __ Mvni(v21.V2S(), 0xff, LSL, 8);
+ __ Mvni(v22.V2S(), 0x00, LSL, 16);
+ __ Mvni(v23.V2S(), 0xff, LSL, 24);
+
+ __ Mvni(v24.V4S(), 0xff, LSL, 0);
+ __ Mvni(v25.V4S(), 0x00, LSL, 8);
+ __ Mvni(v26.V4S(), 0xff, LSL, 16);
+ __ Mvni(v27.V4S(), 0x00, LSL, 24);
+
+ __ Mvni(v28.V2S(), 0xaa, MSL, 8);
+ __ Mvni(v29.V2S(), 0x55, MSL, 16);
+ __ Mvni(v30.V4S(), 0xff, MSL, 8);
+ __ Mvni(v31.V4S(), 0x00, MSL, 16);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q16);
+ ASSERT_EQUAL_128(0x0, 0x00ff00ff00ff00ff, q17);
+ ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q18);
+ ASSERT_EQUAL_128(0x00ff00ff00ff00ff, 0x00ff00ff00ff00ff, q19);
+
+ ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q20);
+ ASSERT_EQUAL_128(0x0, 0xffff00ffffff00ff, q21);
+ ASSERT_EQUAL_128(0x0, 0xffffffffffffffff, q22);
+ ASSERT_EQUAL_128(0x0, 0x00ffffff00ffffff, q23);
+
+ ASSERT_EQUAL_128(0xffffff00ffffff00, 0xffffff00ffffff00, q24);
+ ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q25);
+ ASSERT_EQUAL_128(0xff00ffffff00ffff, 0xff00ffffff00ffff, q26);
+ ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q27);
+
+ ASSERT_EQUAL_128(0x0, 0xffff5500ffff5500, q28);
+ ASSERT_EQUAL_128(0x0, 0xffaa0000ffaa0000, q29);
+ ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q30);
+ ASSERT_EQUAL_128(0xffff0000ffff0000, 0xffff0000ffff0000, q31);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_modimm_orr) {
+ SETUP();
+
+ START();
+
+ __ Movi(v16.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v17.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v18.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v19.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v20.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v21.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v22.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v23.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v24.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v25.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v26.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+ __ Movi(v27.V2D(), 0x00aaff55ff0055aa, 0x5555ffff0000aaaa);
+
+ __ Orr(v16.V4H(), 0x00, 0);
+ __ Orr(v17.V4H(), 0xff, 8);
+ __ Orr(v18.V8H(), 0x00, 0);
+ __ Orr(v19.V8H(), 0xff, 8);
+
+ __ Orr(v20.V2S(), 0x00, 0);
+ __ Orr(v21.V2S(), 0xff, 8);
+ __ Orr(v22.V2S(), 0x00, 16);
+ __ Orr(v23.V2S(), 0xff, 24);
+
+ __ Orr(v24.V4S(), 0xff, 0);
+ __ Orr(v25.V4S(), 0x00, 8);
+ __ Orr(v26.V4S(), 0xff, 16);
+ __ Orr(v27.V4S(), 0x00, 24);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q16);
+ ASSERT_EQUAL_128(0x0, 0xff55ffffff00ffaa, q17);
+ ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q18);
+ ASSERT_EQUAL_128(0xffaaff55ff00ffaa, 0xff55ffffff00ffaa, q19);
+
+ ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q20);
+ ASSERT_EQUAL_128(0x0, 0x5555ffff0000ffaa, q21);
+ ASSERT_EQUAL_128(0x0, 0x5555ffff0000aaaa, q22);
+ ASSERT_EQUAL_128(0x0, 0xff55ffffff00aaaa, q23);
+
+ ASSERT_EQUAL_128(0x00aaffffff0055ff, 0x5555ffff0000aaff, q24);
+ ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q25);
+ ASSERT_EQUAL_128(0x00ffff55ffff55aa, 0x55ffffff00ffaaaa, q26);
+ ASSERT_EQUAL_128(0x00aaff55ff0055aa, 0x5555ffff0000aaaa, q27);
+
+ TEARDOWN();
+}
+
+
+// TODO: add arbitrary values once load literal to Q registers is supported.
+TEST(neon_modimm_fmov) {
+ SETUP();
+
+ // Immediates which can be encoded in the instructions.
+ const float kOne = 1.0f;
+ const float kPointFive = 0.5f;
+ const double kMinusThirteen = -13.0;
+ // Immediates which cannot be encoded in the instructions.
+ const float kNonImmFP32 = 255.0f;
+ const double kNonImmFP64 = 12.3456;
+
+ START();
+ __ Fmov(v11.V2S(), kOne);
+ __ Fmov(v12.V4S(), kPointFive);
+ __ Fmov(v22.V2D(), kMinusThirteen);
+ __ Fmov(v13.V2S(), kNonImmFP32);
+ __ Fmov(v14.V4S(), kNonImmFP32);
+ __ Fmov(v23.V2D(), kNonImmFP64);
+ __ Fmov(v1.V2S(), 0.0);
+ __ Fmov(v2.V4S(), 0.0);
+ __ Fmov(v3.V2D(), 0.0);
+ __ Fmov(v4.V2S(), kFP32PositiveInfinity);
+ __ Fmov(v5.V4S(), kFP32PositiveInfinity);
+ __ Fmov(v6.V2D(), kFP64PositiveInfinity);
+ END();
+
+ RUN();
+
+ const uint64_t kOne1S = float_to_rawbits(1.0);
+ const uint64_t kOne2S = (kOne1S << 32) | kOne1S;
+ const uint64_t kPointFive1S = float_to_rawbits(0.5);
+ const uint64_t kPointFive2S = (kPointFive1S << 32) | kPointFive1S;
+ const uint64_t kMinusThirteen1D = double_to_rawbits(-13.0);
+ const uint64_t kNonImmFP321S = float_to_rawbits(kNonImmFP32);
+ const uint64_t kNonImmFP322S = (kNonImmFP321S << 32) | kNonImmFP321S;
+ const uint64_t kNonImmFP641D = double_to_rawbits(kNonImmFP64);
+ const uint64_t kFP32Inf1S = float_to_rawbits(kFP32PositiveInfinity);
+ const uint64_t kFP32Inf2S = (kFP32Inf1S << 32) | kFP32Inf1S;
+ const uint64_t kFP64Inf1D = double_to_rawbits(kFP64PositiveInfinity);
+
+ ASSERT_EQUAL_128(0x0, kOne2S, q11);
+ ASSERT_EQUAL_128(kPointFive2S, kPointFive2S, q12);
+ ASSERT_EQUAL_128(kMinusThirteen1D, kMinusThirteen1D, q22);
+ ASSERT_EQUAL_128(0x0, kNonImmFP322S, q13);
+ ASSERT_EQUAL_128(kNonImmFP322S, kNonImmFP322S, q14);
+ ASSERT_EQUAL_128(kNonImmFP641D, kNonImmFP641D, q23);
+ ASSERT_EQUAL_128(0x0, 0x0, q1);
+ ASSERT_EQUAL_128(0x0, 0x0, q2);
+ ASSERT_EQUAL_128(0x0, 0x0, q3);
+ ASSERT_EQUAL_128(0x0, kFP32Inf2S, q4);
+ ASSERT_EQUAL_128(kFP32Inf2S, kFP32Inf2S, q5);
+ ASSERT_EQUAL_128(kFP64Inf1D, kFP64Inf1D, q6);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_perm) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0001020304050607, 0x08090a0b0c0d0e0f);
+ __ Movi(v1.V2D(), 0x1011121314151617, 0x18191a1b1c1d1e1f);
+
+ __ Trn1(v16.V16B(), v0.V16B(), v1.V16B());
+ __ Trn2(v17.V16B(), v0.V16B(), v1.V16B());
+ __ Zip1(v18.V16B(), v0.V16B(), v1.V16B());
+ __ Zip2(v19.V16B(), v0.V16B(), v1.V16B());
+ __ Uzp1(v20.V16B(), v0.V16B(), v1.V16B());
+ __ Uzp2(v21.V16B(), v0.V16B(), v1.V16B());
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x1101130315051707, 0x19091b0b1d0d1f0f, q16);
+ ASSERT_EQUAL_128(0x1000120214041606, 0x18081a0a1c0c1e0e, q17);
+ ASSERT_EQUAL_128(0x180819091a0a1b0b, 0x1c0c1d0d1e0e1f0f, q18);
+ ASSERT_EQUAL_128(0x1000110112021303, 0x1404150516061707, q19);
+ ASSERT_EQUAL_128(0x11131517191b1d1f, 0x01030507090b0d0f, q20);
+ ASSERT_EQUAL_128(0x10121416181a1c1e, 0x00020406080a0c0e, q21);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_copy_dup_element) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
+ __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
+ __ Movi(v2.V2D(), 0xffeddccbbaae9988, 0x0011223344556677);
+ __ Movi(v3.V2D(), 0x7766554433221100, 0x8899aabbccddeeff);
+ __ Movi(v4.V2D(), 0x7766554433221100, 0x0123456789abcdef);
+ __ Movi(v5.V2D(), 0x0011223344556677, 0x0123456789abcdef);
+
+ __ Dup(v16.V16B(), v0.B(), 0);
+ __ Dup(v17.V8H(), v1.H(), 7);
+ __ Dup(v18.V4S(), v1.S(), 3);
+ __ Dup(v19.V2D(), v0.D(), 0);
+
+ __ Dup(v20.V8B(), v0.B(), 0);
+ __ Dup(v21.V4H(), v1.H(), 7);
+ __ Dup(v22.V2S(), v1.S(), 3);
+
+ __ Dup(v23.B(), v0.B(), 0);
+ __ Dup(v24.H(), v1.H(), 7);
+ __ Dup(v25.S(), v1.S(), 3);
+ __ Dup(v26.D(), v0.D(), 0);
+
+ __ Dup(v2.V16B(), v2.B(), 0);
+ __ Dup(v3.V8H(), v3.H(), 7);
+ __ Dup(v4.V4S(), v4.S(), 0);
+ __ Dup(v5.V2D(), v5.D(), 1);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0xffffffffffffffff, 0xffffffffffffffff, q16);
+ ASSERT_EQUAL_128(0xffedffedffedffed, 0xffedffedffedffed, q17);
+ ASSERT_EQUAL_128(0xffeddccbffeddccb, 0xffeddccbffeddccb, q18);
+ ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
+
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q20);
+ ASSERT_EQUAL_128(0, 0xffedffedffedffed, q21);
+ ASSERT_EQUAL_128(0, 0xffeddccbffeddccb, q22);
+
+ ASSERT_EQUAL_128(0, 0x00000000000000ff, q23);
+ ASSERT_EQUAL_128(0, 0x000000000000ffed, q24);
+ ASSERT_EQUAL_128(0, 0x00000000ffeddccb, q25);
+ ASSERT_EQUAL_128(0, 0x8899aabbccddeeff, q26);
+
+ ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q2);
+ ASSERT_EQUAL_128(0x7766776677667766, 0x7766776677667766, q3);
+ ASSERT_EQUAL_128(0x89abcdef89abcdef, 0x89abcdef89abcdef, q4);
+ ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q5);
+ TEARDOWN();
+}
+
+
+TEST(neon_copy_dup_general) {
+ SETUP();
+
+ START();
+
+ __ Mov(x0, 0x0011223344556677);
+
+ __ Dup(v16.V16B(), w0);
+ __ Dup(v17.V8H(), w0);
+ __ Dup(v18.V4S(), w0);
+ __ Dup(v19.V2D(), x0);
+
+ __ Dup(v20.V8B(), w0);
+ __ Dup(v21.V4H(), w0);
+ __ Dup(v22.V2S(), w0);
+
+ __ Dup(v2.V16B(), wzr);
+ __ Dup(v3.V8H(), wzr);
+ __ Dup(v4.V4S(), wzr);
+ __ Dup(v5.V2D(), xzr);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x7777777777777777, 0x7777777777777777, q16);
+ ASSERT_EQUAL_128(0x6677667766776677, 0x6677667766776677, q17);
+ ASSERT_EQUAL_128(0x4455667744556677, 0x4455667744556677, q18);
+ ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19);
+
+ ASSERT_EQUAL_128(0, 0x7777777777777777, q20);
+ ASSERT_EQUAL_128(0, 0x6677667766776677, q21);
+ ASSERT_EQUAL_128(0, 0x4455667744556677, q22);
+
+ ASSERT_EQUAL_128(0, 0, q2);
+ ASSERT_EQUAL_128(0, 0, q3);
+ ASSERT_EQUAL_128(0, 0, q4);
+ ASSERT_EQUAL_128(0, 0, q5);
+ TEARDOWN();
+}
+
+
+TEST(neon_copy_ins_element) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
+ __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
+ __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
+ __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
+ __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
+ __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
+
+ __ Movi(v2.V2D(), 0, 0x0011223344556677);
+ __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
+ __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
+ __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
+
+ __ Ins(v16.V16B(), 15, v0.V16B(), 0);
+ __ Ins(v17.V8H(), 0, v1.V8H(), 7);
+ __ Ins(v18.V4S(), 3, v1.V4S(), 0);
+ __ Ins(v19.V2D(), 1, v0.V2D(), 0);
+
+ __ Ins(v2.V16B(), 2, v2.V16B(), 0);
+ __ Ins(v3.V8H(), 0, v3.V8H(), 7);
+ __ Ins(v4.V4S(), 3, v4.V4S(), 0);
+ __ Ins(v5.V2D(), 0, v5.V2D(), 1);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16);
+ ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17);
+ ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18);
+ ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
+
+ ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
+ ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3);
+ ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4);
+ ASSERT_EQUAL_128(0, 0, q5);
+ TEARDOWN();
+}
+
+
+TEST(neon_copy_mov_element) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
+ __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
+ __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
+ __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
+ __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
+ __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
+
+ __ Movi(v2.V2D(), 0, 0x0011223344556677);
+ __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
+ __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
+ __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
+
+ __ Mov(v16.V16B(), 15, v0.V16B(), 0);
+ __ Mov(v17.V8H(), 0, v1.V8H(), 7);
+ __ Mov(v18.V4S(), 3, v1.V4S(), 0);
+ __ Mov(v19.V2D(), 1, v0.V2D(), 0);
+
+ __ Mov(v2.V16B(), 2, v2.V16B(), 0);
+ __ Mov(v3.V8H(), 0, v3.V8H(), 7);
+ __ Mov(v4.V4S(), 3, v4.V4S(), 0);
+ __ Mov(v5.V2D(), 0, v5.V2D(), 1);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0xff23456789abcdef, 0xfedcba9876543210, q16);
+ ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789abffed, q17);
+ ASSERT_EQUAL_128(0x3322110044556677, 0x8899aabbccddeeff, q18);
+ ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x8899aabbccddeeff, q19);
+
+ ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
+ ASSERT_EQUAL_128(0, 0x8899aabbccdd0000, q3);
+ ASSERT_EQUAL_128(0x89abcdef00000000, 0x0123456789abcdef, q4);
+ ASSERT_EQUAL_128(0, 0, q5);
+ TEARDOWN();
+}
+
+
+TEST(neon_copy_smov) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
+
+ __ Smov(w0, v0.B(), 7);
+ __ Smov(w1, v0.B(), 15);
+
+ __ Smov(w2, v0.H(), 0);
+ __ Smov(w3, v0.H(), 3);
+
+ __ Smov(x4, v0.B(), 7);
+ __ Smov(x5, v0.B(), 15);
+
+ __ Smov(x6, v0.H(), 0);
+ __ Smov(x7, v0.H(), 3);
+
+ __ Smov(x16, v0.S(), 0);
+ __ Smov(x17, v0.S(), 1);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_32(0xfffffffe, w0);
+ ASSERT_EQUAL_32(0x00000001, w1);
+ ASSERT_EQUAL_32(0x00003210, w2);
+ ASSERT_EQUAL_32(0xfffffedc, w3);
+ ASSERT_EQUAL_64(0xfffffffffffffffe, x4);
+ ASSERT_EQUAL_64(0x0000000000000001, x5);
+ ASSERT_EQUAL_64(0x0000000000003210, x6);
+ ASSERT_EQUAL_64(0xfffffffffffffedc, x7);
+ ASSERT_EQUAL_64(0x0000000076543210, x16);
+ ASSERT_EQUAL_64(0xfffffffffedcba98, x17);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_copy_umov_mov) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
+
+ __ Umov(w0, v0.B(), 15);
+ __ Umov(w1, v0.H(), 0);
+ __ Umov(w2, v0.S(), 3);
+ __ Umov(x3, v0.D(), 1);
+
+ __ Mov(w4, v0.S(), 3);
+ __ Mov(x5, v0.D(), 1);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_32(0x00000001, w0);
+ ASSERT_EQUAL_32(0x00003210, w1);
+ ASSERT_EQUAL_32(0x01234567, w2);
+ ASSERT_EQUAL_64(0x0123456789abcdef, x3);
+ ASSERT_EQUAL_32(0x01234567, w4);
+ ASSERT_EQUAL_64(0x0123456789abcdef, x5);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_copy_ins_general) {
+ SETUP();
+
+ START();
+
+ __ Mov(x0, 0x0011223344556677);
+ __ Movi(v16.V2D(), 0x0123456789abcdef, 0xfedcba9876543210);
+ __ Movi(v17.V2D(), 0xfedcba9876543210, 0x0123456789abcdef);
+ __ Movi(v18.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
+ __ Movi(v19.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
+
+ __ Movi(v2.V2D(), 0, 0x0011223344556677);
+ __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
+ __ Movi(v4.V2D(), 0, 0x0123456789abcdef);
+ __ Movi(v5.V2D(), 0, 0x0123456789abcdef);
+
+ __ Ins(v16.V16B(), 15, w0);
+ __ Ins(v17.V8H(), 0, w0);
+ __ Ins(v18.V4S(), 3, w0);
+ __ Ins(v19.V2D(), 0, x0);
+
+ __ Ins(v2.V16B(), 2, w0);
+ __ Ins(v3.V8H(), 0, w0);
+ __ Ins(v4.V4S(), 3, w0);
+ __ Ins(v5.V2D(), 1, x0);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x7723456789abcdef, 0xfedcba9876543210, q16);
+ ASSERT_EQUAL_128(0xfedcba9876543210, 0x0123456789ab6677, q17);
+ ASSERT_EQUAL_128(0x4455667744556677, 0x8899aabbccddeeff, q18);
+ ASSERT_EQUAL_128(0x0011223344556677, 0x0011223344556677, q19);
+
+ ASSERT_EQUAL_128(0, 0x0011223344776677, q2);
+ ASSERT_EQUAL_128(0, 0x8899aabbccdd6677, q3);
+ ASSERT_EQUAL_128(0x4455667700000000, 0x0123456789abcdef, q4);
+ ASSERT_EQUAL_128(0x0011223344556677, 0x0123456789abcdef, q5);
+ TEARDOWN();
+}
+
+
+TEST(neon_extract_ext) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0011223344556677, 0x8899aabbccddeeff);
+ __ Movi(v1.V2D(), 0xffeddccbbaae9988, 0x7766554433221100);
+
+ __ Movi(v2.V2D(), 0, 0x0011223344556677);
+ __ Movi(v3.V2D(), 0, 0x8899aabbccddeeff);
+
+ __ Ext(v16.V16B(), v0.V16B(), v1.V16B(), 0);
+ __ Ext(v17.V16B(), v0.V16B(), v1.V16B(), 15);
+ __ Ext(v1.V16B(), v0.V16B(), v1.V16B(), 8); // Dest is same as one Src
+ __ Ext(v0.V16B(), v0.V16B(), v0.V16B(), 8); // All reg are the same
+
+ __ Ext(v18.V8B(), v2.V8B(), v3.V8B(), 0);
+ __ Ext(v19.V8B(), v2.V8B(), v3.V8B(), 7);
+ __ Ext(v2.V8B(), v2.V8B(), v3.V8B(), 4); // Dest is same as one Src
+ __ Ext(v3.V8B(), v3.V8B(), v3.V8B(), 4); // All reg are the same
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0011223344556677, 0x8899aabbccddeeff, q16);
+ ASSERT_EQUAL_128(0xeddccbbaae998877, 0x6655443322110000, q17);
+ ASSERT_EQUAL_128(0x7766554433221100, 0x0011223344556677, q1);
+ ASSERT_EQUAL_128(0x8899aabbccddeeff, 0x0011223344556677, q0);
+
+ ASSERT_EQUAL_128(0, 0x0011223344556677, q18);
+ ASSERT_EQUAL_128(0, 0x99aabbccddeeff00, q19);
+ ASSERT_EQUAL_128(0, 0xccddeeff00112233, q2);
+ ASSERT_EQUAL_128(0, 0xccddeeff8899aabb, q3);
+ TEARDOWN();
+}
+
+
+TEST(neon_3different_uaddl) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0000000000000000, 0x0000000000000000);
+ __ Movi(v1.V2D(), 0, 0x00010280810e0fff);
+ __ Movi(v2.V2D(), 0, 0x0101010101010101);
+
+ __ Movi(v3.V2D(), 0x0000000000000000, 0x0000000000000000);
+ __ Movi(v4.V2D(), 0x0000000000000000, 0x0000000000000000);
+ __ Movi(v5.V2D(), 0, 0x0000000180008001);
+ __ Movi(v6.V2D(), 0, 0x000e000ff000ffff);
+ __ Movi(v7.V2D(), 0, 0x0001000100010001);
+
+ __ Movi(v16.V2D(), 0x0000000000000000, 0x0000000000000000);
+ __ Movi(v17.V2D(), 0x0000000000000000, 0x0000000000000000);
+ __ Movi(v18.V2D(), 0, 0x0000000000000001);
+ __ Movi(v19.V2D(), 0, 0x80000001ffffffff);
+ __ Movi(v20.V2D(), 0, 0x0000000100000001);
+
+ __ Uaddl(v0.V8H(), v1.V8B(), v2.V8B());
+
+ __ Uaddl(v3.V4S(), v5.V4H(), v7.V4H());
+ __ Uaddl(v4.V4S(), v6.V4H(), v7.V4H());
+
+ __ Uaddl(v16.V2D(), v18.V2S(), v20.V2S());
+ __ Uaddl(v17.V2D(), v19.V2S(), v20.V2S());
+
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0001000200030081, 0x0082000f00100100, q0);
+ ASSERT_EQUAL_128(0x0000000100000002, 0x0000800100008002, q3);
+ ASSERT_EQUAL_128(0x0000000f00000010, 0x0000f00100010000, q4);
+ ASSERT_EQUAL_128(0x0000000000000001, 0x0000000000000002, q16);
+ ASSERT_EQUAL_128(0x0000000080000002, 0x0000000100000000, q17);
+ TEARDOWN();
+}
+
+
+TEST(neon_3different_addhn_subhn) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Addhn(v16.V8B(), v0.V8H(), v1.V8H());
+ __ Addhn2(v16.V16B(), v2.V8H(), v3.V8H());
+ __ Raddhn(v17.V8B(), v0.V8H(), v1.V8H());
+ __ Raddhn2(v17.V16B(), v2.V8H(), v3.V8H());
+ __ Subhn(v18.V8B(), v0.V8H(), v1.V8H());
+ __ Subhn2(v18.V16B(), v2.V8H(), v3.V8H());
+ __ Rsubhn(v19.V8B(), v0.V8H(), v1.V8H());
+ __ Rsubhn2(v19.V16B(), v2.V8H(), v3.V8H());
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0000ff007fff7fff, 0xff81817f80ff0100, q16);
+ ASSERT_EQUAL_128(0x0000000080008000, 0xff81817f81ff0201, q17);
+ ASSERT_EQUAL_128(0x0000ffff80008000, 0xff80817f80ff0100, q18);
+ ASSERT_EQUAL_128(0x0000000080008000, 0xff81827f81ff0101, q19);
+ TEARDOWN();
+}
+
+TEST(neon_d_only_scalar) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
+ __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
+ __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
+ __ Movi(v3.V2D(), 0xffffffffffffffff, 2);
+ __ Movi(v4.V2D(), 0xffffffffffffffff, -2);
+
+ __ Add(d16, d0, d0);
+ __ Add(d17, d1, d1);
+ __ Add(d18, d2, d2);
+ __ Sub(d19, d0, d0);
+ __ Sub(d20, d0, d1);
+ __ Sub(d21, d1, d0);
+ __ Ushl(d22, d0, d3);
+ __ Ushl(d23, d0, d4);
+ __ Sshl(d24, d0, d3);
+ __ Sshl(d25, d0, d4);
+ __ Ushr(d26, d0, 1);
+ __ Sshr(d27, d0, 3);
+ __ Shl(d28, d0, 0);
+ __ Shl(d29, d0, 16);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q16);
+ ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q17);
+ ASSERT_EQUAL_128(0, 0x2000000020002020, q18);
+ ASSERT_EQUAL_128(0, 0, q19);
+ ASSERT_EQUAL_128(0, 0x7000000170017171, q20);
+ ASSERT_EQUAL_128(0, 0x8ffffffe8ffe8e8f, q21);
+ ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q22);
+ ASSERT_EQUAL_128(0, 0x3c0000003c003c3c, q23);
+ ASSERT_EQUAL_128(0, 0xc0000003c003c3c0, q24);
+ ASSERT_EQUAL_128(0, 0xfc0000003c003c3c, q25);
+ ASSERT_EQUAL_128(0, 0x7800000078007878, q26);
+ ASSERT_EQUAL_128(0, 0xfe0000001e001e1e, q27);
+ ASSERT_EQUAL_128(0, 0xf0000000f000f0f0, q28);
+ ASSERT_EQUAL_128(0, 0x0000f000f0f00000, q29);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_sqshl_imm_scalar) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0, 0x7f);
+ __ Movi(v1.V2D(), 0x0, 0x80);
+ __ Movi(v2.V2D(), 0x0, 0x01);
+ __ Sqshl(b16, b0, 1);
+ __ Sqshl(b17, b1, 1);
+ __ Sqshl(b18, b2, 1);
+
+ __ Movi(v0.V2D(), 0x0, 0x7fff);
+ __ Movi(v1.V2D(), 0x0, 0x8000);
+ __ Movi(v2.V2D(), 0x0, 0x0001);
+ __ Sqshl(h19, h0, 1);
+ __ Sqshl(h20, h1, 1);
+ __ Sqshl(h21, h2, 1);
+
+ __ Movi(v0.V2D(), 0x0, 0x7fffffff);
+ __ Movi(v1.V2D(), 0x0, 0x80000000);
+ __ Movi(v2.V2D(), 0x0, 0x00000001);
+ __ Sqshl(s22, s0, 1);
+ __ Sqshl(s23, s1, 1);
+ __ Sqshl(s24, s2, 1);
+
+ __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
+ __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
+ __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
+ __ Sqshl(d25, d0, 1);
+ __ Sqshl(d26, d1, 1);
+ __ Sqshl(d27, d2, 1);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0x7f, q16);
+ ASSERT_EQUAL_128(0, 0x80, q17);
+ ASSERT_EQUAL_128(0, 0x02, q18);
+
+ ASSERT_EQUAL_128(0, 0x7fff, q19);
+ ASSERT_EQUAL_128(0, 0x8000, q20);
+ ASSERT_EQUAL_128(0, 0x0002, q21);
+
+ ASSERT_EQUAL_128(0, 0x7fffffff, q22);
+ ASSERT_EQUAL_128(0, 0x80000000, q23);
+ ASSERT_EQUAL_128(0, 0x00000002, q24);
+
+ ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q25);
+ ASSERT_EQUAL_128(0, 0x8000000000000000, q26);
+ ASSERT_EQUAL_128(0, 0x0000000000000002, q27);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_uqshl_imm_scalar) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0, 0x7f);
+ __ Movi(v1.V2D(), 0x0, 0x80);
+ __ Movi(v2.V2D(), 0x0, 0x01);
+ __ Uqshl(b16, b0, 1);
+ __ Uqshl(b17, b1, 1);
+ __ Uqshl(b18, b2, 1);
+
+ __ Movi(v0.V2D(), 0x0, 0x7fff);
+ __ Movi(v1.V2D(), 0x0, 0x8000);
+ __ Movi(v2.V2D(), 0x0, 0x0001);
+ __ Uqshl(h19, h0, 1);
+ __ Uqshl(h20, h1, 1);
+ __ Uqshl(h21, h2, 1);
+
+ __ Movi(v0.V2D(), 0x0, 0x7fffffff);
+ __ Movi(v1.V2D(), 0x0, 0x80000000);
+ __ Movi(v2.V2D(), 0x0, 0x00000001);
+ __ Uqshl(s22, s0, 1);
+ __ Uqshl(s23, s1, 1);
+ __ Uqshl(s24, s2, 1);
+
+ __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
+ __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
+ __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
+ __ Uqshl(d25, d0, 1);
+ __ Uqshl(d26, d1, 1);
+ __ Uqshl(d27, d2, 1);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0xfe, q16);
+ ASSERT_EQUAL_128(0, 0xff, q17);
+ ASSERT_EQUAL_128(0, 0x02, q18);
+
+ ASSERT_EQUAL_128(0, 0xfffe, q19);
+ ASSERT_EQUAL_128(0, 0xffff, q20);
+ ASSERT_EQUAL_128(0, 0x0002, q21);
+
+ ASSERT_EQUAL_128(0, 0xfffffffe, q22);
+ ASSERT_EQUAL_128(0, 0xffffffff, q23);
+ ASSERT_EQUAL_128(0, 0x00000002, q24);
+
+ ASSERT_EQUAL_128(0, 0xfffffffffffffffe, q25);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q26);
+ ASSERT_EQUAL_128(0, 0x0000000000000002, q27);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_sqshlu_scalar) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x0, 0x7f);
+ __ Movi(v1.V2D(), 0x0, 0x80);
+ __ Movi(v2.V2D(), 0x0, 0x01);
+ __ Sqshlu(b16, b0, 2);
+ __ Sqshlu(b17, b1, 2);
+ __ Sqshlu(b18, b2, 2);
+
+ __ Movi(v0.V2D(), 0x0, 0x7fff);
+ __ Movi(v1.V2D(), 0x0, 0x8000);
+ __ Movi(v2.V2D(), 0x0, 0x0001);
+ __ Sqshlu(h19, h0, 2);
+ __ Sqshlu(h20, h1, 2);
+ __ Sqshlu(h21, h2, 2);
+
+ __ Movi(v0.V2D(), 0x0, 0x7fffffff);
+ __ Movi(v1.V2D(), 0x0, 0x80000000);
+ __ Movi(v2.V2D(), 0x0, 0x00000001);
+ __ Sqshlu(s22, s0, 2);
+ __ Sqshlu(s23, s1, 2);
+ __ Sqshlu(s24, s2, 2);
+
+ __ Movi(v0.V2D(), 0x0, 0x7fffffffffffffff);
+ __ Movi(v1.V2D(), 0x0, 0x8000000000000000);
+ __ Movi(v2.V2D(), 0x0, 0x0000000000000001);
+ __ Sqshlu(d25, d0, 2);
+ __ Sqshlu(d26, d1, 2);
+ __ Sqshlu(d27, d2, 2);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0xff, q16);
+ ASSERT_EQUAL_128(0, 0x00, q17);
+ ASSERT_EQUAL_128(0, 0x04, q18);
+
+ ASSERT_EQUAL_128(0, 0xffff, q19);
+ ASSERT_EQUAL_128(0, 0x0000, q20);
+ ASSERT_EQUAL_128(0, 0x0004, q21);
+
+ ASSERT_EQUAL_128(0, 0xffffffff, q22);
+ ASSERT_EQUAL_128(0, 0x00000000, q23);
+ ASSERT_EQUAL_128(0, 0x00000004, q24);
+
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
+ ASSERT_EQUAL_128(0, 0x0000000000000000, q26);
+ ASSERT_EQUAL_128(0, 0x0000000000000004, q27);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_sshll) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+
+ __ Sshll(v16.V8H(), v0.V8B(), 4);
+ __ Sshll2(v17.V8H(), v0.V16B(), 4);
+
+ __ Sshll(v18.V4S(), v1.V4H(), 8);
+ __ Sshll2(v19.V4S(), v1.V8H(), 8);
+
+ __ Sshll(v20.V2D(), v2.V2S(), 16);
+ __ Sshll2(v21.V2D(), v2.V4S(), 16);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0xf800f810fff00000, 0x001007f0f800f810, q16);
+ ASSERT_EQUAL_128(0x07f000100000fff0, 0xf810f80007f00010, q17);
+ ASSERT_EQUAL_128(0xffffff0000000000, 0x00000100007fff00, q18);
+ ASSERT_EQUAL_128(0xff800000ff800100, 0xffffff0000000000, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20);
+ ASSERT_EQUAL_128(0xffff800000000000, 0xffffffffffff0000, q21);
+ TEARDOWN();
+}
+
+TEST(neon_shll) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+
+ __ Shll(v16.V8H(), v0.V8B(), 8);
+ __ Shll2(v17.V8H(), v0.V16B(), 8);
+
+ __ Shll(v18.V4S(), v1.V4H(), 16);
+ __ Shll2(v19.V4S(), v1.V8H(), 16);
+
+ __ Shll(v20.V2D(), v2.V2S(), 32);
+ __ Shll2(v21.V2D(), v2.V4S(), 32);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x80008100ff000000, 0x01007f0080008100, q16);
+ ASSERT_EQUAL_128(0x7f0001000000ff00, 0x810080007f000100, q17);
+ ASSERT_EQUAL_128(0xffff000000000000, 0x000100007fff0000, q18);
+ ASSERT_EQUAL_128(0x8000000080010000, 0xffff000000000000, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x7fffffff00000000, q20);
+ ASSERT_EQUAL_128(0x8000000000000000, 0xffffffff00000000, q21);
+ TEARDOWN();
+}
+
+TEST(neon_ushll) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+
+ __ Ushll(v16.V8H(), v0.V8B(), 4);
+ __ Ushll2(v17.V8H(), v0.V16B(), 4);
+
+ __ Ushll(v18.V4S(), v1.V4H(), 8);
+ __ Ushll2(v19.V4S(), v1.V8H(), 8);
+
+ __ Ushll(v20.V2D(), v2.V2S(), 16);
+ __ Ushll2(v21.V2D(), v2.V4S(), 16);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x080008100ff00000, 0x001007f008000810, q16);
+ ASSERT_EQUAL_128(0x07f0001000000ff0, 0x0810080007f00010, q17);
+ ASSERT_EQUAL_128(0x00ffff0000000000, 0x00000100007fff00, q18);
+ ASSERT_EQUAL_128(0x0080000000800100, 0x00ffff0000000000, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x00007fffffff0000, q20);
+ ASSERT_EQUAL_128(0x0000800000000000, 0x0000ffffffff0000, q21);
+ TEARDOWN();
+}
+
+
+TEST(neon_sxtl) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+
+ __ Sxtl(v16.V8H(), v0.V8B());
+ __ Sxtl2(v17.V8H(), v0.V16B());
+
+ __ Sxtl(v18.V4S(), v1.V4H());
+ __ Sxtl2(v19.V4S(), v1.V8H());
+
+ __ Sxtl(v20.V2D(), v2.V2S());
+ __ Sxtl2(v21.V2D(), v2.V4S());
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0xff80ff81ffff0000, 0x0001007fff80ff81, q16);
+ ASSERT_EQUAL_128(0x007f00010000ffff, 0xff81ff80007f0001, q17);
+ ASSERT_EQUAL_128(0xffffffff00000000, 0x0000000100007fff, q18);
+ ASSERT_EQUAL_128(0xffff8000ffff8001, 0xffffffff00000000, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
+ ASSERT_EQUAL_128(0xffffffff80000000, 0xffffffffffffffff, q21);
+ TEARDOWN();
+}
+
+
+TEST(neon_uxtl) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+
+ __ Uxtl(v16.V8H(), v0.V8B());
+ __ Uxtl2(v17.V8H(), v0.V16B());
+
+ __ Uxtl(v18.V4S(), v1.V4H());
+ __ Uxtl2(v19.V4S(), v1.V8H());
+
+ __ Uxtl(v20.V2D(), v2.V2S());
+ __ Uxtl2(v21.V2D(), v2.V4S());
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0080008100ff0000, 0x0001007f00800081, q16);
+ ASSERT_EQUAL_128(0x007f0001000000ff, 0x00810080007f0001, q17);
+ ASSERT_EQUAL_128(0x0000ffff00000000, 0x0000000100007fff, q18);
+ ASSERT_EQUAL_128(0x0000800000008001, 0x0000ffff00000000, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x000000007fffffff, q20);
+ ASSERT_EQUAL_128(0x0000000080000000, 0x00000000ffffffff, q21);
+ TEARDOWN();
+}
+
+
+TEST(neon_ssra) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Mov(v16.V2D(), v0.V2D());
+ __ Mov(v17.V2D(), v0.V2D());
+ __ Mov(v18.V2D(), v1.V2D());
+ __ Mov(v19.V2D(), v1.V2D());
+ __ Mov(v20.V2D(), v2.V2D());
+ __ Mov(v21.V2D(), v2.V2D());
+ __ Mov(v22.V2D(), v3.V2D());
+ __ Mov(v23.V2D(), v4.V2D());
+ __ Mov(v24.V2D(), v3.V2D());
+ __ Mov(v25.V2D(), v4.V2D());
+
+ __ Ssra(v16.V8B(), v0.V8B(), 4);
+ __ Ssra(v17.V16B(), v0.V16B(), 4);
+
+ __ Ssra(v18.V4H(), v1.V4H(), 8);
+ __ Ssra(v19.V8H(), v1.V8H(), 8);
+
+ __ Ssra(v20.V2S(), v2.V2S(), 16);
+ __ Ssra(v21.V4S(), v2.V4S(), 16);
+
+ __ Ssra(v22.V2D(), v3.V2D(), 32);
+ __ Ssra(v23.V2D(), v4.V2D(), 32);
+
+ __ Ssra(d24, d3, 48);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x7879fe0001867879, q16);
+ ASSERT_EQUAL_128(0x860100fe79788601, 0x7879fe0001867879, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xfffe00000001807e, q18);
+ ASSERT_EQUAL_128(0x7f807f81fffe0000, 0xfffe00000001807e, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20);
+ ASSERT_EQUAL_128(0x7fff8000fffffffe, 0x0000000080007ffe, q21);
+ ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007ffffffe, q22);
+ ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24);
+ TEARDOWN();
+}
+
+TEST(neon_srsra) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Mov(v16.V2D(), v0.V2D());
+ __ Mov(v17.V2D(), v0.V2D());
+ __ Mov(v18.V2D(), v1.V2D());
+ __ Mov(v19.V2D(), v1.V2D());
+ __ Mov(v20.V2D(), v2.V2D());
+ __ Mov(v21.V2D(), v2.V2D());
+ __ Mov(v22.V2D(), v3.V2D());
+ __ Mov(v23.V2D(), v4.V2D());
+ __ Mov(v24.V2D(), v3.V2D());
+ __ Mov(v25.V2D(), v4.V2D());
+
+ __ Srsra(v16.V8B(), v0.V8B(), 4);
+ __ Srsra(v17.V16B(), v0.V16B(), 4);
+
+ __ Srsra(v18.V4H(), v1.V4H(), 8);
+ __ Srsra(v19.V8H(), v1.V8H(), 8);
+
+ __ Srsra(v20.V2S(), v2.V2S(), 16);
+ __ Srsra(v21.V4S(), v2.V4S(), 16);
+
+ __ Srsra(v22.V2D(), v3.V2D(), 32);
+ __ Srsra(v23.V2D(), v4.V2D(), 32);
+
+ __ Srsra(d24, d3, 48);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x7879ff0001877879, q16);
+ ASSERT_EQUAL_128(0x870100ff79788701, 0x7879ff0001877879, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xffff00000001807f, q18);
+ ASSERT_EQUAL_128(0x7f807f81ffff0000, 0xffff00000001807f, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20);
+ ASSERT_EQUAL_128(0x7fff8000ffffffff, 0x0000000080007fff, q21);
+ ASSERT_EQUAL_128(0x7fffffff80000001, 0x800000007fffffff, q22);
+ ASSERT_EQUAL_128(0x7fffffff80000000, 0x0000000000000000, q23);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24);
+
+ TEARDOWN();
+}
+
+TEST(neon_usra) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Mov(v16.V2D(), v0.V2D());
+ __ Mov(v17.V2D(), v0.V2D());
+ __ Mov(v18.V2D(), v1.V2D());
+ __ Mov(v19.V2D(), v1.V2D());
+ __ Mov(v20.V2D(), v2.V2D());
+ __ Mov(v21.V2D(), v2.V2D());
+ __ Mov(v22.V2D(), v3.V2D());
+ __ Mov(v23.V2D(), v4.V2D());
+ __ Mov(v24.V2D(), v3.V2D());
+ __ Mov(v25.V2D(), v4.V2D());
+
+ __ Usra(v16.V8B(), v0.V8B(), 4);
+ __ Usra(v17.V16B(), v0.V16B(), 4);
+
+ __ Usra(v18.V4H(), v1.V4H(), 8);
+ __ Usra(v19.V8H(), v1.V8H(), 8);
+
+ __ Usra(v20.V2S(), v2.V2S(), 16);
+ __ Usra(v21.V4S(), v2.V4S(), 16);
+
+ __ Usra(v22.V2D(), v3.V2D(), 32);
+ __ Usra(v23.V2D(), v4.V2D(), 32);
+
+ __ Usra(d24, d3, 48);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x88890e0001868889, q16);
+ ASSERT_EQUAL_128(0x8601000e89888601, 0x88890e0001868889, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x00fe00000001807e, q18);
+ ASSERT_EQUAL_128(0x8080808100fe0000, 0x00fe00000001807e, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007ffe, q20);
+ ASSERT_EQUAL_128(0x800080000000fffe, 0x0000000080007ffe, q21);
+ ASSERT_EQUAL_128(0x8000000080000001, 0x800000007ffffffe, q22);
+ ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007ffe, q24);
+
+ TEARDOWN();
+}
+
+TEST(neon_ursra) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0x7f0100ff81807f01, 0x8081ff00017f8081);
+ __ Movi(v1.V2D(), 0x80008001ffff0000, 0xffff000000017fff);
+ __ Movi(v2.V2D(), 0x80000000ffffffff, 0x000000007fffffff);
+ __ Movi(v3.V2D(), 0x8000000000000001, 0x7fffffffffffffff);
+ __ Movi(v4.V2D(), 0x8000000000000000, 0x0000000000000000);
+
+ __ Mov(v16.V2D(), v0.V2D());
+ __ Mov(v17.V2D(), v0.V2D());
+ __ Mov(v18.V2D(), v1.V2D());
+ __ Mov(v19.V2D(), v1.V2D());
+ __ Mov(v20.V2D(), v2.V2D());
+ __ Mov(v21.V2D(), v2.V2D());
+ __ Mov(v22.V2D(), v3.V2D());
+ __ Mov(v23.V2D(), v4.V2D());
+ __ Mov(v24.V2D(), v3.V2D());
+ __ Mov(v25.V2D(), v4.V2D());
+
+ __ Ursra(v16.V8B(), v0.V8B(), 4);
+ __ Ursra(v17.V16B(), v0.V16B(), 4);
+
+ __ Ursra(v18.V4H(), v1.V4H(), 8);
+ __ Ursra(v19.V8H(), v1.V8H(), 8);
+
+ __ Ursra(v20.V2S(), v2.V2S(), 16);
+ __ Ursra(v21.V4S(), v2.V4S(), 16);
+
+ __ Ursra(v22.V2D(), v3.V2D(), 32);
+ __ Ursra(v23.V2D(), v4.V2D(), 32);
+
+ __ Ursra(d24, d3, 48);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x88890f0001878889, q16);
+ ASSERT_EQUAL_128(0x8701000f89888701, 0x88890f0001878889, q17);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x00ff00000001807f, q18);
+ ASSERT_EQUAL_128(0x8080808100ff0000, 0x00ff00000001807f, q19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0000000080007fff, q20);
+ ASSERT_EQUAL_128(0x800080000000ffff, 0x0000000080007fff, q21);
+ ASSERT_EQUAL_128(0x8000000080000001, 0x800000007fffffff, q22);
+ ASSERT_EQUAL_128(0x8000000080000000, 0x0000000000000000, q23);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x8000000000007fff, q24);
+ TEARDOWN();
+}
+
+
+TEST(neon_uqshl_scalar) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
+ __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
+ __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
+ __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
+
+ __ Uqshl(b16, b0, b2);
+ __ Uqshl(b17, b0, b3);
+ __ Uqshl(b18, b1, b2);
+ __ Uqshl(b19, b1, b3);
+ __ Uqshl(h20, h0, h2);
+ __ Uqshl(h21, h0, h3);
+ __ Uqshl(h22, h1, h2);
+ __ Uqshl(h23, h1, h3);
+ __ Uqshl(s24, s0, s2);
+ __ Uqshl(s25, s0, s3);
+ __ Uqshl(s26, s1, s2);
+ __ Uqshl(s27, s1, s3);
+ __ Uqshl(d28, d0, d2);
+ __ Uqshl(d29, d0, d3);
+ __ Uqshl(d30, d1, d2);
+ __ Uqshl(d31, d1, d3);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0xff, q16);
+ ASSERT_EQUAL_128(0, 0x78, q17);
+ ASSERT_EQUAL_128(0, 0xfe, q18);
+ ASSERT_EQUAL_128(0, 0x3f, q19);
+ ASSERT_EQUAL_128(0, 0xffff, q20);
+ ASSERT_EQUAL_128(0, 0x7878, q21);
+ ASSERT_EQUAL_128(0, 0xfefe, q22);
+ ASSERT_EQUAL_128(0, 0x3fbf, q23);
+ ASSERT_EQUAL_128(0, 0xffffffff, q24);
+ ASSERT_EQUAL_128(0, 0x78007878, q25);
+ ASSERT_EQUAL_128(0, 0xfffefefe, q26);
+ ASSERT_EQUAL_128(0, 0x3fffbfbf, q27);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28);
+ ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
+ ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
+ ASSERT_EQUAL_128(0, 0x3fffffffbfffbfbf, q31);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_sqshl_scalar) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
+ __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
+ __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
+ __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
+
+ __ Sqshl(b16, b0, b2);
+ __ Sqshl(b17, b0, b3);
+ __ Sqshl(b18, b1, b2);
+ __ Sqshl(b19, b1, b3);
+ __ Sqshl(h20, h0, h2);
+ __ Sqshl(h21, h0, h3);
+ __ Sqshl(h22, h1, h2);
+ __ Sqshl(h23, h1, h3);
+ __ Sqshl(s24, s0, s2);
+ __ Sqshl(s25, s0, s3);
+ __ Sqshl(s26, s1, s2);
+ __ Sqshl(s27, s1, s3);
+ __ Sqshl(d28, d0, d2);
+ __ Sqshl(d29, d0, d3);
+ __ Sqshl(d30, d1, d2);
+ __ Sqshl(d31, d1, d3);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0x80, q16);
+ ASSERT_EQUAL_128(0, 0xdf, q17);
+ ASSERT_EQUAL_128(0, 0x7f, q18);
+ ASSERT_EQUAL_128(0, 0x20, q19);
+ ASSERT_EQUAL_128(0, 0x8000, q20);
+ ASSERT_EQUAL_128(0, 0xdfdf, q21);
+ ASSERT_EQUAL_128(0, 0x7fff, q22);
+ ASSERT_EQUAL_128(0, 0x2020, q23);
+ ASSERT_EQUAL_128(0, 0x80000000, q24);
+ ASSERT_EQUAL_128(0, 0xdfffdfdf, q25);
+ ASSERT_EQUAL_128(0, 0x7fffffff, q26);
+ ASSERT_EQUAL_128(0, 0x20002020, q27);
+ ASSERT_EQUAL_128(0, 0x8000000000000000, q28);
+ ASSERT_EQUAL_128(0, 0xdfffffffdfffdfdf, q29);
+ ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30);
+ ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_urshl_scalar) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
+ __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
+ __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
+ __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
+
+ __ Urshl(d28, d0, d2);
+ __ Urshl(d29, d0, d3);
+ __ Urshl(d30, d1, d2);
+ __ Urshl(d31, d1, d3);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0xe0000001e001e1e0, q28);
+ ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
+ ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
+ ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_srshl_scalar) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
+ __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
+ __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
+ __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
+
+ __ Srshl(d28, d0, d2);
+ __ Srshl(d29, d0, d3);
+ __ Srshl(d30, d1, d2);
+ __ Srshl(d31, d1, d3);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0x7fffffff7fff7f7e, q28);
+ ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29);
+ ASSERT_EQUAL_128(0, 0x8000000080008080, q30);
+ ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_uqrshl_scalar) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
+ __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
+ __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
+ __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
+
+ __ Uqrshl(b16, b0, b2);
+ __ Uqrshl(b17, b0, b3);
+ __ Uqrshl(b18, b1, b2);
+ __ Uqrshl(b19, b1, b3);
+ __ Uqrshl(h20, h0, h2);
+ __ Uqrshl(h21, h0, h3);
+ __ Uqrshl(h22, h1, h2);
+ __ Uqrshl(h23, h1, h3);
+ __ Uqrshl(s24, s0, s2);
+ __ Uqrshl(s25, s0, s3);
+ __ Uqrshl(s26, s1, s2);
+ __ Uqrshl(s27, s1, s3);
+ __ Uqrshl(d28, d0, d2);
+ __ Uqrshl(d29, d0, d3);
+ __ Uqrshl(d30, d1, d2);
+ __ Uqrshl(d31, d1, d3);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0xff, q16);
+ ASSERT_EQUAL_128(0, 0x78, q17);
+ ASSERT_EQUAL_128(0, 0xfe, q18);
+ ASSERT_EQUAL_128(0, 0x40, q19);
+ ASSERT_EQUAL_128(0, 0xffff, q20);
+ ASSERT_EQUAL_128(0, 0x7878, q21);
+ ASSERT_EQUAL_128(0, 0xfefe, q22);
+ ASSERT_EQUAL_128(0, 0x3fc0, q23);
+ ASSERT_EQUAL_128(0, 0xffffffff, q24);
+ ASSERT_EQUAL_128(0, 0x78007878, q25);
+ ASSERT_EQUAL_128(0, 0xfffefefe, q26);
+ ASSERT_EQUAL_128(0, 0x3fffbfc0, q27);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q28);
+ ASSERT_EQUAL_128(0, 0x7800000078007878, q29);
+ ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q30);
+ ASSERT_EQUAL_128(0, 0x3fffffffbfffbfc0, q31);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_sqrshl_scalar) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xbfffffffbfffbfbf);
+ __ Movi(v1.V2D(), 0x5555555555555555, 0x4000000040004040);
+ __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x0000000000000001);
+ __ Movi(v3.V2D(), 0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff);
+
+ __ Sqrshl(b16, b0, b2);
+ __ Sqrshl(b17, b0, b3);
+ __ Sqrshl(b18, b1, b2);
+ __ Sqrshl(b19, b1, b3);
+ __ Sqrshl(h20, h0, h2);
+ __ Sqrshl(h21, h0, h3);
+ __ Sqrshl(h22, h1, h2);
+ __ Sqrshl(h23, h1, h3);
+ __ Sqrshl(s24, s0, s2);
+ __ Sqrshl(s25, s0, s3);
+ __ Sqrshl(s26, s1, s2);
+ __ Sqrshl(s27, s1, s3);
+ __ Sqrshl(d28, d0, d2);
+ __ Sqrshl(d29, d0, d3);
+ __ Sqrshl(d30, d1, d2);
+ __ Sqrshl(d31, d1, d3);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0x80, q16);
+ ASSERT_EQUAL_128(0, 0xe0, q17);
+ ASSERT_EQUAL_128(0, 0x7f, q18);
+ ASSERT_EQUAL_128(0, 0x20, q19);
+ ASSERT_EQUAL_128(0, 0x8000, q20);
+ ASSERT_EQUAL_128(0, 0xdfe0, q21);
+ ASSERT_EQUAL_128(0, 0x7fff, q22);
+ ASSERT_EQUAL_128(0, 0x2020, q23);
+ ASSERT_EQUAL_128(0, 0x80000000, q24);
+ ASSERT_EQUAL_128(0, 0xdfffdfe0, q25);
+ ASSERT_EQUAL_128(0, 0x7fffffff, q26);
+ ASSERT_EQUAL_128(0, 0x20002020, q27);
+ ASSERT_EQUAL_128(0, 0x8000000000000000, q28);
+ ASSERT_EQUAL_128(0, 0xdfffffffdfffdfe0, q29);
+ ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q30);
+ ASSERT_EQUAL_128(0, 0x2000000020002020, q31);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_uqadd_scalar) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
+ __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
+ __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
+
+ __ Uqadd(b16, b0, b0);
+ __ Uqadd(b17, b1, b1);
+ __ Uqadd(b18, b2, b2);
+ __ Uqadd(h19, h0, h0);
+ __ Uqadd(h20, h1, h1);
+ __ Uqadd(h21, h2, h2);
+ __ Uqadd(s22, s0, s0);
+ __ Uqadd(s23, s1, s1);
+ __ Uqadd(s24, s2, s2);
+ __ Uqadd(d25, d0, d0);
+ __ Uqadd(d26, d1, d1);
+ __ Uqadd(d27, d2, d2);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0xff, q16);
+ ASSERT_EQUAL_128(0, 0xfe, q17);
+ ASSERT_EQUAL_128(0, 0x20, q18);
+ ASSERT_EQUAL_128(0, 0xffff, q19);
+ ASSERT_EQUAL_128(0, 0xfefe, q20);
+ ASSERT_EQUAL_128(0, 0x2020, q21);
+ ASSERT_EQUAL_128(0, 0xffffffff, q22);
+ ASSERT_EQUAL_128(0, 0xfffefefe, q23);
+ ASSERT_EQUAL_128(0, 0x20002020, q24);
+ ASSERT_EQUAL_128(0, 0xffffffffffffffff, q25);
+ ASSERT_EQUAL_128(0, 0xfffffffefffefefe, q26);
+ ASSERT_EQUAL_128(0, 0x2000000020002020, q27);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_sqadd_scalar) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0x8000000180018181);
+ __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
+ __ Movi(v2.V2D(), 0xaaaaaaaaaaaaaaaa, 0x1000000010001010);
+
+ __ Sqadd(b16, b0, b0);
+ __ Sqadd(b17, b1, b1);
+ __ Sqadd(b18, b2, b2);
+ __ Sqadd(h19, h0, h0);
+ __ Sqadd(h20, h1, h1);
+ __ Sqadd(h21, h2, h2);
+ __ Sqadd(s22, s0, s0);
+ __ Sqadd(s23, s1, s1);
+ __ Sqadd(s24, s2, s2);
+ __ Sqadd(d25, d0, d0);
+ __ Sqadd(d26, d1, d1);
+ __ Sqadd(d27, d2, d2);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0x80, q16);
+ ASSERT_EQUAL_128(0, 0x7f, q17);
+ ASSERT_EQUAL_128(0, 0x20, q18);
+ ASSERT_EQUAL_128(0, 0x8000, q19);
+ ASSERT_EQUAL_128(0, 0x7fff, q20);
+ ASSERT_EQUAL_128(0, 0x2020, q21);
+ ASSERT_EQUAL_128(0, 0x80000000, q22);
+ ASSERT_EQUAL_128(0, 0x7fffffff, q23);
+ ASSERT_EQUAL_128(0, 0x20002020, q24);
+ ASSERT_EQUAL_128(0, 0x8000000000000000, q25);
+ ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q26);
+ ASSERT_EQUAL_128(0, 0x2000000020002020, q27);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_uqsub_scalar) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
+ __ Movi(v1.V2D(), 0x5555555555555555, 0x7fffffff7fff7f7f);
+
+ __ Uqsub(b16, b0, b0);
+ __ Uqsub(b17, b0, b1);
+ __ Uqsub(b18, b1, b0);
+ __ Uqsub(h19, h0, h0);
+ __ Uqsub(h20, h0, h1);
+ __ Uqsub(h21, h1, h0);
+ __ Uqsub(s22, s0, s0);
+ __ Uqsub(s23, s0, s1);
+ __ Uqsub(s24, s1, s0);
+ __ Uqsub(d25, d0, d0);
+ __ Uqsub(d26, d0, d1);
+ __ Uqsub(d27, d1, d0);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0, q16);
+ ASSERT_EQUAL_128(0, 0x71, q17);
+ ASSERT_EQUAL_128(0, 0, q18);
+
+ ASSERT_EQUAL_128(0, 0, q19);
+ ASSERT_EQUAL_128(0, 0x7171, q20);
+ ASSERT_EQUAL_128(0, 0, q21);
+
+ ASSERT_EQUAL_128(0, 0, q22);
+ ASSERT_EQUAL_128(0, 0x70017171, q23);
+ ASSERT_EQUAL_128(0, 0, q24);
+
+ ASSERT_EQUAL_128(0, 0, q25);
+ ASSERT_EQUAL_128(0, 0x7000000170017171, q26);
+ ASSERT_EQUAL_128(0, 0, q27);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_sqsub_scalar) {
+ SETUP();
+
+ START();
+
+ __ Movi(v0.V2D(), 0xaaaaaaaaaaaaaaaa, 0xf0000000f000f0f0);
+ __ Movi(v1.V2D(), 0x5555555555555555, 0x7eeeeeee7eee7e7e);
+
+ __ Sqsub(b16, b0, b0);
+ __ Sqsub(b17, b0, b1);
+ __ Sqsub(b18, b1, b0);
+ __ Sqsub(h19, h0, h0);
+ __ Sqsub(h20, h0, h1);
+ __ Sqsub(h21, h1, h0);
+ __ Sqsub(s22, s0, s0);
+ __ Sqsub(s23, s0, s1);
+ __ Sqsub(s24, s1, s0);
+ __ Sqsub(d25, d0, d0);
+ __ Sqsub(d26, d0, d1);
+ __ Sqsub(d27, d1, d0);
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0, 0, q16);
+ ASSERT_EQUAL_128(0, 0x80, q17);
+ ASSERT_EQUAL_128(0, 0x7f, q18);
+
+ ASSERT_EQUAL_128(0, 0, q19);
+ ASSERT_EQUAL_128(0, 0x8000, q20);
+ ASSERT_EQUAL_128(0, 0x7fff, q21);
+
+ ASSERT_EQUAL_128(0, 0, q22);
+ ASSERT_EQUAL_128(0, 0x80000000, q23);
+ ASSERT_EQUAL_128(0, 0x7fffffff, q24);
+
+ ASSERT_EQUAL_128(0, 0, q25);
+ ASSERT_EQUAL_128(0, 0x8000000000000000, q26);
+ ASSERT_EQUAL_128(0, 0x7fffffffffffffff, q27);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_fmla_fmls) {
+ SETUP();
+
+ START();
+ __ Movi(v0.V2D(), 0x3f80000040000000, 0x4100000000000000);
+ __ Movi(v1.V2D(), 0x400000003f800000, 0x000000003f800000);
+ __ Movi(v2.V2D(), 0x3f800000ffffffff, 0x7f800000ff800000);
+ __ Mov(v16.V16B(), v0.V16B());
+ __ Mov(v17.V16B(), v0.V16B());
+ __ Mov(v18.V16B(), v0.V16B());
+ __ Mov(v19.V16B(), v0.V16B());
+ __ Mov(v20.V16B(), v0.V16B());
+ __ Mov(v21.V16B(), v0.V16B());
+
+ __ Fmla(v16.V2S(), v1.V2S(), v2.V2S());
+ __ Fmla(v17.V4S(), v1.V4S(), v2.V4S());
+ __ Fmla(v18.V2D(), v1.V2D(), v2.V2D());
+ __ Fmls(v19.V2S(), v1.V2S(), v2.V2S());
+ __ Fmls(v20.V4S(), v1.V4S(), v2.V4S());
+ __ Fmls(v21.V2D(), v1.V2D(), v2.V2D());
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x0000000000000000, 0x7fc00000ff800000, q16);
+ ASSERT_EQUAL_128(0x40400000ffffffff, 0x7fc00000ff800000, q17);
+ ASSERT_EQUAL_128(0x3f9800015f8003f7, 0x41000000000000fe, q18);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x7fc000007f800000, q19);
+ ASSERT_EQUAL_128(0xbf800000ffffffff, 0x7fc000007f800000, q20);
+ ASSERT_EQUAL_128(0xbf8000023f0007ee, 0x40fffffffffffe04, q21);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_fmulx_scalar) {
+ SETUP();
+
+ START();
+ __ Fmov(s0, 2.0);
+ __ Fmov(s1, 0.5);
+ __ Fmov(s2, 0.0);
+ __ Fmov(s3, -0.0);
+ __ Fmov(s4, kFP32PositiveInfinity);
+ __ Fmov(s5, kFP32NegativeInfinity);
+ __ Fmulx(s16, s0, s1);
+ __ Fmulx(s17, s2, s4);
+ __ Fmulx(s18, s2, s5);
+ __ Fmulx(s19, s3, s4);
+ __ Fmulx(s20, s3, s5);
+
+ __ Fmov(d21, 2.0);
+ __ Fmov(d22, 0.5);
+ __ Fmov(d23, 0.0);
+ __ Fmov(d24, -0.0);
+ __ Fmov(d25, kFP64PositiveInfinity);
+ __ Fmov(d26, kFP64NegativeInfinity);
+ __ Fmulx(d27, d21, d22);
+ __ Fmulx(d28, d23, d25);
+ __ Fmulx(d29, d23, d26);
+ __ Fmulx(d30, d24, d25);
+ __ Fmulx(d31, d24, d26);
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_FP32(1.0, s16);
+ ASSERT_EQUAL_FP32(2.0, s17);
+ ASSERT_EQUAL_FP32(-2.0, s18);
+ ASSERT_EQUAL_FP32(-2.0, s19);
+ ASSERT_EQUAL_FP32(2.0, s20);
+ ASSERT_EQUAL_FP64(1.0, d27);
+ ASSERT_EQUAL_FP64(2.0, d28);
+ ASSERT_EQUAL_FP64(-2.0, d29);
+ ASSERT_EQUAL_FP64(-2.0, d30);
+ ASSERT_EQUAL_FP64(2.0, d31);
+
+ TEARDOWN();
+}
+
+
+TEST(crc32b) {
+ SETUP();
+ START();
+
+ __ Mov(w0, 0);
+ __ Mov(w1, 0);
+ __ Crc32b(w10, w0, w1);
+
+ __ Mov(w0, 0x1);
+ __ Mov(w1, 0x138);
+ __ Crc32b(w11, w0, w1);
+
+ __ Mov(w0, 0x1);
+ __ Mov(w1, 0x38);
+ __ Crc32b(w12, w0, w1);
+
+ __ Mov(w0, 0);
+ __ Mov(w1, 128);
+ __ Crc32b(w13, w0, w1);
+
+ __ Mov(w0, UINT32_MAX);
+ __ Mov(w1, 255);
+ __ Crc32b(w14, w0, w1);
+
+ __ Mov(w0, 0x00010001);
+ __ Mov(w1, 0x10001000);
+ __ Crc32b(w15, w0, w1);
+
+ END();
+ RUN();
+
+ ASSERT_EQUAL_64(0x0, x10);
+ ASSERT_EQUAL_64(0x5f058808, x11);
+ ASSERT_EQUAL_64(0x5f058808, x12);
+ ASSERT_EQUAL_64(0xedb88320, x13);
+ ASSERT_EQUAL_64(0x00ffffff, x14);
+ ASSERT_EQUAL_64(0x77073196, x15);
+
+ TEARDOWN();
+}
+
+
+TEST(crc32h) {
+ SETUP();
+ START();
+
+ __ Mov(w0, 0);
+ __ Mov(w1, 0);
+ __ Crc32h(w10, w0, w1);
+
+ __ Mov(w0, 0x1);
+ __ Mov(w1, 0x10038);
+ __ Crc32h(w11, w0, w1);
+
+ __ Mov(w0, 0x1);
+ __ Mov(w1, 0x38);
+ __ Crc32h(w12, w0, w1);
+
+ __ Mov(w0, 0);
+ __ Mov(w1, 128);
+ __ Crc32h(w13, w0, w1);
+
+ __ Mov(w0, UINT32_MAX);
+ __ Mov(w1, 255);
+ __ Crc32h(w14, w0, w1);
+
+ __ Mov(w0, 0x00010001);
+ __ Mov(w1, 0x10001000);
+ __ Crc32h(w15, w0, w1);
+
+ END();
+ RUN();
+
+ ASSERT_EQUAL_64(0x0, x10);
+ ASSERT_EQUAL_64(0x0e848dba, x11);
+ ASSERT_EQUAL_64(0x0e848dba, x12);
+ ASSERT_EQUAL_64(0x3b83984b, x13);
+ ASSERT_EQUAL_64(0x2d021072, x14);
+ ASSERT_EQUAL_64(0x04ac2124, x15);
+
+ TEARDOWN();
+}
+
+
+TEST(crc32w) {
+ SETUP();
+ START();
+
+ __ Mov(w0, 0);
+ __ Mov(w1, 0);
+ __ Crc32w(w10, w0, w1);
+
+ __ Mov(w0, 0x1);
+ __ Mov(w1, 0x80000031);
+ __ Crc32w(w11, w0, w1);
+
+ __ Mov(w0, 0);
+ __ Mov(w1, 128);
+ __ Crc32w(w13, w0, w1);
+
+ __ Mov(w0, UINT32_MAX);
+ __ Mov(w1, 255);
+ __ Crc32w(w14, w0, w1);
+
+ __ Mov(w0, 0x00010001);
+ __ Mov(w1, 0x10001000);
+ __ Crc32w(w15, w0, w1);
+
+ END();
+ RUN();
+
+ ASSERT_EQUAL_64(0x0, x10);
+ ASSERT_EQUAL_64(0x1d937b81, x11);
+ ASSERT_EQUAL_64(0xed59b63b, x13);
+ ASSERT_EQUAL_64(0x00be2612, x14);
+ ASSERT_EQUAL_64(0xa036e530, x15);
+
+ TEARDOWN();
+}
+
+
+TEST(crc32x) {
+ SETUP();
+ START();
+
+ __ Mov(w0, 0);
+ __ Mov(x1, 0);
+ __ Crc32x(w10, w0, x1);
+
+ __ Mov(w0, 0x1);
+ __ Mov(x1, UINT64_C(0x0000000800000031));
+ __ Crc32x(w11, w0, x1);
+
+ __ Mov(w0, 0);
+ __ Mov(x1, 128);
+ __ Crc32x(w13, w0, x1);
+
+ __ Mov(w0, UINT32_MAX);
+ __ Mov(x1, 255);
+ __ Crc32x(w14, w0, x1);
+
+ __ Mov(w0, 0x00010001);
+ __ Mov(x1, UINT64_C(0x1000100000000000));
+ __ Crc32x(w15, w0, x1);
+
+ END();
+ RUN();
+
+ ASSERT_EQUAL_64(0x0, x10);
+ ASSERT_EQUAL_64(0x40797b92, x11);
+ ASSERT_EQUAL_64(0x533b85da, x13);
+ ASSERT_EQUAL_64(0xbc962670, x14);
+ ASSERT_EQUAL_64(0x0667602f, x15);
+
+ TEARDOWN();
+}
+
+
+TEST(crc32cb) {
+ SETUP();
+ START();
+
+ __ Mov(w0, 0);
+ __ Mov(w1, 0);
+ __ Crc32cb(w10, w0, w1);
+
+ __ Mov(w0, 0x1);
+ __ Mov(w1, 0x138);
+ __ Crc32cb(w11, w0, w1);
+
+ __ Mov(w0, 0x1);
+ __ Mov(w1, 0x38);
+ __ Crc32cb(w12, w0, w1);
+
+ __ Mov(w0, 0);
+ __ Mov(w1, 128);
+ __ Crc32cb(w13, w0, w1);
+
+ __ Mov(w0, UINT32_MAX);
+ __ Mov(w1, 255);
+ __ Crc32cb(w14, w0, w1);
+
+ __ Mov(w0, 0x00010001);
+ __ Mov(w1, 0x10001000);
+ __ Crc32cb(w15, w0, w1);
+
+ END();
+ RUN();
+
+ ASSERT_EQUAL_64(0x0, x10);
+ ASSERT_EQUAL_64(0x4851927d, x11);
+ ASSERT_EQUAL_64(0x4851927d, x12);
+ ASSERT_EQUAL_64(0x82f63b78, x13);
+ ASSERT_EQUAL_64(0x00ffffff, x14);
+ ASSERT_EQUAL_64(0xf26b8203, x15);
+
+ TEARDOWN();
+}
+
+
+TEST(crc32ch) {
+ SETUP();
+ START();
+
+ __ Mov(w0, 0);
+ __ Mov(w1, 0);
+ __ Crc32ch(w10, w0, w1);
+
+ __ Mov(w0, 0x1);
+ __ Mov(w1, 0x10038);
+ __ Crc32ch(w11, w0, w1);
+
+ __ Mov(w0, 0x1);
+ __ Mov(w1, 0x38);
+ __ Crc32ch(w12, w0, w1);
+
+ __ Mov(w0, 0);
+ __ Mov(w1, 128);
+ __ Crc32ch(w13, w0, w1);
+
+ __ Mov(w0, UINT32_MAX);
+ __ Mov(w1, 255);
+ __ Crc32ch(w14, w0, w1);
+
+ __ Mov(w0, 0x00010001);
+ __ Mov(w1, 0x10001000);
+ __ Crc32ch(w15, w0, w1);
+
+ END();
+ RUN();
+
+ ASSERT_EQUAL_64(0x0, x10);
+ ASSERT_EQUAL_64(0xcef8494c, x11);
+ ASSERT_EQUAL_64(0xcef8494c, x12);
+ ASSERT_EQUAL_64(0xfbc3faf9, x13);
+ ASSERT_EQUAL_64(0xad7dacae, x14);
+ ASSERT_EQUAL_64(0x03fc5f19, x15);
+
+ TEARDOWN();
+}
+
+
+TEST(crc32cw) {
+ SETUP();
+ START();
+
+ __ Mov(w0, 0);
+ __ Mov(w1, 0);
+ __ Crc32cw(w10, w0, w1);
+
+ __ Mov(w0, 0x1);
+ __ Mov(w1, 0x80000031);
+ __ Crc32cw(w11, w0, w1);
+
+ __ Mov(w0, 0);
+ __ Mov(w1, 128);
+ __ Crc32cw(w13, w0, w1);
+
+ __ Mov(w0, UINT32_MAX);
+ __ Mov(w1, 255);
+ __ Crc32cw(w14, w0, w1);
+
+ __ Mov(w0, 0x00010001);
+ __ Mov(w1, 0x10001000);
+ __ Crc32cw(w15, w0, w1);
+
+ END();
+ RUN();
+
+ ASSERT_EQUAL_64(0x0, x10);
+ ASSERT_EQUAL_64(0xbcb79ece, x11);
+ ASSERT_EQUAL_64(0x52a0c93f, x13);
+ ASSERT_EQUAL_64(0x9f9b5c7a, x14);
+ ASSERT_EQUAL_64(0xae1b882a, x15);
+
+ TEARDOWN();
+}
+
+
+TEST(crc32cx) {
+ SETUP();
+ START();
+
+ __ Mov(w0, 0);
+ __ Mov(x1, 0);
+ __ Crc32cx(w10, w0, x1);
+
+ __ Mov(w0, 0x1);
+ __ Mov(x1, UINT64_C(0x0000000800000031));
+ __ Crc32cx(w11, w0, x1);
+
+ __ Mov(w0, 0);
+ __ Mov(x1, 128);
+ __ Crc32cx(w13, w0, x1);
+
+ __ Mov(w0, UINT32_MAX);
+ __ Mov(x1, 255);
+ __ Crc32cx(w14, w0, x1);
+
+ __ Mov(w0, 0x00010001);
+ __ Mov(x1, UINT64_C(0x1000100000000000));
+ __ Crc32cx(w15, w0, x1);
+
+ END();
+ RUN();
+
+ ASSERT_EQUAL_64(0x0, x10);
+ ASSERT_EQUAL_64(0x7f320fcb, x11);
+ ASSERT_EQUAL_64(0x34019664, x13);
+ ASSERT_EQUAL_64(0x6cc27dd0, x14);
+ ASSERT_EQUAL_64(0xc6f0acdb, x15);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_fabd_scalar) {
+ SETUP();
+
+ START();
+ __ Fmov(s0, 2.0);
+ __ Fmov(s1, 0.5);
+ __ Fmov(s2, 0.0);
+ __ Fmov(s3, -0.0);
+ __ Fmov(s4, kFP32PositiveInfinity);
+ __ Fmov(s5, kFP32NegativeInfinity);
+ __ Fabd(s16, s1, s0);
+ __ Fabd(s17, s2, s3);
+ __ Fabd(s18, s2, s5);
+ __ Fabd(s19, s3, s4);
+ __ Fabd(s20, s3, s5);
+
+ __ Fmov(d21, 2.0);
+ __ Fmov(d22, 0.5);
+ __ Fmov(d23, 0.0);
+ __ Fmov(d24, -0.0);
+ __ Fmov(d25, kFP64PositiveInfinity);
+ __ Fmov(d26, kFP64NegativeInfinity);
+ __ Fabd(d27, d21, d22);
+ __ Fabd(d28, d23, d24);
+ __ Fabd(d29, d23, d26);
+ __ Fabd(d30, d24, d25);
+ __ Fabd(d31, d24, d26);
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_FP32(1.5, s16);
+ ASSERT_EQUAL_FP32(0.0, s17);
+ ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s18);
+ ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s19);
+ ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s20);
+ ASSERT_EQUAL_FP64(1.5, d27);
+ ASSERT_EQUAL_FP64(0.0, d28);
+ ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d29);
+ ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d30);
+ ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d31);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_faddp_scalar) {
+ SETUP();
+
+ START();
+ __ Movi(d0, 0x3f80000040000000);
+ __ Movi(d1, 0xff8000007f800000);
+ __ Movi(d2, 0x0000000080000000);
+ __ Faddp(s0, v0.V2S());
+ __ Faddp(s1, v1.V2S());
+ __ Faddp(s2, v2.V2S());
+
+ __ Movi(v3.V2D(), 0xc000000000000000, 0x4000000000000000);
+ __ Movi(v4.V2D(), 0xfff8000000000000, 0x7ff8000000000000);
+ __ Movi(v5.V2D(), 0x0000000000000000, 0x8000000000000000);
+ __ Faddp(d3, v3.V2D());
+ __ Faddp(d4, v4.V2D());
+ __ Faddp(d5, v5.V2D());
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_FP32(3.0, s0);
+ ASSERT_EQUAL_FP32(kFP32DefaultNaN, s1);
+ ASSERT_EQUAL_FP32(0.0, s2);
+ ASSERT_EQUAL_FP64(0.0, d3);
+ ASSERT_EQUAL_FP64(kFP64DefaultNaN, d4);
+ ASSERT_EQUAL_FP64(0.0, d5);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_fmaxp_scalar) {
+ SETUP();
+
+ START();
+ __ Movi(d0, 0x3f80000040000000);
+ __ Movi(d1, 0xff8000007f800000);
+ __ Movi(d2, 0x7fc00000ff800000);
+ __ Fmaxp(s0, v0.V2S());
+ __ Fmaxp(s1, v1.V2S());
+ __ Fmaxp(s2, v2.V2S());
+
+ __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
+ __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
+ __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
+ __ Fmaxp(d3, v3.V2D());
+ __ Fmaxp(d4, v4.V2D());
+ __ Fmaxp(d5, v5.V2D());
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_FP32(2.0, s0);
+ ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1);
+ ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2);
+ ASSERT_EQUAL_FP64(2.0, d3);
+ ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4);
+ ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_fmaxnmp_scalar) {
+ SETUP();
+
+ START();
+ __ Movi(d0, 0x3f80000040000000);
+ __ Movi(d1, 0xff8000007f800000);
+ __ Movi(d2, 0x7fc00000ff800000);
+ __ Fmaxnmp(s0, v0.V2S());
+ __ Fmaxnmp(s1, v1.V2S());
+ __ Fmaxnmp(s2, v2.V2S());
+
+ __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
+ __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
+ __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
+ __ Fmaxnmp(d3, v3.V2D());
+ __ Fmaxnmp(d4, v4.V2D());
+ __ Fmaxnmp(d5, v5.V2D());
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_FP32(2.0, s0);
+ ASSERT_EQUAL_FP32(kFP32PositiveInfinity, s1);
+ ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2);
+ ASSERT_EQUAL_FP64(2.0, d3);
+ ASSERT_EQUAL_FP64(kFP64PositiveInfinity, d4);
+ ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_fminp_scalar) {
+ SETUP();
+
+ START();
+ __ Movi(d0, 0x3f80000040000000);
+ __ Movi(d1, 0xff8000007f800000);
+ __ Movi(d2, 0x7fc00000ff800000);
+ __ Fminp(s0, v0.V2S());
+ __ Fminp(s1, v1.V2S());
+ __ Fminp(s2, v2.V2S());
+
+ __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
+ __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
+ __ Movi(v5.V2D(), 0x7ff0000000000000, 0x7ff8000000000000);
+ __ Fminp(d3, v3.V2D());
+ __ Fminp(d4, v4.V2D());
+ __ Fminp(d5, v5.V2D());
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_FP32(1.0, s0);
+ ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1);
+ ASSERT_EQUAL_FP32(kFP32DefaultNaN, s2);
+ ASSERT_EQUAL_FP64(1.0, d3);
+ ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4);
+ ASSERT_EQUAL_FP64(kFP64DefaultNaN, d5);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_fminnmp_scalar) {
+ SETUP();
+
+ START();
+ __ Movi(d0, 0x3f80000040000000);
+ __ Movi(d1, 0xff8000007f800000);
+ __ Movi(d2, 0x7fc00000ff800000);
+ __ Fminnmp(s0, v0.V2S());
+ __ Fminnmp(s1, v1.V2S());
+ __ Fminnmp(s2, v2.V2S());
+
+ __ Movi(v3.V2D(), 0x3ff0000000000000, 0x4000000000000000);
+ __ Movi(v4.V2D(), 0xfff0000000000000, 0x7ff0000000000000);
+ __ Movi(v5.V2D(), 0x7ff8000000000000, 0xfff0000000000000);
+ __ Fminnmp(d3, v3.V2D());
+ __ Fminnmp(d4, v4.V2D());
+ __ Fminnmp(d5, v5.V2D());
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_FP32(1.0, s0);
+ ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s1);
+ ASSERT_EQUAL_FP32(kFP32NegativeInfinity, s2);
+ ASSERT_EQUAL_FP64(1.0, d3);
+ ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d4);
+ ASSERT_EQUAL_FP64(kFP64NegativeInfinity, d5);
+
+ TEARDOWN();
+}
+
+
+TEST(neon_tbl) {
+ SETUP();
+
+ START();
+ __ Movi(v30.V2D(), 0xbf561e188b1280e9, 0xbd542b8cbd24e8e8);
+ __ Movi(v31.V2D(), 0xb5e9883d2c88a46d, 0x12276d5b614c915e);
+ __ Movi(v0.V2D(), 0xc45b7782bc5ecd72, 0x5dd4fe5a4bc6bf5e);
+ __ Movi(v1.V2D(), 0x1e3254094bd1746a, 0xf099ecf50e861c80);
+
+ __ Movi(v4.V2D(), 0xf80c030100031f16, 0x00070504031201ff);
+ __ Movi(v5.V2D(), 0x1f01001afc14202a, 0x2a081e1b0c02020c);
+ __ Movi(v6.V2D(), 0x353f1a13022a2360, 0x2c464a00203a0a33);
+ __ Movi(v7.V2D(), 0x64801a1c054cf30d, 0x793a2c052e213739);
+
+ __ Movi(v8.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
+ __ Movi(v9.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
+ __ Movi(v10.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
+ __ Movi(v11.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
+ __ Movi(v12.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
+ __ Movi(v13.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
+ __ Movi(v14.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
+ __ Movi(v15.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
+
+ __ Tbl(v8.V16B(), v1.V16B(), v4.V16B());
+ __ Tbl(v9.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
+ __ Tbl(v10.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
+ __ Tbl(v11.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
+ __ Tbl(v12.V8B(), v1.V16B(), v4.V8B());
+ __ Tbl(v13.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
+ __ Tbl(v14.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
+ __ Tbl(v15.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
+
+ __ Movi(v16.V2D(), 0xb7f60ad7d7d88f13, 0x13eefc240496e842);
+ __ Movi(v17.V2D(), 0x1be199c7c69b47ec, 0x8e4b9919f6eed443);
+ __ Movi(v18.V2D(), 0x9bd2e1654c69e48f, 0x2143d089e426c6d2);
+ __ Movi(v19.V2D(), 0xc31dbdc4a0393065, 0x1ecc2077caaf64d8);
+ __ Movi(v20.V2D(), 0x29b24463967bc6eb, 0xdaf59970df01c93b);
+ __ Movi(v21.V2D(), 0x3e20a4a4cb6813f4, 0x20a5832713dae669);
+ __ Movi(v22.V2D(), 0xc5ff9a94041b1fdf, 0x2f46cde38cba2682);
+ __ Movi(v23.V2D(), 0xd8cc5b0e61f387e6, 0xe69d6d314971e8fd);
+
+ __ Tbx(v16.V16B(), v1.V16B(), v4.V16B());
+ __ Tbx(v17.V16B(), v0.V16B(), v1.V16B(), v5.V16B());
+ __ Tbx(v18.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V16B());
+ __ Tbx(v19.V16B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V16B());
+ __ Tbx(v20.V8B(), v1.V16B(), v4.V8B());
+ __ Tbx(v21.V8B(), v0.V16B(), v1.V16B(), v5.V8B());
+ __ Tbx(v22.V8B(), v31.V16B(), v0.V16B(), v1.V16B(), v6.V8B());
+ __ Tbx(v23.V8B(), v30.V16B(), v31.V16B(), v0.V16B(), v1.V16B(), v7.V8B());
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_128(0x00090e1c800e0000, 0x80f0ecf50e001c00, v8);
+ ASSERT_EQUAL_128(0x1ebf5ed100f50000, 0x0072324b82c6c682, v9);
+ ASSERT_EQUAL_128(0x00005e4b4cd10e00, 0x0900005e80008800, v10);
+ ASSERT_EQUAL_128(0x0000883d2b00001e, 0x00d1822b5bbff074, v11);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e001c00, v12);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0072324b82c6c682, v13);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0900005e80008800, v14);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x00d1822b5bbff074, v15);
+
+ ASSERT_EQUAL_128(0xb7090e1c800e8f13, 0x80f0ecf50e961c42, v16);
+ ASSERT_EQUAL_128(0x1ebf5ed1c6f547ec, 0x8e72324b82c6c682, v17);
+ ASSERT_EQUAL_128(0x9bd25e4b4cd10e8f, 0x0943d05e802688d2, v18);
+ ASSERT_EQUAL_128(0xc31d883d2b39301e, 0x1ed1822b5bbff074, v19);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x80f0ecf50e011c3b, v20);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x2072324b82c6c682, v21);
+ ASSERT_EQUAL_128(0x0000000000000000, 0x0946cd5e80ba8882, v22);
+ ASSERT_EQUAL_128(0x0000000000000000, 0xe6d1822b5bbff074, v23);
+
+ TEARDOWN();
+}
+
+
+TEST(regress_cmp_shift_imm) {
+ SETUP();
+
+ START();
+
+ __ Mov(x0, 0x3d720c8d);
+ __ Cmp(x0, Operand(0x3d720c8d));
+
+ END();
+ RUN();
+
+ ASSERT_EQUAL_NZCV(ZCFlag);
+
+ TEARDOWN();
+}
+
+
+TEST(compute_address) {
+ SETUP();
+
+ START();
+ int64_t base_address = INT64_C(0x123000000abc);
+ int64_t reg_offset = INT64_C(0x1087654321);
+ Register base = x0;
+ Register offset = x1;
+
+ __ Mov(base, base_address);
+ __ Mov(offset, reg_offset);
+
+
+ __ ComputeAddress(x2, MemOperand(base, 0));
+ __ ComputeAddress(x3, MemOperand(base, 8));
+ __ ComputeAddress(x4, MemOperand(base, -100));
+
+ __ ComputeAddress(x5, MemOperand(base, offset));
+ __ ComputeAddress(x6, MemOperand(base, offset, LSL, 2));
+ __ ComputeAddress(x7, MemOperand(base, offset, LSL, 4));
+ __ ComputeAddress(x8, MemOperand(base, offset, LSL, 8));
+
+ __ ComputeAddress(x9, MemOperand(base, offset, SXTW));
+ __ ComputeAddress(x10, MemOperand(base, offset, UXTW, 1));
+ __ ComputeAddress(x11, MemOperand(base, offset, SXTW, 2));
+ __ ComputeAddress(x12, MemOperand(base, offset, UXTW, 3));
+
+ END();
+
+ RUN();
+
+ ASSERT_EQUAL_64(base_address, base);
+
+ ASSERT_EQUAL_64(INT64_C(0x123000000abc), x2);
+ ASSERT_EQUAL_64(INT64_C(0x123000000ac4), x3);
+ ASSERT_EQUAL_64(INT64_C(0x123000000a58), x4);
+
+ ASSERT_EQUAL_64(INT64_C(0x124087654ddd), x5);
+ ASSERT_EQUAL_64(INT64_C(0x12721d951740), x6);
+ ASSERT_EQUAL_64(INT64_C(0x133876543ccc), x7);
+ ASSERT_EQUAL_64(INT64_C(0x22b765432bbc), x8);
+
+ ASSERT_EQUAL_64(INT64_C(0x122f87654ddd), x9);
+ ASSERT_EQUAL_64(INT64_C(0x12310eca90fe), x10);
+ ASSERT_EQUAL_64(INT64_C(0x122e1d951740), x11);
+ ASSERT_EQUAL_64(INT64_C(0x12343b2a23c4), x12);
+
+ TEARDOWN();
+}
+
+
+TEST(far_branch_backward) {
+ // Test that the MacroAssembler correctly resolves backward branches to labels
+ // that are outside the immediate range of branch instructions.
+ // Take into account that backward branches can reach one instruction further
+ // than forward branches.
+ const int overflow_size = kInstructionSize +
+ std::max(Instruction::ImmBranchForwardRange(TestBranchType),
+ std::max(Instruction::ImmBranchForwardRange(CompareBranchType),
+ Instruction::ImmBranchForwardRange(CondBranchType)));
+
+ SETUP();
+ START();
+
+ Label done, fail;
+ Label test_tbz, test_cbz, test_bcond;
+ Label success_tbz, success_cbz, success_bcond;
+
+ __ Mov(x0, 0);
+ __ Mov(x1, 1);
+ __ Mov(x10, 0);
+
+ __ B(&test_tbz);
+ __ Bind(&success_tbz);
+ __ Orr(x0, x0, 1 << 0);
+ __ B(&test_cbz);
+ __ Bind(&success_cbz);
+ __ Orr(x0, x0, 1 << 1);
+ __ B(&test_bcond);
+ __ Bind(&success_bcond);
+ __ Orr(x0, x0, 1 << 2);
+
+ __ B(&done);
+
+ // Generate enough code to overflow the immediate range of the three types of
+ // branches below.
+ for (unsigned i = 0; i < overflow_size / kInstructionSize; ++i) {
+ if (i % 100 == 0) {
+ // If we do land in this code, we do not want to execute so many nops
+ // before reaching the end of test (especially if tracing is activated).
+ __ B(&fail);
+ } else {
+ __ Nop();
+ }
+ }
+ __ B(&fail);
+
+ __ Bind(&test_tbz);
+ __ Tbz(x10, 7, &success_tbz);
+ __ Bind(&test_cbz);
+ __ Cbz(x10, &success_cbz);
+ __ Bind(&test_bcond);
+ __ Cmp(x10, 0);
+ __ B(eq, &success_bcond);
+
+ // For each out-of-range branch instructions, at least two instructions should
+ // have been generated.
+ VIXL_CHECK(masm.SizeOfCodeGeneratedSince(&test_tbz) >= 7 * kInstructionSize);
+
+ __ Bind(&fail);
+ __ Mov(x1, 0);
+ __ Bind(&done);
+
+ END();
+ RUN();
+
+ ASSERT_EQUAL_64(0x7, x0);
+ ASSERT_EQUAL_64(0x1, x1);
+
+ TEARDOWN();
+}
+
+
+TEST(single_veneer) {
+ SETUP();
+ START();
+
+ const int max_range = Instruction::ImmBranchForwardRange(TestBranchType);
+
+ Label success, fail, done;
+
+ __ Mov(x0, 0);
+ __ Mov(x1, 1);
+ __ Mov(x10, 0);
+
+ __ Tbz(x10, 7, &success);
+
+ // Generate enough code to overflow the immediate range of the `tbz`.
+ for (unsigned i = 0; i < max_range / kInstructionSize + 1; ++i) {
+ if (i % 100 == 0) {
+ // If we do land in this code, we do not want to execute so many nops
+ // before reaching the end of test (especially if tracing is activated).
+ __ B(&fail);
+ } else {
+ __ Nop();
+ }
+ }
+ __ B(&fail);
+
+ __ Bind(&success);
+ __ Mov(x0, 1);
+
+ __ B(&done);
+ __ Bind(&fail);
+ __ Mov(x1, 0);
+ __ Bind(&done);
+
+ END();
+ RUN();
+
+ ASSERT_EQUAL_64(1, x0);
+ ASSERT_EQUAL_64(1, x1);
+
+ TEARDOWN();
+}
+
+
+TEST(simple_veneers) {
+ // Test that the MacroAssembler correctly emits veneers for forward branches
+ // to labels that are outside the immediate range of branch instructions.
+ const int max_range =
+ std::max(Instruction::ImmBranchForwardRange(TestBranchType),
+ std::max(Instruction::ImmBranchForwardRange(CompareBranchType),
+ Instruction::ImmBranchForwardRange(CondBranchType)));
+
+ SETUP();
+ START();
+
+ Label done, fail;
+ Label test_tbz, test_cbz, test_bcond;
+ Label success_tbz, success_cbz, success_bcond;
+
+ __ Mov(x0, 0);
+ __ Mov(x1, 1);
+ __ Mov(x10, 0);
+
+ __ Bind(&test_tbz);
+ __ Tbz(x10, 7, &success_tbz);
+ __ Bind(&test_cbz);
+ __ Cbz(x10, &success_cbz);
+ __ Bind(&test_bcond);
+ __ Cmp(x10, 0);
+ __ B(eq, &success_bcond);
+
+ // Generate enough code to overflow the immediate range of the three types of
+ // branches below.
+ for (unsigned i = 0; i < max_range / kInstructionSize + 1; ++i) {
+ if (i % 100 == 0) {
+ // If we do land in this code, we do not want to execute so many nops
+ // before reaching the end of test (especially if tracing is activated).
+ __ B(&fail);
+ } else {
+ __ Nop();
+ }
+ }
+ __ B(&fail);
+
+ __ Bind(&success_tbz);
+ __ Orr(x0, x0, 1 << 0);
+ __ B(&test_cbz);
+ __ Bind(&success_cbz);
+ __ Orr(x0, x0, 1 << 1);
+ __ B(&test_bcond);
+ __ Bind(&success_bcond);
+ __ Orr(x0, x0, 1 << 2);
+
+ __ B(&done);
+ __ Bind(&fail);
+ __ Mov(x1, 0);
+ __ Bind(&done);
+
+ END();
+ RUN();
+
+ ASSERT_EQUAL_64(0x7, x0);
+ ASSERT_EQUAL_64(0x1, x1);
+
+ TEARDOWN();
+}
+
+
+TEST(veneers_stress) {
+ SETUP();
+ START();
+
+ // This is a code generation test stressing the emission of veneers. The code
+ // generated is not executed.
+
+ Label target;
+ const unsigned max_range = Instruction::ImmBranchForwardRange(CondBranchType);
+ const unsigned iterations =
+ (max_range + max_range / 4) / (4 * kInstructionSize);
+ for (unsigned i = 0; i < iterations; i++) {
+ __ B(&target);
+ __ B(eq, &target);
+ __ Cbz(x0, &target);
+ __ Tbz(x0, 0, &target);
+ }
+ __ Bind(&target);
+
+ END();
+ TEARDOWN();
+}
+
+
+TEST(veneers_two_out_of_range) {
+ SETUP();
+ START();
+
+ // This is a code generation test. The code generated is not executed.
+ // Ensure that the MacroAssembler considers unresolved branches to chose when
+ // a veneer pool should be emitted. We generate two branches that go out of
+ // range at the same offset. When the MacroAssembler decides to emit the
+ // veneer pool, the emission of a first veneer should not cause the other
+ // branch to go out of range.
+
+ int range_cbz = Instruction::ImmBranchForwardRange(CompareBranchType);
+ int range_tbz = Instruction::ImmBranchForwardRange(TestBranchType);
+ int max_target = masm.CursorOffset() + range_cbz;
+
+ Label done;
+
+ // We use different labels to prevent the MacroAssembler from sharing veneers.
+ Label target_cbz, target_tbz;
+
+ __ Cbz(x0, &target_cbz);
+ while (masm.CursorOffset() < max_target - range_tbz) {
+ __ Nop();
+ }
+ __ Tbz(x0, 0, &target_tbz);
+ while (masm.CursorOffset() < max_target) {
+ __ Nop();
+ }
+
+ // This additional nop makes the branches go out of range.
+ __ Nop();
+
+ __ Bind(&target_cbz);
+ __ Bind(&target_tbz);
+
+ END();
+ TEARDOWN();
+}
+
+
+TEST(veneers_hanging) {
+ SETUP();
+ START();
+
+ // This is a code generation test. The code generated is not executed.
+ // Ensure that the MacroAssembler considers unresolved branches to chose when
+ // a veneer pool should be emitted. This is similar to the
+ // 'veneers_two_out_of_range' test. We try to trigger the following situation:
+ // b.eq label
+ // b.eq label
+ // ...
+ // nop
+ // ...
+ // cbz x0, label
+ // cbz x0, label
+ // ...
+ // tbz x0, 0 label
+ // nop
+ // ...
+ // nop <- From here the `b.eq` and `cbz` instructions run out of range,
+ // so a literal pool is required.
+ // veneer
+ // veneer
+ // veneer <- The `tbz` runs out of range somewhere in the middle of the
+ // veneer veneer pool.
+ // veneer
+
+ const int range_bcond = Instruction::ImmBranchForwardRange(CondBranchType);
+ const int range_cbz = Instruction::ImmBranchForwardRange(CompareBranchType);
+ const int range_tbz = Instruction::ImmBranchForwardRange(TestBranchType);
+ const int max_target = masm.CursorOffset() + range_bcond;
+
+ Label done;
+ const int n_bcond = 100;
+ const int n_cbz = 100;
+ const int n_tbz = 1;
+ const int kNTotalBranches = n_bcond + n_cbz + n_tbz;
+
+ // We use different labels to prevent the MacroAssembler from sharing veneers.
+ Label labels[kNTotalBranches];
+ for (int i = 0; i < kNTotalBranches; i++) {
+ new(&labels[i]) Label();
+ }
+
+ for (int i = 0; i < n_bcond; i++) {
+ __ B(eq, &labels[i]);
+ }
+
+ while (masm.CursorOffset() < max_target - range_cbz) {
+ __ Nop();
+ }
+
+ for (int i = 0; i < n_cbz; i++) {
+ __ Cbz(x0, &labels[n_bcond + i]);
+ }
+
+ // Ensure the 'tbz' will go out of range after some of the previously
+ // generated branches.
+ int margin = (n_bcond / 2) * kInstructionSize;
+ while (masm.CursorOffset() < max_target - range_tbz + margin) {
+ __ Nop();
+ }
+
+ __ Tbz(x0, 0, &labels[n_bcond + n_cbz]);
+
+ while (masm.CursorOffset() < max_target) {
+ __ Nop();
+ }
+
+ // This additional nop makes the 'b.eq' and 'cbz' instructions go out of range
+ // and forces the emission of a veneer pool. The 'tbz' is not yet out of
+ // range, but will go out of range while veneers are emitted for the other
+ // branches.
+ // The MacroAssembler should ensure that veneers are correctly emitted for all
+ // the branches, including the 'tbz'. Checks will fail if the target of a
+ // branch is out of range.
+ __ Nop();
+
+ for (int i = 0; i < kNTotalBranches; i++) {
+ __ Bind(&labels[i]);
+ }
+
+ END();
+ TEARDOWN();
+}
+
+
+TEST(collision_literal_veneer_pools) {
+ SETUP();
+ START();
+
+ // This is a code generation test. The code generated is not executed.
+
+ // Make sure the literal pool is empty;
+ masm.EmitLiteralPool(LiteralPool::kBranchRequired);
+ ASSERT_LITERAL_POOL_SIZE(0);
+
+ // We chose the offsets below to (try to) trigger the following situation:
+ // buffer offset
+ // 0: tbz x0, 0, target_tbz ----------------------------------.
+ // 4: nop |
+ // ... |
+ // nop |
+ // literal gen: ldr s0, [pc + ...] ; load from `pool start + 0` |
+ // ldr s0, [pc + ...] ; load from `pool start + 4` |
+ // ... |
+ // ldr s0, [pc + ...] |
+ // pool start: floating-point literal (0.1) |
+ // floating-point literal (1.1) |
+ // ... |
+ // floating-point literal (<n>.1) <-----tbz-max-range--'
+ // floating-point literal (<n+1>.1)
+ // ...
+
+ const int range_tbz = Instruction::ImmBranchForwardRange(TestBranchType);
+ const int max_target = masm.CursorOffset() + range_tbz;
+
+ const size_t target_literal_pool_size = 100 * kInstructionSize;
+ const int offset_start_literal_gen =
+ target_literal_pool_size + target_literal_pool_size / 2;
+
+
+ Label target_tbz;
+
+ __ Tbz(x0, 0, &target_tbz);
+ VIXL_CHECK(masm.NumberOfPotentialVeneers() == 1);
+ while (masm.CursorOffset() < max_target - offset_start_literal_gen) {
+ __ Nop();
+ }
+ VIXL_CHECK(masm.NumberOfPotentialVeneers() == 1);
+
+ for (int i = 0; i < 100; i++) {
+ // Use a different value to force one literal pool entry per iteration.
+ __ Ldr(s0, i + 0.1);
+ }
+ VIXL_CHECK(masm.LiteralPoolSize() >= target_literal_pool_size);
+
+ // Force emission of a literal pool.
+ masm.EmitLiteralPool(LiteralPool::kBranchRequired);
+ ASSERT_LITERAL_POOL_SIZE(0);
+
+ // The branch should not have gone out of range during the emission of the
+ // literal pool.
+ __ Bind(&target_tbz);
+
+ VIXL_CHECK(masm.NumberOfPotentialVeneers() == 0);
+
+ END();
+ TEARDOWN();
+}
} // namespace vixl