Regression test for SVE loads/stores with sp base address (#65)
Test the change in d3f755c30ca201ce716c75f09235525ce47de52b using an example
instruction from each load/store class. Relies on accesses to the addresses
around zero faulting, causing failure when register 31 is misinterpreted as xzr.
diff --git a/test/aarch64/test-assembler-sve-aarch64.cc b/test/aarch64/test-assembler-sve-aarch64.cc
index 21a7bae..b7c7758 100644
--- a/test/aarch64/test-assembler-sve-aarch64.cc
+++ b/test/aarch64/test-assembler-sve-aarch64.cc
@@ -19732,6 +19732,234 @@
}
}
+TEST_SVE(sve_load_store_sp_base_regression_test) {
+ SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE);
+ START();
+
+ __ Mov(x0, 0);
+ __ Mov(z0.VnB(), 0);
+ __ Ptrue(p0.VnB());
+
+ Label loop;
+ __ Mov(x1, 128);
+ __ Bind(&loop);
+ __ Push(xzr, xzr);
+ __ Sub(x1, x1, 1);
+ __ Cbnz(x1, &loop);
+
+ {
+ ExactAssemblyScope scope(&masm, 193 * kInstructionSize);
+
+ __ dci(0xa420a3e0); // ld1b {z0.h}, p0/z, [sp]
+ __ dci(0xa440a3e0); // ld1b {z0.s}, p0/z, [sp]
+ __ dci(0xa460a3e0); // ld1b {z0.d}, p0/z, [sp]
+ __ dci(0xa400a3e0); // ld1b {z0.b}, p0/z, [sp]
+ __ dci(0xa42043e0); // ld1b {z0.h}, p0/z, [sp, x0]
+ __ dci(0xa44043e0); // ld1b {z0.s}, p0/z, [sp, x0]
+ __ dci(0xa46043e0); // ld1b {z0.d}, p0/z, [sp, x0]
+ __ dci(0xa40043e0); // ld1b {z0.b}, p0/z, [sp, x0]
+ __ dci(0xc440c3e0); // ld1b {z0.d}, p0/z, [sp, z0.d]
+ __ dci(0xa5e0a3e0); // ld1d {z0.d}, p0/z, [sp]
+ __ dci(0xa5e043e0); // ld1d {z0.d}, p0/z, [sp, x0, lsl #3]
+ __ dci(0xc5e0c3e0); // ld1d {z0.d}, p0/z, [sp, z0.d, lsl #3]
+ __ dci(0xc5c0c3e0); // ld1d {z0.d}, p0/z, [sp, z0.d]
+ __ dci(0xa4a0a3e0); // ld1h {z0.h}, p0/z, [sp]
+ __ dci(0xa4c0a3e0); // ld1h {z0.s}, p0/z, [sp]
+ __ dci(0xa4e0a3e0); // ld1h {z0.d}, p0/z, [sp]
+ __ dci(0xa4a043e0); // ld1h {z0.h}, p0/z, [sp, x0, lsl #1]
+ __ dci(0xa4c043e0); // ld1h {z0.s}, p0/z, [sp, x0, lsl #1]
+ __ dci(0xa4e043e0); // ld1h {z0.d}, p0/z, [sp, x0, lsl #1]
+ __ dci(0xc4e0c3e0); // ld1h {z0.d}, p0/z, [sp, z0.d, lsl #1]
+ __ dci(0xc4c0c3e0); // ld1h {z0.d}, p0/z, [sp, z0.d]
+ __ dci(0x8440a3e0); // ld1rb {z0.h}, p0/z, [sp]
+ __ dci(0x8440c3e0); // ld1rb {z0.s}, p0/z, [sp]
+ __ dci(0x8440e3e0); // ld1rb {z0.d}, p0/z, [sp]
+ __ dci(0x844083e0); // ld1rb {z0.b}, p0/z, [sp]
+ __ dci(0x85c0e3e0); // ld1rd {z0.d}, p0/z, [sp]
+ __ dci(0x84c0a3e0); // ld1rh {z0.h}, p0/z, [sp]
+ __ dci(0x84c0c3e0); // ld1rh {z0.s}, p0/z, [sp]
+ __ dci(0x84c0e3e0); // ld1rh {z0.d}, p0/z, [sp]
+ __ dci(0xa40023e0); // ld1rqb {z0.b}, p0/z, [sp]
+ __ dci(0xa40003e0); // ld1rqb {z0.b}, p0/z, [sp, x0]
+ __ dci(0xa58023e0); // ld1rqd {z0.d}, p0/z, [sp]
+ __ dci(0xa58003e0); // ld1rqd {z0.d}, p0/z, [sp, x0, lsl #3]
+ __ dci(0xa48023e0); // ld1rqh {z0.h}, p0/z, [sp]
+ __ dci(0xa48003e0); // ld1rqh {z0.h}, p0/z, [sp, x0, lsl #1]
+ __ dci(0xa50023e0); // ld1rqw {z0.s}, p0/z, [sp]
+ __ dci(0xa50003e0); // ld1rqw {z0.s}, p0/z, [sp, x0, lsl #2]
+ __ dci(0x85c0c3e0); // ld1rsb {z0.h}, p0/z, [sp]
+ __ dci(0x85c0a3e0); // ld1rsb {z0.s}, p0/z, [sp]
+ __ dci(0x85c083e0); // ld1rsb {z0.d}, p0/z, [sp]
+ __ dci(0x8540a3e0); // ld1rsh {z0.s}, p0/z, [sp]
+ __ dci(0x854083e0); // ld1rsh {z0.d}, p0/z, [sp]
+ __ dci(0x84c083e0); // ld1rsw {z0.d}, p0/z, [sp]
+ __ dci(0x8540c3e0); // ld1rw {z0.s}, p0/z, [sp]
+ __ dci(0x8540e3e0); // ld1rw {z0.d}, p0/z, [sp]
+ __ dci(0xa5c0a3e0); // ld1sb {z0.h}, p0/z, [sp]
+ __ dci(0xa5a0a3e0); // ld1sb {z0.s}, p0/z, [sp]
+ __ dci(0xa580a3e0); // ld1sb {z0.d}, p0/z, [sp]
+ __ dci(0xa5c043e0); // ld1sb {z0.h}, p0/z, [sp, x0]
+ __ dci(0xa5a043e0); // ld1sb {z0.s}, p0/z, [sp, x0]
+ __ dci(0xa58043e0); // ld1sb {z0.d}, p0/z, [sp, x0]
+ __ dci(0xc44083e0); // ld1sb {z0.d}, p0/z, [sp, z0.d]
+ __ dci(0xa520a3e0); // ld1sh {z0.s}, p0/z, [sp]
+ __ dci(0xa500a3e0); // ld1sh {z0.d}, p0/z, [sp]
+ __ dci(0xa52043e0); // ld1sh {z0.s}, p0/z, [sp, x0, lsl #1]
+ __ dci(0xa50043e0); // ld1sh {z0.d}, p0/z, [sp, x0, lsl #1]
+ __ dci(0xc4e083e0); // ld1sh {z0.d}, p0/z, [sp, z0.d, lsl #1]
+ __ dci(0xc4c083e0); // ld1sh {z0.d}, p0/z, [sp, z0.d]
+ __ dci(0xa480a3e0); // ld1sw {z0.d}, p0/z, [sp]
+ __ dci(0xa48043e0); // ld1sw {z0.d}, p0/z, [sp, x0, lsl #2]
+ __ dci(0xc56083e0); // ld1sw {z0.d}, p0/z, [sp, z0.d, lsl #2]
+ __ dci(0xc54083e0); // ld1sw {z0.d}, p0/z, [sp, z0.d]
+ __ dci(0xa540a3e0); // ld1w {z0.s}, p0/z, [sp]
+ __ dci(0xa560a3e0); // ld1w {z0.d}, p0/z, [sp]
+ __ dci(0xa54043e0); // ld1w {z0.s}, p0/z, [sp, x0, lsl #2]
+ __ dci(0xa56043e0); // ld1w {z0.d}, p0/z, [sp, x0, lsl #2]
+ __ dci(0xc560c3e0); // ld1w {z0.d}, p0/z, [sp, z0.d, lsl #2]
+ __ dci(0xc540c3e0); // ld1w {z0.d}, p0/z, [sp, z0.d]
+ __ dci(0xa420e3e0); // ld2b {z0.b, z1.b}, p0/z, [sp]
+ __ dci(0xa420c3e0); // ld2b {z0.b, z1.b}, p0/z, [sp, x0]
+ __ dci(0xa5a0e3e0); // ld2d {z0.d, z1.d}, p0/z, [sp]
+ __ dci(0xa5a0c3e0); // ld2d {z0.d, z1.d}, p0/z, [sp, x0, lsl #3]
+ __ dci(0xa4a0e3e0); // ld2h {z0.h, z1.h}, p0/z, [sp]
+ __ dci(0xa4a0c3e0); // ld2h {z0.h, z1.h}, p0/z, [sp, x0, lsl #1]
+ __ dci(0xa520e3e0); // ld2w {z0.s, z1.s}, p0/z, [sp]
+ __ dci(0xa520c3e0); // ld2w {z0.s, z1.s}, p0/z, [sp, x0, lsl #2]
+ __ dci(0xa440e3e0); // ld3b {z0.b, z1.b, z2.b}, p0/z, [sp]
+ __ dci(0xa440c3e0); // ld3b {z0.b, z1.b, z2.b}, p0/z, [sp, x0]
+ __ dci(0xa5c0e3e0); // ld3d {z0.d, z1.d, z2.d}, p0/z, [sp]
+ __ dci(0xa5c0c3e0); // ld3d {z0.d, z1.d, z2.d}, p0/z, [sp, x0, lsl #3]
+ __ dci(0xa4c0e3e0); // ld3h {z0.h, z1.h, z2.h}, p0/z, [sp]
+ __ dci(0xa4c0c3e0); // ld3h {z0.h, z1.h, z2.h}, p0/z, [sp, x0, lsl #1]
+ __ dci(0xa540e3e0); // ld3w {z0.s, z1.s, z2.s}, p0/z, [sp]
+ __ dci(0xa540c3e0); // ld3w {z0.s, z1.s, z2.s}, p0/z, [sp, x0, lsl #2]
+ __ dci(0xa460e3e0); // ld4b {z0.b, z1.b, z2.b, z3.b}, p0/z, [sp]
+ __ dci(0xa460c3e0); // ld4b {z0.b, z1.b, z2.b, z3.b}, p0/z, [sp, x0]
+ __ dci(0xa5e0e3e0); // ld4d {z0.d, z1.d, z2.d, z3.d}, p0/z, [sp]
+ __ dci(
+ 0xa5e0c3e0); // ld4d {z0.d, z1.d, z2.d, z3.d}, p0/z, [sp, x0, lsl #3]
+ __ dci(0xa4e0e3e0); // ld4h {z0.h, z1.h, z2.h, z3.h}, p0/z, [sp]
+ __ dci(
+ 0xa4e0c3e0); // ld4h {z0.h, z1.h, z2.h, z3.h}, p0/z, [sp, x0, lsl #1]
+ __ dci(0xa560e3e0); // ld4w {z0.s, z1.s, z2.s, z3.s}, p0/z, [sp]
+ __ dci(
+ 0xa560c3e0); // ld4w {z0.s, z1.s, z2.s, z3.s}, p0/z, [sp, x0, lsl #2]
+ __ dci(0xa42063e0); // ldff1b {z0.h}, p0/z, [sp, x0]
+ __ dci(0xa44063e0); // ldff1b {z0.s}, p0/z, [sp, x0]
+ __ dci(0xa46063e0); // ldff1b {z0.d}, p0/z, [sp, x0]
+ __ dci(0xa40063e0); // ldff1b {z0.b}, p0/z, [sp, x0]
+ __ dci(0xc440e3e0); // ldff1b {z0.d}, p0/z, [sp, z0.d]
+ __ dci(0xa5e063e0); // ldff1d {z0.d}, p0/z, [sp, x0, lsl #3]
+ __ dci(0xc5e0e3e0); // ldff1d {z0.d}, p0/z, [sp, z0.d, lsl #3]
+ __ dci(0xc5c0e3e0); // ldff1d {z0.d}, p0/z, [sp, z0.d]
+ __ dci(0xa4a063e0); // ldff1h {z0.h}, p0/z, [sp, x0, lsl #1]
+ __ dci(0xa4c063e0); // ldff1h {z0.s}, p0/z, [sp, x0, lsl #1]
+ __ dci(0xa4e063e0); // ldff1h {z0.d}, p0/z, [sp, x0, lsl #1]
+ __ dci(0xc4e0e3e0); // ldff1h {z0.d}, p0/z, [sp, z0.d, lsl #1]
+ __ dci(0xc4c0e3e0); // ldff1h {z0.d}, p0/z, [sp, z0.d]
+ __ dci(0xa5c063e0); // ldff1sb {z0.h}, p0/z, [sp, x0]
+ __ dci(0xa5a063e0); // ldff1sb {z0.s}, p0/z, [sp, x0]
+ __ dci(0xa58063e0); // ldff1sb {z0.d}, p0/z, [sp, x0]
+ __ dci(0xc440a3e0); // ldff1sb {z0.d}, p0/z, [sp, z0.d]
+ __ dci(0xa52063e0); // ldff1sh {z0.s}, p0/z, [sp, x0, lsl #1]
+ __ dci(0xa50063e0); // ldff1sh {z0.d}, p0/z, [sp, x0, lsl #1]
+ __ dci(0xc4e0a3e0); // ldff1sh {z0.d}, p0/z, [sp, z0.d, lsl #1]
+ __ dci(0xc4c0a3e0); // ldff1sh {z0.d}, p0/z, [sp, z0.d]
+ __ dci(0xa48063e0); // ldff1sw {z0.d}, p0/z, [sp, x0, lsl #2]
+ __ dci(0xc560a3e0); // ldff1sw {z0.d}, p0/z, [sp, z0.d, lsl #2]
+ __ dci(0xc540a3e0); // ldff1sw {z0.d}, p0/z, [sp, z0.d]
+ __ dci(0xa54063e0); // ldff1w {z0.s}, p0/z, [sp, x0, lsl #2]
+ __ dci(0xa56063e0); // ldff1w {z0.d}, p0/z, [sp, x0, lsl #2]
+ __ dci(0xc560e3e0); // ldff1w {z0.d}, p0/z, [sp, z0.d, lsl #2]
+ __ dci(0xc540e3e0); // ldff1w {z0.d}, p0/z, [sp, z0.d]
+ __ dci(0xa430a3e0); // ldnf1b {z0.h}, p0/z, [sp]
+ __ dci(0xa450a3e0); // ldnf1b {z0.s}, p0/z, [sp]
+ __ dci(0xa470a3e0); // ldnf1b {z0.d}, p0/z, [sp]
+ __ dci(0xa410a3e0); // ldnf1b {z0.b}, p0/z, [sp]
+ __ dci(0xa5f0a3e0); // ldnf1d {z0.d}, p0/z, [sp]
+ __ dci(0xa4b0a3e0); // ldnf1h {z0.h}, p0/z, [sp]
+ __ dci(0xa4d0a3e0); // ldnf1h {z0.s}, p0/z, [sp]
+ __ dci(0xa4f0a3e0); // ldnf1h {z0.d}, p0/z, [sp]
+ __ dci(0xa5d0a3e0); // ldnf1sb {z0.h}, p0/z, [sp]
+ __ dci(0xa5b0a3e0); // ldnf1sb {z0.s}, p0/z, [sp]
+ __ dci(0xa590a3e0); // ldnf1sb {z0.d}, p0/z, [sp]
+ __ dci(0xa530a3e0); // ldnf1sh {z0.s}, p0/z, [sp]
+ __ dci(0xa510a3e0); // ldnf1sh {z0.d}, p0/z, [sp]
+ __ dci(0xa490a3e0); // ldnf1sw {z0.d}, p0/z, [sp]
+ __ dci(0xa550a3e0); // ldnf1w {z0.s}, p0/z, [sp]
+ __ dci(0xa570a3e0); // ldnf1w {z0.d}, p0/z, [sp]
+ __ dci(0xa400e3e0); // ldnt1b {z0.b}, p0/z, [sp]
+ __ dci(0xa400c3e0); // ldnt1b {z0.b}, p0/z, [sp, x0]
+ __ dci(0xa580e3e0); // ldnt1d {z0.d}, p0/z, [sp]
+ __ dci(0xa580c3e0); // ldnt1d {z0.d}, p0/z, [sp, x0, lsl #3]
+ __ dci(0xa480e3e0); // ldnt1h {z0.h}, p0/z, [sp]
+ __ dci(0xa480c3e0); // ldnt1h {z0.h}, p0/z, [sp, x0, lsl #1]
+ __ dci(0xa500e3e0); // ldnt1w {z0.s}, p0/z, [sp]
+ __ dci(0xa500c3e0); // ldnt1w {z0.s}, p0/z, [sp, x0, lsl #2]
+ __ dci(0x858043e0); // ldr z0, [sp]
+ __ dci(0xe400e3e0); // st1b {z0.b}, p0, [sp]
+ __ dci(0xe40043e0); // st1b {z0.b}, p0, [sp, x0]
+ __ dci(0xe400a3e0); // st1b {z0.d}, p0, [sp, z0.d]
+ __ dci(0xe5e0e3e0); // st1d {z0.d}, p0, [sp]
+ __ dci(0xe5e043e0); // st1d {z0.d}, p0, [sp, x0, lsl #3]
+ __ dci(0xe5a0a3e0); // st1d {z0.d}, p0, [sp, z0.d, lsl #3]
+ __ dci(0xe580a3e0); // st1d {z0.d}, p0, [sp, z0.d]
+ __ dci(0xe4e0e3e0); // st1h {z0.d}, p0, [sp]
+ __ dci(0xe4e043e0); // st1h {z0.d}, p0, [sp, x0, lsl #1]
+ __ dci(0xe4a0a3e0); // st1h {z0.d}, p0, [sp, z0.d, lsl #1]
+ __ dci(0xe480a3e0); // st1h {z0.d}, p0, [sp, z0.d]
+ __ dci(0xe560e3e0); // st1w {z0.d}, p0, [sp]
+ __ dci(0xe56043e0); // st1w {z0.d}, p0, [sp, x0, lsl #2]
+ __ dci(0xe430e3e0); // st2b {z0.b, z1.b}, p0, [sp]
+ __ dci(0xe42063e0); // st2b {z0.b, z1.b}, p0, [sp, x0]
+ __ dci(0xe5b0e3e0); // st2d {z0.d, z1.d}, p0, [sp]
+ __ dci(0xe5a063e0); // st2d {z0.d, z1.d}, p0, [sp, x0, lsl #3]
+ __ dci(0xe4b0e3e0); // st2h {z0.h, z1.h}, p0, [sp]
+ __ dci(0xe4a063e0); // st2h {z0.h, z1.h}, p0, [sp, x0, lsl #1]
+ __ dci(0xe530e3e0); // st2w {z0.s, z1.s}, p0, [sp]
+ __ dci(0xe52063e0); // st2w {z0.s, z1.s}, p0, [sp, x0, lsl #2]
+ __ dci(0xe450e3e0); // st3b {z0.b, z1.b, z2.b}, p0, [sp]
+ __ dci(0xe44063e0); // st3b {z0.b, z1.b, z2.b}, p0, [sp, x0]
+ __ dci(0xe5d0e3e0); // st3d {z0.d, z1.d, z2.d}, p0, [sp]
+ __ dci(0xe5c063e0); // st3d {z0.d, z1.d, z2.d}, p0, [sp, x0, lsl #3]
+ __ dci(0xe4d0e3e0); // st3h {z0.h, z1.h, z2.h}, p0, [sp]
+ __ dci(0xe4c063e0); // st3h {z0.h, z1.h, z2.h}, p0, [sp, x0, lsl #1]
+ __ dci(0xe550e3e0); // st3w {z0.s, z1.s, z2.s}, p0, [sp]
+ __ dci(0xe54063e0); // st3w {z0.s, z1.s, z2.s}, p0, [sp, x0, lsl #2]
+ __ dci(0xe470e3e0); // st4b {z0.b, z1.b, z2.b, z3.b}, p0, [sp]
+ __ dci(0xe46063e0); // st4b {z0.b, z1.b, z2.b, z3.b}, p0, [sp, x0]
+ __ dci(0xe5f0e3e0); // st4d {z0.d, z1.d, z2.d, z3.d}, p0, [sp]
+ __ dci(0xe5e063e0); // st4d {z0.d, z1.d, z2.d, z3.d}, p0, [sp, x0, lsl #3]
+ __ dci(0xe4f0e3e0); // st4h {z0.h, z1.h, z2.h, z3.h}, p0, [sp]
+ __ dci(0xe4e063e0); // st4h {z0.h, z1.h, z2.h, z3.h}, p0, [sp, x0, lsl #1]
+ __ dci(0xe570e3e0); // st4w {z0.s, z1.s, z2.s, z3.s}, p0, [sp]
+ __ dci(0xe56063e0); // st4w {z0.s, z1.s, z2.s, z3.s}, p0, [sp, x0, lsl #2]
+ __ dci(0xe410e3e0); // stnt1b {z0.b}, p0, [sp]
+ __ dci(0xe40063e0); // stnt1b {z0.b}, p0, [sp, x0]
+ __ dci(0xe590e3e0); // stnt1d {z0.d}, p0, [sp]
+ __ dci(0xe58063e0); // stnt1d {z0.d}, p0, [sp, x0, lsl #3]
+ __ dci(0xe490e3e0); // stnt1h {z0.h}, p0, [sp]
+ __ dci(0xe48063e0); // stnt1h {z0.h}, p0, [sp, x0, lsl #1]
+ __ dci(0xe510e3e0); // stnt1w {z0.s}, p0, [sp]
+ __ dci(0xe50063e0); // stnt1w {z0.s}, p0, [sp, x0, lsl #2]
+ __ dci(0x858003e0); // ldr p0, [sp]
+ __ dci(0xe58003e0); // str p0, [sp]
+ __ dci(0xe58043e0); // str z0, [sp]
+ }
+
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+
+ // No checks are made here. The test is designed to ensure that the base
+ // register is interpreted correctly as sp, not xzr. If it is interpreted
+ // as xzr, the memory access to addresses near zero will fault, and the
+ // test will fail.
+ }
+}
+
// Manually constructed simulator test to avoid creating a VL128 variant.
#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
diff --git a/tools/code_coverage.log b/tools/code_coverage.log
index d7c551d..621a9bc 100644
--- a/tools/code_coverage.log
+++ b/tools/code_coverage.log
@@ -14,4 +14,5 @@
1660224011 82.79% 97.51% 95.50%
1663161852 82.79% 97.51% 95.50%
1666104118 82.79% 97.51% 95.50%
+1669202345 82.79% 97.51% 95.51%
1673432155 82.79% 97.51% 95.51%