Add support for DC ZVA (#127)
The DC ZVA instruction allows zeroing areas of memory, typically at block sizes
of 64 bytes per instance. Add support for this in all components, with tests.
diff --git a/src/aarch64/assembler-aarch64.cc b/src/aarch64/assembler-aarch64.cc
index 3ab0faa..8e7cee5 100644
--- a/src/aarch64/assembler-aarch64.cc
+++ b/src/aarch64/assembler-aarch64.cc
@@ -7177,6 +7177,7 @@
return CPUHas(CPUFeatures::kRNG);
case FPCR:
case NZCV:
+ case DCZID_EL0:
break;
}
return true;
diff --git a/src/aarch64/constants-aarch64.h b/src/aarch64/constants-aarch64.h
index 0846952..279587c 100644
--- a/src/aarch64/constants-aarch64.h
+++ b/src/aarch64/constants-aarch64.h
@@ -501,7 +501,8 @@
NZCV = SystemRegisterEncoder<3, 3, 4, 2, 0>::value,
FPCR = SystemRegisterEncoder<3, 3, 4, 4, 0>::value,
RNDR = SystemRegisterEncoder<3, 3, 2, 4, 0>::value, // Random number.
- RNDRRS = SystemRegisterEncoder<3, 3, 2, 4, 1>::value // Reseeded random number.
+ RNDRRS = SystemRegisterEncoder<3, 3, 2, 4, 1>::value, // Reseeded random number.
+ DCZID_EL0 = SystemRegisterEncoder<3, 3, 0, 0, 7>::value
};
template<int op1, int crn, int crm, int op2>
diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc
index 5bcee74..930dfd6 100644
--- a/src/aarch64/disasm-aarch64.cc
+++ b/src/aarch64/disasm-aarch64.cc
@@ -7016,6 +7016,9 @@
case RNDRRS:
AppendToOutput("rndrrs");
break;
+ case DCZID_EL0:
+ AppendToOutput("dczid_el0");
+ break;
default:
AppendToOutput("S%d_%d_c%d_c%d_%d",
instr->GetSysOp0(),
diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc
index 2130c46..0c842ab 100644
--- a/src/aarch64/simulator-aarch64.cc
+++ b/src/aarch64/simulator-aarch64.cc
@@ -6926,7 +6926,7 @@
}
-void Simulator::SysOp_W(int op, int64_t val) {
+bool Simulator::SysOp_W(int op, int64_t val) {
switch (op) {
case IVAU:
case CVAC:
@@ -6948,12 +6948,27 @@
volatile uint8_t y = *MemRead<uint8_t>(val);
MetaDataDepot::MetaDataMTE::SetActive(mte_enabled);
USE(y);
- // TODO: Implement ZVA, GVA, GZVA.
break;
}
+ case ZVA: {
+ if ((dczid_ & 0x10) != 0) { // Check dc zva is enabled.
+ return false;
+ }
+ int blocksize = (1 << (dczid_ & 0xf)) * kWRegSizeInBytes;
+ VIXL_ASSERT(IsMultiple(blocksize, sizeof(uint64_t)));
+ uintptr_t addr = AlignDown(val, blocksize);
+ for (int i = 0; i < blocksize; i += sizeof(uint64_t)) {
+ MemWrite<uint64_t>(addr + i, 0);
+ LogWriteU64(0, addr + i);
+ }
+ break;
+ }
+ // TODO: Implement GVA, GZVA.
default:
VIXL_UNIMPLEMENTED();
+ return false;
}
+ return true;
}
void Simulator::PACHelper(int dst,
@@ -7036,6 +7051,9 @@
LogSystemRegister(NZCV);
break;
}
+ case DCZID_EL0:
+ WriteXRegister(instr->GetRt(), dczid_);
+ break;
default:
VIXL_UNIMPLEMENTED();
}
@@ -7153,7 +7171,9 @@
} else if (sysop == GCSPUSHM) {
GCSPush(ReadXRegister(instr->GetRt()));
} else {
- SysOp_W(sysop, rt);
+ if (!SysOp_W(sysop, rt)) {
+ VisitUnallocated(instr);
+ }
}
break;
}
diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h
index cbb1c4c..5d6d975 100644
--- a/src/aarch64/simulator-aarch64.h
+++ b/src/aarch64/simulator-aarch64.h
@@ -2575,6 +2575,14 @@
void PrintPWrite(int rt_code, uintptr_t address) {
PrintPAccess(rt_code, "->", address);
}
+ void PrintWriteU64(uint64_t x, uintptr_t address) {
+ fprintf(stream_,
+ "# 0x%016lx -> %s0x%016" PRIxPTR "%s\n",
+ x,
+ clr_memory_address,
+ address,
+ clr_normal);
+ }
// Like Print* (above), but respect GetTraceParameters().
void LogRead(int rt_code, PrintRegisterFormat format, uintptr_t address) {
@@ -2609,6 +2617,9 @@
void LogPWrite(int rt_code, uintptr_t address) {
if (ShouldTraceWrites()) PrintPWrite(rt_code, address);
}
+ void LogWriteU64(uint64_t x, uintptr_t address) {
+ if (ShouldTraceWrites()) PrintWriteU64(x, address);
+ }
void LogMemTransfer(uintptr_t dst, uintptr_t src, uint8_t value) {
if (ShouldTraceWrites()) PrintMemTransfer(dst, src, value);
}
@@ -5006,7 +5017,7 @@
uint32_t Crc32Checksum(uint32_t acc, T val, uint32_t poly);
uint32_t Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly);
- void SysOp_W(int op, int64_t val);
+ bool SysOp_W(int op, int64_t val);
template <typename T>
T FPRecipSqrtEstimate(T op);
@@ -5456,6 +5467,9 @@
// A configurable size of SVE vector registers.
unsigned vector_length_;
+ // DC ZVA enable (= 0) status and block size.
+ unsigned dczid_ = (0 << 4) | 4; // 2^4 words => 64-byte block size.
+
// Representation of memory attributes such as MTE tagging and BTI page
// protection in addition to branch interceptions.
MetaDataDepot meta_data_;
diff --git a/test/aarch64/test-assembler-aarch64.cc b/test/aarch64/test-assembler-aarch64.cc
index 9da3f12..a86b32e 100644
--- a/test/aarch64/test-assembler-aarch64.cc
+++ b/test/aarch64/test-assembler-aarch64.cc
@@ -15321,6 +15321,61 @@
}
#endif // VIXL_NEGATIVE_TESTING
+TEST(dc_zva) {
+ SETUP_WITH_FEATURES(CPUFeatures::kNEON);
+
+ const int zva_blocksize = 64; // Assumed blocksize.
+ uint8_t buf[2 * zva_blocksize];
+ uintptr_t buf_addr = reinterpret_cast<uintptr_t>(buf);
+ uintptr_t aligned_addr = AlignUp(buf_addr, zva_blocksize);
+
+ START();
+ // Skip this test if the ZVA blocksize is not 64 bytes.
+ // Set up initial register values to allow the test to pass when skipped.
+ Label skip;
+ __ Movi(q0.V16B(), 0);
+ __ Movi(q1.V16B(), 0);
+ __ Movi(q2.V16B(), 0);
+ __ Movi(q3.V16B(), 0);
+
+ __ Mrs(x1, DCZID_EL0);
+ __ Cmp(x1, 4); // 4 => DC ZVA enabled with 64-byte blocks.
+ __ B(ne, &skip);
+
+ // Fill aligned region with a pattern.
+ __ Mov(x0, aligned_addr);
+ __ Movi(q0.V16B(), 0x55);
+ __ Movi(q1.V16B(), 0xaa);
+ __ Movi(q2.V16B(), 0x55);
+ __ Movi(q3.V16B(), 0xaa);
+ __ St4(q0.V16B(), q1.V16B(), q2.V16B(), q3.V16B(), MemOperand(x0));
+
+ // Misalign the address to check DC ZVA re-aligns.
+ __ Add(x0, x0, 42);
+
+ // Clear the aligned region.
+ __ Dc(ZVA, x0);
+
+ // Reload the aligned region to check contents.
+ __ Mov(x0, aligned_addr);
+ __ Ld1(q0.V16B(), q1.V16B(), q2.V16B(), q3.V16B(), MemOperand(x0));
+
+ __ Bind(&skip);
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+ if (core.xreg(1) == 4) {
+ ASSERT_EQUAL_128(0, 0, q0);
+ ASSERT_EQUAL_128(0, 0, q1);
+ ASSERT_EQUAL_128(0, 0, q2);
+ ASSERT_EQUAL_128(0, 0, q3);
+ } else {
+ printf("SKIPPED: DC ZVA chunksize not 64-bytes");
+ }
+ }
+}
+
#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
// Test the pseudo-instructions that control CPUFeatures dynamically in the
// Simulator. These are used by the test infrastructure itself, but in a fairly
diff --git a/test/aarch64/test-disasm-aarch64.cc b/test/aarch64/test-disasm-aarch64.cc
index 14a354b..40abef1 100644
--- a/test/aarch64/test-disasm-aarch64.cc
+++ b/test/aarch64/test-disasm-aarch64.cc
@@ -2611,6 +2611,7 @@
COMPARE(mrs(x15, FPCR), "mrs x15, fpcr");
COMPARE(mrs(x20, RNDR), "mrs x20, rndr");
COMPARE(mrs(x5, RNDRRS), "mrs x5, rndrrs");
+ COMPARE(mrs(x9, DCZID_EL0), "mrs x9, dczid_el0");
// Test mrs that use system registers we haven't named.
COMPARE(dci(MRS | (0x5555 << 5)), "mrs x0, S3_2_c10_c10_5");