diff options
author | Alexander Gilday <alexander.gilday@arm.com> | 2018-10-29 13:41:41 +0000 |
---|---|---|
committer | Jacob Bramley <jacob.bramley@arm.com> | 2018-11-12 13:38:00 +0000 |
commit | 311edf27319bc91cf89db268c32b589f68b2ea69 (patch) | |
tree | adeda6f329f035c973ae4e5a0d26b75fc7eebff0 | |
parent | 84ee144ecd16f0ee52c24a6273430f37c92322ad (diff) |
Add support for STLR/LDAPR unscaled offset variants.
Add support for the unscaled offset versions of STLR and LDAPR, as well
as the signed versions of LDAPR. These instructions are STLUR<BH>,
LDAPUR<BH>, and LDAPURS[BHW].
Change-Id: I74a9551de9a0fc5284b06eae00fd0b5267a7f324
-rw-r--r-- | examples/aarch64/non-const-visitor.h | 1 | ||||
-rw-r--r-- | src/aarch64/assembler-aarch64.cc | 86 | ||||
-rw-r--r-- | src/aarch64/assembler-aarch64.h | 29 | ||||
-rw-r--r-- | src/aarch64/constants-aarch64.h | 20 | ||||
-rw-r--r-- | src/aarch64/cpu-features-auditor-aarch64.cc | 24 | ||||
-rw-r--r-- | src/aarch64/decoder-aarch64.cc | 12 | ||||
-rw-r--r-- | src/aarch64/decoder-aarch64.h | 1 | ||||
-rw-r--r-- | src/aarch64/disasm-aarch64.cc | 59 | ||||
-rw-r--r-- | src/aarch64/instrument-aarch64.cc | 27 | ||||
-rw-r--r-- | src/aarch64/macro-assembler-aarch64.h | 60 | ||||
-rw-r--r-- | src/aarch64/simulator-aarch64.cc | 99 | ||||
-rw-r--r-- | src/aarch64/simulator-aarch64.h | 4 | ||||
-rw-r--r-- | test/aarch64/test-assembler-aarch64.cc | 195 | ||||
-rw-r--r-- | test/aarch64/test-disasm-aarch64.cc | 48 |
14 files changed, 648 insertions, 17 deletions
diff --git a/examples/aarch64/non-const-visitor.h b/examples/aarch64/non-const-visitor.h index 1a218bcb..3a142ca0 100644 --- a/examples/aarch64/non-const-visitor.h +++ b/examples/aarch64/non-const-visitor.h @@ -78,6 +78,7 @@ class SwitchAddSubRegisterSources : public DecoderVisitor { V(LoadStorePairOffset) \ V(LoadStorePairPreIndex) \ V(LoadStorePairNonTemporal) \ + V(LoadStoreRCpcUnscaledOffset) \ V(LoadLiteral) \ V(LoadStoreUnscaledOffset) \ V(LoadStorePostIndex) \ diff --git a/src/aarch64/assembler-aarch64.cc b/src/aarch64/assembler-aarch64.cc index 04bd2267..1c3ea65b 100644 --- a/src/aarch64/assembler-aarch64.cc +++ b/src/aarch64/assembler-aarch64.cc @@ -1522,12 +1522,30 @@ void Assembler::stlrb(const Register& rt, const MemOperand& dst) { Emit(STLRB_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(dst.GetBaseRegister())); } +void Assembler::stlurb(const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpcImm)); + VIXL_ASSERT(dst.IsImmediateOffset() && IsImmLSUnscaled(dst.GetOffset())); + + Instr base = RnSP(dst.GetBaseRegister()); + int64_t offset = dst.GetOffset(); + Emit(STLURB | Rt(rt) | base | ImmLS(static_cast<int>(offset))); +} + void Assembler::stlrh(const Register& rt, const MemOperand& dst) { VIXL_ASSERT(dst.IsImmediateOffset() && (dst.GetOffset() == 0)); Emit(STLRH_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(dst.GetBaseRegister())); } +void Assembler::stlurh(const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpcImm)); + VIXL_ASSERT(dst.IsImmediateOffset() && IsImmLSUnscaled(dst.GetOffset())); + + Instr base = RnSP(dst.GetBaseRegister()); + int64_t offset = dst.GetOffset(); + Emit(STLURH | Rt(rt) | base | ImmLS(static_cast<int>(offset))); +} + void Assembler::stlr(const Register& rt, const MemOperand& dst) { VIXL_ASSERT(dst.IsImmediateOffset() && (dst.GetOffset() == 0)); @@ -1535,6 +1553,16 @@ void Assembler::stlr(const Register& rt, const MemOperand& dst) { Emit(op | Rs_mask | Rt(rt) | Rt2_mask | RnSP(dst.GetBaseRegister())); } +void Assembler::stlur(const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpcImm)); + VIXL_ASSERT(dst.IsImmediateOffset() && IsImmLSUnscaled(dst.GetOffset())); + + Instr base = RnSP(dst.GetBaseRegister()); + int64_t offset = dst.GetOffset(); + Instr op = rt.Is64Bits() ? STLUR_x : STLUR_w; + Emit(op | Rt(rt) | base | ImmLS(static_cast<int>(offset))); +} + void Assembler::ldarb(const Register& rt, const MemOperand& src) { VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); @@ -1746,6 +1774,25 @@ void Assembler::ldaprb(const Register& rt, const MemOperand& src) { Emit(op | Rs(xzr) | Rt(rt) | RnSP(src.GetBaseRegister())); } +void Assembler::ldapurb(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm)); + VIXL_ASSERT(src.IsImmediateOffset() && IsImmLSUnscaled(src.GetOffset())); + + Instr base = RnSP(src.GetBaseRegister()); + int64_t offset = src.GetOffset(); + Emit(LDAPURB | Rt(rt) | base | ImmLS(static_cast<int>(offset))); +} + +void Assembler::ldapursb(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm)); + VIXL_ASSERT(src.IsImmediateOffset() && IsImmLSUnscaled(src.GetOffset())); + + Instr base = RnSP(src.GetBaseRegister()); + int64_t offset = src.GetOffset(); + Instr op = rt.Is64Bits() ? LDAPURSB_x : LDAPURSB_w; + Emit(op | Rt(rt) | base | ImmLS(static_cast<int>(offset))); +} + void Assembler::ldaprh(const Register& rt, const MemOperand& src) { VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc)); VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); @@ -1753,6 +1800,25 @@ void Assembler::ldaprh(const Register& rt, const MemOperand& src) { Emit(op | Rs(xzr) | Rt(rt) | RnSP(src.GetBaseRegister())); } +void Assembler::ldapurh(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm)); + VIXL_ASSERT(src.IsImmediateOffset() && IsImmLSUnscaled(src.GetOffset())); + + Instr base = RnSP(src.GetBaseRegister()); + int64_t offset = src.GetOffset(); + Emit(LDAPURH | Rt(rt) | base | ImmLS(static_cast<int>(offset))); +} + +void Assembler::ldapursh(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm)); + VIXL_ASSERT(src.IsImmediateOffset() && IsImmLSUnscaled(src.GetOffset())); + + Instr base = RnSP(src.GetBaseRegister()); + int64_t offset = src.GetOffset(); + LoadStoreRCpcUnscaledOffsetOp op = rt.Is64Bits() ? LDAPURSH_x : LDAPURSH_w; + Emit(op | Rt(rt) | base | ImmLS(static_cast<int>(offset))); +} + void Assembler::ldapr(const Register& rt, const MemOperand& src) { VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc)); VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); @@ -1760,6 +1826,26 @@ void Assembler::ldapr(const Register& rt, const MemOperand& src) { Emit(op | Rs(xzr) | Rt(rt) | RnSP(src.GetBaseRegister())); } +void Assembler::ldapur(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm)); + VIXL_ASSERT(src.IsImmediateOffset() && IsImmLSUnscaled(src.GetOffset())); + + Instr base = RnSP(src.GetBaseRegister()); + int64_t offset = src.GetOffset(); + LoadStoreRCpcUnscaledOffsetOp op = rt.Is64Bits() ? LDAPUR_x : LDAPUR_w; + Emit(op | Rt(rt) | base | ImmLS(static_cast<int>(offset))); +} + +void Assembler::ldapursw(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm)); + VIXL_ASSERT(rt.Is64Bits()); + VIXL_ASSERT(src.IsImmediateOffset() && IsImmLSUnscaled(src.GetOffset())); + + Instr base = RnSP(src.GetBaseRegister()); + int64_t offset = src.GetOffset(); + Emit(LDAPURSW | Rt(rt) | base | ImmLS(static_cast<int>(offset))); +} + void Assembler::prfm(PrefetchOperation op, const MemOperand& address, LoadStoreScalingOption option) { diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h index 309b49ac..ecdba12c 100644 --- a/src/aarch64/assembler-aarch64.h +++ b/src/aarch64/assembler-aarch64.h @@ -1487,6 +1487,35 @@ class Assembler : public vixl::internal::AssemblerBase { const Register& rt2, const MemOperand& src); + // Store-release byte (with unscaled offset) [Armv8.4]. + void stlurb(const Register& rt, const MemOperand& dst); + + // Load-acquire RCpc Register byte (with unscaled offset) [Armv8.4]. + void ldapurb(const Register& rt, const MemOperand& src); + + // Load-acquire RCpc Register signed byte (with unscaled offset) [Armv8.4]. + void ldapursb(const Register& rt, const MemOperand& src); + + // Store-release half-word (with unscaled offset) [Armv8.4]. + void stlurh(const Register& rt, const MemOperand& dst); + + // Load-acquire RCpc Register half-word (with unscaled offset) [Armv8.4]. + void ldapurh(const Register& rt, const MemOperand& src); + + // Load-acquire RCpc Register signed half-word (with unscaled offset) + // [Armv8.4]. + void ldapursh(const Register& rt, const MemOperand& src); + + // Store-release word or double-word (with unscaled offset) [Armv8.4]. + void stlur(const Register& rt, const MemOperand& dst); + + // Load-acquire RCpc Register word or double-word (with unscaled offset) + // [Armv8.4]. + void ldapur(const Register& rt, const MemOperand& src); + + // Load-acquire RCpc Register signed word (with unscaled offset) [Armv8.4]. + void ldapursw(const Register& xt, const MemOperand& src); + // Atomic add on byte in memory [Armv8.1] void ldaddb(const Register& rs, const Register& rt, const MemOperand& src); diff --git a/src/aarch64/constants-aarch64.h b/src/aarch64/constants-aarch64.h index 16f2715e..bbee5d82 100644 --- a/src/aarch64/constants-aarch64.h +++ b/src/aarch64/constants-aarch64.h @@ -1134,6 +1134,26 @@ enum LoadStoreExclusive { CASPAL_x = CASPFixed | LSEBit_l | LSEBit_o0 | LSEBit_sz }; +// Load/store RCpc unscaled offset. +enum LoadStoreRCpcUnscaledOffsetOp { + LoadStoreRCpcUnscaledOffsetFixed = 0x19000000, + LoadStoreRCpcUnscaledOffsetFMask = 0x3F200C00, + LoadStoreRCpcUnscaledOffsetMask = 0xFFE00C00, + STLURB = LoadStoreRCpcUnscaledOffsetFixed | 0x00000000, + LDAPURB = LoadStoreRCpcUnscaledOffsetFixed | 0x00400000, + LDAPURSB_x = LoadStoreRCpcUnscaledOffsetFixed | 0x00800000, + LDAPURSB_w = LoadStoreRCpcUnscaledOffsetFixed | 0x00C00000, + STLURH = LoadStoreRCpcUnscaledOffsetFixed | 0x40000000, + LDAPURH = LoadStoreRCpcUnscaledOffsetFixed | 0x40400000, + LDAPURSH_x = LoadStoreRCpcUnscaledOffsetFixed | 0x40800000, + LDAPURSH_w = LoadStoreRCpcUnscaledOffsetFixed | 0x40C00000, + STLUR_w = LoadStoreRCpcUnscaledOffsetFixed | 0x80000000, + LDAPUR_w = LoadStoreRCpcUnscaledOffsetFixed | 0x80400000, + LDAPURSW = LoadStoreRCpcUnscaledOffsetFixed | 0x80800000, + STLUR_x = LoadStoreRCpcUnscaledOffsetFixed | 0xC0000000, + LDAPUR_x = LoadStoreRCpcUnscaledOffsetFixed | 0xC0400000 +}; + #define ATOMIC_MEMORY_SIMPLE_OPC_LIST(V) \ V(LDADD, 0x00000000), \ V(LDCLR, 0x00001000), \ diff --git a/src/aarch64/cpu-features-auditor-aarch64.cc b/src/aarch64/cpu-features-auditor-aarch64.cc index d3639d35..b579507e 100644 --- a/src/aarch64/cpu-features-auditor-aarch64.cc +++ b/src/aarch64/cpu-features-auditor-aarch64.cc @@ -273,6 +273,30 @@ void CPUFeaturesAuditor::VisitDataProcessing2Source(const Instruction* instr) { } } +void CPUFeaturesAuditor::VisitLoadStoreRCpcUnscaledOffset( + const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + switch (instr->Mask(LoadStoreRCpcUnscaledOffsetMask)) { + case LDAPURB: + case LDAPURSB_w: + case LDAPURSB_x: + case LDAPURH: + case LDAPURSH_w: + case LDAPURSH_x: + case LDAPUR_w: + case LDAPURSW: + case LDAPUR_x: + scope.Record(CPUFeatures::kRCpc); + VIXL_FALLTHROUGH(); + case STLURB: + case STLURH: + case STLUR_w: + case STLUR_x: + scope.Record(CPUFeatures::kRCpcImm); + return; + } +} + void CPUFeaturesAuditor::VisitLoadStorePAC(const Instruction* instr) { RecordInstructionFeaturesScope scope(this); USE(instr); diff --git a/src/aarch64/decoder-aarch64.cc b/src/aarch64/decoder-aarch64.cc index ea35973f..a01890ce 100644 --- a/src/aarch64/decoder-aarch64.cc +++ b/src/aarch64/decoder-aarch64.cc @@ -422,7 +422,17 @@ void Decoder::DecodeLoadStore(const Instruction* instr) { } } else { if (instr->ExtractBit(29) == 0) { - VisitUnallocated(instr); + if ((instr->ExtractBit(26) == 0) && (instr->ExtractBit(21) == 0) && + (instr->ExtractBits(11, 10) == 0x0) && + ((instr->ExtractBits(31, 30) < 0x2) || + ((instr->ExtractBits(31, 30) == 0x2) && + (instr->ExtractBits(23, 22) != 0x3)) || + ((instr->ExtractBits(31, 30) == 0x3) && + (instr->ExtractBits(23, 22) < 0x2)))) { + VisitLoadStoreRCpcUnscaledOffset(instr); + } else { + VisitUnallocated(instr); + } } else { if ((instr->Mask(0x84C00000) == 0x80C00000) || (instr->Mask(0x44800000) == 0x44800000) || diff --git a/src/aarch64/decoder-aarch64.h b/src/aarch64/decoder-aarch64.h index d6fb4a3c..94928bed 100644 --- a/src/aarch64/decoder-aarch64.h +++ b/src/aarch64/decoder-aarch64.h @@ -75,6 +75,7 @@ V(LoadStorePairPreIndex) \ V(LoadStorePostIndex) \ V(LoadStorePreIndex) \ + V(LoadStoreRCpcUnscaledOffset) \ V(LoadStoreRegisterOffset) \ V(LoadStoreUnscaledOffset) \ V(LoadStoreUnsignedOffset) \ diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc index 21610432..28fec6bc 100644 --- a/src/aarch64/disasm-aarch64.cc +++ b/src/aarch64/disasm-aarch64.cc @@ -1092,6 +1092,65 @@ void Disassembler::VisitLoadStoreUnsignedOffset(const Instruction *instr) { } +void Disassembler::VisitLoadStoreRCpcUnscaledOffset(const Instruction *instr) { + const char *mnemonic; + const char *form = "'Wt, ['Xns'ILS]"; + const char *form_x = "'Xt, ['Xns'ILS]"; + + switch (instr->Mask(LoadStoreRCpcUnscaledOffsetMask)) { + case STLURB: + mnemonic = "stlurb"; + break; + case LDAPURB: + mnemonic = "ldapurb"; + break; + case LDAPURSB_w: + mnemonic = "ldapursb"; + break; + case LDAPURSB_x: + mnemonic = "ldapursb"; + form = form_x; + break; + case STLURH: + mnemonic = "stlurh"; + break; + case LDAPURH: + mnemonic = "ldapurh"; + break; + case LDAPURSH_w: + mnemonic = "ldapursh"; + break; + case LDAPURSH_x: + mnemonic = "ldapursh"; + form = form_x; + break; + case STLUR_w: + mnemonic = "stlur"; + break; + case LDAPUR_w: + mnemonic = "ldapur"; + break; + case LDAPURSW: + mnemonic = "ldapursw"; + form = form_x; + break; + case STLUR_x: + mnemonic = "stlur"; + form = form_x; + break; + case LDAPUR_x: + mnemonic = "ldapur"; + form = form_x; + break; + default: + mnemonic = "unimplemented"; + form = "(LoadStoreRCpcUnscaledOffset)"; + } + + Format(instr, mnemonic, form); +} + + void Disassembler::VisitLoadStoreRegisterOffset(const Instruction *instr) { const char *mnemonic = "unimplemented"; const char *form = "(LoadStoreRegisterOffset)"; diff --git a/src/aarch64/instrument-aarch64.cc b/src/aarch64/instrument-aarch64.cc index 78edcacf..02a1083e 100644 --- a/src/aarch64/instrument-aarch64.cc +++ b/src/aarch64/instrument-aarch64.cc @@ -495,6 +495,33 @@ void Instrument::VisitLoadStoreRegisterOffset(const Instruction* instr) { InstrumentLoadStore(instr); } +void Instrument::VisitLoadStoreRCpcUnscaledOffset(const Instruction* instr) { + Update(); + switch (instr->Mask(LoadStoreRCpcUnscaledOffsetMask)) { + case STLURB: + case STLURH: + case STLUR_w: + case STLUR_x: { + static Counter* counter = GetCounter("Store Integer"); + counter->Increment(); + break; + } + case LDAPURB: + case LDAPURSB_w: + case LDAPURSB_x: + case LDAPURH: + case LDAPURSH_w: + case LDAPURSH_x: + case LDAPUR_w: + case LDAPURSW: + case LDAPUR_x: { + static Counter* counter = GetCounter("Load Integer"); + counter->Increment(); + break; + } + } +} + void Instrument::VisitLoadStoreUnsignedOffset(const Instruction* instr) { Update(); diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h index 8051bd1b..bdd85494 100644 --- a/src/aarch64/macro-assembler-aarch64.h +++ b/src/aarch64/macro-assembler-aarch64.h @@ -1781,19 +1781,52 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { void Ldaprb(const Register& rt, const MemOperand& src) { VIXL_ASSERT(allow_macro_instructions_); SingleEmissionCheckScope guard(this); - ldaprb(rt, src); + VIXL_ASSERT(src.IsImmediateOffset()); + if (src.GetOffset() == 0) { + ldaprb(rt, src); + } else { + ldapurb(rt, src); + } + } + + void Ldapursb(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldapursb(rt, src); } void Ldaprh(const Register& rt, const MemOperand& src) { VIXL_ASSERT(allow_macro_instructions_); SingleEmissionCheckScope guard(this); - ldaprh(rt, src); + VIXL_ASSERT(src.IsImmediateOffset()); + if (src.GetOffset() == 0) { + ldaprh(rt, src); + } else { + ldapurh(rt, src); + } + } + + void Ldapursh(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldapursh(rt, src); } void Ldapr(const Register& rt, const MemOperand& src) { VIXL_ASSERT(allow_macro_instructions_); SingleEmissionCheckScope guard(this); - ldapr(rt, src); + VIXL_ASSERT(src.IsImmediateOffset()); + if (src.GetOffset() == 0) { + ldapr(rt, src); + } else { + ldapur(rt, src); + } + } + + void Ldapursw(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldapursw(rt, src); } void Ldnp(const CPURegister& rt, @@ -2249,17 +2282,32 @@ class MacroAssembler : public Assembler, public MacroAssemblerInterface { void Stlr(const Register& rt, const MemOperand& dst) { VIXL_ASSERT(allow_macro_instructions_); SingleEmissionCheckScope guard(this); - stlr(rt, dst); + VIXL_ASSERT(dst.IsImmediateOffset()); + if (dst.GetOffset() == 0) { + stlr(rt, dst); + } else { + stlur(rt, dst); + } } void Stlrb(const Register& rt, const MemOperand& dst) { VIXL_ASSERT(allow_macro_instructions_); SingleEmissionCheckScope guard(this); - stlrb(rt, dst); + VIXL_ASSERT(dst.IsImmediateOffset()); + if (dst.GetOffset() == 0) { + stlrb(rt, dst); + } else { + stlurb(rt, dst); + } } void Stlrh(const Register& rt, const MemOperand& dst) { VIXL_ASSERT(allow_macro_instructions_); SingleEmissionCheckScope guard(this); - stlrh(rt, dst); + VIXL_ASSERT(dst.IsImmediateOffset()); + if (dst.GetOffset() == 0) { + stlrh(rt, dst); + } else { + stlurh(rt, dst); + } } void Stllr(const Register& rt, const MemOperand& dst) { VIXL_ASSERT(allow_macro_instructions_); diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc index 1c1d52e7..b682d07c 100644 --- a/src/aarch64/simulator-aarch64.cc +++ b/src/aarch64/simulator-aarch64.cc @@ -1449,6 +1449,105 @@ void Simulator::VisitLoadStorePostIndex(const Instruction* instr) { } +template <typename T1, typename T2> +void Simulator::LoadAcquireRCpcUnscaledOffsetHelper(const Instruction* instr) { + unsigned rt = instr->GetRt(); + unsigned rn = instr->GetRn(); + + unsigned element_size = sizeof(T2); + uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer); + int offset = instr->GetImmLS(); + address += offset; + + // Verify that the address is available to the host. + VIXL_ASSERT(address == static_cast<uintptr_t>(address)); + + // Check the alignment of `address`. + if (AlignDown(address, 16) != AlignDown(address + element_size - 1, 16)) { + VIXL_ALIGNMENT_EXCEPTION(); + } + + WriteRegister<T1>(rt, static_cast<T1>(Memory::Read<T2>(address))); + + // Approximate load-acquire by issuing a full barrier after the load. + __sync_synchronize(); + + LogRead(address, rt, GetPrintRegisterFormat(element_size)); +} + + +template <typename T> +void Simulator::StoreReleaseUnscaledOffsetHelper(const Instruction* instr) { + unsigned rt = instr->GetRt(); + unsigned rn = instr->GetRn(); + + unsigned element_size = sizeof(T); + uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer); + int offset = instr->GetImmLS(); + address += offset; + + // Verify that the address is available to the host. + VIXL_ASSERT(address == static_cast<uintptr_t>(address)); + + // Check the alignment of `address`. + if (AlignDown(address, 16) != AlignDown(address + element_size - 1, 16)) { + VIXL_ALIGNMENT_EXCEPTION(); + } + + // Approximate store-release by issuing a full barrier after the load. + __sync_synchronize(); + + Memory::Write<T>(address, ReadRegister<T>(rt)); + + LogWrite(address, rt, GetPrintRegisterFormat(element_size)); +} + + +void Simulator::VisitLoadStoreRCpcUnscaledOffset(const Instruction* instr) { + switch (instr->Mask(LoadStoreRCpcUnscaledOffsetMask)) { + case LDAPURB: + LoadAcquireRCpcUnscaledOffsetHelper<uint8_t, uint8_t>(instr); + break; + case LDAPURH: + LoadAcquireRCpcUnscaledOffsetHelper<uint16_t, uint16_t>(instr); + break; + case LDAPUR_w: + LoadAcquireRCpcUnscaledOffsetHelper<uint32_t, uint32_t>(instr); + break; + case LDAPUR_x: + LoadAcquireRCpcUnscaledOffsetHelper<uint64_t, uint64_t>(instr); + break; + case LDAPURSB_w: + LoadAcquireRCpcUnscaledOffsetHelper<int32_t, int8_t>(instr); + break; + case LDAPURSB_x: + LoadAcquireRCpcUnscaledOffsetHelper<int64_t, int8_t>(instr); + break; + case LDAPURSH_w: + LoadAcquireRCpcUnscaledOffsetHelper<int32_t, int16_t>(instr); + break; + case LDAPURSH_x: + LoadAcquireRCpcUnscaledOffsetHelper<int64_t, int16_t>(instr); + break; + case LDAPURSW: + LoadAcquireRCpcUnscaledOffsetHelper<int64_t, int32_t>(instr); + break; + case STLURB: + StoreReleaseUnscaledOffsetHelper<uint8_t>(instr); + break; + case STLURH: + StoreReleaseUnscaledOffsetHelper<uint16_t>(instr); + break; + case STLUR_w: + StoreReleaseUnscaledOffsetHelper<uint32_t>(instr); + break; + case STLUR_x: + StoreReleaseUnscaledOffsetHelper<uint64_t>(instr); + break; + } +} + + void Simulator::VisitLoadStorePAC(const Instruction* instr) { unsigned dst = instr->GetRt(); unsigned addr_reg = instr->GetRn(); diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h index 534a3298..84c81d8e 100644 --- a/src/aarch64/simulator-aarch64.h +++ b/src/aarch64/simulator-aarch64.h @@ -1870,6 +1870,10 @@ class Simulator : public DecoderVisitor { void AtomicMemorySwapHelper(const Instruction* instr); template <typename T> void LoadAcquireRCpcHelper(const Instruction* instr); + template <typename T1, typename T2> + void LoadAcquireRCpcUnscaledOffsetHelper(const Instruction* instr); + template <typename T> + void StoreReleaseUnscaledOffsetHelper(const Instruction* instr); uintptr_t AddressModeHelper(unsigned addr_reg, int64_t offset, AddrMode addrmode); diff --git a/test/aarch64/test-assembler-aarch64.cc b/test/aarch64/test-assembler-aarch64.cc index 8f0a87fb..fb53213c 100644 --- a/test/aarch64/test-assembler-aarch64.cc +++ b/test/aarch64/test-assembler-aarch64.cc @@ -19570,6 +19570,145 @@ TEST(ldaprb_ldaprh_ldapr) { } +TEST(ldapurb_ldapurh_ldapur) { + uint64_t data[] + __attribute__((aligned(kXRegSizeInBytes * 2))) = {0x0123456789abcdef, + 0xfedcba9876543210}; + + uintptr_t data_base = reinterpret_cast<uintptr_t>(data); + + SETUP_WITH_FEATURES(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm); + START(); + + __ Mov(x20, data_base); + __ Mov(x21, data_base + 2 * sizeof(data[0])); + + __ Ldaprb(w0, MemOperand(x20)); + __ Ldaprh(w1, MemOperand(x20)); + __ Ldapr(w2, MemOperand(x20)); + __ Ldapr(x3, MemOperand(x20)); + __ Ldaprb(w4, MemOperand(x20, 12)); + __ Ldaprh(w5, MemOperand(x20, 8)); + __ Ldapr(w6, MemOperand(x20, 10)); + __ Ldapr(x7, MemOperand(x20, 7)); + __ Ldaprb(w8, MemOperand(x21, -1)); + __ Ldaprh(w9, MemOperand(x21, -3)); + __ Ldapr(w10, MemOperand(x21, -9)); + __ Ldapr(x11, MemOperand(x21, -12)); + + END(); + +#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 + RUN(); + ASSERT_EQUAL_64(0xef, x0); + ASSERT_EQUAL_64(0xcdef, x1); + ASSERT_EQUAL_64(0x89abcdef, x2); + ASSERT_EQUAL_64(0x0123456789abcdef, x3); + ASSERT_EQUAL_64(0x98, x4); + ASSERT_EQUAL_64(0x3210, x5); + ASSERT_EQUAL_64(0xba987654, x6); + ASSERT_EQUAL_64(0xdcba987654321001, x7); + ASSERT_EQUAL_64(0xfe, x8); + ASSERT_EQUAL_64(0xdcba, x9); + ASSERT_EQUAL_64(0x54321001, x10); + ASSERT_EQUAL_64(0x7654321001234567, x11); +#endif + + TEARDOWN(); +} + + +TEST(ldapursb_ldapursh_ldapursw) { + uint64_t data[] + __attribute__((aligned(kXRegSizeInBytes * 2))) = {0x0123456789abcdef, + 0xfedcba9876543210}; + + uintptr_t data_base = reinterpret_cast<uintptr_t>(data); + + SETUP_WITH_FEATURES(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm); + START(); + + __ Mov(x20, data_base); + __ Mov(x21, data_base + 2 * sizeof(data[0])); + + __ Ldapursb(w0, MemOperand(x20)); + __ Ldapursb(x1, MemOperand(x20)); + __ Ldapursh(w2, MemOperand(x20)); + __ Ldapursh(x3, MemOperand(x20)); + __ Ldapursw(x4, MemOperand(x20)); + __ Ldapursb(w5, MemOperand(x20, 12)); + __ Ldapursb(x6, MemOperand(x20, 12)); + __ Ldapursh(w7, MemOperand(x20, 13)); + __ Ldapursh(x8, MemOperand(x20, 13)); + __ Ldapursw(x9, MemOperand(x20, 10)); + __ Ldapursb(w10, MemOperand(x21, -1)); + __ Ldapursb(x11, MemOperand(x21, -1)); + __ Ldapursh(w12, MemOperand(x21, -4)); + __ Ldapursh(x13, MemOperand(x21, -4)); + __ Ldapursw(x14, MemOperand(x21, -5)); + + __ Ldapursb(x15, MemOperand(x20, 8)); + __ Ldapursh(x16, MemOperand(x20, 8)); + __ Ldapursw(x17, MemOperand(x20, 8)); + + END(); + +#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 + RUN(); + ASSERT_EQUAL_64(0xffffffef, x0); + ASSERT_EQUAL_64(0xffffffffffffffef, x1); + ASSERT_EQUAL_64(0xffffcdef, x2); + ASSERT_EQUAL_64(0xffffffffffffcdef, x3); + ASSERT_EQUAL_64(0xffffffff89abcdef, x4); + ASSERT_EQUAL_64(0xffffff98, x5); + ASSERT_EQUAL_64(0xffffffffffffff98, x6); + ASSERT_EQUAL_64(0xffffdcba, x7); + ASSERT_EQUAL_64(0xffffffffffffdcba, x8); + ASSERT_EQUAL_64(0xffffffffba987654, x9); + ASSERT_EQUAL_64(0xfffffffe, x10); + ASSERT_EQUAL_64(0xfffffffffffffffe, x11); + ASSERT_EQUAL_64(0xffffba98, x12); + ASSERT_EQUAL_64(0xffffffffffffba98, x13); + ASSERT_EQUAL_64(0xffffffffdcba9876, x14); + + ASSERT_EQUAL_64(0x0000000000000010, x15); + ASSERT_EQUAL_64(0x0000000000003210, x16); + ASSERT_EQUAL_64(0x0000000076543210, x17); +#endif + + TEARDOWN(); +} + + +TEST(stlurb_stlurh_strlur) { + uint64_t data[] __attribute__((aligned(kXRegSizeInBytes * 2))) = {0x0, 0x0}; + + uintptr_t data_base = reinterpret_cast<uintptr_t>(data); + + SETUP_WITH_FEATURES(CPUFeatures::kRCpcImm); + START(); + + __ Mov(x0, 0x0011223344556677); + __ Mov(x20, data_base); + __ Mov(x21, data_base + 2 * sizeof(data[0])); + + __ Stlrb(w0, MemOperand(x20)); + __ Stlrh(w0, MemOperand(x20, 1)); + __ Stlr(w0, MemOperand(x20, 3)); + __ Stlr(x0, MemOperand(x21, -8)); + + END(); + +#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 + RUN(); + ASSERT_EQUAL_64(0x0044556677667777, data[0]); + ASSERT_EQUAL_64(0x0011223344556677, data[1]); +#endif + + TEARDOWN(); +} + + #define SIMPLE_ATOMIC_OPS(V, DEF) \ V(DEF, add) \ V(DEF, clr) \ @@ -19597,18 +19736,22 @@ TEST(unaligned_single_copy_atomicity) { uint64_t* data0_aligned = AlignUp(data0, kAtomicAccessGranule); uint64_t* dst_aligned = AlignUp(dst, kAtomicAccessGranule); - SETUP_WITH_FEATURES(CPUFeatures::kAtomics, - CPUFeatures::kLORegions, - CPUFeatures::kRCpc, - CPUFeatures::kUSCAT); + CPUFeatures features(CPUFeatures::kAtomics, + CPUFeatures::kLORegions, + CPUFeatures::kRCpc, + CPUFeatures::kRCpcImm); + features.Combine(CPUFeatures::kUSCAT); + SETUP_WITH_FEATURES(features); START(); __ Mov(x0, 0x0123456789abcdef); __ Mov(x1, 0x456789abcdef0123); __ Mov(x2, 0x89abcdef01234567); __ Mov(x3, 0xcdef0123456789ab); - __ Mov(x20, reinterpret_cast<uintptr_t>(data0_aligned)); - __ Mov(x21, reinterpret_cast<uintptr_t>(dst_aligned)); + __ Mov(x18, reinterpret_cast<uintptr_t>(data0_aligned)); + __ Mov(x19, reinterpret_cast<uintptr_t>(dst_aligned)); + __ Mov(x20, x18); + __ Mov(x21, x19); for (unsigned i = 0; i < kAtomicAccessGranule; i++) { __ Stxrb(w0, w1, MemOperand(x20)); @@ -19629,6 +19772,11 @@ TEST(unaligned_single_copy_atomicity) { __ Swpab(w0, w1, MemOperand(x20)); __ Swpalb(w0, w1, MemOperand(x20)); __ Ldaprb(w0, MemOperand(x20)); + // Use offset instead of Add to test Stlurb and Ldapurb. + __ Stlrb(w0, MemOperand(x19, i)); + __ Ldaprb(w0, MemOperand(x19, i)); + __ Ldapursb(w0, MemOperand(x20)); + __ Ldapursb(x0, MemOperand(x20)); #define ATOMIC_LOAD_B(NAME) __ Ld##NAME##b(w0, w1, MemOperand(x20)); #define ATOMIC_STORE_B(NAME) __ St##NAME##b(w0, MemOperand(x20)); @@ -19656,6 +19804,11 @@ TEST(unaligned_single_copy_atomicity) { __ Swpah(w0, w1, MemOperand(x20)); __ Swpalh(w0, w1, MemOperand(x20)); __ Ldaprh(w0, MemOperand(x20)); + // Use offset instead of Add to test Stlurh and Ldapurh. + __ Stlrh(w0, MemOperand(x19, i)); + __ Ldaprh(w0, MemOperand(x19, i)); + __ Ldapursh(w0, MemOperand(x20)); + __ Ldapursh(x0, MemOperand(x20)); #define ATOMIC_LOAD_H(NAME) __ Ld##NAME##h(w0, w1, MemOperand(x20)); #define ATOMIC_STORE_H(NAME) __ St##NAME##h(w0, MemOperand(x20)); @@ -19684,6 +19837,10 @@ TEST(unaligned_single_copy_atomicity) { __ Swpa(w0, w1, MemOperand(x20)); __ Swpal(w0, w1, MemOperand(x20)); __ Ldapr(w0, MemOperand(x20)); + // Use offset instead of Add to test Stlur and Ldapur. + __ Stlr(w0, MemOperand(x19, i)); + __ Ldapr(w0, MemOperand(x19, i)); + __ Ldapursw(x0, MemOperand(x20)); #define ATOMIC_LOAD_W(NAME) __ Ld##NAME(w0, w1, MemOperand(x20)); #define ATOMIC_STORE_W(NAME) __ St##NAME(w0, MemOperand(x20)); @@ -19723,6 +19880,9 @@ TEST(unaligned_single_copy_atomicity) { __ Swpa(x0, x1, MemOperand(x20)); __ Swpal(x0, x1, MemOperand(x20)); __ Ldapr(x0, MemOperand(x20)); + // Use offset instead of Add to test Stlur and Ldapur. + __ Stlr(x0, MemOperand(x19, i)); + __ Ldapr(x0, MemOperand(x19, i)); #define ATOMIC_LOAD_X(NAME) __ Ld##NAME(x0, x1, MemOperand(x20)); #define ATOMIC_STORE_X(NAME) __ St##NAME(x0, MemOperand(x20)); @@ -19761,10 +19921,12 @@ TEST(unaligned_single_copy_atomicity) { #if defined(VIXL_NEGATIVE_TESTING) && defined(VIXL_INCLUDE_SIMULATOR_AARCH64) #define CHECK_ALIGN_FAIL(i, expr) \ { \ - SETUP_WITH_FEATURES(CPUFeatures::kAtomics, \ - CPUFeatures::kLORegions, \ - CPUFeatures::kRCpc, \ - CPUFeatures::kUSCAT); \ + CPUFeatures features(CPUFeatures::kAtomics, \ + CPUFeatures::kLORegions, \ + CPUFeatures::kRCpc, \ + CPUFeatures::kRCpcImm); \ + features.Combine(CPUFeatures::kUSCAT); \ + SETUP_WITH_FEATURES(features); \ START(); \ __ Mov(x0, 0x0123456789abcdef); \ __ Mov(x1, 0x456789abcdef0123); \ @@ -19810,6 +19972,11 @@ TEST(unaligned_single_copy_atomicity_negative_test) { CHECK_ALIGN_FAIL(i, __ Swpah(w0, w1, MemOperand(x20))); CHECK_ALIGN_FAIL(i, __ Swpalh(w0, w1, MemOperand(x20))); CHECK_ALIGN_FAIL(i, __ Ldaprh(w0, MemOperand(x20))); + // Use offset instead of Add to test Stlurh and Ldapurh. + CHECK_ALIGN_FAIL(0, __ Stlrh(w0, MemOperand(x20, i))); + CHECK_ALIGN_FAIL(0, __ Ldaprh(w0, MemOperand(x20, i))); + CHECK_ALIGN_FAIL(i, __ Ldapursh(w0, MemOperand(x20))); + CHECK_ALIGN_FAIL(i, __ Ldapursh(x0, MemOperand(x20))); #define ATOMIC_LOAD_H(NAME) \ CHECK_ALIGN_FAIL(i, __ Ld##NAME##h(w0, w1, MemOperand(x20))); @@ -19840,6 +20007,10 @@ TEST(unaligned_single_copy_atomicity_negative_test) { CHECK_ALIGN_FAIL(i, __ Swpa(w0, w1, MemOperand(x20))); CHECK_ALIGN_FAIL(i, __ Swpal(w0, w1, MemOperand(x20))); CHECK_ALIGN_FAIL(i, __ Ldapr(w0, MemOperand(x20))); + // Use offset instead of add to test Stlur and Ldapur. + CHECK_ALIGN_FAIL(0, __ Stlr(w0, MemOperand(x20, i))); + CHECK_ALIGN_FAIL(0, __ Ldapr(w0, MemOperand(x20, i))); + CHECK_ALIGN_FAIL(i, __ Ldapursw(x0, MemOperand(x20))); #define ATOMIC_LOAD_W(NAME) \ CHECK_ALIGN_FAIL(i, __ Ld##NAME(w0, w1, MemOperand(x20))); @@ -19881,6 +20052,9 @@ TEST(unaligned_single_copy_atomicity_negative_test) { CHECK_ALIGN_FAIL(i, __ Swpa(x0, x1, MemOperand(x20))); CHECK_ALIGN_FAIL(i, __ Swpal(x0, x1, MemOperand(x20))); CHECK_ALIGN_FAIL(i, __ Ldapr(x0, MemOperand(x20))); + // Use offset instead of add to test Stlur and Ldapur. + CHECK_ALIGN_FAIL(0, __ Stlr(x0, MemOperand(x20, i))); + CHECK_ALIGN_FAIL(0, __ Ldapr(x0, MemOperand(x20, i))); #define ATOMIC_LOAD_X(NAME) \ CHECK_ALIGN_FAIL(i, __ Ld##NAME(x0, x1, MemOperand(x20))); @@ -19935,6 +20109,7 @@ TEST(unaligned_single_copy_atomicity_negative_test_2) { } #endif // VIXL_NEGATIVE_TESTING && VIXL_INCLUDE_SIMULATOR_AARCH64 + TEST(load_store_tagged_immediate_offset) { uint64_t tags[] = {0x00, 0x1, 0x55, 0xff}; int tag_count = sizeof(tags) / sizeof(tags[0]); diff --git a/test/aarch64/test-disasm-aarch64.cc b/test/aarch64/test-disasm-aarch64.cc index 5fb98d2c..447426de 100644 --- a/test/aarch64/test-disasm-aarch64.cc +++ b/test/aarch64/test-disasm-aarch64.cc @@ -2186,6 +2186,54 @@ TEST(atomic_memory) { CLEANUP(); } +TEST(load_store_rcpc_unscaled_offset) { + SETUP(); + + COMPARE(ldapurb(w0, MemOperand(x1)), "ldapurb w0, [x1]"); + COMPARE(ldapurb(w2, MemOperand(x3, 13)), "ldapurb w2, [x3, #13]"); + COMPARE(ldapursb(w4, MemOperand(x5, 129)), "ldapursb w4, [x5, #129]"); + COMPARE(ldapursb(x6, MemOperand(sp, 64)), "ldapursb x6, [sp, #64]"); + COMPARE(ldapurh(w7, MemOperand(x8)), "ldapurh w7, [x8]"); + COMPARE(ldapurh(w9, MemOperand(x10, 13)), "ldapurh w9, [x10, #13]"); + COMPARE(ldapursh(w11, MemOperand(x12, 129)), "ldapursh w11, [x12, #129]"); + COMPARE(ldapursh(x13, MemOperand(sp, 64)), "ldapursh x13, [sp, #64]"); + COMPARE(ldapur(w14, MemOperand(x15)), "ldapur w14, [x15]"); + COMPARE(ldapur(w16, MemOperand(x17, 13)), "ldapur w16, [x17, #13]"); + COMPARE(ldapursw(x18, MemOperand(sp, 64)), "ldapursw x18, [sp, #64]"); + COMPARE(ldapur(x19, MemOperand(x20)), "ldapur x19, [x20]"); + COMPARE(ldapur(x21, MemOperand(sp, 64)), "ldapur x21, [sp, #64]"); + + COMPARE(stlurb(w22, MemOperand(x23)), "stlurb w22, [x23]"); + COMPARE(stlurb(w24, MemOperand(sp, 64)), "stlurb w24, [sp, #64]"); + COMPARE(stlurh(w25, MemOperand(x26)), "stlurh w25, [x26]"); + COMPARE(stlurh(w27, MemOperand(sp, 64)), "stlurh w27, [sp, #64]"); + COMPARE(stlur(w28, MemOperand(x29)), "stlur w28, [x29]"); + COMPARE(stlur(w0, MemOperand(sp, 64)), "stlur w0, [sp, #64]"); + COMPARE(stlur(x1, MemOperand(x2)), "stlur x1, [x2]"); + COMPARE(stlur(x3, MemOperand(sp, 64)), "stlur x3, [sp, #64]"); + + + COMPARE_MACRO(Ldaprb(w0, MemOperand(x1)), "ldaprb w0, [x1]"); + COMPARE_MACRO(Ldaprb(w2, MemOperand(x3, 13)), "ldapurb w2, [x3, #13]"); + COMPARE_MACRO(Ldaprh(w4, MemOperand(x5)), "ldaprh w4, [x5]"); + COMPARE_MACRO(Ldaprh(w6, MemOperand(x7, 13)), "ldapurh w6, [x7, #13]"); + COMPARE_MACRO(Ldapr(w8, MemOperand(x9)), "ldapr w8, [x9]"); + COMPARE_MACRO(Ldapr(w10, MemOperand(x11, 13)), "ldapur w10, [x11, #13]"); + COMPARE_MACRO(Ldapr(x12, MemOperand(x13)), "ldapr x12, [x13]"); + COMPARE_MACRO(Ldapr(x14, MemOperand(sp, 64)), "ldapur x14, [sp, #64]"); + + COMPARE_MACRO(Stlrb(w15, MemOperand(x16)), "stlrb w15, [x16]"); + COMPARE_MACRO(Stlrb(w17, MemOperand(sp, 64)), "stlurb w17, [sp, #64]"); + COMPARE_MACRO(Stlrh(w18, MemOperand(x19)), "stlrh w18, [x19]"); + COMPARE_MACRO(Stlrh(w20, MemOperand(sp, 64)), "stlurh w20, [sp, #64]"); + COMPARE_MACRO(Stlr(w21, MemOperand(x22)), "stlr w21, [x22]"); + COMPARE_MACRO(Stlr(w23, MemOperand(sp, 64)), "stlur w23, [sp, #64]"); + COMPARE_MACRO(Stlr(x24, MemOperand(x25)), "stlr x24, [x25]"); + COMPARE_MACRO(Stlr(x26, MemOperand(sp, 64)), "stlur x26, [sp, #64]"); + + CLEANUP(); +} + TEST(load_store_pair_nontemp) { SETUP(); |