Add support for features up to Armv8.3.
This is a squashed merge of several separate patches that, combined, add support
for most architectural features described up to Armv8.3.
Change-Id: Ia67f25fb9b82d5968120b0b144bd232e1898dc90
diff --git a/src/aarch64/assembler-aarch64.cc b/src/aarch64/assembler-aarch64.cc
index 4617fba..937809b 100644
--- a/src/aarch64/assembler-aarch64.cc
+++ b/src/aarch64/assembler-aarch64.cc
@@ -195,6 +195,66 @@
}
+void Assembler::braaz(const Register& xn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ VIXL_ASSERT(xn.Is64Bits());
+ Emit(BRAAZ | Rn(xn) | Rd_mask);
+}
+
+void Assembler::brabz(const Register& xn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ VIXL_ASSERT(xn.Is64Bits());
+ Emit(BRABZ | Rn(xn) | Rd_mask);
+}
+
+void Assembler::blraaz(const Register& xn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ VIXL_ASSERT(xn.Is64Bits());
+ Emit(BLRAAZ | Rn(xn) | Rd_mask);
+}
+
+void Assembler::blrabz(const Register& xn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ VIXL_ASSERT(xn.Is64Bits());
+ Emit(BLRABZ | Rn(xn) | Rd_mask);
+}
+
+void Assembler::retaa() {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ Emit(RETAA | Rn_mask | Rd_mask);
+}
+
+void Assembler::retab() {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ Emit(RETAB | Rn_mask | Rd_mask);
+}
+
+// The Arm ARM names the register Xm but encodes it in the Xd bitfield.
+void Assembler::braa(const Register& xn, const Register& xm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ VIXL_ASSERT(xn.Is64Bits() && xm.Is64Bits());
+ Emit(BRAA | Rn(xn) | RdSP(xm));
+}
+
+void Assembler::brab(const Register& xn, const Register& xm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ VIXL_ASSERT(xn.Is64Bits() && xm.Is64Bits());
+ Emit(BRAB | Rn(xn) | RdSP(xm));
+}
+
+void Assembler::blraa(const Register& xn, const Register& xm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ VIXL_ASSERT(xn.Is64Bits() && xm.Is64Bits());
+ Emit(BLRAA | Rn(xn) | RdSP(xm));
+}
+
+void Assembler::blrab(const Register& xn, const Register& xm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ VIXL_ASSERT(xn.Is64Bits() && xm.Is64Bits());
+ Emit(BLRAB | Rn(xn) | RdSP(xm));
+}
+
+
void Assembler::b(int64_t imm26) { Emit(B | ImmUncondBranch(imm26)); }
@@ -959,6 +1019,60 @@
DataProcessing1Source(rd, rn, CLS);
}
+#define PAUTH_VARIATIONS(V) \
+ V(paci, PACI) \
+ V(pacd, PACD) \
+ V(auti, AUTI) \
+ V(autd, AUTD)
+
+#define DEFINE_ASM_FUNCS(PRE, OP) \
+ void Assembler::PRE##a(const Register& xd, const Register& xn) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); \
+ VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits()); \
+ Emit(SF(xd) | OP##A | Rd(xd) | RnSP(xn)); \
+ } \
+ \
+ void Assembler::PRE##za(const Register& xd) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); \
+ VIXL_ASSERT(xd.Is64Bits()); \
+ Emit(SF(xd) | OP##ZA | Rd(xd)); \
+ } \
+ \
+ void Assembler::PRE##b(const Register& xd, const Register& xn) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); \
+ VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits()); \
+ Emit(SF(xd) | OP##B | Rd(xd) | RnSP(xn)); \
+ } \
+ \
+ void Assembler::PRE##zb(const Register& xd) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); \
+ VIXL_ASSERT(xd.Is64Bits()); \
+ Emit(SF(xd) | OP##ZB | Rd(xd)); \
+ }
+
+PAUTH_VARIATIONS(DEFINE_ASM_FUNCS)
+#undef DEFINE_ASM_FUNCS
+
+void Assembler::pacga(const Register& xd,
+ const Register& xn,
+ const Register& xm) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth, CPUFeatures::kPAuthGeneric));
+ VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits() && xm.Is64Bits());
+ Emit(SF(xd) | PACGA | Rd(xd) | Rn(xn) | RmSP(xm));
+}
+
+void Assembler::xpaci(const Register& xd) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ VIXL_ASSERT(xd.Is64Bits());
+ Emit(SF(xd) | XPACI | Rd(xd));
+}
+
+void Assembler::xpacd(const Register& xd) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ VIXL_ASSERT(xd.Is64Bits());
+ Emit(SF(xd) | XPACD | Rd(xd));
+}
+
void Assembler::ldp(const CPURegister& rt,
const CPURegister& rt2,
@@ -1524,6 +1638,96 @@
COMPARE_AND_SWAP_PAIR_LIST(DEFINE_ASM_FUNC)
#undef DEFINE_ASM_FUNC
+// These macros generate all the variations of the atomic memory operations,
+// e.g. ldadd, ldadda, ldaddb, staddl, etc.
+// For a full list of the methods with comments, see the assembler header file.
+
+// clang-format off
+#define ATOMIC_MEMORY_SIMPLE_OPERATION_LIST(V, DEF) \
+ V(DEF, add, LDADD) \
+ V(DEF, clr, LDCLR) \
+ V(DEF, eor, LDEOR) \
+ V(DEF, set, LDSET) \
+ V(DEF, smax, LDSMAX) \
+ V(DEF, smin, LDSMIN) \
+ V(DEF, umax, LDUMAX) \
+ V(DEF, umin, LDUMIN)
+
+#define ATOMIC_MEMORY_STORE_MODES(V, NAME, OP) \
+ V(NAME, OP##_x, OP##_w) \
+ V(NAME##l, OP##L_x, OP##L_w) \
+ V(NAME##b, OP##B, OP##B) \
+ V(NAME##lb, OP##LB, OP##LB) \
+ V(NAME##h, OP##H, OP##H) \
+ V(NAME##lh, OP##LH, OP##LH)
+
+#define ATOMIC_MEMORY_LOAD_MODES(V, NAME, OP) \
+ ATOMIC_MEMORY_STORE_MODES(V, NAME, OP) \
+ V(NAME##a, OP##A_x, OP##A_w) \
+ V(NAME##al, OP##AL_x, OP##AL_w) \
+ V(NAME##ab, OP##AB, OP##AB) \
+ V(NAME##alb, OP##ALB, OP##ALB) \
+ V(NAME##ah, OP##AH, OP##AH) \
+ V(NAME##alh, OP##ALH, OP##ALH)
+// clang-format on
+
+#define DEFINE_ASM_LOAD_FUNC(FN, OP_X, OP_W) \
+ void Assembler::ld##FN(const Register& rs, \
+ const Register& rt, \
+ const MemOperand& src) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics)); \
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); \
+ AtomicMemoryOp op = rt.Is64Bits() ? OP_X : OP_W; \
+ Emit(op | Rs(rs) | Rt(rt) | RnSP(src.GetBaseRegister())); \
+ }
+#define DEFINE_ASM_STORE_FUNC(FN, OP_X, OP_W) \
+ void Assembler::st##FN(const Register& rs, const MemOperand& src) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics)); \
+ ld##FN(rs, AppropriateZeroRegFor(rs), src); \
+ }
+
+ATOMIC_MEMORY_SIMPLE_OPERATION_LIST(ATOMIC_MEMORY_LOAD_MODES,
+ DEFINE_ASM_LOAD_FUNC)
+ATOMIC_MEMORY_SIMPLE_OPERATION_LIST(ATOMIC_MEMORY_STORE_MODES,
+ DEFINE_ASM_STORE_FUNC)
+
+#define DEFINE_ASM_SWP_FUNC(FN, OP_X, OP_W) \
+ void Assembler::FN(const Register& rs, \
+ const Register& rt, \
+ const MemOperand& src) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics)); \
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); \
+ AtomicMemoryOp op = rt.Is64Bits() ? OP_X : OP_W; \
+ Emit(op | Rs(rs) | Rt(rt) | RnSP(src.GetBaseRegister())); \
+ }
+
+ATOMIC_MEMORY_LOAD_MODES(DEFINE_ASM_SWP_FUNC, swp, SWP)
+
+#undef DEFINE_ASM_LOAD_FUNC
+#undef DEFINE_ASM_STORE_FUNC
+#undef DEFINE_ASM_SWP_FUNC
+
+
+void Assembler::ldaprb(const Register& rt, const MemOperand& src) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc));
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0));
+ AtomicMemoryOp op = LDAPRB;
+ Emit(op | Rs(xzr) | Rt(rt) | RnSP(src.GetBaseRegister()));
+}
+
+void Assembler::ldaprh(const Register& rt, const MemOperand& src) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc));
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0));
+ AtomicMemoryOp op = LDAPRH;
+ Emit(op | Rs(xzr) | Rt(rt) | RnSP(src.GetBaseRegister()));
+}
+
+void Assembler::ldapr(const Register& rt, const MemOperand& src) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc));
+ VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0));
+ AtomicMemoryOp op = rt.Is64Bits() ? LDAPR_x : LDAPR_w;
+ Emit(op | Rs(xzr) | Rt(rt) | RnSP(src.GetBaseRegister()));
+}
void Assembler::prfm(PrefetchOperation op,
const MemOperand& address,
@@ -2219,6 +2423,71 @@
}
}
+void Assembler::xpaclri() {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ Emit(XPACLRI);
+}
+
+void Assembler::pacia1716() {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ Emit(PACIA1716);
+}
+
+void Assembler::pacib1716() {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ Emit(PACIB1716);
+}
+
+void Assembler::autia1716() {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ Emit(AUTIA1716);
+}
+
+void Assembler::autib1716() {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ Emit(AUTIB1716);
+}
+
+void Assembler::paciaz() {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ Emit(PACIAZ);
+}
+
+void Assembler::pacibz() {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ Emit(PACIBZ);
+}
+
+void Assembler::autiaz() {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ Emit(AUTIAZ);
+}
+
+void Assembler::autibz() {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ Emit(AUTIBZ);
+}
+
+void Assembler::paciasp() {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ Emit(PACIASP);
+}
+
+void Assembler::pacibsp() {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ Emit(PACIBSP);
+}
+
+void Assembler::autiasp() {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ Emit(AUTIASP);
+}
+
+void Assembler::autibsp() {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth));
+ Emit(AUTIBSP);
+}
+
void Assembler::mvn(const Register& rd, const Operand& operand) {
orn(rd, AppropriateZeroRegFor(rd), operand);
@@ -2254,6 +2523,11 @@
Emit(ISB | ImmBarrierDomain(FullSystem) | ImmBarrierType(BarrierAll));
}
+void Assembler::esb() {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kRAS));
+ hint(ESB);
+}
+
void Assembler::csdb() { hint(CSDB); }
void Assembler::fmov(const VRegister& vd, double imm) {
@@ -2288,18 +2562,17 @@
}
-void Assembler::fmov(const VRegister& vd, F16 imm) {
+void Assembler::fmov(const VRegister& vd, Float16 imm) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP));
- uint16_t rawbits = imm.ToRawbits();
if (vd.IsScalar()) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf));
VIXL_ASSERT(vd.Is1H());
- Emit(FMOV_h_imm | Rd(vd) | ImmFP16(rawbits));
+ Emit(FMOV_h_imm | Rd(vd) | ImmFP16(imm));
} else {
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kNEONHalf));
VIXL_ASSERT(vd.Is4H() | vd.Is8H());
Instr q = vd.Is8H() ? NEON_Q : 0;
- uint32_t encoded_imm = FP16ToImm8(rawbits);
+ uint32_t encoded_imm = FP16ToImm8(imm);
Emit(q | NEONModifiedImmediate_FMOV | ImmNEONabcdefgh(encoded_imm) |
NEONCmode(0xf) | Rd(vd));
}
@@ -2378,7 +2651,17 @@
const VRegister& vm,
const VRegister& va) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP));
- FPDataProcessing3Source(vd, vn, vm, va, vd.Is1S() ? FMADD_s : FMADD_d);
+ FPDataProcessing3SourceOp op;
+ if (vd.Is1H()) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf));
+ op = FMADD_h;
+ } else if (vd.Is1S()) {
+ op = FMADD_s;
+ } else {
+ VIXL_ASSERT(vd.Is1D());
+ op = FMADD_d;
+ }
+ FPDataProcessing3Source(vd, vn, vm, va, op);
}
@@ -2387,7 +2670,17 @@
const VRegister& vm,
const VRegister& va) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP));
- FPDataProcessing3Source(vd, vn, vm, va, vd.Is1S() ? FMSUB_s : FMSUB_d);
+ FPDataProcessing3SourceOp op;
+ if (vd.Is1H()) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf));
+ op = FMSUB_h;
+ } else if (vd.Is1S()) {
+ op = FMSUB_s;
+ } else {
+ VIXL_ASSERT(vd.Is1D());
+ op = FMSUB_d;
+ }
+ FPDataProcessing3Source(vd, vn, vm, va, op);
}
@@ -2396,7 +2689,17 @@
const VRegister& vm,
const VRegister& va) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP));
- FPDataProcessing3Source(vd, vn, vm, va, vd.Is1S() ? FNMADD_s : FNMADD_d);
+ FPDataProcessing3SourceOp op;
+ if (vd.Is1H()) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf));
+ op = FNMADD_h;
+ } else if (vd.Is1S()) {
+ op = FNMADD_s;
+ } else {
+ VIXL_ASSERT(vd.Is1D());
+ op = FNMADD_d;
+ }
+ FPDataProcessing3Source(vd, vn, vm, va, op);
}
@@ -2405,7 +2708,17 @@
const VRegister& vm,
const VRegister& va) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP));
- FPDataProcessing3Source(vd, vn, vm, va, vd.Is1S() ? FNMSUB_s : FNMSUB_d);
+ FPDataProcessing3SourceOp op;
+ if (vd.Is1H()) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf));
+ op = FNMSUB_h;
+ } else if (vd.Is1S()) {
+ op = FNMSUB_s;
+ } else {
+ VIXL_ASSERT(vd.Is1D());
+ op = FNMSUB_d;
+ }
+ FPDataProcessing3Source(vd, vn, vm, va, op);
}
@@ -2414,7 +2727,16 @@
const VRegister& vm) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP));
VIXL_ASSERT(AreSameSizeAndType(vd, vn, vm));
- Instr op = vd.Is1S() ? FNMUL_s : FNMUL_d;
+ Instr op;
+ if (vd.Is1H()) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf));
+ op = FNMUL_h;
+ } else if (vd.Is1S()) {
+ op = FNMUL_s;
+ } else {
+ VIXL_ASSERT(vd.Is1D());
+ op = FNMUL_d;
+ }
Emit(FPType(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
}
@@ -2427,7 +2749,7 @@
// value of +0.0, we don't need to check for -0.0 because the sign of 0.0
// doesn't affect the result of the comparison.
VIXL_ASSERT(value == 0.0);
- VIXL_ASSERT(vn.Is1S() || vn.Is1D());
+ VIXL_ASSERT(vn.Is1H() || vn.Is1S() || vn.Is1D());
Instr op = (trap == EnableTrap) ? FCMPE_zero : FCMP_zero;
Emit(FPType(vn) | op | Rn(vn));
}
@@ -2436,7 +2758,7 @@
void Assembler::FPCompareMacro(const VRegister& vn,
const VRegister& vm,
FPTrapFlags trap) {
- VIXL_ASSERT(vn.Is1S() || vn.Is1D());
+ VIXL_ASSERT(vn.Is1H() || vn.Is1S() || vn.Is1D());
VIXL_ASSERT(vn.IsSameSizeAndType(vm));
Instr op = (trap == EnableTrap) ? FCMPE : FCMP;
Emit(FPType(vn) | op | Rm(vm) | Rn(vn));
@@ -2445,24 +2767,28 @@
void Assembler::fcmp(const VRegister& vn, const VRegister& vm) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP));
+ if (vn.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf));
FPCompareMacro(vn, vm, DisableTrap);
}
void Assembler::fcmpe(const VRegister& vn, const VRegister& vm) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP));
+ if (vn.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf));
FPCompareMacro(vn, vm, EnableTrap);
}
void Assembler::fcmp(const VRegister& vn, double value) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP));
+ if (vn.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf));
FPCompareMacro(vn, value, DisableTrap);
}
void Assembler::fcmpe(const VRegister& vn, double value) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP));
+ if (vn.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf));
FPCompareMacro(vn, value, EnableTrap);
}
@@ -2472,7 +2798,7 @@
StatusFlags nzcv,
Condition cond,
FPTrapFlags trap) {
- VIXL_ASSERT(vn.Is1S() || vn.Is1D());
+ VIXL_ASSERT(vn.Is1H() || vn.Is1S() || vn.Is1D());
VIXL_ASSERT(vn.IsSameSizeAndType(vm));
Instr op = (trap == EnableTrap) ? FCCMPE : FCCMP;
Emit(FPType(vn) | op | Rm(vm) | Cond(cond) | Rn(vn) | Nzcv(nzcv));
@@ -2483,6 +2809,7 @@
StatusFlags nzcv,
Condition cond) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP));
+ if (vn.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf));
FPCCompareMacro(vn, vm, nzcv, cond, DisableTrap);
}
@@ -2492,6 +2819,7 @@
StatusFlags nzcv,
Condition cond) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP));
+ if (vn.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf));
FPCCompareMacro(vn, vm, nzcv, cond, EnableTrap);
}
@@ -2501,7 +2829,8 @@
const VRegister& vm,
Condition cond) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP));
- VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+ if (vd.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf));
+ VIXL_ASSERT(vd.Is1H() || vd.Is1S() || vd.Is1D());
VIXL_ASSERT(AreSameFormat(vd, vn, vm));
Emit(FPType(vd) | FCSEL | Rm(vm) | Cond(cond) | Rn(vn) | Rd(vd));
}
@@ -2510,6 +2839,7 @@
void Assembler::fcvt(const VRegister& vd, const VRegister& vn) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP));
FPDataProcessing1SourceOp op;
+ // The half-precision variants belong to base FP, and do not require kFPHalf.
if (vd.Is1D()) {
VIXL_ASSERT(vn.Is1S() || vn.Is1H());
op = vn.Is1S() ? FCVT_ds : FCVT_dh;
@@ -2528,6 +2858,7 @@
void Assembler::fcvtl(const VRegister& vd, const VRegister& vn) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));
VIXL_ASSERT((vd.Is4S() && vn.Is4H()) || (vd.Is2D() && vn.Is2S()));
+ // The half-precision variants belong to base FP, and do not require kFPHalf.
Instr format = vd.Is2D() ? (1 << NEONSize_offset) : 0;
Emit(format | NEON_FCVTL | Rn(vn) | Rd(vd));
}
@@ -2536,6 +2867,7 @@
void Assembler::fcvtl2(const VRegister& vd, const VRegister& vn) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));
VIXL_ASSERT((vd.Is4S() && vn.Is8H()) || (vd.Is2D() && vn.Is4S()));
+ // The half-precision variants belong to base FP, and do not require kFPHalf.
Instr format = vd.Is2D() ? (1 << NEONSize_offset) : 0;
Emit(NEON_Q | format | NEON_FCVTL | Rn(vn) | Rd(vd));
}
@@ -2544,6 +2876,7 @@
void Assembler::fcvtn(const VRegister& vd, const VRegister& vn) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));
VIXL_ASSERT((vn.Is4S() && vd.Is4H()) || (vn.Is2D() && vd.Is2S()));
+ // The half-precision variants belong to base FP, and do not require kFPHalf.
Instr format = vn.Is2D() ? (1 << NEONSize_offset) : 0;
Emit(format | NEON_FCVTN | Rn(vn) | Rd(vd));
}
@@ -2552,6 +2885,7 @@
void Assembler::fcvtn2(const VRegister& vd, const VRegister& vn) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));
VIXL_ASSERT((vn.Is4S() && vd.Is8H()) || (vn.Is2D() && vd.Is4S()));
+ // The half-precision variants belong to base FP, and do not require kFPHalf.
Instr format = vn.Is2D() ? (1 << NEONSize_offset) : 0;
Emit(NEON_Q | format | NEON_FCVTN | Rn(vn) | Rd(vd));
}
@@ -2577,6 +2911,12 @@
Emit(NEON_Q | format | NEON_FCVTXN | Rn(vn) | Rd(vd));
}
+void Assembler::fjcvtzs(const Register& rd, const VRegister& vn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kJSCVT));
+ VIXL_ASSERT(rd.IsW() && vn.Is1D());
+ Emit(FJCVTZS | Rn(vn) | Rd(rd));
+}
+
void Assembler::NEONFPConvertToInt(const Register& rd,
const VRegister& vn,
@@ -2596,6 +2936,20 @@
}
+void Assembler::NEONFP16ConvertToInt(const VRegister& vd,
+ const VRegister& vn,
+ Instr op) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ VIXL_ASSERT(vn.IsLaneSizeH());
+ if (vn.IsScalar()) {
+ op |= NEON_Q | NEONScalar;
+ } else if (vn.Is8H()) {
+ op |= NEON_Q;
+ }
+ Emit(op | Rn(vn) | Rd(vd));
+}
+
+
#define NEON_FP2REGMISC_FCVT_LIST(V) \
V(fcvtnu, NEON_FCVTNU, FCVTNU) \
V(fcvtns, NEON_FCVTNS, FCVTNS) \
@@ -2609,12 +2963,17 @@
#define DEFINE_ASM_FUNCS(FN, VEC_OP, SCA_OP) \
void Assembler::FN(const Register& rd, const VRegister& vn) { \
VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); \
+ if (vn.IsH()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); \
NEONFPConvertToInt(rd, vn, SCA_OP); \
} \
void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
- /* This form is a NEON scalar FP instruction. */ \
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); \
- NEONFPConvertToInt(vd, vn, VEC_OP); \
+ if (vd.IsLaneSizeH()) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); \
+ NEONFP16ConvertToInt(vd, vn, VEC_OP##_H); \
+ } else { \
+ NEONFPConvertToInt(vd, vn, VEC_OP); \
+ } \
}
NEON_FP2REGMISC_FCVT_LIST(DEFINE_ASM_FUNCS)
#undef DEFINE_ASM_FUNCS
@@ -2622,7 +2981,8 @@
void Assembler::fcvtzs(const Register& rd, const VRegister& vn, int fbits) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP));
- VIXL_ASSERT(vn.Is1S() || vn.Is1D());
+ if (vn.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf));
+ VIXL_ASSERT(vn.Is1H() || vn.Is1S() || vn.Is1D());
VIXL_ASSERT((fbits >= 0) && (fbits <= rd.GetSizeInBits()));
if (fbits == 0) {
Emit(SF(rd) | FPType(vn) | FCVTZS | Rn(vn) | Rd(rd));
@@ -2636,11 +2996,17 @@
void Assembler::fcvtzs(const VRegister& vd, const VRegister& vn, int fbits) {
// This form is a NEON scalar FP instruction.
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));
+ if (vn.IsLaneSizeH()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf));
VIXL_ASSERT(fbits >= 0);
if (fbits == 0) {
- NEONFP2RegMisc(vd, vn, NEON_FCVTZS);
+ if (vd.IsLaneSizeH()) {
+ NEONFP2RegMiscFP16(vd, vn, NEON_FCVTZS_H);
+ } else {
+ NEONFP2RegMisc(vd, vn, NEON_FCVTZS);
+ }
} else {
- VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
+ VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S() ||
+ vd.Is1H() || vd.Is4H() || vd.Is8H());
NEONShiftRightImmediate(vd, vn, fbits, NEON_FCVTZS_imm);
}
}
@@ -2648,7 +3014,8 @@
void Assembler::fcvtzu(const Register& rd, const VRegister& vn, int fbits) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP));
- VIXL_ASSERT(vn.Is1S() || vn.Is1D());
+ if (vn.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf));
+ VIXL_ASSERT(vn.Is1H() || vn.Is1S() || vn.Is1D());
VIXL_ASSERT((fbits >= 0) && (fbits <= rd.GetSizeInBits()));
if (fbits == 0) {
Emit(SF(rd) | FPType(vn) | FCVTZU | Rn(vn) | Rd(rd));
@@ -2662,11 +3029,17 @@
void Assembler::fcvtzu(const VRegister& vd, const VRegister& vn, int fbits) {
// This form is a NEON scalar FP instruction.
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));
+ if (vn.IsLaneSizeH()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf));
VIXL_ASSERT(fbits >= 0);
if (fbits == 0) {
- NEONFP2RegMisc(vd, vn, NEON_FCVTZU);
+ if (vd.IsLaneSizeH()) {
+ NEONFP2RegMiscFP16(vd, vn, NEON_FCVTZU_H);
+ } else {
+ NEONFP2RegMisc(vd, vn, NEON_FCVTZU);
+ }
} else {
- VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
+ VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S() ||
+ vd.Is1H() || vd.Is4H() || vd.Is8H());
NEONShiftRightImmediate(vd, vn, fbits, NEON_FCVTZU_imm);
}
}
@@ -2674,11 +3047,17 @@
void Assembler::ucvtf(const VRegister& vd, const VRegister& vn, int fbits) {
// This form is a NEON scalar FP instruction.
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));
+ if (vn.IsLaneSizeH()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf));
VIXL_ASSERT(fbits >= 0);
if (fbits == 0) {
- NEONFP2RegMisc(vd, vn, NEON_UCVTF);
+ if (vd.IsLaneSizeH()) {
+ NEONFP2RegMiscFP16(vd, vn, NEON_UCVTF_H);
+ } else {
+ NEONFP2RegMisc(vd, vn, NEON_UCVTF);
+ }
} else {
- VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
+ VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S() ||
+ vd.Is1H() || vd.Is4H() || vd.Is8H());
NEONShiftRightImmediate(vd, vn, fbits, NEON_UCVTF_imm);
}
}
@@ -2686,11 +3065,17 @@
void Assembler::scvtf(const VRegister& vd, const VRegister& vn, int fbits) {
// This form is a NEON scalar FP instruction.
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));
+ if (vn.IsLaneSizeH()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf));
VIXL_ASSERT(fbits >= 0);
if (fbits == 0) {
- NEONFP2RegMisc(vd, vn, NEON_SCVTF);
+ if (vd.IsLaneSizeH()) {
+ NEONFP2RegMiscFP16(vd, vn, NEON_SCVTF_H);
+ } else {
+ NEONFP2RegMisc(vd, vn, NEON_SCVTF);
+ }
} else {
- VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
+ VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S() ||
+ vd.Is1H() || vd.Is4H() || vd.Is8H());
NEONShiftRightImmediate(vd, vn, fbits, NEON_SCVTF_imm);
}
}
@@ -2698,7 +3083,8 @@
void Assembler::scvtf(const VRegister& vd, const Register& rn, int fbits) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP));
- VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+ if (vd.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf));
+ VIXL_ASSERT(vd.Is1H() || vd.Is1S() || vd.Is1D());
VIXL_ASSERT(fbits >= 0);
if (fbits == 0) {
Emit(SF(rn) | FPType(vd) | SCVTF | Rn(rn) | Rd(vd));
@@ -2711,7 +3097,8 @@
void Assembler::ucvtf(const VRegister& vd, const Register& rn, int fbits) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP));
- VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+ if (vd.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf));
+ VIXL_ASSERT(vd.Is1H() || vd.Is1S() || vd.Is1D());
VIXL_ASSERT(fbits >= 0);
if (fbits == 0) {
Emit(SF(rn) | FPType(vd) | UCVTF | Rn(rn) | Rd(vd));
@@ -2750,44 +3137,85 @@
}
+void Assembler::NEON3SameFP16(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ Instr op) {
+ VIXL_ASSERT(AreSameFormat(vd, vn, vm));
+ VIXL_ASSERT(vd.GetLaneSizeInBytes() == kHRegSizeInBytes);
+ if (vd.Is8H()) op |= NEON_Q;
+ Emit(op | Rm(vm) | Rn(vn) | Rd(vd));
+}
+
+
// clang-format off
-#define NEON_FP2REGMISC_LIST(V) \
- V(fabs, NEON_FABS, FABS) \
- V(fneg, NEON_FNEG, FNEG) \
- V(fsqrt, NEON_FSQRT, FSQRT) \
- V(frintn, NEON_FRINTN, FRINTN) \
- V(frinta, NEON_FRINTA, FRINTA) \
- V(frintp, NEON_FRINTP, FRINTP) \
- V(frintm, NEON_FRINTM, FRINTM) \
- V(frintx, NEON_FRINTX, FRINTX) \
- V(frintz, NEON_FRINTZ, FRINTZ) \
- V(frinti, NEON_FRINTI, FRINTI) \
- V(frsqrte, NEON_FRSQRTE, NEON_FRSQRTE_scalar) \
- V(frecpe, NEON_FRECPE, NEON_FRECPE_scalar )
+#define NEON_FP2REGMISC_LIST(V) \
+ V(fabs, NEON_FABS, FABS, FABS_h) \
+ V(fneg, NEON_FNEG, FNEG, FNEG_h) \
+ V(fsqrt, NEON_FSQRT, FSQRT, FSQRT_h) \
+ V(frintn, NEON_FRINTN, FRINTN, FRINTN_h) \
+ V(frinta, NEON_FRINTA, FRINTA, FRINTA_h) \
+ V(frintp, NEON_FRINTP, FRINTP, FRINTP_h) \
+ V(frintm, NEON_FRINTM, FRINTM, FRINTM_h) \
+ V(frintx, NEON_FRINTX, FRINTX, FRINTX_h) \
+ V(frintz, NEON_FRINTZ, FRINTZ, FRINTZ_h) \
+ V(frinti, NEON_FRINTI, FRINTI, FRINTI_h) \
+ V(frsqrte, NEON_FRSQRTE, NEON_FRSQRTE_scalar, NEON_FRSQRTE_H_scalar) \
+ V(frecpe, NEON_FRECPE, NEON_FRECPE_scalar, NEON_FRECPE_H_scalar)
// clang-format on
-
-#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \
- void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
- VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); \
- Instr op; \
- if (vd.IsScalar()) { \
- if ((SCA_OP & NEONScalar2RegMiscFMask) == NEONScalar2RegMiscFixed) { \
- VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \
- } \
- VIXL_ASSERT(vd.Is1S() || vd.Is1D()); \
- op = SCA_OP; \
- } else { \
- VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \
- VIXL_ASSERT(vd.Is2S() || vd.Is2D() || vd.Is4S()); \
- op = VEC_OP; \
- } \
- NEONFP2RegMisc(vd, vn, op); \
+#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP, SCA_OP_H) \
+ void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); \
+ Instr op; \
+ if (vd.IsScalar()) { \
+ if (vd.Is1H()) { \
+ if ((SCA_OP_H & NEONScalar2RegMiscFP16FMask) == \
+ NEONScalar2RegMiscFP16Fixed) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kNEONHalf)); \
+ } else { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); \
+ } \
+ op = SCA_OP_H; \
+ } else { \
+ if ((SCA_OP & NEONScalar2RegMiscFMask) == NEONScalar2RegMiscFixed) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \
+ } \
+ VIXL_ASSERT(vd.Is1S() || vd.Is1D()); \
+ op = SCA_OP; \
+ } \
+ } else { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \
+ VIXL_ASSERT(vd.Is4H() || vd.Is8H() || vd.Is2S() || vd.Is2D() || \
+ vd.Is4S()); \
+ if (vd.IsLaneSizeH()) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); \
+ op = VEC_OP##_H; \
+ if (vd.Is8H()) { \
+ op |= NEON_Q; \
+ } \
+ } else { \
+ op = VEC_OP; \
+ } \
+ } \
+ if (vd.IsLaneSizeH()) { \
+ NEONFP2RegMiscFP16(vd, vn, op); \
+ } else { \
+ NEONFP2RegMisc(vd, vn, op); \
+ } \
}
NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC)
#undef DEFINE_ASM_FUNC
+void Assembler::NEONFP2RegMiscFP16(const VRegister& vd,
+ const VRegister& vn,
+ Instr op) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ Emit(op | Rn(vn) | Rd(vd));
+}
+
+
void Assembler::NEONFP2RegMisc(const VRegister& vd,
const VRegister& vn,
Instr op) {
@@ -2891,33 +3319,81 @@
}
+void Assembler::NEONFP2RegMiscFP16(const VRegister& vd,
+ const VRegister& vn,
+ NEON2RegMiscFP16Op vop,
+ double value) {
+ VIXL_ASSERT(AreSameFormat(vd, vn));
+ VIXL_ASSERT(value == 0.0);
+ USE(value);
+
+ Instr op = vop;
+ if (vd.IsScalar()) {
+ VIXL_ASSERT(vd.Is1H());
+ op |= NEON_Q | NEONScalar;
+ } else {
+ VIXL_ASSERT(vd.Is4H() || vd.Is8H());
+ if (vd.Is8H()) {
+ op |= NEON_Q;
+ }
+ }
+
+ Emit(op | Rn(vn) | Rd(vd));
+}
+
+
void Assembler::fcmeq(const VRegister& vd, const VRegister& vn, double value) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));
- NEONFP2RegMisc(vd, vn, NEON_FCMEQ_zero, value);
+ if (vd.IsLaneSizeH()) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf));
+ NEONFP2RegMiscFP16(vd, vn, NEON_FCMEQ_H_zero, value);
+ } else {
+ NEONFP2RegMisc(vd, vn, NEON_FCMEQ_zero, value);
+ }
}
void Assembler::fcmge(const VRegister& vd, const VRegister& vn, double value) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));
- NEONFP2RegMisc(vd, vn, NEON_FCMGE_zero, value);
+ if (vd.IsLaneSizeH()) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf));
+ NEONFP2RegMiscFP16(vd, vn, NEON_FCMGE_H_zero, value);
+ } else {
+ NEONFP2RegMisc(vd, vn, NEON_FCMGE_zero, value);
+ }
}
void Assembler::fcmgt(const VRegister& vd, const VRegister& vn, double value) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));
- NEONFP2RegMisc(vd, vn, NEON_FCMGT_zero, value);
+ if (vd.IsLaneSizeH()) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf));
+ NEONFP2RegMiscFP16(vd, vn, NEON_FCMGT_H_zero, value);
+ } else {
+ NEONFP2RegMisc(vd, vn, NEON_FCMGT_zero, value);
+ }
}
void Assembler::fcmle(const VRegister& vd, const VRegister& vn, double value) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));
- NEONFP2RegMisc(vd, vn, NEON_FCMLE_zero, value);
+ if (vd.IsLaneSizeH()) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf));
+ NEONFP2RegMiscFP16(vd, vn, NEON_FCMLE_H_zero, value);
+ } else {
+ NEONFP2RegMisc(vd, vn, NEON_FCMLE_zero, value);
+ }
}
void Assembler::fcmlt(const VRegister& vd, const VRegister& vn, double value) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));
- NEONFP2RegMisc(vd, vn, NEON_FCMLT_zero, value);
+ if (vd.IsLaneSizeH()) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf));
+ NEONFP2RegMiscFP16(vd, vn, NEON_FCMLT_H_zero, value);
+ } else {
+ NEONFP2RegMisc(vd, vn, NEON_FCMLT_zero, value);
+ }
}
@@ -2925,8 +3401,15 @@
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));
VIXL_ASSERT(vd.IsScalar());
VIXL_ASSERT(AreSameFormat(vd, vn));
- VIXL_ASSERT(vd.Is1S() || vd.Is1D());
- Emit(FPFormat(vd) | NEON_FRECPX_scalar | Rn(vn) | Rd(vd));
+ Instr op;
+ if (vd.Is1H()) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf));
+ op = NEON_FRECPX_H_scalar;
+ } else {
+ VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+ op = NEON_FRECPX_scalar;
+ }
+ Emit(FPFormat(vd) | op | Rn(vn) | Rd(vd));
}
@@ -2999,52 +3482,75 @@
#undef DEFINE_ASM_FUNC
// clang-format off
-#define NEON_FP3SAME_OP_LIST(V) \
- V(fadd, NEON_FADD, FADD) \
- V(fsub, NEON_FSUB, FSUB) \
- V(fmul, NEON_FMUL, FMUL) \
- V(fdiv, NEON_FDIV, FDIV) \
- V(fmax, NEON_FMAX, FMAX) \
- V(fmaxnm, NEON_FMAXNM, FMAXNM) \
- V(fmin, NEON_FMIN, FMIN) \
- V(fminnm, NEON_FMINNM, FMINNM) \
- V(fmulx, NEON_FMULX, NEON_FMULX_scalar) \
- V(frecps, NEON_FRECPS, NEON_FRECPS_scalar) \
- V(frsqrts, NEON_FRSQRTS, NEON_FRSQRTS_scalar) \
- V(fabd, NEON_FABD, NEON_FABD_scalar) \
- V(fmla, NEON_FMLA, 0) \
- V(fmls, NEON_FMLS, 0) \
- V(facge, NEON_FACGE, NEON_FACGE_scalar) \
- V(facgt, NEON_FACGT, NEON_FACGT_scalar) \
- V(fcmeq, NEON_FCMEQ, NEON_FCMEQ_scalar) \
- V(fcmge, NEON_FCMGE, NEON_FCMGE_scalar) \
- V(fcmgt, NEON_FCMGT, NEON_FCMGT_scalar) \
- V(faddp, NEON_FADDP, 0) \
- V(fmaxp, NEON_FMAXP, 0) \
- V(fminp, NEON_FMINP, 0) \
- V(fmaxnmp, NEON_FMAXNMP, 0) \
- V(fminnmp, NEON_FMINNMP, 0)
+#define NEON_FP3SAME_OP_LIST(V) \
+ V(fmulx, NEON_FMULX, NEON_FMULX_scalar, NEON_FMULX_H_scalar) \
+ V(frecps, NEON_FRECPS, NEON_FRECPS_scalar, NEON_FRECPS_H_scalar) \
+ V(frsqrts, NEON_FRSQRTS, NEON_FRSQRTS_scalar, NEON_FRSQRTS_H_scalar) \
+ V(fabd, NEON_FABD, NEON_FABD_scalar, NEON_FABD_H_scalar) \
+ V(fmla, NEON_FMLA, 0, 0) \
+ V(fmls, NEON_FMLS, 0, 0) \
+ V(facge, NEON_FACGE, NEON_FACGE_scalar, NEON_FACGE_H_scalar) \
+ V(facgt, NEON_FACGT, NEON_FACGT_scalar, NEON_FACGT_H_scalar) \
+ V(fcmeq, NEON_FCMEQ, NEON_FCMEQ_scalar, NEON_FCMEQ_H_scalar) \
+ V(fcmge, NEON_FCMGE, NEON_FCMGE_scalar, NEON_FCMGE_H_scalar) \
+ V(fcmgt, NEON_FCMGT, NEON_FCMGT_scalar, NEON_FCMGT_H_scalar) \
+ V(faddp, NEON_FADDP, 0, 0) \
+ V(fmaxp, NEON_FMAXP, 0, 0) \
+ V(fminp, NEON_FMINP, 0, 0) \
+ V(fmaxnmp, NEON_FMAXNMP, 0, 0) \
+ V(fadd, NEON_FADD, FADD, 0) \
+ V(fsub, NEON_FSUB, FSUB, 0) \
+ V(fmul, NEON_FMUL, FMUL, 0) \
+ V(fdiv, NEON_FDIV, FDIV, 0) \
+ V(fmax, NEON_FMAX, FMAX, 0) \
+ V(fmin, NEON_FMIN, FMIN, 0) \
+ V(fmaxnm, NEON_FMAXNM, FMAXNM, 0) \
+ V(fminnm, NEON_FMINNM, FMINNM, 0) \
+ V(fminnmp, NEON_FMINNMP, 0, 0)
// clang-format on
-#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \
- void Assembler::FN(const VRegister& vd, \
- const VRegister& vn, \
- const VRegister& vm) { \
- VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); \
- Instr op; \
- if ((SCA_OP != 0) && vd.IsScalar()) { \
- if ((SCA_OP & NEONScalar3SameFMask) == NEONScalar3SameFixed) { \
- VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \
- } \
- VIXL_ASSERT(vd.Is1S() || vd.Is1D()); \
- op = SCA_OP; \
- } else { \
- VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \
- VIXL_ASSERT(vd.IsVector()); \
- VIXL_ASSERT(vd.Is2S() || vd.Is2D() || vd.Is4S()); \
- op = VEC_OP; \
- } \
- NEONFP3Same(vd, vn, vm, op); \
+// TODO: This macro is complicated because it classifies the instructions in the
+// macro list above, and treats each case differently. It could be somewhat
+// simpler if we were to split the macro, at the cost of some duplication.
+#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP, SCA_OP_H) \
+ void Assembler::FN(const VRegister& vd, \
+ const VRegister& vn, \
+ const VRegister& vm) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); \
+ Instr op; \
+ bool is_fp16 = false; \
+ if ((SCA_OP != 0) && vd.IsScalar()) { \
+ if ((SCA_OP_H != 0) && vd.Is1H()) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kNEONHalf)); \
+ is_fp16 = true; \
+ op = SCA_OP_H; \
+ } else { \
+ VIXL_ASSERT(vd.Is1H() || vd.Is1S() || vd.Is1D()); \
+ if ((SCA_OP & NEONScalar3SameFMask) == NEONScalar3SameFixed) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \
+ if (vd.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); \
+ } else if (vd.Is1H()) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); \
+ } \
+ op = SCA_OP; \
+ } \
+ } else { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \
+ VIXL_ASSERT(vd.IsVector()); \
+ if (vd.Is4H() || vd.Is8H()) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); \
+ is_fp16 = true; \
+ op = VEC_OP##_H; \
+ } else { \
+ VIXL_ASSERT(vd.Is2S() || vd.Is2D() || vd.Is4S()); \
+ op = VEC_OP; \
+ } \
+ } \
+ if (is_fp16) { \
+ NEON3SameFP16(vd, vn, vm, op); \
+ } else { \
+ NEONFP3Same(vd, vn, vm, op); \
+ } \
}
NEON_FP3SAME_OP_LIST(DEFINE_ASM_FUNC)
#undef DEFINE_ASM_FUNC
@@ -3119,36 +3625,66 @@
void Assembler::faddp(const VRegister& vd, const VRegister& vn) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));
- VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
- Emit(FPFormat(vd) | NEON_FADDP_scalar | Rn(vn) | Rd(vd));
+ VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()) ||
+ (vd.Is1H() && vn.Is2H()));
+ if (vd.Is1H()) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf));
+ Emit(NEON_FADDP_h_scalar | Rn(vn) | Rd(vd));
+ } else {
+ Emit(FPFormat(vd) | NEON_FADDP_scalar | Rn(vn) | Rd(vd));
+ }
}
void Assembler::fmaxp(const VRegister& vd, const VRegister& vn) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));
- VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
- Emit(FPFormat(vd) | NEON_FMAXP_scalar | Rn(vn) | Rd(vd));
+ VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()) ||
+ (vd.Is1H() && vn.Is2H()));
+ if (vd.Is1H()) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf));
+ Emit(NEON_FMAXP_h_scalar | Rn(vn) | Rd(vd));
+ } else {
+ Emit(FPFormat(vd) | NEON_FMAXP_scalar | Rn(vn) | Rd(vd));
+ }
}
void Assembler::fminp(const VRegister& vd, const VRegister& vn) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));
- VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
- Emit(FPFormat(vd) | NEON_FMINP_scalar | Rn(vn) | Rd(vd));
+ VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()) ||
+ (vd.Is1H() && vn.Is2H()));
+ if (vd.Is1H()) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf));
+ Emit(NEON_FMINP_h_scalar | Rn(vn) | Rd(vd));
+ } else {
+ Emit(FPFormat(vd) | NEON_FMINP_scalar | Rn(vn) | Rd(vd));
+ }
}
void Assembler::fmaxnmp(const VRegister& vd, const VRegister& vn) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));
- VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
- Emit(FPFormat(vd) | NEON_FMAXNMP_scalar | Rn(vn) | Rd(vd));
+ VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()) ||
+ (vd.Is1H() && vn.Is2H()));
+ if (vd.Is1H()) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf));
+ Emit(NEON_FMAXNMP_h_scalar | Rn(vn) | Rd(vd));
+ } else {
+ Emit(FPFormat(vd) | NEON_FMAXNMP_scalar | Rn(vn) | Rd(vd));
+ }
}
void Assembler::fminnmp(const VRegister& vd, const VRegister& vn) {
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON));
- VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
- Emit(FPFormat(vd) | NEON_FMINNMP_scalar | Rn(vn) | Rd(vd));
+ VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()) ||
+ (vd.Is1H() && vn.Is2H()));
+ if (vd.Is1H()) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf));
+ Emit(NEON_FMINNMP_h_scalar | Rn(vn) | Rd(vd));
+ } else {
+ Emit(FPFormat(vd) | NEON_FMINNMP_scalar | Rn(vn) | Rd(vd));
+ }
}
@@ -3162,6 +3698,7 @@
VIXL_ASSERT(vd.IsVector() && AreSameFormat(vd, vn));
VIXL_ASSERT((vm.IsH() && (vd.Is8H() || vd.Is4H())) ||
(vm.IsS() && vd.Is4S()));
+ if (vd.IsLaneSizeH()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf));
int index_num_bits = vd.Is4S() ? 1 : 2;
Emit(VFormat(vd) | Rm(vm) | NEON_FCMLA_byelement |
ImmNEONHLM(vm_index, index_num_bits) | ImmRotFcmlaSca(rot) | Rn(vn) |
@@ -3176,6 +3713,7 @@
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::kFcma));
VIXL_ASSERT(AreSameFormat(vd, vn, vm));
VIXL_ASSERT(vd.IsVector() && !vd.IsLaneSizeB());
+ if (vd.IsLaneSizeH()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf));
Emit(VFormat(vd) | Rm(vm) | NEON_FCMLA | ImmRotFcmlaVec(rot) | Rn(vn) |
Rd(vd));
}
@@ -3189,6 +3727,7 @@
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::kFcma));
VIXL_ASSERT(AreSameFormat(vd, vn, vm));
VIXL_ASSERT(vd.IsVector() && !vd.IsLaneSizeB());
+ if (vd.IsLaneSizeH()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf));
Emit(VFormat(vd) | Rm(vm) | NEON_FCADD | ImmRotFcadd(rot) | Rn(vn) | Rd(vd));
}
@@ -3282,21 +3821,38 @@
const VRegister& vn,
const VRegister& vm,
int vm_index,
- NEONByIndexedElementOp vop) {
+ NEONByIndexedElementOp vop,
+ NEONByIndexedElementOp vop_half) {
VIXL_ASSERT(AreSameFormat(vd, vn));
VIXL_ASSERT((vd.Is2S() && vm.Is1S()) || (vd.Is4S() && vm.Is1S()) ||
(vd.Is1S() && vm.Is1S()) || (vd.Is2D() && vm.Is1D()) ||
- (vd.Is1D() && vm.Is1D()));
- VIXL_ASSERT((vm.Is1S() && (vm_index < 4)) || (vm.Is1D() && (vm_index < 2)));
+ (vd.Is1D() && vm.Is1D()) || (vd.Is4H() && vm.Is1H()) ||
+ (vd.Is8H() && vm.Is1H()) || (vd.Is1H() && vm.Is1H()));
+ VIXL_ASSERT((vm.Is1S() && (vm_index < 4)) || (vm.Is1D() && (vm_index < 2)) ||
+ (vm.Is1H() && (vm.GetCode() < 16) && (vm_index < 8)));
Instr op = vop;
- int index_num_bits = vm.Is1S() ? 2 : 1;
+ int index_num_bits;
+ if (vm.Is1D()) {
+ index_num_bits = 1;
+ } else if (vm.Is1S()) {
+ index_num_bits = 2;
+ } else {
+ index_num_bits = 3;
+ op = vop_half;
+ }
+
if (vd.IsScalar()) {
op |= NEON_Q | NEONScalar;
}
- Emit(FPFormat(vd) | op | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) |
- Rn(vn) | Rd(vd));
+ if (!vm.Is1H()) {
+ op |= FPFormat(vd);
+ } else if (vd.Is8H()) {
+ op |= NEON_Q;
+ }
+
+ Emit(op | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) | Rd(vd));
}
@@ -3423,19 +3979,20 @@
// clang-format off
#define NEON_FPBYELEMENT_LIST(V) \
- V(fmul, NEON_FMUL_byelement) \
- V(fmla, NEON_FMLA_byelement) \
- V(fmls, NEON_FMLS_byelement) \
- V(fmulx, NEON_FMULX_byelement)
+ V(fmul, NEON_FMUL_byelement, NEON_FMUL_H_byelement) \
+ V(fmla, NEON_FMLA_byelement, NEON_FMLA_H_byelement) \
+ V(fmls, NEON_FMLS_byelement, NEON_FMLS_H_byelement) \
+ V(fmulx, NEON_FMULX_byelement, NEON_FMULX_H_byelement)
// clang-format on
-#define DEFINE_ASM_FUNC(FN, OP) \
- void Assembler::FN(const VRegister& vd, \
- const VRegister& vn, \
- const VRegister& vm, \
- int vm_index) { \
- VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); \
- NEONFPByElement(vd, vn, vm, vm_index, OP); \
+#define DEFINE_ASM_FUNC(FN, OP, OP_H) \
+ void Assembler::FN(const VRegister& vd, \
+ const VRegister& vn, \
+ const VRegister& vm, \
+ int vm_index) { \
+ VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); \
+ if (vd.IsLaneSizeH()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); \
+ NEONFPByElement(vd, vn, vm, vm_index, OP, OP_H); \
}
NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC)
#undef DEFINE_ASM_FUNC
@@ -3955,46 +4512,59 @@
void Assembler::NEONAcrossLanes(const VRegister& vd,
const VRegister& vn,
- NEONAcrossLanesOp op) {
+ NEONAcrossLanesOp op,
+ Instr op_half) {
VIXL_ASSERT((vn.Is8B() && vd.Is1B()) || (vn.Is16B() && vd.Is1B()) ||
(vn.Is4H() && vd.Is1H()) || (vn.Is8H() && vd.Is1H()) ||
(vn.Is4S() && vd.Is1S()));
if ((op & NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
- Emit(FPFormat(vn) | op | Rn(vn) | Rd(vd));
+ if (vd.Is1H()) {
+ VIXL_ASSERT(op_half != 0);
+ Instr vop = op_half;
+ if (vn.Is8H()) {
+ vop |= NEON_Q;
+ }
+ Emit(vop | Rn(vn) | Rd(vd));
+ } else {
+ Emit(FPFormat(vn) | op | Rn(vn) | Rd(vd));
+ }
} else {
Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
}
}
+// clang-format off
+#define NEON_ACROSSLANES_LIST(V) \
+ V(addv, NEON_ADDV) \
+ V(smaxv, NEON_SMAXV) \
+ V(sminv, NEON_SMINV) \
+ V(umaxv, NEON_UMAXV) \
+ V(uminv, NEON_UMINV)
+// clang-format on
-#define NEON_ACROSSLANES_LIST(V) \
- V(addv, NEON_ADDV, true) \
- V(smaxv, NEON_SMAXV, true) \
- V(sminv, NEON_SMINV, true) \
- V(umaxv, NEON_UMAXV, true) \
- V(uminv, NEON_UMINV, true)
-
-#define DEFINE_ASM_FUNC(FN, OP, AS) \
+#define DEFINE_ASM_FUNC(FN, OP) \
void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \
- VIXL_ASSERT(AS); \
- NEONAcrossLanes(vd, vn, OP); \
+ NEONAcrossLanes(vd, vn, OP, 0); \
}
NEON_ACROSSLANES_LIST(DEFINE_ASM_FUNC)
#undef DEFINE_ASM_FUNC
+// clang-format off
#define NEON_ACROSSLANES_FP_LIST(V) \
- V(fmaxv, NEON_FMAXV, vd.Is1S()) \
- V(fminv, NEON_FMINV, vd.Is1S()) \
- V(fmaxnmv, NEON_FMAXNMV, vd.Is1S()) \
- V(fminnmv, NEON_FMINNMV, vd.Is1S())
+ V(fmaxv, NEON_FMAXV, NEON_FMAXV_H) \
+ V(fminv, NEON_FMINV, NEON_FMINV_H) \
+ V(fmaxnmv, NEON_FMAXNMV, NEON_FMAXNMV_H) \
+ V(fminnmv, NEON_FMINNMV, NEON_FMINNMV_H) \
+// clang-format on
-#define DEFINE_ASM_FUNC(FN, OP, AS) \
+#define DEFINE_ASM_FUNC(FN, OP, OP_H) \
void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); \
- VIXL_ASSERT(AS); \
- NEONAcrossLanes(vd, vn, OP); \
+ if (vd.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); \
+ VIXL_ASSERT(vd.Is1S() || vd.Is1H()); \
+ NEONAcrossLanes(vd, vn, OP, OP_H); \
}
NEON_ACROSSLANES_FP_LIST(DEFINE_ASM_FUNC)
#undef DEFINE_ASM_FUNC
@@ -4400,7 +4970,7 @@
// For all ToImm instructions below, a difference in case
// for the same letter indicates a negated bit.
// If b is 1, then B is 0.
-uint32_t Assembler::FP16ToImm8(float16 imm) {
+uint32_t Assembler::FP16ToImm8(Float16 imm) {
VIXL_ASSERT(IsImmFP16(imm));
// Half: aBbb.cdef.gh00.0000 (16 bits)
uint16_t bits = Float16ToRawbits(imm);
@@ -4415,7 +4985,7 @@
}
-Instr Assembler::ImmFP16(float16 imm) {
+Instr Assembler::ImmFP16(Float16 imm) {
return FP16ToImm8(imm) << ImmFP_offset;
}
@@ -4663,7 +5233,7 @@
const VRegister& vm,
const VRegister& va,
FPDataProcessing3SourceOp op) {
- VIXL_ASSERT(vd.Is1S() || vd.Is1D());
+ VIXL_ASSERT(vd.Is1H() || vd.Is1S() || vd.Is1D());
VIXL_ASSERT(AreSameSizeAndType(vd, vn, vm, va));
Emit(FPType(vd) | op | Rm(vm) | Rn(vn) | Rd(vd) | Ra(va));
}
@@ -4905,7 +5475,7 @@
}
-bool Assembler::IsImmFP16(float16 imm) {
+bool Assembler::IsImmFP16(Float16 imm) {
// Valid values will have the form:
// aBbb.cdef.gh00.000
uint16_t bits = Float16ToRawbits(imm);
@@ -5379,7 +5949,7 @@
const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8};
- for (unsigned i = 0; i < sizeof(regs) / sizeof(regs[0]); i++) {
+ for (size_t i = 0; i < ArrayLength(regs); i++) {
if (regs[i].IsRegister()) {
number_of_valid_regs++;
unique_regs |= regs[i].GetBit();
diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h
index 54256fb..7d95466 100644
--- a/src/aarch64/assembler-aarch64.h
+++ b/src/aarch64/assembler-aarch64.h
@@ -34,7 +34,6 @@
#include "../invalset-vixl.h"
#include "../utils-vixl.h"
#include "operands-aarch64.h"
-#include "utils-aarch64.h"
namespace vixl {
namespace aarch64 {
@@ -496,6 +495,42 @@
// Branch to register with return hint.
void ret(const Register& xn = lr);
+ // Branch to register, with pointer authentication. Using key A and a modifier
+ // of zero [Armv8.3].
+ void braaz(const Register& xn);
+
+ // Branch to register, with pointer authentication. Using key B and a modifier
+ // of zero [Armv8.3].
+ void brabz(const Register& xn);
+
+ // Branch with link to register, with pointer authentication. Using key A and
+ // a modifier of zero [Armv8.3].
+ void blraaz(const Register& xn);
+
+ // Branch with link to register, with pointer authentication. Using key B and
+ // a modifier of zero [Armv8.3].
+ void blrabz(const Register& xn);
+
+ // Return from subroutine, with pointer authentication. Using key A [Armv8.3].
+ void retaa();
+
+ // Return from subroutine, with pointer authentication. Using key B [Armv8.3].
+ void retab();
+
+ // Branch to register, with pointer authentication. Using key A [Armv8.3].
+ void braa(const Register& xn, const Register& xm);
+
+ // Branch to register, with pointer authentication. Using key B [Armv8.3].
+ void brab(const Register& xn, const Register& xm);
+
+ // Branch with link to register, with pointer authentication. Using key A
+ // [Armv8.3].
+ void blraa(const Register& xn, const Register& xm);
+
+ // Branch with link to register, with pointer authentication. Using key B
+ // [Armv8.3].
+ void blrab(const Register& xn, const Register& xm);
+
// Unconditional branch to label.
void b(Label* label);
@@ -728,6 +763,11 @@
bfm(rd, rn, lsb, lsb + width - 1);
}
+ // Bitfield clear [Armv8.2].
+ void bfc(const Register& rd, unsigned lsb, unsigned width) {
+ bfi(rd, AppropriateZeroRegFor(rd), lsb, width);
+ }
+
// Sbfm aliases.
// Arithmetic shift right.
void asr(const Register& rd, const Register& rn, unsigned shift) {
@@ -971,6 +1011,13 @@
// Reverse bytes in 32-bit words.
void rev32(const Register& xd, const Register& xn);
+ // Reverse bytes in 64-bit general purpose register, an alias for rev
+ // [Armv8.2].
+ void rev64(const Register& xd, const Register& xn) {
+ VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits());
+ rev(xd, xn);
+ }
+
// Reverse bytes.
void rev(const Register& rd, const Register& rn);
@@ -980,6 +1027,168 @@
// Count leading sign bits.
void cls(const Register& rd, const Register& rn);
+ // Pointer Authentication Code for Instruction address, using key A [Armv8.3].
+ void pacia(const Register& xd, const Register& rn);
+
+ // Pointer Authentication Code for Instruction address, using key A and a
+ // modifier of zero [Armv8.3].
+ void paciza(const Register& xd);
+
+ // Pointer Authentication Code for Instruction address, using key A, with
+ // address in x17 and modifier in x16 [Armv8.3].
+ void pacia1716();
+
+ // Pointer Authentication Code for Instruction address, using key A, with
+ // address in LR and modifier in SP [Armv8.3].
+ void paciasp();
+
+ // Pointer Authentication Code for Instruction address, using key A, with
+ // address in LR and a modifier of zero [Armv8.3].
+ void paciaz();
+
+ // Pointer Authentication Code for Instruction address, using key B [Armv8.3].
+ void pacib(const Register& xd, const Register& xn);
+
+ // Pointer Authentication Code for Instruction address, using key B and a
+ // modifier of zero [Armv8.3].
+ void pacizb(const Register& xd);
+
+ // Pointer Authentication Code for Instruction address, using key B, with
+ // address in x17 and modifier in x16 [Armv8.3].
+ void pacib1716();
+
+ // Pointer Authentication Code for Instruction address, using key B, with
+ // address in LR and modifier in SP [Armv8.3].
+ void pacibsp();
+
+ // Pointer Authentication Code for Instruction address, using key B, with
+ // address in LR and a modifier of zero [Armv8.3].
+ void pacibz();
+
+ // Pointer Authentication Code for Data address, using key A [Armv8.3].
+ void pacda(const Register& xd, const Register& xn);
+
+ // Pointer Authentication Code for Data address, using key A and a modifier of
+ // zero [Armv8.3].
+ void pacdza(const Register& xd);
+
+ // Pointer Authentication Code for Data address, using key A, with address in
+ // x17 and modifier in x16 [Armv8.3].
+ void pacda1716();
+
+ // Pointer Authentication Code for Data address, using key A, with address in
+ // LR and modifier in SP [Armv8.3].
+ void pacdasp();
+
+ // Pointer Authentication Code for Data address, using key A, with address in
+ // LR and a modifier of zero [Armv8.3].
+ void pacdaz();
+
+ // Pointer Authentication Code for Data address, using key B [Armv8.3].
+ void pacdb(const Register& xd, const Register& xn);
+
+ // Pointer Authentication Code for Data address, using key B and a modifier of
+ // zero [Armv8.3].
+ void pacdzb(const Register& xd);
+
+ // Pointer Authentication Code for Data address, using key B, with address in
+ // x17 and modifier in x16 [Armv8.3].
+ void pacdb1716();
+
+ // Pointer Authentication Code for Data address, using key B, with address in
+ // LR and modifier in SP [Armv8.3].
+ void pacdbsp();
+
+ // Pointer Authentication Code for Data address, using key B, with address in
+ // LR and a modifier of zero [Armv8.3].
+ void pacdbz();
+
+ // Pointer Authentication Code, using Generic key [Armv8.3].
+ void pacga(const Register& xd, const Register& xn, const Register& xm);
+
+ // Authenticate Instruction address, using key A [Armv8.3].
+ void autia(const Register& xd, const Register& xn);
+
+ // Authenticate Instruction address, using key A and a modifier of zero
+ // [Armv8.3].
+ void autiza(const Register& xd);
+
+ // Authenticate Instruction address, using key A, with address in x17 and
+ // modifier in x16 [Armv8.3].
+ void autia1716();
+
+ // Authenticate Instruction address, using key A, with address in LR and
+ // modifier in SP [Armv8.3].
+ void autiasp();
+
+ // Authenticate Instruction address, using key A, with address in LR and a
+ // modifier of zero [Armv8.3].
+ void autiaz();
+
+ // Authenticate Instruction address, using key B [Armv8.3].
+ void autib(const Register& xd, const Register& xn);
+
+ // Authenticate Instruction address, using key B and a modifier of zero
+ // [Armv8.3].
+ void autizb(const Register& xd);
+
+ // Authenticate Instruction address, using key B, with address in x17 and
+ // modifier in x16 [Armv8.3].
+ void autib1716();
+
+ // Authenticate Instruction address, using key B, with address in LR and
+ // modifier in SP [Armv8.3].
+ void autibsp();
+
+ // Authenticate Instruction address, using key B, with address in LR and a
+ // modifier of zero [Armv8.3].
+ void autibz();
+
+ // Authenticate Data address, using key A [Armv8.3].
+ void autda(const Register& xd, const Register& xn);
+
+ // Authenticate Data address, using key A and a modifier of zero [Armv8.3].
+ void autdza(const Register& xd);
+
+ // Authenticate Data address, using key A, with address in x17 and modifier in
+ // x16 [Armv8.3].
+ void autda1716();
+
+ // Authenticate Data address, using key A, with address in LR and modifier in
+ // SP [Armv8.3].
+ void autdasp();
+
+ // Authenticate Data address, using key A, with address in LR and a modifier
+ // of zero [Armv8.3].
+ void autdaz();
+
+ // Authenticate Data address, using key B [Armv8.3].
+ void autdb(const Register& xd, const Register& xn);
+
+ // Authenticate Data address, using key B and a modifier of zero [Armv8.3].
+ void autdzb(const Register& xd);
+
+ // Authenticate Data address, using key B, with address in x17 and modifier in
+ // x16 [Armv8.3].
+ void autdb1716();
+
+ // Authenticate Data address, using key B, with address in LR and modifier in
+ // SP [Armv8.3].
+ void autdbsp();
+
+ // Authenticate Data address, using key B, with address in LR and a modifier
+ // of zero [Armv8.3].
+ void autdbz();
+
+ // Strip Pointer Authentication Code of Data address [Armv8.3].
+ void xpacd(const Register& xd);
+
+ // Strip Pointer Authentication Code of Instruction address [Armv8.3].
+ void xpaci(const Register& xd);
+
+ // Strip Pointer Authentication Code of Instruction address in LR [Armv8.3].
+ void xpaclri();
+
// Memory instructions.
// Load integer or FP register.
void ldr(const CPURegister& rt,
@@ -1260,6 +1469,578 @@
const Register& rt2,
const MemOperand& src);
+ // Atomic add on byte in memory [Armv8.1]
+ void ldaddb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on byte in memory, with Load-acquire semantics [Armv8.1]
+ void ldaddab(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on byte in memory, with Store-release semantics [Armv8.1]
+ void ldaddlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on byte in memory, with Load-acquire and Store-release semantics
+ // [Armv8.1]
+ void ldaddalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on halfword in memory [Armv8.1]
+ void ldaddh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on halfword in memory, with Load-acquire semantics [Armv8.1]
+ void ldaddah(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on halfword in memory, with Store-release semantics [Armv8.1]
+ void ldaddlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on halfword in memory, with Load-acquire and Store-release
+ // semantics [Armv8.1]
+ void ldaddalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on word or doubleword in memory [Armv8.1]
+ void ldadd(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on word or doubleword in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldadda(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on word or doubleword in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldaddl(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on word or doubleword in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldaddal(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on byte in memory [Armv8.1]
+ void ldclrb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on byte in memory, with Load-acquire semantics [Armv8.1]
+ void ldclrab(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on byte in memory, with Store-release semantics [Armv8.1]
+ void ldclrlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on byte in memory, with Load-acquire and Store-release
+ // semantics [Armv8.1]
+ void ldclralb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on halfword in memory [Armv8.1]
+ void ldclrh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on halfword in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldclrah(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on halfword in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldclrlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on halfword in memory, with Load-acquire and Store-release
+ // semantics [Armv8.1]
+ void ldclralh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on word or doubleword in memory [Armv8.1]
+ void ldclr(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on word or doubleword in memory, with Load-acquire
+ // semantics [Armv8.1]
+ void ldclra(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on word or doubleword in memory, with Store-release
+ // semantics [Armv8.1]
+ void ldclrl(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit clear on word or doubleword in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldclral(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on byte in memory [Armv8.1]
+ void ldeorb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on byte in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldeorab(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on byte in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldeorlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on byte in memory, with Load-acquire and Store-release
+ // semantics [Armv8.1]
+ void ldeoralb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on halfword in memory [Armv8.1]
+ void ldeorh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on halfword in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldeorah(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on halfword in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldeorlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on halfword in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldeoralh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on word or doubleword in memory [Armv8.1]
+ void ldeor(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on word or doubleword in memory, with Load-acquire
+ // semantics [Armv8.1]
+ void ldeora(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on word or doubleword in memory, with Store-release
+ // semantics [Armv8.1]
+ void ldeorl(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic exclusive OR on word or doubleword in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldeoral(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on byte in memory [Armv8.1]
+ void ldsetb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on byte in memory, with Load-acquire semantics [Armv8.1]
+ void ldsetab(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on byte in memory, with Store-release semantics [Armv8.1]
+ void ldsetlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on byte in memory, with Load-acquire and Store-release
+ // semantics [Armv8.1]
+ void ldsetalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on halfword in memory [Armv8.1]
+ void ldseth(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on halfword in memory, with Load-acquire semantics [Armv8.1]
+ void ldsetah(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on halfword in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldsetlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on halfword in memory, with Load-acquire and Store-release
+ // semantics [Armv8.1]
+ void ldsetalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on word or doubleword in memory [Armv8.1]
+ void ldset(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on word or doubleword in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldseta(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on word or doubleword in memory, with Store-release
+ // semantics [Armv8.1]
+ void ldsetl(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic bit set on word or doubleword in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldsetal(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on byte in memory [Armv8.1]
+ void ldsmaxb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on byte in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldsmaxab(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on byte in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldsmaxlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on byte in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldsmaxalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on halfword in memory [Armv8.1]
+ void ldsmaxh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on halfword in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldsmaxah(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on halfword in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldsmaxlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on halfword in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldsmaxalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on word or doubleword in memory [Armv8.1]
+ void ldsmax(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on word or doubleword in memory, with Load-acquire
+ // semantics [Armv8.1]
+ void ldsmaxa(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on word or doubleword in memory, with Store-release
+ // semantics [Armv8.1]
+ void ldsmaxl(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed maximum on word or doubleword in memory, with Load-acquire
+ // and Store-release semantics [Armv8.1]
+ void ldsmaxal(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on byte in memory [Armv8.1]
+ void ldsminb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on byte in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldsminab(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on byte in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldsminlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on byte in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldsminalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on halfword in memory [Armv8.1]
+ void ldsminh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on halfword in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldsminah(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on halfword in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldsminlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on halfword in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldsminalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on word or doubleword in memory [Armv8.1]
+ void ldsmin(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on word or doubleword in memory, with Load-acquire
+ // semantics [Armv8.1]
+ void ldsmina(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on word or doubleword in memory, with Store-release
+ // semantics [Armv8.1]
+ void ldsminl(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic signed minimum on word or doubleword in memory, with Load-acquire
+ // and Store-release semantics [Armv8.1]
+ void ldsminal(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on byte in memory [Armv8.1]
+ void ldumaxb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on byte in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldumaxab(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on byte in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldumaxlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on byte in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldumaxalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on halfword in memory [Armv8.1]
+ void ldumaxh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on halfword in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void ldumaxah(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on halfword in memory, with Store-release semantics
+ // [Armv8.1]
+ void ldumaxlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on halfword in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void ldumaxalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on word or doubleword in memory [Armv8.1]
+ void ldumax(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
+ // semantics [Armv8.1]
+ void ldumaxa(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on word or doubleword in memory, with Store-release
+ // semantics [Armv8.1]
+ void ldumaxl(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
+ // and Store-release semantics [Armv8.1]
+ void ldumaxal(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on byte in memory [Armv8.1]
+ void lduminb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on byte in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void lduminab(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on byte in memory, with Store-release semantics
+ // [Armv8.1]
+ void lduminlb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on byte in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void lduminalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on halfword in memory [Armv8.1]
+ void lduminh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on halfword in memory, with Load-acquire semantics
+ // [Armv8.1]
+ void lduminah(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on halfword in memory, with Store-release semantics
+ // [Armv8.1]
+ void lduminlh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on halfword in memory, with Load-acquire and
+ // Store-release semantics [Armv8.1]
+ void lduminalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on word or doubleword in memory [Armv8.1]
+ void ldumin(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
+ // semantics [Armv8.1]
+ void ldumina(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on word or doubleword in memory, with Store-release
+ // semantics [Armv8.1]
+ void lduminl(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
+ // and Store-release semantics [Armv8.1]
+ void lduminal(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Atomic add on byte in memory, without return. [Armv8.1]
+ void staddb(const Register& rs, const MemOperand& src);
+
+ // Atomic add on byte in memory, with Store-release semantics and without
+ // return. [Armv8.1]
+ void staddlb(const Register& rs, const MemOperand& src);
+
+ // Atomic add on halfword in memory, without return. [Armv8.1]
+ void staddh(const Register& rs, const MemOperand& src);
+
+ // Atomic add on halfword in memory, with Store-release semantics and without
+ // return. [Armv8.1]
+ void staddlh(const Register& rs, const MemOperand& src);
+
+ // Atomic add on word or doubleword in memory, without return. [Armv8.1]
+ void stadd(const Register& rs, const MemOperand& src);
+
+ // Atomic add on word or doubleword in memory, with Store-release semantics
+ // and without return. [Armv8.1]
+ void staddl(const Register& rs, const MemOperand& src);
+
+ // Atomic bit clear on byte in memory, without return. [Armv8.1]
+ void stclrb(const Register& rs, const MemOperand& src);
+
+ // Atomic bit clear on byte in memory, with Store-release semantics and
+ // without return. [Armv8.1]
+ void stclrlb(const Register& rs, const MemOperand& src);
+
+ // Atomic bit clear on halfword in memory, without return. [Armv8.1]
+ void stclrh(const Register& rs, const MemOperand& src);
+
+ // Atomic bit clear on halfword in memory, with Store-release semantics and
+ // without return. [Armv8.1]
+ void stclrlh(const Register& rs, const MemOperand& src);
+
+ // Atomic bit clear on word or doubleword in memory, without return. [Armv8.1]
+ void stclr(const Register& rs, const MemOperand& src);
+
+ // Atomic bit clear on word or doubleword in memory, with Store-release
+ // semantics and without return. [Armv8.1]
+ void stclrl(const Register& rs, const MemOperand& src);
+
+ // Atomic exclusive OR on byte in memory, without return. [Armv8.1]
+ void steorb(const Register& rs, const MemOperand& src);
+
+ // Atomic exclusive OR on byte in memory, with Store-release semantics and
+ // without return. [Armv8.1]
+ void steorlb(const Register& rs, const MemOperand& src);
+
+ // Atomic exclusive OR on halfword in memory, without return. [Armv8.1]
+ void steorh(const Register& rs, const MemOperand& src);
+
+ // Atomic exclusive OR on halfword in memory, with Store-release semantics
+ // and without return. [Armv8.1]
+ void steorlh(const Register& rs, const MemOperand& src);
+
+ // Atomic exclusive OR on word or doubleword in memory, without return.
+ // [Armv8.1]
+ void steor(const Register& rs, const MemOperand& src);
+
+ // Atomic exclusive OR on word or doubleword in memory, with Store-release
+ // semantics and without return. [Armv8.1]
+ void steorl(const Register& rs, const MemOperand& src);
+
+ // Atomic bit set on byte in memory, without return. [Armv8.1]
+ void stsetb(const Register& rs, const MemOperand& src);
+
+ // Atomic bit set on byte in memory, with Store-release semantics and without
+ // return. [Armv8.1]
+ void stsetlb(const Register& rs, const MemOperand& src);
+
+ // Atomic bit set on halfword in memory, without return. [Armv8.1]
+ void stseth(const Register& rs, const MemOperand& src);
+
+ // Atomic bit set on halfword in memory, with Store-release semantics and
+ // without return. [Armv8.1]
+ void stsetlh(const Register& rs, const MemOperand& src);
+
+ // Atomic bit set on word or doubleword in memory, without return. [Armv8.1]
+ void stset(const Register& rs, const MemOperand& src);
+
+ // Atomic bit set on word or doubleword in memory, with Store-release
+ // semantics and without return. [Armv8.1]
+ void stsetl(const Register& rs, const MemOperand& src);
+
+ // Atomic signed maximum on byte in memory, without return. [Armv8.1]
+ void stsmaxb(const Register& rs, const MemOperand& src);
+
+ // Atomic signed maximum on byte in memory, with Store-release semantics and
+ // without return. [Armv8.1]
+ void stsmaxlb(const Register& rs, const MemOperand& src);
+
+ // Atomic signed maximum on halfword in memory, without return. [Armv8.1]
+ void stsmaxh(const Register& rs, const MemOperand& src);
+
+ // Atomic signed maximum on halfword in memory, with Store-release semantics
+ // and without return. [Armv8.1]
+ void stsmaxlh(const Register& rs, const MemOperand& src);
+
+ // Atomic signed maximum on word or doubleword in memory, without return.
+ // [Armv8.1]
+ void stsmax(const Register& rs, const MemOperand& src);
+
+ // Atomic signed maximum on word or doubleword in memory, with Store-release
+ // semantics and without return. [Armv8.1]
+ void stsmaxl(const Register& rs, const MemOperand& src);
+
+ // Atomic signed minimum on byte in memory, without return. [Armv8.1]
+ void stsminb(const Register& rs, const MemOperand& src);
+
+ // Atomic signed minimum on byte in memory, with Store-release semantics and
+ // without return. [Armv8.1]
+ void stsminlb(const Register& rs, const MemOperand& src);
+
+ // Atomic signed minimum on halfword in memory, without return. [Armv8.1]
+ void stsminh(const Register& rs, const MemOperand& src);
+
+ // Atomic signed minimum on halfword in memory, with Store-release semantics
+ // and without return. [Armv8.1]
+ void stsminlh(const Register& rs, const MemOperand& src);
+
+ // Atomic signed minimum on word or doubleword in memory, without return.
+ // [Armv8.1]
+ void stsmin(const Register& rs, const MemOperand& src);
+
+ // Atomic signed minimum on word or doubleword in memory, with Store-release
+ // semantics and without return. semantics [Armv8.1]
+ void stsminl(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned maximum on byte in memory, without return. [Armv8.1]
+ void stumaxb(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned maximum on byte in memory, with Store-release semantics and
+ // without return. [Armv8.1]
+ void stumaxlb(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned maximum on halfword in memory, without return. [Armv8.1]
+ void stumaxh(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned maximum on halfword in memory, with Store-release semantics
+ // and without return. [Armv8.1]
+ void stumaxlh(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned maximum on word or doubleword in memory, without return.
+ // [Armv8.1]
+ void stumax(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned maximum on word or doubleword in memory, with Store-release
+ // semantics and without return. [Armv8.1]
+ void stumaxl(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned minimum on byte in memory, without return. [Armv8.1]
+ void stuminb(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned minimum on byte in memory, with Store-release semantics and
+ // without return. [Armv8.1]
+ void stuminlb(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned minimum on halfword in memory, without return. [Armv8.1]
+ void stuminh(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned minimum on halfword in memory, with Store-release semantics
+ // and without return. [Armv8.1]
+ void stuminlh(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned minimum on word or doubleword in memory, without return.
+ // [Armv8.1]
+ void stumin(const Register& rs, const MemOperand& src);
+
+ // Atomic unsigned minimum on word or doubleword in memory, with Store-release
+ // semantics and without return. [Armv8.1]
+ void stuminl(const Register& rs, const MemOperand& src);
+
+ // Swap byte in memory [Armv8.1]
+ void swpb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap byte in memory, with Load-acquire semantics [Armv8.1]
+ void swpab(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap byte in memory, with Store-release semantics [Armv8.1]
+ void swplb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap byte in memory, with Load-acquire and Store-release semantics
+ // [Armv8.1]
+ void swpalb(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap halfword in memory [Armv8.1]
+ void swph(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap halfword in memory, with Load-acquire semantics [Armv8.1]
+ void swpah(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap halfword in memory, with Store-release semantics [Armv8.1]
+ void swplh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap halfword in memory, with Load-acquire and Store-release semantics
+ // [Armv8.1]
+ void swpalh(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap word or doubleword in memory [Armv8.1]
+ void swp(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap word or doubleword in memory, with Load-acquire semantics [Armv8.1]
+ void swpa(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap word or doubleword in memory, with Store-release semantics [Armv8.1]
+ void swpl(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Swap word or doubleword in memory, with Load-acquire and Store-release
+ // semantics [Armv8.1]
+ void swpal(const Register& rs, const Register& rt, const MemOperand& src);
+
+ // Load-Acquire RCpc Register byte [Armv8.3]
+ void ldaprb(const Register& rt, const MemOperand& src);
+
+ // Load-Acquire RCpc Register halfword [Armv8.3]
+ void ldaprh(const Register& rt, const MemOperand& src);
+
+ // Load-Acquire RCpc Register word or doubleword [Armv8.3]
+ void ldapr(const Register& rt, const MemOperand& src);
+
// Prefetch memory.
void prfm(PrefetchOperation op,
const MemOperand& addr,
@@ -1354,6 +2135,9 @@
// Instruction synchronization barrier.
void isb();
+ // Error synchronization barrier.
+ void esb();
+
// Conditional speculation dependency barrier.
void csdb();
@@ -1369,7 +2153,7 @@
void fmov(const VRegister& vd, float imm);
// Move half precision immediate to FP register [Armv8.2].
- void fmov(const VRegister& vd, F16 imm);
+ void fmov(const VRegister& vd, Float16 imm);
// Move FP register to register.
void fmov(const Register& rd, const VRegister& fn);
@@ -1515,6 +2299,7 @@
// Common FP Convert functions.
void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op);
void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
+ void NEONFP16ConvertToInt(const VRegister& vd, const VRegister& vn, Instr op);
// FP convert between precisions.
void fcvt(const VRegister& vd, const VRegister& vn);
@@ -1564,6 +2349,9 @@
// FP convert to signed integer, nearest with ties to even.
void fcvtns(const Register& rd, const VRegister& vn);
+ // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3].
+ void fjcvtzs(const Register& rd, const VRegister& vn);
+
// FP convert to unsigned integer, nearest with ties to even.
void fcvtnu(const Register& rd, const VRegister& vn);
@@ -2791,6 +3579,11 @@
return (rn.GetCode() & kRegCodeMask) << Rn_offset;
}
+ static Instr RmSP(Register rm) {
+ VIXL_ASSERT(!rm.IsZero());
+ return (rm.GetCode() & kRegCodeMask) << Rm_offset;
+ }
+
// Flags encoding.
static Instr Flags(FlagsUpdate S) {
if (S == SetFlags) {
@@ -3018,7 +3811,7 @@
}
// FP Immediates.
- static Instr ImmFP16(float16 imm);
+ static Instr ImmFP16(Float16 imm);
static Instr ImmFP32(float imm);
static Instr ImmFP64(double imm);
@@ -3045,7 +3838,7 @@
// Immediate field checking helpers.
static bool IsImmAddSub(int64_t immediate);
static bool IsImmConditionalCompare(int64_t immediate);
- static bool IsImmFP16(float16 imm);
+ static bool IsImmFP16(Float16 imm);
static bool IsImmFP32(float imm);
static bool IsImmFP64(double imm);
static bool IsImmLogical(uint64_t value,
@@ -3422,7 +4215,7 @@
bool CPUHas(const CPURegister& rt, const CPURegister& rt2) const;
private:
- static uint32_t FP16ToImm8(float16 imm);
+ static uint32_t FP16ToImm8(Float16 imm);
static uint32_t FP32ToImm8(float imm);
static uint32_t FP64ToImm8(double imm);
@@ -3472,7 +4265,8 @@
NEONAcrossLanesOp op);
void NEONAcrossLanes(const VRegister& vd,
const VRegister& vn,
- NEONAcrossLanesOp op);
+ NEONAcrossLanesOp op,
+ Instr op_half);
void NEONModifiedImmShiftLsl(const VRegister& vd,
const int imm8,
const int left_shift,
@@ -3486,6 +4280,10 @@
const VRegister& vn,
const VRegister& vm,
NEON3SameOp vop);
+ void NEON3SameFP16(const VRegister& vd,
+ const VRegister& vn,
+ const VRegister& vm,
+ Instr op);
void NEONFP3Same(const VRegister& vd,
const VRegister& vn,
const VRegister& vm,
@@ -3506,11 +4304,16 @@
const VRegister& vn,
NEON2RegMiscOp vop,
double value = 0.0);
+ void NEONFP2RegMiscFP16(const VRegister& vd,
+ const VRegister& vn,
+ NEON2RegMiscFP16Op vop,
+ double value = 0.0);
void NEON2RegMisc(const VRegister& vd,
const VRegister& vn,
NEON2RegMiscOp vop,
int value = 0);
void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op);
+ void NEONFP2RegMiscFP16(const VRegister& vd, const VRegister& vn, Instr op);
void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op);
void NEONPerm(const VRegister& vd,
const VRegister& vn,
@@ -3520,7 +4323,8 @@
const VRegister& vn,
const VRegister& vm,
int vm_index,
- NEONByIndexedElementOp op);
+ NEONByIndexedElementOp op,
+ NEONByIndexedElementOp op_half);
void NEONByElement(const VRegister& vd,
const VRegister& vn,
const VRegister& vm,
diff --git a/src/aarch64/constants-aarch64.h b/src/aarch64/constants-aarch64.h
index d3d403f..de659f0 100644
--- a/src/aarch64/constants-aarch64.h
+++ b/src/aarch64/constants-aarch64.h
@@ -305,6 +305,7 @@
WFI = 3,
SEV = 4,
SEVL = 5,
+ ESB = 16,
CSDB = 20
};
@@ -653,10 +654,21 @@
enum UnconditionalBranchToRegisterOp {
UnconditionalBranchToRegisterFixed = 0xD6000000,
UnconditionalBranchToRegisterFMask = 0xFE000000,
- UnconditionalBranchToRegisterMask = 0xFFFFFC1F,
+ UnconditionalBranchToRegisterMask = 0xFFFFFC00,
BR = UnconditionalBranchToRegisterFixed | 0x001F0000,
BLR = UnconditionalBranchToRegisterFixed | 0x003F0000,
- RET = UnconditionalBranchToRegisterFixed | 0x005F0000
+ RET = UnconditionalBranchToRegisterFixed | 0x005F0000,
+
+ BRAAZ = UnconditionalBranchToRegisterFixed | 0x001F0800,
+ BRABZ = UnconditionalBranchToRegisterFixed | 0x001F0C00,
+ BLRAAZ = UnconditionalBranchToRegisterFixed | 0x003F0800,
+ BLRABZ = UnconditionalBranchToRegisterFixed | 0x003F0C00,
+ RETAA = UnconditionalBranchToRegisterFixed | 0x005F0800,
+ RETAB = UnconditionalBranchToRegisterFixed | 0x005F0C00,
+ BRAA = UnconditionalBranchToRegisterFixed | 0x011F0800,
+ BRAB = UnconditionalBranchToRegisterFixed | 0x011F0C00,
+ BLRAA = UnconditionalBranchToRegisterFixed | 0x013F0800,
+ BLRAB = UnconditionalBranchToRegisterFixed | 0x013F0C00
};
// Compare and branch.
@@ -752,6 +764,28 @@
CLREX = SystemExclusiveMonitorFixed
};
+enum SystemPAuthOp {
+ SystemPAuthFixed = 0xD503211F,
+ SystemPAuthFMask = 0xFFFFFD1F,
+ SystemPAuthMask = 0xFFFFFFFF,
+ PACIA1716 = SystemPAuthFixed | 0x00000100,
+ PACIB1716 = SystemPAuthFixed | 0x00000140,
+ AUTIA1716 = SystemPAuthFixed | 0x00000180,
+ AUTIB1716 = SystemPAuthFixed | 0x000001C0,
+ PACIAZ = SystemPAuthFixed | 0x00000300,
+ PACIASP = SystemPAuthFixed | 0x00000320,
+ PACIBZ = SystemPAuthFixed | 0x00000340,
+ PACIBSP = SystemPAuthFixed | 0x00000360,
+ AUTIAZ = SystemPAuthFixed | 0x00000380,
+ AUTIASP = SystemPAuthFixed | 0x000003A0,
+ AUTIBZ = SystemPAuthFixed | 0x000003C0,
+ AUTIBSP = SystemPAuthFixed | 0x000003E0,
+
+ // XPACLRI has the same fixed mask as System Hints and needs to be handled
+ // differently.
+ XPACLRI = 0xD50320FF
+};
+
// Any load or store.
enum LoadStoreAnyOp {
LoadStoreAnyFMask = 0x0a000000,
@@ -1023,6 +1057,67 @@
CASPAL_x = CASPFixed | LSEBit_l | LSEBit_o0 | LSEBit_sz
};
+#define ATOMIC_MEMORY_SIMPLE_OPC_LIST(V) \
+ V(LDADD, 0x00000000), \
+ V(LDCLR, 0x00001000), \
+ V(LDEOR, 0x00002000), \
+ V(LDSET, 0x00003000), \
+ V(LDSMAX, 0x00004000), \
+ V(LDSMIN, 0x00005000), \
+ V(LDUMAX, 0x00006000), \
+ V(LDUMIN, 0x00007000)
+
+// Atomic memory.
+enum AtomicMemoryOp {
+ AtomicMemoryFixed = 0x38200000,
+ AtomicMemoryFMask = 0x3B200C00,
+ AtomicMemoryMask = 0xFFE0FC00,
+ SWPB = AtomicMemoryFixed | 0x00008000,
+ SWPAB = AtomicMemoryFixed | 0x00808000,
+ SWPLB = AtomicMemoryFixed | 0x00408000,
+ SWPALB = AtomicMemoryFixed | 0x00C08000,
+ SWPH = AtomicMemoryFixed | 0x40008000,
+ SWPAH = AtomicMemoryFixed | 0x40808000,
+ SWPLH = AtomicMemoryFixed | 0x40408000,
+ SWPALH = AtomicMemoryFixed | 0x40C08000,
+ SWP_w = AtomicMemoryFixed | 0x80008000,
+ SWPA_w = AtomicMemoryFixed | 0x80808000,
+ SWPL_w = AtomicMemoryFixed | 0x80408000,
+ SWPAL_w = AtomicMemoryFixed | 0x80C08000,
+ SWP_x = AtomicMemoryFixed | 0xC0008000,
+ SWPA_x = AtomicMemoryFixed | 0xC0808000,
+ SWPL_x = AtomicMemoryFixed | 0xC0408000,
+ SWPAL_x = AtomicMemoryFixed | 0xC0C08000,
+ LDAPRB = AtomicMemoryFixed | 0x0080C000,
+ LDAPRH = AtomicMemoryFixed | 0x4080C000,
+ LDAPR_w = AtomicMemoryFixed | 0x8080C000,
+ LDAPR_x = AtomicMemoryFixed | 0xC080C000,
+
+ AtomicMemorySimpleFMask = 0x3B208C00,
+ AtomicMemorySimpleOpMask = 0x00007000,
+#define ATOMIC_MEMORY_SIMPLE(N, OP) \
+ N##Op = OP, \
+ N##B = AtomicMemoryFixed | OP, \
+ N##AB = AtomicMemoryFixed | OP | 0x00800000, \
+ N##LB = AtomicMemoryFixed | OP | 0x00400000, \
+ N##ALB = AtomicMemoryFixed | OP | 0x00C00000, \
+ N##H = AtomicMemoryFixed | OP | 0x40000000, \
+ N##AH = AtomicMemoryFixed | OP | 0x40800000, \
+ N##LH = AtomicMemoryFixed | OP | 0x40400000, \
+ N##ALH = AtomicMemoryFixed | OP | 0x40C00000, \
+ N##_w = AtomicMemoryFixed | OP | 0x80000000, \
+ N##A_w = AtomicMemoryFixed | OP | 0x80800000, \
+ N##L_w = AtomicMemoryFixed | OP | 0x80400000, \
+ N##AL_w = AtomicMemoryFixed | OP | 0x80C00000, \
+ N##_x = AtomicMemoryFixed | OP | 0xC0000000, \
+ N##A_x = AtomicMemoryFixed | OP | 0xC0800000, \
+ N##L_x = AtomicMemoryFixed | OP | 0xC0400000, \
+ N##AL_x = AtomicMemoryFixed | OP | 0xC0C00000
+
+ ATOMIC_MEMORY_SIMPLE_OPC_LIST(ATOMIC_MEMORY_SIMPLE)
+#undef ATOMIC_MEMORY_SIMPLE
+};
+
// Conditional compare.
enum ConditionalCompareOp {
ConditionalCompareMask = 0x60000000,
@@ -1091,7 +1186,27 @@
CLZ_x = CLZ | SixtyFourBits,
CLS = DataProcessing1SourceFixed | 0x00001400,
CLS_w = CLS,
- CLS_x = CLS | SixtyFourBits
+ CLS_x = CLS | SixtyFourBits,
+
+ // Pointer authentication instructions in Armv8.3.
+ PACIA = DataProcessing1SourceFixed | 0x80010000,
+ PACIB = DataProcessing1SourceFixed | 0x80010400,
+ PACDA = DataProcessing1SourceFixed | 0x80010800,
+ PACDB = DataProcessing1SourceFixed | 0x80010C00,
+ AUTIA = DataProcessing1SourceFixed | 0x80011000,
+ AUTIB = DataProcessing1SourceFixed | 0x80011400,
+ AUTDA = DataProcessing1SourceFixed | 0x80011800,
+ AUTDB = DataProcessing1SourceFixed | 0x80011C00,
+ PACIZA = DataProcessing1SourceFixed | 0x80012000,
+ PACIZB = DataProcessing1SourceFixed | 0x80012400,
+ PACDZA = DataProcessing1SourceFixed | 0x80012800,
+ PACDZB = DataProcessing1SourceFixed | 0x80012C00,
+ AUTIZA = DataProcessing1SourceFixed | 0x80013000,
+ AUTIZB = DataProcessing1SourceFixed | 0x80013400,
+ AUTDZA = DataProcessing1SourceFixed | 0x80013800,
+ AUTDZB = DataProcessing1SourceFixed | 0x80013C00,
+ XPACI = DataProcessing1SourceFixed | 0x80014000,
+ XPACD = DataProcessing1SourceFixed | 0x80014400
};
// Data processing 2 source.
@@ -1117,6 +1232,7 @@
RORV_w = DataProcessing2SourceFixed | 0x00002C00,
RORV_x = DataProcessing2SourceFixed | 0x80002C00,
RORV = RORV_w,
+ PACGA = DataProcessing2SourceFixed | SixtyFourBits | 0x00003000,
CRC32B = DataProcessing2SourceFixed | 0x00004000,
CRC32H = DataProcessing2SourceFixed | 0x00004400,
CRC32W = DataProcessing2SourceFixed | 0x00004800,
@@ -1151,15 +1267,19 @@
FPCompareFixed = 0x1E202000,
FPCompareFMask = 0x5F203C00,
FPCompareMask = 0xFFE0FC1F,
+ FCMP_h = FPCompareFixed | FP16 | 0x00000000,
FCMP_s = FPCompareFixed | 0x00000000,
FCMP_d = FPCompareFixed | FP64 | 0x00000000,
FCMP = FCMP_s,
+ FCMP_h_zero = FPCompareFixed | FP16 | 0x00000008,
FCMP_s_zero = FPCompareFixed | 0x00000008,
FCMP_d_zero = FPCompareFixed | FP64 | 0x00000008,
FCMP_zero = FCMP_s_zero,
+ FCMPE_h = FPCompareFixed | FP16 | 0x00000010,
FCMPE_s = FPCompareFixed | 0x00000010,
FCMPE_d = FPCompareFixed | FP64 | 0x00000010,
FCMPE = FCMPE_s,
+ FCMPE_h_zero = FPCompareFixed | FP16 | 0x00000018,
FCMPE_s_zero = FPCompareFixed | 0x00000018,
FCMPE_d_zero = FPCompareFixed | FP64 | 0x00000018,
FCMPE_zero = FCMPE_s_zero
@@ -1170,9 +1290,11 @@
FPConditionalCompareFixed = 0x1E200400,
FPConditionalCompareFMask = 0x5F200C00,
FPConditionalCompareMask = 0xFFE00C10,
+ FCCMP_h = FPConditionalCompareFixed | FP16 | 0x00000000,
FCCMP_s = FPConditionalCompareFixed | 0x00000000,
FCCMP_d = FPConditionalCompareFixed | FP64 | 0x00000000,
FCCMP = FCCMP_s,
+ FCCMPE_h = FPConditionalCompareFixed | FP16 | 0x00000010,
FCCMPE_s = FPConditionalCompareFixed | 0x00000010,
FCCMPE_d = FPConditionalCompareFixed | FP64 | 0x00000010,
FCCMPE = FCCMPE_s
@@ -1183,6 +1305,7 @@
FPConditionalSelectFixed = 0x1E200C00,
FPConditionalSelectFMask = 0x5F200C00,
FPConditionalSelectMask = 0xFFE00C00,
+ FCSEL_h = FPConditionalSelectFixed | FP16 | 0x00000000,
FCSEL_s = FPConditionalSelectFixed | 0x00000000,
FCSEL_d = FPConditionalSelectFixed | FP64 | 0x00000000,
FCSEL = FCSEL_s
@@ -1207,12 +1330,15 @@
FMOV_s = FPDataProcessing1SourceFixed | 0x00000000,
FMOV_d = FPDataProcessing1SourceFixed | FP64 | 0x00000000,
FMOV = FMOV_s,
+ FABS_h = FPDataProcessing1SourceFixed | FP16 | 0x00008000,
FABS_s = FPDataProcessing1SourceFixed | 0x00008000,
FABS_d = FPDataProcessing1SourceFixed | FP64 | 0x00008000,
FABS = FABS_s,
+ FNEG_h = FPDataProcessing1SourceFixed | FP16 | 0x00010000,
FNEG_s = FPDataProcessing1SourceFixed | 0x00010000,
FNEG_d = FPDataProcessing1SourceFixed | FP64 | 0x00010000,
FNEG = FNEG_s,
+ FSQRT_h = FPDataProcessing1SourceFixed | FP16 | 0x00018000,
FSQRT_s = FPDataProcessing1SourceFixed | 0x00018000,
FSQRT_d = FPDataProcessing1SourceFixed | FP64 | 0x00018000,
FSQRT = FSQRT_s,
@@ -1222,24 +1348,31 @@
FCVT_hd = FPDataProcessing1SourceFixed | FP64 | 0x00038000,
FCVT_sh = FPDataProcessing1SourceFixed | 0x00C20000,
FCVT_dh = FPDataProcessing1SourceFixed | 0x00C28000,
+ FRINTN_h = FPDataProcessing1SourceFixed | FP16 | 0x00040000,
FRINTN_s = FPDataProcessing1SourceFixed | 0x00040000,
FRINTN_d = FPDataProcessing1SourceFixed | FP64 | 0x00040000,
FRINTN = FRINTN_s,
+ FRINTP_h = FPDataProcessing1SourceFixed | FP16 | 0x00048000,
FRINTP_s = FPDataProcessing1SourceFixed | 0x00048000,
FRINTP_d = FPDataProcessing1SourceFixed | FP64 | 0x00048000,
FRINTP = FRINTP_s,
+ FRINTM_h = FPDataProcessing1SourceFixed | FP16 | 0x00050000,
FRINTM_s = FPDataProcessing1SourceFixed | 0x00050000,
FRINTM_d = FPDataProcessing1SourceFixed | FP64 | 0x00050000,
FRINTM = FRINTM_s,
+ FRINTZ_h = FPDataProcessing1SourceFixed | FP16 | 0x00058000,
FRINTZ_s = FPDataProcessing1SourceFixed | 0x00058000,
FRINTZ_d = FPDataProcessing1SourceFixed | FP64 | 0x00058000,
FRINTZ = FRINTZ_s,
+ FRINTA_h = FPDataProcessing1SourceFixed | FP16 | 0x00060000,
FRINTA_s = FPDataProcessing1SourceFixed | 0x00060000,
FRINTA_d = FPDataProcessing1SourceFixed | FP64 | 0x00060000,
FRINTA = FRINTA_s,
+ FRINTX_h = FPDataProcessing1SourceFixed | FP16 | 0x00070000,
FRINTX_s = FPDataProcessing1SourceFixed | 0x00070000,
FRINTX_d = FPDataProcessing1SourceFixed | FP64 | 0x00070000,
FRINTX = FRINTX_s,
+ FRINTI_h = FPDataProcessing1SourceFixed | FP16 | 0x00078000,
FRINTI_s = FPDataProcessing1SourceFixed | 0x00078000,
FRINTI_d = FPDataProcessing1SourceFixed | FP64 | 0x00078000,
FRINTI = FRINTI_s
@@ -1251,30 +1384,39 @@
FPDataProcessing2SourceFMask = 0x5F200C00,
FPDataProcessing2SourceMask = 0xFFE0FC00,
FMUL = FPDataProcessing2SourceFixed | 0x00000000,
+ FMUL_h = FMUL | FP16,
FMUL_s = FMUL,
FMUL_d = FMUL | FP64,
FDIV = FPDataProcessing2SourceFixed | 0x00001000,
+ FDIV_h = FDIV | FP16,
FDIV_s = FDIV,
FDIV_d = FDIV | FP64,
FADD = FPDataProcessing2SourceFixed | 0x00002000,
+ FADD_h = FADD | FP16,
FADD_s = FADD,
FADD_d = FADD | FP64,
FSUB = FPDataProcessing2SourceFixed | 0x00003000,
+ FSUB_h = FSUB | FP16,
FSUB_s = FSUB,
FSUB_d = FSUB | FP64,
FMAX = FPDataProcessing2SourceFixed | 0x00004000,
+ FMAX_h = FMAX | FP16,
FMAX_s = FMAX,
FMAX_d = FMAX | FP64,
FMIN = FPDataProcessing2SourceFixed | 0x00005000,
+ FMIN_h = FMIN | FP16,
FMIN_s = FMIN,
FMIN_d = FMIN | FP64,
FMAXNM = FPDataProcessing2SourceFixed | 0x00006000,
+ FMAXNM_h = FMAXNM | FP16,
FMAXNM_s = FMAXNM,
FMAXNM_d = FMAXNM | FP64,
FMINNM = FPDataProcessing2SourceFixed | 0x00007000,
+ FMINNM_h = FMINNM | FP16,
FMINNM_s = FMINNM,
FMINNM_d = FMINNM | FP64,
FNMUL = FPDataProcessing2SourceFixed | 0x00008000,
+ FNMUL_h = FNMUL | FP16,
FNMUL_s = FNMUL,
FNMUL_d = FNMUL | FP64
};
@@ -1284,6 +1426,10 @@
FPDataProcessing3SourceFixed = 0x1F000000,
FPDataProcessing3SourceFMask = 0x5F000000,
FPDataProcessing3SourceMask = 0xFFE08000,
+ FMADD_h = FPDataProcessing3SourceFixed | 0x00C00000,
+ FMSUB_h = FPDataProcessing3SourceFixed | 0x00C08000,
+ FNMADD_h = FPDataProcessing3SourceFixed | 0x00E00000,
+ FNMSUB_h = FPDataProcessing3SourceFixed | 0x00E08000,
FMADD_s = FPDataProcessing3SourceFixed | 0x00000000,
FMSUB_s = FPDataProcessing3SourceFixed | 0x00008000,
FNMADD_s = FPDataProcessing3SourceFixed | 0x00200000,
@@ -1300,61 +1446,85 @@
FPIntegerConvertFMask = 0x5F20FC00,
FPIntegerConvertMask = 0xFFFFFC00,
FCVTNS = FPIntegerConvertFixed | 0x00000000,
+ FCVTNS_wh = FCVTNS | FP16,
+ FCVTNS_xh = FCVTNS | SixtyFourBits | FP16,
FCVTNS_ws = FCVTNS,
FCVTNS_xs = FCVTNS | SixtyFourBits,
FCVTNS_wd = FCVTNS | FP64,
FCVTNS_xd = FCVTNS | SixtyFourBits | FP64,
FCVTNU = FPIntegerConvertFixed | 0x00010000,
+ FCVTNU_wh = FCVTNU | FP16,
+ FCVTNU_xh = FCVTNU | SixtyFourBits | FP16,
FCVTNU_ws = FCVTNU,
FCVTNU_xs = FCVTNU | SixtyFourBits,
FCVTNU_wd = FCVTNU | FP64,
FCVTNU_xd = FCVTNU | SixtyFourBits | FP64,
FCVTPS = FPIntegerConvertFixed | 0x00080000,
+ FCVTPS_wh = FCVTPS | FP16,
+ FCVTPS_xh = FCVTPS | SixtyFourBits | FP16,
FCVTPS_ws = FCVTPS,
FCVTPS_xs = FCVTPS | SixtyFourBits,
FCVTPS_wd = FCVTPS | FP64,
FCVTPS_xd = FCVTPS | SixtyFourBits | FP64,
FCVTPU = FPIntegerConvertFixed | 0x00090000,
+ FCVTPU_wh = FCVTPU | FP16,
+ FCVTPU_xh = FCVTPU | SixtyFourBits | FP16,
FCVTPU_ws = FCVTPU,
FCVTPU_xs = FCVTPU | SixtyFourBits,
FCVTPU_wd = FCVTPU | FP64,
FCVTPU_xd = FCVTPU | SixtyFourBits | FP64,
FCVTMS = FPIntegerConvertFixed | 0x00100000,
+ FCVTMS_wh = FCVTMS | FP16,
+ FCVTMS_xh = FCVTMS | SixtyFourBits | FP16,
FCVTMS_ws = FCVTMS,
FCVTMS_xs = FCVTMS | SixtyFourBits,
FCVTMS_wd = FCVTMS | FP64,
FCVTMS_xd = FCVTMS | SixtyFourBits | FP64,
FCVTMU = FPIntegerConvertFixed | 0x00110000,
+ FCVTMU_wh = FCVTMU | FP16,
+ FCVTMU_xh = FCVTMU | SixtyFourBits | FP16,
FCVTMU_ws = FCVTMU,
FCVTMU_xs = FCVTMU | SixtyFourBits,
FCVTMU_wd = FCVTMU | FP64,
FCVTMU_xd = FCVTMU | SixtyFourBits | FP64,
FCVTZS = FPIntegerConvertFixed | 0x00180000,
+ FCVTZS_wh = FCVTZS | FP16,
+ FCVTZS_xh = FCVTZS | SixtyFourBits | FP16,
FCVTZS_ws = FCVTZS,
FCVTZS_xs = FCVTZS | SixtyFourBits,
FCVTZS_wd = FCVTZS | FP64,
FCVTZS_xd = FCVTZS | SixtyFourBits | FP64,
FCVTZU = FPIntegerConvertFixed | 0x00190000,
+ FCVTZU_wh = FCVTZU | FP16,
+ FCVTZU_xh = FCVTZU | SixtyFourBits | FP16,
FCVTZU_ws = FCVTZU,
FCVTZU_xs = FCVTZU | SixtyFourBits,
FCVTZU_wd = FCVTZU | FP64,
FCVTZU_xd = FCVTZU | SixtyFourBits | FP64,
SCVTF = FPIntegerConvertFixed | 0x00020000,
+ SCVTF_hw = SCVTF | FP16,
+ SCVTF_hx = SCVTF | SixtyFourBits | FP16,
SCVTF_sw = SCVTF,
SCVTF_sx = SCVTF | SixtyFourBits,
SCVTF_dw = SCVTF | FP64,
SCVTF_dx = SCVTF | SixtyFourBits | FP64,
UCVTF = FPIntegerConvertFixed | 0x00030000,
+ UCVTF_hw = UCVTF | FP16,
+ UCVTF_hx = UCVTF | SixtyFourBits | FP16,
UCVTF_sw = UCVTF,
UCVTF_sx = UCVTF | SixtyFourBits,
UCVTF_dw = UCVTF | FP64,
UCVTF_dx = UCVTF | SixtyFourBits | FP64,
FCVTAS = FPIntegerConvertFixed | 0x00040000,
+ FCVTAS_wh = FCVTAS | FP16,
+ FCVTAS_xh = FCVTAS | SixtyFourBits | FP16,
FCVTAS_ws = FCVTAS,
FCVTAS_xs = FCVTAS | SixtyFourBits,
FCVTAS_wd = FCVTAS | FP64,
FCVTAS_xd = FCVTAS | SixtyFourBits | FP64,
FCVTAU = FPIntegerConvertFixed | 0x00050000,
+ FCVTAU_wh = FCVTAU | FP16,
+ FCVTAU_xh = FCVTAU | SixtyFourBits | FP16,
FCVTAU_ws = FCVTAU,
FCVTAU_xs = FCVTAU | SixtyFourBits,
FCVTAU_wd = FCVTAU | FP64,
@@ -1368,7 +1538,8 @@
FMOV_xd = FMOV_ws | SixtyFourBits | FP64,
FMOV_dx = FMOV_sw | SixtyFourBits | FP64,
FMOV_d1_x = FPIntegerConvertFixed | SixtyFourBits | 0x008F0000,
- FMOV_x_d1 = FPIntegerConvertFixed | SixtyFourBits | 0x008E0000
+ FMOV_x_d1 = FPIntegerConvertFixed | SixtyFourBits | 0x008E0000,
+ FJCVTZS = FPIntegerConvertFixed | FP64 | 0x001E0000
};
// Conversion between fixed point and floating point.
@@ -1377,21 +1548,29 @@
FPFixedPointConvertFMask = 0x5F200000,
FPFixedPointConvertMask = 0xFFFF0000,
FCVTZS_fixed = FPFixedPointConvertFixed | 0x00180000,
+ FCVTZS_wh_fixed = FCVTZS_fixed | FP16,
+ FCVTZS_xh_fixed = FCVTZS_fixed | SixtyFourBits | FP16,
FCVTZS_ws_fixed = FCVTZS_fixed,
FCVTZS_xs_fixed = FCVTZS_fixed | SixtyFourBits,
FCVTZS_wd_fixed = FCVTZS_fixed | FP64,
FCVTZS_xd_fixed = FCVTZS_fixed | SixtyFourBits | FP64,
FCVTZU_fixed = FPFixedPointConvertFixed | 0x00190000,
+ FCVTZU_wh_fixed = FCVTZU_fixed | FP16,
+ FCVTZU_xh_fixed = FCVTZU_fixed | SixtyFourBits | FP16,
FCVTZU_ws_fixed = FCVTZU_fixed,
FCVTZU_xs_fixed = FCVTZU_fixed | SixtyFourBits,
FCVTZU_wd_fixed = FCVTZU_fixed | FP64,
FCVTZU_xd_fixed = FCVTZU_fixed | SixtyFourBits | FP64,
SCVTF_fixed = FPFixedPointConvertFixed | 0x00020000,
+ SCVTF_hw_fixed = SCVTF_fixed | FP16,
+ SCVTF_hx_fixed = SCVTF_fixed | SixtyFourBits | FP16,
SCVTF_sw_fixed = SCVTF_fixed,
SCVTF_sx_fixed = SCVTF_fixed | SixtyFourBits,
SCVTF_dw_fixed = SCVTF_fixed | FP64,
SCVTF_dx_fixed = SCVTF_fixed | SixtyFourBits | FP64,
UCVTF_fixed = FPFixedPointConvertFixed | 0x00030000,
+ UCVTF_hw_fixed = UCVTF_fixed | FP16,
+ UCVTF_hx_fixed = UCVTF_fixed | SixtyFourBits | FP16,
UCVTF_sw_fixed = UCVTF_fixed,
UCVTF_sx_fixed = UCVTF_fixed | SixtyFourBits,
UCVTF_dw_fixed = UCVTF_fixed | FP64,
@@ -1498,6 +1677,42 @@
NEON_FCVTN_opcode = NEON_FCVTN & NEON2RegMiscOpcode
};
+// NEON instructions with two register operands (FP16).
+enum NEON2RegMiscFP16Op {
+ NEON2RegMiscFP16Fixed = 0x0E780800,
+ NEON2RegMiscFP16FMask = 0x9F7E0C00,
+ NEON2RegMiscFP16Mask = 0xBFFFFC00,
+ NEON_FRINTN_H = NEON2RegMiscFP16Fixed | 0x00018000,
+ NEON_FRINTM_H = NEON2RegMiscFP16Fixed | 0x00019000,
+ NEON_FCVTNS_H = NEON2RegMiscFP16Fixed | 0x0001A000,
+ NEON_FCVTMS_H = NEON2RegMiscFP16Fixed | 0x0001B000,
+ NEON_FCVTAS_H = NEON2RegMiscFP16Fixed | 0x0001C000,
+ NEON_SCVTF_H = NEON2RegMiscFP16Fixed | 0x0001D000,
+ NEON_FCMGT_H_zero = NEON2RegMiscFP16Fixed | 0x0080C000,
+ NEON_FCMEQ_H_zero = NEON2RegMiscFP16Fixed | 0x0080D000,
+ NEON_FCMLT_H_zero = NEON2RegMiscFP16Fixed | 0x0080E000,
+ NEON_FABS_H = NEON2RegMiscFP16Fixed | 0x0080F000,
+ NEON_FRINTP_H = NEON2RegMiscFP16Fixed | 0x00818000,
+ NEON_FRINTZ_H = NEON2RegMiscFP16Fixed | 0x00819000,
+ NEON_FCVTPS_H = NEON2RegMiscFP16Fixed | 0x0081A000,
+ NEON_FCVTZS_H = NEON2RegMiscFP16Fixed | 0x0081B000,
+ NEON_FRECPE_H = NEON2RegMiscFP16Fixed | 0x0081D000,
+ NEON_FRINTA_H = NEON2RegMiscFP16Fixed | 0x20018000,
+ NEON_FRINTX_H = NEON2RegMiscFP16Fixed | 0x20019000,
+ NEON_FCVTNU_H = NEON2RegMiscFP16Fixed | 0x2001A000,
+ NEON_FCVTMU_H = NEON2RegMiscFP16Fixed | 0x2001B000,
+ NEON_FCVTAU_H = NEON2RegMiscFP16Fixed | 0x2001C000,
+ NEON_UCVTF_H = NEON2RegMiscFP16Fixed | 0x2001D000,
+ NEON_FCMGE_H_zero = NEON2RegMiscFP16Fixed | 0x2080C000,
+ NEON_FCMLE_H_zero = NEON2RegMiscFP16Fixed | 0x2080D000,
+ NEON_FNEG_H = NEON2RegMiscFP16Fixed | 0x2080F000,
+ NEON_FRINTI_H = NEON2RegMiscFP16Fixed | 0x20819000,
+ NEON_FCVTPU_H = NEON2RegMiscFP16Fixed | 0x2081A000,
+ NEON_FCVTZU_H = NEON2RegMiscFP16Fixed | 0x2081B000,
+ NEON_FRSQRTE_H = NEON2RegMiscFP16Fixed | 0x2081D000,
+ NEON_FSQRT_H = NEON2RegMiscFP16Fixed | 0x2081F000
+};
+
// NEON instructions with three same-type operands.
enum NEON3SameOp {
NEON3SameFixed = 0x0E200400,
@@ -1595,6 +1810,37 @@
};
+enum NEON3SameFP16 {
+ NEON3SameFP16Fixed = 0x0E400400,
+ NEON3SameFP16FMask = 0x9F60C400,
+ NEON3SameFP16Mask = 0xBFE0FC00,
+ NEON_FMAXNM_H = NEON3SameFP16Fixed | 0x00000000,
+ NEON_FMLA_H = NEON3SameFP16Fixed | 0x00000800,
+ NEON_FADD_H = NEON3SameFP16Fixed | 0x00001000,
+ NEON_FMULX_H = NEON3SameFP16Fixed | 0x00001800,
+ NEON_FCMEQ_H = NEON3SameFP16Fixed | 0x00002000,
+ NEON_FMAX_H = NEON3SameFP16Fixed | 0x00003000,
+ NEON_FRECPS_H = NEON3SameFP16Fixed | 0x00003800,
+ NEON_FMINNM_H = NEON3SameFP16Fixed | 0x00800000,
+ NEON_FMLS_H = NEON3SameFP16Fixed | 0x00800800,
+ NEON_FSUB_H = NEON3SameFP16Fixed | 0x00801000,
+ NEON_FMIN_H = NEON3SameFP16Fixed | 0x00803000,
+ NEON_FRSQRTS_H = NEON3SameFP16Fixed | 0x00803800,
+ NEON_FMAXNMP_H = NEON3SameFP16Fixed | 0x20000000,
+ NEON_FADDP_H = NEON3SameFP16Fixed | 0x20001000,
+ NEON_FMUL_H = NEON3SameFP16Fixed | 0x20001800,
+ NEON_FCMGE_H = NEON3SameFP16Fixed | 0x20002000,
+ NEON_FACGE_H = NEON3SameFP16Fixed | 0x20002800,
+ NEON_FMAXP_H = NEON3SameFP16Fixed | 0x20003000,
+ NEON_FDIV_H = NEON3SameFP16Fixed | 0x20003800,
+ NEON_FMINNMP_H = NEON3SameFP16Fixed | 0x20800000,
+ NEON_FABD_H = NEON3SameFP16Fixed | 0x20801000,
+ NEON_FCMGT_H = NEON3SameFP16Fixed | 0x20802000,
+ NEON_FACGT_H = NEON3SameFP16Fixed | 0x20802800,
+ NEON_FMINP_H = NEON3SameFP16Fixed | 0x20803000
+};
+
+
// 'Extra' NEON instructions with three same-type operands.
enum NEON3SameExtraOp {
NEON3SameExtraFixed = 0x0E008400,
@@ -1690,10 +1936,18 @@
NEON_UMAXV = NEONAcrossLanesFixed | 0x2000A000,
NEON_UMINV = NEONAcrossLanesFixed | 0x2001A000,
+ NEONAcrossLanesFP16Fixed = NEONAcrossLanesFixed | 0x0000C000,
+ NEONAcrossLanesFP16FMask = NEONAcrossLanesFMask | 0x2000C000,
+ NEONAcrossLanesFP16Mask = NEONAcrossLanesMask | 0x20800000,
+ NEON_FMAXNMV_H = NEONAcrossLanesFP16Fixed | 0x00000000,
+ NEON_FMAXV_H = NEONAcrossLanesFP16Fixed | 0x00003000,
+ NEON_FMINNMV_H = NEONAcrossLanesFP16Fixed | 0x00800000,
+ NEON_FMINV_H = NEONAcrossLanesFP16Fixed | 0x00803000,
+
// NEON floating point across instructions.
- NEONAcrossLanesFPFixed = NEONAcrossLanesFixed | 0x0000C000,
- NEONAcrossLanesFPFMask = NEONAcrossLanesFMask | 0x0000C000,
- NEONAcrossLanesFPMask = NEONAcrossLanesMask | 0x00800000,
+ NEONAcrossLanesFPFixed = NEONAcrossLanesFixed | 0x2000C000,
+ NEONAcrossLanesFPFMask = NEONAcrossLanesFMask | 0x2000C000,
+ NEONAcrossLanesFPMask = NEONAcrossLanesMask | 0x20800000,
NEON_FMAXV = NEONAcrossLanesFPFixed | 0x2000F000,
NEON_FMINV = NEONAcrossLanesFPFixed | 0x2080F000,
@@ -1724,6 +1978,10 @@
NEON_SQRDMLAH_byelement = NEONByIndexedElementFixed | 0x2000D000,
NEON_UDOT_byelement = NEONByIndexedElementFixed | 0x2000E000,
NEON_SQRDMLSH_byelement = NEONByIndexedElementFixed | 0x2000F000,
+ NEON_FMLA_H_byelement = NEONByIndexedElementFixed | 0x00001000,
+ NEON_FMLS_H_byelement = NEONByIndexedElementFixed | 0x00005000,
+ NEON_FMUL_H_byelement = NEONByIndexedElementFixed | 0x00009000,
+ NEON_FMULX_H_byelement = NEONByIndexedElementFixed | 0x20009000,
// Floating point instructions.
NEONByIndexedElementFPFixed = NEONByIndexedElementFixed | 0x00800000,
@@ -2072,6 +2330,33 @@
NEON_FCVTXN_scalar = NEON_Q | NEONScalar | NEON_FCVTXN
};
+// NEON instructions with two register operands (FP16).
+enum NEONScalar2RegMiscFP16Op {
+ NEONScalar2RegMiscFP16Fixed = 0x5E780800,
+ NEONScalar2RegMiscFP16FMask = 0xDF7E0C00,
+ NEONScalar2RegMiscFP16Mask = 0xFFFFFC00,
+ NEON_FCVTNS_H_scalar = NEON_Q | NEONScalar | NEON_FCVTNS_H,
+ NEON_FCVTMS_H_scalar = NEON_Q | NEONScalar | NEON_FCVTMS_H,
+ NEON_FCVTAS_H_scalar = NEON_Q | NEONScalar | NEON_FCVTAS_H,
+ NEON_SCVTF_H_scalar = NEON_Q | NEONScalar | NEON_SCVTF_H,
+ NEON_FCMGT_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMGT_H_zero,
+ NEON_FCMEQ_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMEQ_H_zero,
+ NEON_FCMLT_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMLT_H_zero,
+ NEON_FCVTPS_H_scalar = NEON_Q | NEONScalar | NEON_FCVTPS_H,
+ NEON_FCVTZS_H_scalar = NEON_Q | NEONScalar | NEON_FCVTZS_H,
+ NEON_FRECPE_H_scalar = NEON_Q | NEONScalar | NEON_FRECPE_H,
+ NEON_FRECPX_H_scalar = NEONScalar2RegMiscFP16Fixed | 0x0081F000,
+ NEON_FCVTNU_H_scalar = NEON_Q | NEONScalar | NEON_FCVTNU_H,
+ NEON_FCVTMU_H_scalar = NEON_Q | NEONScalar | NEON_FCVTMU_H,
+ NEON_FCVTAU_H_scalar = NEON_Q | NEONScalar | NEON_FCVTAU_H,
+ NEON_UCVTF_H_scalar = NEON_Q | NEONScalar | NEON_UCVTF_H,
+ NEON_FCMGE_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMGE_H_zero,
+ NEON_FCMLE_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMLE_H_zero,
+ NEON_FCVTPU_H_scalar = NEON_Q | NEONScalar | NEON_FCVTPU_H,
+ NEON_FCVTZU_H_scalar = NEON_Q | NEONScalar | NEON_FCVTZU_H,
+ NEON_FRSQRTE_H_scalar = NEON_Q | NEONScalar | NEON_FRSQRTE_H
+};
+
// NEON scalar instructions with three same-type operands.
enum NEONScalar3SameOp {
NEONScalar3SameFixed = 0x5E200400,
@@ -2115,6 +2400,22 @@
NEON_FABD_scalar = NEON_Q | NEONScalar | NEON_FABD
};
+// NEON scalar FP16 instructions with three same-type operands.
+enum NEONScalar3SameFP16Op {
+ NEONScalar3SameFP16Fixed = 0x5E400400,
+ NEONScalar3SameFP16FMask = 0xDF60C400,
+ NEONScalar3SameFP16Mask = 0xFFE0FC00,
+ NEON_FABD_H_scalar = NEON_Q | NEONScalar | NEON_FABD_H,
+ NEON_FMULX_H_scalar = NEON_Q | NEONScalar | NEON_FMULX_H,
+ NEON_FCMEQ_H_scalar = NEON_Q | NEONScalar | NEON_FCMEQ_H,
+ NEON_FCMGE_H_scalar = NEON_Q | NEONScalar | NEON_FCMGE_H,
+ NEON_FCMGT_H_scalar = NEON_Q | NEONScalar | NEON_FCMGT_H,
+ NEON_FACGE_H_scalar = NEON_Q | NEONScalar | NEON_FACGE_H,
+ NEON_FACGT_H_scalar = NEON_Q | NEONScalar | NEON_FACGT_H,
+ NEON_FRECPS_H_scalar = NEON_Q | NEONScalar | NEON_FRECPS_H,
+ NEON_FRSQRTS_H_scalar = NEON_Q | NEONScalar | NEON_FRSQRTS_H
+};
+
// 'Extra' NEON scalar instructions with three same-type operands.
enum NEONScalar3SameExtraOp {
NEONScalar3SameExtraFixed = 0x5E008400,
@@ -2149,6 +2450,10 @@
= NEON_Q | NEONScalar | NEON_SQRDMLAH_byelement,
NEON_SQRDMLSH_byelement_scalar
= NEON_Q | NEONScalar | NEON_SQRDMLSH_byelement,
+ NEON_FMLA_H_byelement_scalar = NEON_Q | NEONScalar | NEON_FMLA_H_byelement,
+ NEON_FMLS_H_byelement_scalar = NEON_Q | NEONScalar | NEON_FMLS_H_byelement,
+ NEON_FMUL_H_byelement_scalar = NEON_Q | NEONScalar | NEON_FMUL_H_byelement,
+ NEON_FMULX_H_byelement_scalar = NEON_Q | NEONScalar | NEON_FMULX_H_byelement,
// Floating point instructions.
NEONScalarByIndexedElementFPFixed
@@ -2174,12 +2479,17 @@
NEONScalarPairwiseFixed = 0x5E300800,
NEONScalarPairwiseFMask = 0xDF3E0C00,
NEONScalarPairwiseMask = 0xFFB1F800,
- NEON_ADDP_scalar = NEONScalarPairwiseFixed | 0x0081B000,
- NEON_FMAXNMP_scalar = NEONScalarPairwiseFixed | 0x2000C000,
- NEON_FMINNMP_scalar = NEONScalarPairwiseFixed | 0x2080C000,
- NEON_FADDP_scalar = NEONScalarPairwiseFixed | 0x2000D000,
- NEON_FMAXP_scalar = NEONScalarPairwiseFixed | 0x2000F000,
- NEON_FMINP_scalar = NEONScalarPairwiseFixed | 0x2080F000
+ NEON_ADDP_scalar = NEONScalarPairwiseFixed | 0x0081B000,
+ NEON_FMAXNMP_h_scalar = NEONScalarPairwiseFixed | 0x0000C000,
+ NEON_FADDP_h_scalar = NEONScalarPairwiseFixed | 0x0000D000,
+ NEON_FMAXP_h_scalar = NEONScalarPairwiseFixed | 0x0000F000,
+ NEON_FMINNMP_h_scalar = NEONScalarPairwiseFixed | 0x0080C000,
+ NEON_FMINP_h_scalar = NEONScalarPairwiseFixed | 0x0080F000,
+ NEON_FMAXNMP_scalar = NEONScalarPairwiseFixed | 0x2000C000,
+ NEON_FMINNMP_scalar = NEONScalarPairwiseFixed | 0x2080C000,
+ NEON_FADDP_scalar = NEONScalarPairwiseFixed | 0x2000D000,
+ NEON_FMAXP_scalar = NEONScalarPairwiseFixed | 0x2000F000,
+ NEON_FMINP_scalar = NEONScalarPairwiseFixed | 0x2080F000
};
// NEON scalar shift immediate.
diff --git a/src/aarch64/cpu-features-auditor-aarch64.cc b/src/aarch64/cpu-features-auditor-aarch64.cc
index 68fae51..66f0d80 100644
--- a/src/aarch64/cpu-features-auditor-aarch64.cc
+++ b/src/aarch64/cpu-features-auditor-aarch64.cc
@@ -47,6 +47,10 @@
auditor_->seen_.Combine(auditor_->last_instruction_);
}
+ void Record(const CPUFeatures& features) {
+ auditor_->last_instruction_.Combine(features);
+ }
+
void Record(CPUFeatures::Feature feature0,
CPUFeatures::Feature feature1 = CPUFeatures::kNone,
CPUFeatures::Feature feature2 = CPUFeatures::kNone,
@@ -136,6 +140,22 @@
USE(instr);
}
+void CPUFeaturesAuditor::VisitAtomicMemory(const Instruction* instr) {
+ RecordInstructionFeaturesScope scope(this);
+ switch (instr->Mask(AtomicMemoryMask)) {
+ case LDAPRB:
+ case LDAPRH:
+ case LDAPR_w:
+ case LDAPR_x:
+ scope.Record(CPUFeatures::kRCpc);
+ return;
+ default:
+ // Everything else belongs to the Atomics extension.
+ scope.Record(CPUFeatures::kAtomics);
+ return;
+ }
+}
+
void CPUFeaturesAuditor::VisitBitfield(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
USE(instr);
@@ -184,8 +204,32 @@
}
void CPUFeaturesAuditor::VisitDataProcessing1Source(const Instruction* instr) {
- USE(instr);
RecordInstructionFeaturesScope scope(this);
+ switch (instr->Mask(DataProcessing1SourceMask)) {
+ case PACIA:
+ case PACIB:
+ case PACDA:
+ case PACDB:
+ case AUTIA:
+ case AUTIB:
+ case AUTDA:
+ case AUTDB:
+ case PACIZA:
+ case PACIZB:
+ case PACDZA:
+ case PACDZB:
+ case AUTIZA:
+ case AUTIZB:
+ case AUTDZA:
+ case AUTDZB:
+ case XPACI:
+ case XPACD:
+ scope.Record(CPUFeatures::kPAuth);
+ return;
+ default:
+ // No special CPU features.
+ return;
+ }
}
void CPUFeaturesAuditor::VisitDataProcessing2Source(const Instruction* instr) {
@@ -201,6 +245,9 @@
case CRC32CX:
scope.Record(CPUFeatures::kCRC32);
return;
+ case PACGA:
+ scope.Record(CPUFeatures::kPAuth, CPUFeatures::kPAuthGeneric);
+ return;
default:
// No special CPU features.
return;
@@ -226,21 +273,41 @@
RecordInstructionFeaturesScope scope(this);
// All of these instructions require FP.
scope.Record(CPUFeatures::kFP);
- USE(instr);
+ switch (instr->Mask(FPCompareMask)) {
+ case FCMP_h:
+ case FCMP_h_zero:
+ case FCMPE_h:
+ case FCMPE_h_zero:
+ scope.Record(CPUFeatures::kFPHalf);
+ return;
+ default:
+ // No special CPU features.
+ return;
+ }
}
void CPUFeaturesAuditor::VisitFPConditionalCompare(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
// All of these instructions require FP.
scope.Record(CPUFeatures::kFP);
- USE(instr);
+ switch (instr->Mask(FPConditionalCompareMask)) {
+ case FCCMP_h:
+ case FCCMPE_h:
+ scope.Record(CPUFeatures::kFPHalf);
+ return;
+ default:
+ // No special CPU features.
+ return;
+ }
}
void CPUFeaturesAuditor::VisitFPConditionalSelect(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
// All of these instructions require FP.
scope.Record(CPUFeatures::kFP);
- USE(instr);
+ if (instr->Mask(FPConditionalSelectMask) == FCSEL_h) {
+ scope.Record(CPUFeatures::kFPHalf);
+ }
}
void CPUFeaturesAuditor::VisitFPDataProcessing1Source(
@@ -248,8 +315,25 @@
RecordInstructionFeaturesScope scope(this);
// All of these instructions require FP.
scope.Record(CPUFeatures::kFP);
- if (instr->Mask(FPDataProcessing1SourceMask) == FMOV_h) {
- scope.Record(CPUFeatures::kFPHalf);
+ switch (instr->Mask(FPDataProcessing1SourceMask)) {
+ case FMOV_h:
+ case FABS_h:
+ case FNEG_h:
+ case FSQRT_h:
+ case FRINTN_h:
+ case FRINTP_h:
+ case FRINTM_h:
+ case FRINTZ_h:
+ case FRINTA_h:
+ case FRINTX_h:
+ case FRINTI_h:
+ scope.Record(CPUFeatures::kFPHalf);
+ return;
+ default:
+ // No special CPU features.
+ // This category includes some half-precision FCVT instructions that do
+ // not require FPHalf.
+ return;
}
}
@@ -258,7 +342,22 @@
RecordInstructionFeaturesScope scope(this);
// All of these instructions require FP.
scope.Record(CPUFeatures::kFP);
- USE(instr);
+ switch (instr->Mask(FPDataProcessing2SourceMask)) {
+ case FMUL_h:
+ case FDIV_h:
+ case FADD_h:
+ case FSUB_h:
+ case FMAX_h:
+ case FMIN_h:
+ case FMAXNM_h:
+ case FMINNM_h:
+ case FNMUL_h:
+ scope.Record(CPUFeatures::kFPHalf);
+ return;
+ default:
+ // No special CPU features.
+ return;
+ }
}
void CPUFeaturesAuditor::VisitFPDataProcessing3Source(
@@ -266,14 +365,38 @@
RecordInstructionFeaturesScope scope(this);
// All of these instructions require FP.
scope.Record(CPUFeatures::kFP);
- USE(instr);
+ switch (instr->Mask(FPDataProcessing3SourceMask)) {
+ case FMADD_h:
+ case FMSUB_h:
+ case FNMADD_h:
+ case FNMSUB_h:
+ scope.Record(CPUFeatures::kFPHalf);
+ return;
+ default:
+ // No special CPU features.
+ return;
+ }
}
void CPUFeaturesAuditor::VisitFPFixedPointConvert(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
// All of these instructions require FP.
scope.Record(CPUFeatures::kFP);
- USE(instr);
+ switch (instr->Mask(FPFixedPointConvertMask)) {
+ case FCVTZS_wh_fixed:
+ case FCVTZS_xh_fixed:
+ case FCVTZU_wh_fixed:
+ case FCVTZU_xh_fixed:
+ case SCVTF_hw_fixed:
+ case SCVTF_hx_fixed:
+ case UCVTF_hw_fixed:
+ case UCVTF_hx_fixed:
+ scope.Record(CPUFeatures::kFPHalf);
+ return;
+ default:
+ // No special CPU features.
+ return;
+ }
}
void CPUFeaturesAuditor::VisitFPImmediate(const Instruction* instr) {
@@ -290,16 +413,43 @@
// All of these instructions require FP.
scope.Record(CPUFeatures::kFP);
switch (instr->Mask(FPIntegerConvertMask)) {
+ case FCVTAS_wh:
+ case FCVTAS_xh:
+ case FCVTAU_wh:
+ case FCVTAU_xh:
+ case FCVTMS_wh:
+ case FCVTMS_xh:
+ case FCVTMU_wh:
+ case FCVTMU_xh:
+ case FCVTNS_wh:
+ case FCVTNS_xh:
+ case FCVTNU_wh:
+ case FCVTNU_xh:
+ case FCVTPS_wh:
+ case FCVTPS_xh:
+ case FCVTPU_wh:
+ case FCVTPU_xh:
+ case FCVTZS_wh:
+ case FCVTZS_xh:
+ case FCVTZU_wh:
+ case FCVTZU_xh:
case FMOV_hw:
+ case FMOV_hx:
case FMOV_wh:
case FMOV_xh:
- case FMOV_hx:
+ case SCVTF_hw:
+ case SCVTF_hx:
+ case UCVTF_hw:
+ case UCVTF_hx:
scope.Record(CPUFeatures::kFPHalf);
return;
case FMOV_d1_x:
case FMOV_x_d1:
scope.Record(CPUFeatures::kNEON);
return;
+ case FJCVTZS:
+ scope.Record(CPUFeatures::kJSCVT);
+ return;
default:
// No special CPU features.
return;
@@ -467,6 +617,13 @@
}
}
+void CPUFeaturesAuditor::VisitNEON2RegMiscFP16(const Instruction* instr) {
+ RecordInstructionFeaturesScope scope(this);
+ // All of these instructions require NEONHalf.
+ scope.Record(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::kNEONHalf);
+ USE(instr);
+}
+
void CPUFeaturesAuditor::VisitNEON3Different(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
// All of these instructions require NEON.
@@ -490,6 +647,7 @@
if ((instr->Mask(NEON3SameExtraFCMLAMask) == NEON_FCMLA) ||
(instr->Mask(NEON3SameExtraFCADDMask) == NEON_FCADD)) {
scope.Record(CPUFeatures::kFP, CPUFeatures::kFcma);
+ if (instr->GetNEONSize() == 1) scope.Record(CPUFeatures::kNEONHalf);
} else {
switch (instr->Mask(NEON3SameExtraMask)) {
case NEON_SDOT:
@@ -507,11 +665,22 @@
}
}
+void CPUFeaturesAuditor::VisitNEON3SameFP16(const Instruction* instr) {
+ RecordInstructionFeaturesScope scope(this);
+ // All of these instructions require NEON FP16 support.
+ scope.Record(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::kNEONHalf);
+ USE(instr);
+}
+
void CPUFeaturesAuditor::VisitNEONAcrossLanes(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
// All of these instructions require NEON.
scope.Record(CPUFeatures::kNEON);
- if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
+ if (instr->Mask(NEONAcrossLanesFP16FMask) == NEONAcrossLanesFP16Fixed) {
+ // FMAXV_H, FMINV_H, FMAXNMV_H, FMINNMV_H
+ scope.Record(CPUFeatures::kFP, CPUFeatures::kNEONHalf);
+ } else if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
+ // FMAXV, FMINV, FMAXNMV, FMINNMV
scope.Record(CPUFeatures::kFP);
}
}
@@ -534,6 +703,12 @@
break;
}
switch (instr->Mask(NEONByIndexedElementFPMask)) {
+ case NEON_FMLA_H_byelement:
+ case NEON_FMLS_H_byelement:
+ case NEON_FMUL_H_byelement:
+ case NEON_FMULX_H_byelement:
+ scope.Record(CPUFeatures::kNEONHalf);
+ VIXL_FALLTHROUGH();
case NEON_FMLA_byelement:
case NEON_FMLS_byelement:
case NEON_FMUL_byelement:
@@ -544,6 +719,7 @@
switch (instr->Mask(NEONByIndexedElementFPComplexMask)) {
case NEON_FCMLA_byelement:
scope.Record(CPUFeatures::kFP, CPUFeatures::kFcma);
+ if (instr->GetNEONSize() == 1) scope.Record(CPUFeatures::kNEONHalf);
return;
}
// No additional features.
@@ -650,6 +826,13 @@
}
}
+void CPUFeaturesAuditor::VisitNEONScalar2RegMiscFP16(const Instruction* instr) {
+ RecordInstructionFeaturesScope scope(this);
+ // All of these instructions require NEONHalf.
+ scope.Record(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::kNEONHalf);
+ USE(instr);
+}
+
void CPUFeaturesAuditor::VisitNEONScalar3Diff(const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
// All of these instructions require NEON.
@@ -673,26 +856,39 @@
USE(instr);
}
+void CPUFeaturesAuditor::VisitNEONScalar3SameFP16(const Instruction* instr) {
+ RecordInstructionFeaturesScope scope(this);
+ // All of these instructions require NEONHalf.
+ scope.Record(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::kNEONHalf);
+ USE(instr);
+}
+
void CPUFeaturesAuditor::VisitNEONScalarByIndexedElement(
const Instruction* instr) {
RecordInstructionFeaturesScope scope(this);
// All of these instructions require NEON.
scope.Record(CPUFeatures::kNEON);
switch (instr->Mask(NEONScalarByIndexedElementMask)) {
- case NEON_SQDMULL_byelement_scalar:
- case NEON_SQDMLAL_byelement_scalar:
- case NEON_SQDMLSL_byelement_scalar:
- case NEON_SQDMULH_byelement_scalar:
- case NEON_SQRDMULH_byelement_scalar:
- // No additional features.
- return;
case NEON_SQRDMLAH_byelement_scalar:
case NEON_SQRDMLSH_byelement_scalar:
scope.Record(CPUFeatures::kRDM);
return;
default:
- // FMUL, FMLA, FMLS, FMULX
- scope.Record(CPUFeatures::kFP);
+ switch (instr->Mask(NEONScalarByIndexedElementFPMask)) {
+ case NEON_FMLA_H_byelement_scalar:
+ case NEON_FMLS_H_byelement_scalar:
+ case NEON_FMUL_H_byelement_scalar:
+ case NEON_FMULX_H_byelement_scalar:
+ scope.Record(CPUFeatures::kNEONHalf);
+ VIXL_FALLTHROUGH();
+ case NEON_FMLA_byelement_scalar:
+ case NEON_FMLS_byelement_scalar:
+ case NEON_FMUL_byelement_scalar:
+ case NEON_FMULX_byelement_scalar:
+ scope.Record(CPUFeatures::kFP);
+ return;
+ }
+ // No additional features.
return;
}
}
@@ -709,6 +905,13 @@
// All of these instructions require NEON.
scope.Record(CPUFeatures::kNEON);
switch (instr->Mask(NEONScalarPairwiseMask)) {
+ case NEON_FMAXNMP_h_scalar:
+ case NEON_FADDP_h_scalar:
+ case NEON_FMAXP_h_scalar:
+ case NEON_FMINNMP_h_scalar:
+ case NEON_FMINP_h_scalar:
+ scope.Record(CPUFeatures::kNEONHalf);
+ VIXL_FALLTHROUGH();
case NEON_FADDP_scalar:
case NEON_FMAXP_scalar:
case NEON_FMAXNMP_scalar:
@@ -778,8 +981,35 @@
}
void CPUFeaturesAuditor::VisitSystem(const Instruction* instr) {
- USE(instr);
RecordInstructionFeaturesScope scope(this);
+ if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
+ CPUFeatures required;
+ switch (instr->GetInstructionBits()) {
+ case PACIA1716:
+ case PACIB1716:
+ case AUTIA1716:
+ case AUTIB1716:
+ case PACIAZ:
+ case PACIASP:
+ case PACIBZ:
+ case PACIBSP:
+ case AUTIAZ:
+ case AUTIASP:
+ case AUTIBZ:
+ case AUTIBSP:
+ case XPACLRI:
+ required.Combine(CPUFeatures::kPAuth);
+ break;
+ default:
+ if (instr->GetImmHint() == ESB) required.Combine(CPUFeatures::kRAS);
+ break;
+ }
+
+ // These are all HINT instructions, and behave as NOPs if the corresponding
+ // features are not implemented, so we record the corresponding features
+ // only if they are available.
+ if (available_.Has(required)) scope.Record(required);
+ }
}
void CPUFeaturesAuditor::VisitTestBranch(const Instruction* instr) {
@@ -799,8 +1029,24 @@
void CPUFeaturesAuditor::VisitUnconditionalBranchToRegister(
const Instruction* instr) {
- USE(instr);
RecordInstructionFeaturesScope scope(this);
+ switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
+ case BRAAZ:
+ case BRABZ:
+ case BLRAAZ:
+ case BLRABZ:
+ case RETAA:
+ case RETAB:
+ case BRAA:
+ case BRAB:
+ case BLRAA:
+ case BLRAB:
+ scope.Record(CPUFeatures::kPAuth);
+ return;
+ default:
+ // No additional features.
+ return;
+ }
}
void CPUFeaturesAuditor::VisitUnimplemented(const Instruction* instr) {
diff --git a/src/aarch64/decoder-aarch64.cc b/src/aarch64/decoder-aarch64.cc
index 9db8b51..4cac45c 100644
--- a/src/aarch64/decoder-aarch64.cc
+++ b/src/aarch64/decoder-aarch64.cc
@@ -257,10 +257,11 @@
}
}
} else {
- if ((instr->ExtractBit(24) == 0x1) ||
+ if (((instr->ExtractBit(24) == 0x1) &&
+ (instr->ExtractBits(23, 21) > 0x1)) ||
(instr->ExtractBits(20, 16) != 0x1F) ||
- (instr->ExtractBits(15, 10) != 0) ||
- (instr->ExtractBits(4, 0) != 0) ||
+ (instr->ExtractBits(15, 10) == 0x1) ||
+ (instr->ExtractBits(15, 10) > 0x3) ||
(instr->ExtractBits(24, 21) == 0x3) ||
(instr->ExtractBits(24, 22) == 0x3)) {
VisitUnallocated(instr);
@@ -323,8 +324,7 @@
VisitLoadLiteral(instr);
}
} else {
- if ((instr->Mask(0x84C00000) == 0x80C00000) ||
- (instr->Mask(0x44800000) == 0x44800000) ||
+ if ((instr->Mask(0x44800000) == 0x44800000) ||
(instr->Mask(0x84800000) == 0x84800000)) {
VisitUnallocated(instr);
} else {
@@ -364,7 +364,29 @@
VisitLoadStoreRegisterOffset(instr);
}
} else {
- VisitUnallocated(instr);
+ if (instr->ExtractBits(11, 10) == 0x0) {
+ if (instr->ExtractBit(25) == 0) {
+ if (instr->ExtractBit(26) == 0) {
+ if ((instr->ExtractBit(15) == 1) &&
+ ((instr->ExtractBits(14, 12) == 0x1) ||
+ (instr->ExtractBit(13) == 1) ||
+ (instr->ExtractBits(14, 12) == 0x5) ||
+ ((instr->ExtractBits(14, 12) == 0x4) &&
+ ((instr->ExtractBit(23) == 0) ||
+ (instr->ExtractBits(23, 22) == 0x3))))) {
+ VisitUnallocated(instr);
+ } else {
+ VisitAtomicMemory(instr);
+ }
+ } else {
+ VisitUnallocated(instr);
+ }
+ } else {
+ VisitUnallocated(instr);
+ }
+ } else {
+ VisitUnallocated(instr);
+ }
}
}
}
@@ -507,7 +529,8 @@
if ((instr->ExtractBit(15) == 0x1) ||
(instr->ExtractBits(15, 11) == 0) ||
(instr->ExtractBits(15, 12) == 0x1) ||
- (instr->ExtractBits(15, 12) == 0x3) ||
+ ((instr->ExtractBits(15, 12) == 0x3) &&
+ (instr->ExtractBit(31) == 0)) ||
(instr->ExtractBits(15, 13) == 0x3) ||
(instr->Mask(0x8000EC00) == 0x00004C00) ||
(instr->Mask(0x8000E800) == 0x80004000) ||
@@ -517,11 +540,15 @@
VisitDataProcessing2Source(instr);
}
} else {
- if ((instr->ExtractBit(13) == 1) ||
- (instr->ExtractBits(20, 16) != 0) ||
- (instr->ExtractBits(15, 14) != 0) ||
- (instr->Mask(0xA01FFC00) == 0x00000C00) ||
- (instr->Mask(0x201FF800) == 0x00001800)) {
+ if ((instr->ExtractBits(20, 17) != 0) ||
+ (instr->ExtractBit(15) == 1) ||
+ ((instr->ExtractBit(16) == 1) &&
+ ((instr->ExtractBits(14, 10) > 17) ||
+ (instr->ExtractBit(31) == 0))) ||
+ ((instr->ExtractBit(16) == 0) &&
+ ((instr->ExtractBits(14, 13) != 0) ||
+ (instr->Mask(0xA01FFC00) == 0x00000C00) ||
+ (instr->Mask(0x201FF800) == 0x00001800)))) {
VisitUnallocated(instr);
} else {
VisitDataProcessing1Source(instr);
@@ -588,7 +615,8 @@
if (instr->ExtractBit(29) == 0) {
if (instr->ExtractBit(24) == 0) {
if (instr->ExtractBit(21) == 0) {
- if ((instr->ExtractBit(23) == 1) || (instr->ExtractBit(18) == 1) ||
+ if ((instr->ExtractBits(23, 22) == 0x2) ||
+ (instr->ExtractBit(18) == 1) ||
(instr->Mask(0x80008000) == 0x00000000) ||
(instr->Mask(0x000E0000) == 0x00000000) ||
(instr->Mask(0x000E0000) == 0x000A0000) ||
@@ -610,7 +638,6 @@
(instr->Mask(0x20C60000) == 0x00840000) ||
(instr->Mask(0xA0C60000) == 0x80060000) ||
(instr->Mask(0xA0C60000) == 0x00860000) ||
- (instr->Mask(0xA0C60000) == 0x00460000) ||
(instr->Mask(0xA0CE0000) == 0x80860000) ||
(instr->Mask(0xA0CE0000) == 0x804E0000) ||
(instr->Mask(0xA0CE0000) == 0x000E0000) ||
@@ -632,7 +659,7 @@
(masked_A0DF8000 == 0x00468000) ||
(instr->Mask(0xA0D80000) == 0x00800000) ||
(instr->Mask(0xA0DF0000) == 0x00C30000) ||
- (instr->Mask(0xA0DC0000) == 0x00C40000)) {
+ (instr->Mask(0xA0DF8000) == 0x00C68000)) {
VisitUnallocated(instr);
} else {
VisitFPDataProcessing1Source(instr);
@@ -640,7 +667,8 @@
} else if (instr->ExtractBits(13, 10) == 8) {
if ((instr->ExtractBits(15, 14) != 0) ||
(instr->ExtractBits(2, 0) != 0) ||
- (instr->Mask(0x80800000) != 0x00000000)) {
+ (instr->ExtractBit(31) == 1) ||
+ (instr->ExtractBits(23, 22) == 0x2)) {
VisitUnallocated(instr);
} else {
VisitFPCompare(instr);
@@ -655,7 +683,8 @@
VisitFPImmediate(instr);
}
} else {
- if (instr->Mask(0x80800000) != 0x00000000) {
+ if ((instr->ExtractBits(23, 22) == 0x2) ||
+ (instr->ExtractBit(31) == 1)) {
VisitUnallocated(instr);
} else {
switch (instr->ExtractBits(11, 10)) {
@@ -664,9 +693,7 @@
break;
}
case 2: {
- if ((instr->ExtractBits(15, 14) == 0x3) ||
- (instr->Mask(0x00009000) == 0x00009000) ||
- (instr->Mask(0x0000A000) == 0x0000A000)) {
+ if (instr->ExtractBits(15, 12) > 0x8) {
VisitUnallocated(instr);
} else {
VisitFPDataProcessing2Source(instr);
@@ -686,7 +713,8 @@
} else {
// Bit 30 == 1 has been handled earlier.
VIXL_ASSERT(instr->ExtractBit(30) == 0);
- if (instr->Mask(0xA0800000) != 0) {
+ if ((instr->Mask(0xA0000000) != 0) ||
+ (instr->ExtractBits(23, 22) == 0x2)) {
VisitUnallocated(instr);
} else {
VisitFPDataProcessing3Source(instr);
@@ -750,6 +778,27 @@
} else {
if (instr->ExtractBits(23, 22) == 0) {
VisitNEONCopy(instr);
+ } else if (instr->ExtractBit(14) == 0x0 &&
+ instr->ExtractBit(22) == 0x1) {
+ // U + a + opcode.
+ uint8_t decode_field =
+ (instr->ExtractBit(29) << 1) | instr->ExtractBit(23);
+ decode_field = (decode_field << 3) | instr->ExtractBits(13, 11);
+ switch (decode_field) {
+ case 0x5:
+ case 0xB:
+ case 0xC:
+ case 0xD:
+ case 0x11:
+ case 0x19:
+ case 0x1B:
+ case 0x1F:
+ VisitUnallocated(instr);
+ break;
+ default:
+ VisitNEON3SameFP16(instr);
+ break;
+ }
} else {
VisitUnallocated(instr);
}
@@ -799,7 +848,23 @@
if (instr->ExtractBit(19) == 0) {
VisitNEONAcrossLanes(instr);
} else {
- VisitUnallocated(instr);
+ if (instr->ExtractBit(22) == 0) {
+ VisitUnallocated(instr);
+ } else {
+ if ((instr->ExtractBits(16, 15) == 0x0) ||
+ (instr->ExtractBits(16, 14) == 0x2) ||
+ (instr->ExtractBits(16, 15) == 0x2) ||
+ (instr->ExtractBits(16, 12) == 0x1e) ||
+ ((instr->ExtractBit(23) == 0) &&
+ ((instr->ExtractBits(16, 14) == 0x3) ||
+ (instr->ExtractBits(16, 12) == 0x1f))) ||
+ ((instr->ExtractBit(23) == 1) &&
+ (instr->ExtractBits(16, 12) == 0x1c))) {
+ VisitUnallocated(instr);
+ } else {
+ VisitNEON2RegMiscFP16(instr);
+ }
+ }
}
}
} else {
@@ -850,7 +915,26 @@
if (instr->ExtractBits(23, 22) == 0) {
VisitNEONScalarCopy(instr);
} else {
- VisitUnallocated(instr);
+ if (instr->Mask(0x00404000) == 0x00400000) {
+ if ((instr->ExtractBits(13, 11) == 0x6) ||
+ (instr->ExtractBits(13, 11) < 2) ||
+ ((instr->Mask(0x20800000) == 0x00000000) &&
+ ((instr->ExtractBits(13, 11) < 0x3) ||
+ (instr->ExtractBits(13, 11) == 0x5))) ||
+ ((instr->Mask(0x20800000) == 0x00800000) &&
+ (instr->ExtractBits(13, 11) < 0x7)) ||
+ ((instr->Mask(0x20800000) == 0x20000000) &&
+ ((instr->ExtractBits(13, 11) < 0x4) ||
+ (instr->ExtractBits(13, 11) == 0x7))) ||
+ ((instr->Mask(0x20800000) == 0x20800000) &&
+ (instr->ExtractBits(12, 11) == 0x3))) {
+ VisitUnallocated(instr);
+ } else {
+ VisitNEONScalar3SameFP16(instr);
+ }
+ } else {
+ VisitUnallocated(instr);
+ }
}
}
} else {
@@ -884,7 +968,28 @@
if (instr->ExtractBit(19) == 0) {
VisitNEONScalarPairwise(instr);
} else {
- VisitUnallocated(instr);
+ if (instr->ExtractBit(22) == 0) {
+ VisitUnallocated(instr);
+ } else {
+ if ((instr->ExtractBits(16, 15) == 0x0) ||
+ (instr->ExtractBits(16, 14) == 0x2) ||
+ (instr->ExtractBits(16, 15) == 0x2) ||
+ (instr->ExtractBits(16, 13) == 0xc) ||
+ (instr->ExtractBits(16, 12) == 0x1e) ||
+ ((instr->ExtractBit(23) == 0) &&
+ ((instr->ExtractBits(16, 14) == 0x3) ||
+ (instr->ExtractBits(16, 12) == 0x1f))) ||
+ ((instr->ExtractBit(23) == 1) &&
+ ((instr->ExtractBits(16, 12) == 0xf) ||
+ (instr->ExtractBits(16, 12) == 0x1c) ||
+ ((instr->ExtractBit(29) == 1) &&
+ ((instr->ExtractBits(16, 12) == 0xe) ||
+ (instr->ExtractBits(16, 12) == 0x1f)))))) {
+ VisitUnallocated(instr);
+ } else {
+ VisitNEONScalar2RegMiscFP16(instr);
+ }
+ }
}
}
} else {
diff --git a/src/aarch64/decoder-aarch64.h b/src/aarch64/decoder-aarch64.h
index 4ebc7ce..100fbb3 100644
--- a/src/aarch64/decoder-aarch64.h
+++ b/src/aarch64/decoder-aarch64.h
@@ -37,56 +37,54 @@
// List macro containing all visitors needed by the decoder class.
#define VISITOR_LIST_THAT_RETURN(V) \
- V(PCRelAddressing) \
- V(AddSubImmediate) \
- V(LogicalImmediate) \
- V(MoveWideImmediate) \
- V(Bitfield) \
- V(Extract) \
- V(UnconditionalBranch) \
- V(UnconditionalBranchToRegister) \
- V(CompareBranch) \
- V(TestBranch) \
- V(ConditionalBranch) \
- V(System) \
- V(Exception) \
- V(LoadStorePairPostIndex) \
- V(LoadStorePairOffset) \
- V(LoadStorePairPreIndex) \
- V(LoadStorePairNonTemporal) \
- V(LoadLiteral) \
- V(LoadStoreUnscaledOffset) \
- V(LoadStorePostIndex) \
- V(LoadStorePreIndex) \
- V(LoadStoreRegisterOffset) \
- V(LoadStoreUnsignedOffset) \
- V(LoadStoreExclusive) \
- V(LogicalShifted) \
- V(AddSubShifted) \
V(AddSubExtended) \
+ V(AddSubImmediate) \
+ V(AddSubShifted) \
V(AddSubWithCarry) \
- V(ConditionalCompareRegister) \
+ V(AtomicMemory) \
+ V(Bitfield) \
+ V(CompareBranch) \
+ V(ConditionalBranch) \
V(ConditionalCompareImmediate) \
+ V(ConditionalCompareRegister) \
V(ConditionalSelect) \
- V(DataProcessing1Source) \
- V(DataProcessing2Source) \
- V(DataProcessing3Source) \
- V(FPCompare) \
- V(FPConditionalCompare) \
- V(FPConditionalSelect) \
- V(FPImmediate) \
- V(FPDataProcessing1Source) \
- V(FPDataProcessing2Source) \
- V(FPDataProcessing3Source) \
- V(FPIntegerConvert) \
- V(FPFixedPointConvert) \
V(Crypto2RegSHA) \
V(Crypto3RegSHA) \
V(CryptoAES) \
+ V(DataProcessing1Source) \
+ V(DataProcessing2Source) \
+ V(DataProcessing3Source) \
+ V(Exception) \
+ V(Extract) \
+ V(FPCompare) \
+ V(FPConditionalCompare) \
+ V(FPConditionalSelect) \
+ V(FPDataProcessing1Source) \
+ V(FPDataProcessing2Source) \
+ V(FPDataProcessing3Source) \
+ V(FPFixedPointConvert) \
+ V(FPImmediate) \
+ V(FPIntegerConvert) \
+ V(LoadLiteral) \
+ V(LoadStoreExclusive) \
+ V(LoadStorePairNonTemporal) \
+ V(LoadStorePairOffset) \
+ V(LoadStorePairPostIndex) \
+ V(LoadStorePairPreIndex) \
+ V(LoadStorePostIndex) \
+ V(LoadStorePreIndex) \
+ V(LoadStoreRegisterOffset) \
+ V(LoadStoreUnscaledOffset) \
+ V(LoadStoreUnsignedOffset) \
+ V(LogicalImmediate) \
+ V(LogicalShifted) \
+ V(MoveWideImmediate) \
V(NEON2RegMisc) \
+ V(NEON2RegMiscFP16) \
V(NEON3Different) \
V(NEON3Same) \
V(NEON3SameExtra) \
+ V(NEON3SameFP16) \
V(NEONAcrossLanes) \
V(NEONByIndexedElement) \
V(NEONCopy) \
@@ -96,17 +94,24 @@
V(NEONLoadStoreSingleStruct) \
V(NEONLoadStoreSingleStructPostIndex) \
V(NEONModifiedImmediate) \
+ V(NEONPerm) \
V(NEONScalar2RegMisc) \
+ V(NEONScalar2RegMiscFP16) \
V(NEONScalar3Diff) \
V(NEONScalar3Same) \
V(NEONScalar3SameExtra) \
+ V(NEONScalar3SameFP16) \
V(NEONScalarByIndexedElement) \
V(NEONScalarCopy) \
V(NEONScalarPairwise) \
V(NEONScalarShiftImmediate) \
V(NEONShiftImmediate) \
V(NEONTable) \
- V(NEONPerm)
+ V(PCRelAddressing) \
+ V(System) \
+ V(TestBranch) \
+ V(UnconditionalBranch) \
+ V(UnconditionalBranchToRegister)
#define VISITOR_LIST_THAT_DONT_RETURN(V) \
V(Unallocated) \
diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc
index 0b76ae6..1c00443 100644
--- a/src/aarch64/disasm-aarch64.cc
+++ b/src/aarch64/disasm-aarch64.cc
@@ -28,7 +28,6 @@
#include <sstream>
#include "disasm-aarch64.h"
-#include "utils-aarch64.h"
namespace vixl {
namespace aarch64 {
@@ -489,6 +488,7 @@
const char *form_shift_right = "'Rd, 'Rn, 'IBr";
const char *form_extend = "'Rd, 'Wn";
const char *form_bfiz = "'Rd, 'Rn, 'IBZ-r, 'IBs+1";
+ const char *form_bfc = "'Rd, 'IBZ-r, 'IBs+1";
const char *form_bfx = "'Rd, 'Rn, 'IBr, 'IBs-r+1";
const char *form_lsl = "'Rd, 'Rn, 'IBZ-r";
@@ -548,8 +548,13 @@
mnemonic = "bfxil";
form = form_bfx;
if (s < r) {
- mnemonic = "bfi";
- form = form_bfiz;
+ if (instr->GetRn() == kZeroRegCode) {
+ mnemonic = "bfc";
+ form = form_bfc;
+ } else {
+ mnemonic = "bfi";
+ form = form_bfiz;
+ }
}
}
}
@@ -607,22 +612,66 @@
void Disassembler::VisitUnconditionalBranchToRegister(
const Instruction *instr) {
const char *mnemonic = "unimplemented";
- const char *form = "'Xn";
+ const char *form;
switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
case BR:
mnemonic = "br";
+ form = "'Xn";
break;
case BLR:
mnemonic = "blr";
+ form = "'Xn";
break;
case RET: {
mnemonic = "ret";
if (instr->GetRn() == kLinkRegCode) {
form = NULL;
+ } else {
+ form = "'Xn";
}
break;
}
+ case BRAAZ:
+ mnemonic = "braaz";
+ form = "'Xn";
+ break;
+ case BRABZ:
+ mnemonic = "brabz";
+ form = "'Xn";
+ break;
+ case BLRAAZ:
+ mnemonic = "blraaz";
+ form = "'Xn";
+ break;
+ case BLRABZ:
+ mnemonic = "blrabz";
+ form = "'Xn";
+ break;
+ case RETAA:
+ mnemonic = "retaa";
+ form = NULL;
+ break;
+ case RETAB:
+ mnemonic = "retab";
+ form = NULL;
+ break;
+ case BRAA:
+ mnemonic = "braa";
+ form = "'Xn, 'Xds";
+ break;
+ case BRAB:
+ mnemonic = "brab";
+ form = "'Xn, 'Xds";
+ break;
+ case BLRAA:
+ mnemonic = "blraa";
+ form = "'Xn, 'Xds";
+ break;
+ case BLRAB:
+ mnemonic = "blrab";
+ form = "'Xn, 'Xds";
+ break;
default:
form = "(UnconditionalBranchToRegister)";
}
@@ -664,6 +713,41 @@
FORMAT(CLZ, "clz");
FORMAT(CLS, "cls");
#undef FORMAT
+
+#define PAUTH_VARIATIONS(V) \
+ V(PACI, "paci") \
+ V(PACD, "pacd") \
+ V(AUTI, "auti") \
+ V(AUTD, "autd")
+#define PAUTH_CASE(NAME, MN) \
+ case NAME##A: \
+ mnemonic = MN "a"; \
+ form = "'Xd, 'Xns"; \
+ break; \
+ case NAME##ZA: \
+ mnemonic = MN "za"; \
+ form = "'Xd"; \
+ break; \
+ case NAME##B: \
+ mnemonic = MN "b"; \
+ form = "'Xd, 'Xns"; \
+ break; \
+ case NAME##ZB: \
+ mnemonic = MN "zb"; \
+ form = "'Xd"; \
+ break;
+
+ PAUTH_VARIATIONS(PAUTH_CASE)
+#undef PAUTH_CASE
+
+ case XPACI:
+ mnemonic = "xpaci";
+ form = "'Xd";
+ break;
+ case XPACD:
+ mnemonic = "xpacd";
+ form = "'Xd";
+ break;
case REV32_x:
mnemonic = "rev32";
break;
@@ -692,6 +776,10 @@
FORMAT(ASRV, "asr");
FORMAT(RORV, "ror");
#undef FORMAT
+ case PACGA:
+ mnemonic = "pacga";
+ form = "'Xd, 'Xn, 'Xms";
+ break;
case CRC32B:
mnemonic = "crc32b";
break;
@@ -1347,6 +1435,152 @@
Format(instr, mnemonic, form);
}
+#define ATOMIC_MEMORY_SIMPLE_LIST(V) \
+ V(LDADD, "add") \
+ V(LDCLR, "clr") \
+ V(LDEOR, "eor") \
+ V(LDSET, "set") \
+ V(LDSMAX, "smax") \
+ V(LDSMIN, "smin") \
+ V(LDUMAX, "umax") \
+ V(LDUMIN, "umin")
+
+void Disassembler::VisitAtomicMemory(const Instruction *instr) {
+ const int kMaxAtomicOpMnemonicLength = 16;
+ const char *mnemonic;
+ const char *form = "'Ws, 'Wt, ['Xns]";
+
+ switch (instr->Mask(AtomicMemoryMask)) {
+#define AMS(A, MN) \
+ case A##B: \
+ mnemonic = MN "b"; \
+ break; \
+ case A##AB: \
+ mnemonic = MN "ab"; \
+ break; \
+ case A##LB: \
+ mnemonic = MN "lb"; \
+ break; \
+ case A##ALB: \
+ mnemonic = MN "alb"; \
+ break; \
+ case A##H: \
+ mnemonic = MN "h"; \
+ break; \
+ case A##AH: \
+ mnemonic = MN "ah"; \
+ break; \
+ case A##LH: \
+ mnemonic = MN "lh"; \
+ break; \
+ case A##ALH: \
+ mnemonic = MN "alh"; \
+ break; \
+ case A##_w: \
+ mnemonic = MN; \
+ break; \
+ case A##A_w: \
+ mnemonic = MN "a"; \
+ break; \
+ case A##L_w: \
+ mnemonic = MN "l"; \
+ break; \
+ case A##AL_w: \
+ mnemonic = MN "al"; \
+ break; \
+ case A##_x: \
+ mnemonic = MN; \
+ form = "'Xs, 'Xt, ['Xns]"; \
+ break; \
+ case A##A_x: \
+ mnemonic = MN "a"; \
+ form = "'Xs, 'Xt, ['Xns]"; \
+ break; \
+ case A##L_x: \
+ mnemonic = MN "l"; \
+ form = "'Xs, 'Xt, ['Xns]"; \
+ break; \
+ case A##AL_x: \
+ mnemonic = MN "al"; \
+ form = "'Xs, 'Xt, ['Xns]"; \
+ break;
+ ATOMIC_MEMORY_SIMPLE_LIST(AMS)
+
+ // SWP has the same semantics as ldadd etc but without the store aliases.
+ AMS(SWP, "swp")
+#undef AMS
+
+ case LDAPRB:
+ mnemonic = "ldaprb";
+ form = "'Wt, ['Xns]";
+ break;
+ case LDAPRH:
+ mnemonic = "ldaprh";
+ form = "'Wt, ['Xns]";
+ break;
+ case LDAPR_w:
+ mnemonic = "ldapr";
+ form = "'Wt, ['Xns]";
+ break;
+ case LDAPR_x:
+ mnemonic = "ldapr";
+ form = "'Xt, ['Xns]";
+ break;
+ default:
+ mnemonic = "unimplemented";
+ form = "(AtomicMemory)";
+ }
+
+ const char *prefix = "";
+ switch (instr->Mask(AtomicMemoryMask)) {
+#define AMS(A, MN) \
+ case A##AB: \
+ case A##ALB: \
+ case A##AH: \
+ case A##ALH: \
+ case A##A_w: \
+ case A##AL_w: \
+ case A##A_x: \
+ case A##AL_x: \
+ prefix = "ld"; \
+ break; \
+ case A##B: \
+ case A##LB: \
+ case A##H: \
+ case A##LH: \
+ case A##_w: \
+ case A##L_w: { \
+ prefix = "ld"; \
+ unsigned rt = instr->GetRt(); \
+ if (Register(rt, 32).IsZero()) { \
+ prefix = "st"; \
+ form = "'Ws, ['Xns]"; \
+ } \
+ break; \
+ } \
+ case A##_x: \
+ case A##L_x: { \
+ prefix = "ld"; \
+ unsigned rt = instr->GetRt(); \
+ if (Register(rt, 64).IsZero()) { \
+ prefix = "st"; \
+ form = "'Xs, ['Xns]"; \
+ } \
+ break; \
+ }
+ ATOMIC_MEMORY_SIMPLE_LIST(AMS)
+#undef AMS
+ }
+
+ char buffer[kMaxAtomicOpMnemonicLength];
+ if (strlen(prefix) > 0) {
+ snprintf(buffer, kMaxAtomicOpMnemonicLength, "%s%s", prefix, mnemonic);
+ mnemonic = buffer;
+ }
+
+ Format(instr, mnemonic, form);
+}
+
void Disassembler::VisitFPCompare(const Instruction *instr) {
const char *mnemonic = "unimplemented";
@@ -1354,18 +1588,22 @@
const char *form_zero = "'Fn, #0.0";
switch (instr->Mask(FPCompareMask)) {
+ case FCMP_h_zero:
case FCMP_s_zero:
case FCMP_d_zero:
form = form_zero;
VIXL_FALLTHROUGH();
+ case FCMP_h:
case FCMP_s:
case FCMP_d:
mnemonic = "fcmp";
break;
+ case FCMPE_h_zero:
case FCMPE_s_zero:
case FCMPE_d_zero:
form = form_zero;
VIXL_FALLTHROUGH();
+ case FCMPE_h:
case FCMPE_s:
case FCMPE_d:
mnemonic = "fcmpe";
@@ -1382,10 +1620,12 @@
const char *form = "'Fn, 'Fm, 'INzcv, 'Cond";
switch (instr->Mask(FPConditionalCompareMask)) {
+ case FCCMP_h:
case FCCMP_s:
case FCCMP_d:
mnemonic = "fccmp";
break;
+ case FCCMPE_h:
case FCCMPE_s:
case FCCMPE_d:
mnemonic = "fccmpe";
@@ -1402,6 +1642,7 @@
const char *form = "'Fd, 'Fn, 'Fm, 'Cond";
switch (instr->Mask(FPConditionalSelectMask)) {
+ case FCSEL_h:
case FCSEL_s:
case FCSEL_d:
mnemonic = "fcsel";
@@ -1418,7 +1659,6 @@
const char *form = "'Fd, 'Fn";
switch (instr->Mask(FPDataProcessing1SourceMask)) {
-// Duplicated until half precision support for all fp instructions.
#define FORMAT(A, B) \
case A##_h: \
case A##_s: \
@@ -1426,12 +1666,6 @@
mnemonic = B; \
break;
FORMAT(FMOV, "fmov");
-#undef FORMAT
-#define FORMAT(A, B) \
- case A##_s: \
- case A##_d: \
- mnemonic = B; \
- break;
FORMAT(FABS, "fabs");
FORMAT(FNEG, "fneg");
FORMAT(FSQRT, "fsqrt");
@@ -1480,14 +1714,15 @@
switch (instr->Mask(FPDataProcessing2SourceMask)) {
#define FORMAT(A, B) \
+ case A##_h: \
case A##_s: \
case A##_d: \
mnemonic = B; \
break;
- FORMAT(FMUL, "fmul");
- FORMAT(FDIV, "fdiv");
FORMAT(FADD, "fadd");
FORMAT(FSUB, "fsub");
+ FORMAT(FMUL, "fmul");
+ FORMAT(FDIV, "fdiv");
FORMAT(FMAX, "fmax");
FORMAT(FMIN, "fmin");
FORMAT(FMAXNM, "fmaxnm");
@@ -1507,6 +1742,7 @@
switch (instr->Mask(FPDataProcessing3SourceMask)) {
#define FORMAT(A, B) \
+ case A##_h: \
case A##_s: \
case A##_d: \
mnemonic = B; \
@@ -1575,6 +1811,8 @@
mnemonic = "fmov";
form = "'Rd, 'Vn.D[1]";
break;
+ case FCVTAS_wh:
+ case FCVTAS_xh:
case FCVTAS_ws:
case FCVTAS_xs:
case FCVTAS_wd:
@@ -1582,6 +1820,8 @@
mnemonic = "fcvtas";
form = form_rf;
break;
+ case FCVTAU_wh:
+ case FCVTAU_xh:
case FCVTAU_ws:
case FCVTAU_xs:
case FCVTAU_wd:
@@ -1589,6 +1829,8 @@
mnemonic = "fcvtau";
form = form_rf;
break;
+ case FCVTMS_wh:
+ case FCVTMS_xh:
case FCVTMS_ws:
case FCVTMS_xs:
case FCVTMS_wd:
@@ -1596,6 +1838,8 @@
mnemonic = "fcvtms";
form = form_rf;
break;
+ case FCVTMU_wh:
+ case FCVTMU_xh:
case FCVTMU_ws:
case FCVTMU_xs:
case FCVTMU_wd:
@@ -1603,6 +1847,8 @@
mnemonic = "fcvtmu";
form = form_rf;
break;
+ case FCVTNS_wh:
+ case FCVTNS_xh:
case FCVTNS_ws:
case FCVTNS_xs:
case FCVTNS_wd:
@@ -1610,6 +1856,8 @@
mnemonic = "fcvtns";
form = form_rf;
break;
+ case FCVTNU_wh:
+ case FCVTNU_xh:
case FCVTNU_ws:
case FCVTNU_xs:
case FCVTNU_wd:
@@ -1617,34 +1865,44 @@
mnemonic = "fcvtnu";
form = form_rf;
break;
- case FCVTZU_xd:
+ case FCVTZU_wh:
+ case FCVTZU_xh:
case FCVTZU_ws:
- case FCVTZU_wd:
case FCVTZU_xs:
+ case FCVTZU_wd:
+ case FCVTZU_xd:
mnemonic = "fcvtzu";
form = form_rf;
break;
- case FCVTZS_xd:
- case FCVTZS_wd:
- case FCVTZS_xs:
+ case FCVTZS_wh:
+ case FCVTZS_xh:
case FCVTZS_ws:
+ case FCVTZS_xs:
+ case FCVTZS_wd:
+ case FCVTZS_xd:
mnemonic = "fcvtzs";
form = form_rf;
break;
- case FCVTPU_xd:
- case FCVTPU_ws:
- case FCVTPU_wd:
+ case FCVTPU_wh:
+ case FCVTPU_xh:
case FCVTPU_xs:
+ case FCVTPU_wd:
+ case FCVTPU_ws:
+ case FCVTPU_xd:
mnemonic = "fcvtpu";
form = form_rf;
break;
- case FCVTPS_xd:
- case FCVTPS_wd:
- case FCVTPS_xs:
+ case FCVTPS_wh:
+ case FCVTPS_xh:
case FCVTPS_ws:
+ case FCVTPS_xs:
+ case FCVTPS_wd:
+ case FCVTPS_xd:
mnemonic = "fcvtps";
form = form_rf;
break;
+ case SCVTF_hw:
+ case SCVTF_hx:
case SCVTF_sw:
case SCVTF_sx:
case SCVTF_dw:
@@ -1652,6 +1910,8 @@
mnemonic = "scvtf";
form = form_fr;
break;
+ case UCVTF_hw:
+ case UCVTF_hx:
case UCVTF_sw:
case UCVTF_sx:
case UCVTF_dw:
@@ -1659,6 +1919,10 @@
mnemonic = "ucvtf";
form = form_fr;
break;
+ case FJCVTZS:
+ mnemonic = "fjcvtzs";
+ form = form_rf;
+ break;
}
Format(instr, mnemonic, form);
}
@@ -1670,18 +1934,24 @@
const char *form_fr = "'Fd, 'Rn, 'IFPFBits";
switch (instr->Mask(FPFixedPointConvertMask)) {
+ case FCVTZS_wh_fixed:
+ case FCVTZS_xh_fixed:
case FCVTZS_ws_fixed:
case FCVTZS_xs_fixed:
case FCVTZS_wd_fixed:
case FCVTZS_xd_fixed:
mnemonic = "fcvtzs";
break;
+ case FCVTZU_wh_fixed:
+ case FCVTZU_xh_fixed:
case FCVTZU_ws_fixed:
case FCVTZU_xs_fixed:
case FCVTZU_wd_fixed:
case FCVTZU_xd_fixed:
mnemonic = "fcvtzu";
break;
+ case SCVTF_hw_fixed:
+ case SCVTF_hx_fixed:
case SCVTF_sw_fixed:
case SCVTF_sx_fixed:
case SCVTF_dw_fixed:
@@ -1689,6 +1959,8 @@
mnemonic = "scvtf";
form = form_fr;
break;
+ case UCVTF_hw_fixed:
+ case UCVTF_hx_fixed:
case UCVTF_sw_fixed:
case UCVTF_sx_fixed:
case UCVTF_dw_fixed:
@@ -1702,6 +1974,21 @@
Format(instr, mnemonic, form);
}
+// clang-format off
+#define PAUTH_SYSTEM_MNEMONICS(V) \
+ V(PACIA1716, "pacia1716") \
+ V(PACIB1716, "pacib1716") \
+ V(AUTIA1716, "autia1716") \
+ V(AUTIB1716, "autib1716") \
+ V(PACIAZ, "paciaz") \
+ V(PACIASP, "paciasp") \
+ V(PACIBZ, "pacibz") \
+ V(PACIBSP, "pacibsp") \
+ V(AUTIAZ, "autiaz") \
+ V(AUTIASP, "autiasp") \
+ V(AUTIBZ, "autibz") \
+ V(AUTIBSP, "autibsp")
+// clang-format on
void Disassembler::VisitSystem(const Instruction *instr) {
// Some system instructions hijack their Op and Cp fields to represent a
@@ -1709,8 +1996,22 @@
// makes the decoding tricky.
const char *mnemonic = "unimplemented";
const char *form = "(System)";
+ if (instr->GetInstructionBits() == XPACLRI) {
+ mnemonic = "xpaclri";
+ form = NULL;
+ } else if (instr->Mask(SystemPAuthFMask) == SystemPAuthFixed) {
+ switch (instr->Mask(SystemPAuthMask)) {
+#define PAUTH_CASE(NAME, MN) \
+ case NAME: \
+ mnemonic = MN; \
+ form = NULL; \
+ break;
- if (instr->Mask(SystemExclusiveMonitorFMask) == SystemExclusiveMonitorFixed) {
+ PAUTH_SYSTEM_MNEMONICS(PAUTH_CASE)
+#undef PAUTH_CASE
+ }
+ } else if (instr->Mask(SystemExclusiveMonitorFMask) ==
+ SystemExclusiveMonitorFixed) {
switch (instr->Mask(SystemExclusiveMonitorMask)) {
case CLREX: {
mnemonic = "clrex";
@@ -1763,6 +2064,11 @@
mnemonic = "sevl";
break;
}
+ case ESB: {
+ form = NULL;
+ mnemonic = "esb";
+ break;
+ }
case CSDB: {
form = NULL;
mnemonic = "csdb";
@@ -2154,6 +2460,73 @@
Format(instr, mnemonic, nfd.Substitute(form));
}
+void Disassembler::VisitNEON2RegMiscFP16(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Vd.%s, 'Vn.%s";
+ const char *form_cmp = "'Vd.%s, 'Vn.%s, #0.0";
+
+ static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}};
+ NEONFormatDecoder nfd(instr, &map_half);
+
+ switch (instr->Mask(NEON2RegMiscFP16Mask)) {
+// clang-format off
+#define FORMAT(A, B) \
+ case NEON_##A##_H: \
+ mnemonic = B; \
+ break;
+ FORMAT(FABS, "fabs")
+ FORMAT(FCVTAS, "fcvtas")
+ FORMAT(FCVTAU, "fcvtau")
+ FORMAT(FCVTMS, "fcvtms")
+ FORMAT(FCVTMU, "fcvtmu")
+ FORMAT(FCVTNS, "fcvtns")
+ FORMAT(FCVTNU, "fcvtnu")
+ FORMAT(FCVTPS, "fcvtps")
+ FORMAT(FCVTPU, "fcvtpu")
+ FORMAT(FCVTZS, "fcvtzs")
+ FORMAT(FCVTZU, "fcvtzu")
+ FORMAT(FNEG, "fneg")
+ FORMAT(FRECPE, "frecpe")
+ FORMAT(FRINTA, "frinta")
+ FORMAT(FRINTI, "frinti")
+ FORMAT(FRINTM, "frintm")
+ FORMAT(FRINTN, "frintn")
+ FORMAT(FRINTP, "frintp")
+ FORMAT(FRINTX, "frintx")
+ FORMAT(FRINTZ, "frintz")
+ FORMAT(FRSQRTE, "frsqrte")
+ FORMAT(FSQRT, "fsqrt")
+ FORMAT(SCVTF, "scvtf")
+ FORMAT(UCVTF, "ucvtf")
+// clang-format on
+#undef FORMAT
+
+ case NEON_FCMEQ_H_zero:
+ mnemonic = "fcmeq";
+ form = form_cmp;
+ break;
+ case NEON_FCMGT_H_zero:
+ mnemonic = "fcmgt";
+ form = form_cmp;
+ break;
+ case NEON_FCMGE_H_zero:
+ mnemonic = "fcmge";
+ form = form_cmp;
+ break;
+ case NEON_FCMLT_H_zero:
+ mnemonic = "fcmlt";
+ form = form_cmp;
+ break;
+ case NEON_FCMLE_H_zero:
+ mnemonic = "fcmle";
+ form = form_cmp;
+ break;
+ default:
+ form = "(NEON2RegMiscFP16)";
+ }
+ Format(instr, mnemonic, nfd.Substitute(form));
+}
+
void Disassembler::VisitNEON3Same(const Instruction *instr) {
const char *mnemonic = "unimplemented";
@@ -2329,7 +2702,7 @@
// size (23) and the U bit (29).
unsigned index = (instr->ExtractBits(15, 11) << 2) |
(instr->ExtractBit(23) << 1) | instr->ExtractBit(29);
- VIXL_ASSERT(index < (sizeof(mnemonics) / sizeof(mnemonics[0])));
+ VIXL_ASSERT(index < ArrayLength(mnemonics));
mnemonic = mnemonics[index];
// Assert that index is not one of the previously handled logical
// instructions.
@@ -2342,6 +2715,50 @@
Format(instr, mnemonic, nfd.Substitute(form));
}
+void Disassembler::VisitNEON3SameFP16(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s";
+
+ NEONFormatDecoder nfd(instr);
+ nfd.SetFormatMaps(nfd.FP16FormatMap());
+
+ switch (instr->Mask(NEON3SameFP16Mask)) {
+#define FORMAT(A, B) \
+ case NEON_##A##_H: \
+ mnemonic = B; \
+ break;
+ FORMAT(FMAXNM, "fmaxnm");
+ FORMAT(FMLA, "fmla");
+ FORMAT(FADD, "fadd");
+ FORMAT(FMULX, "fmulx");
+ FORMAT(FCMEQ, "fcmeq");
+ FORMAT(FMAX, "fmax");
+ FORMAT(FRECPS, "frecps");
+ FORMAT(FMINNM, "fminnm");
+ FORMAT(FMLS, "fmls");
+ FORMAT(FSUB, "fsub");
+ FORMAT(FMIN, "fmin");
+ FORMAT(FRSQRTS, "frsqrts");
+ FORMAT(FMAXNMP, "fmaxnmp");
+ FORMAT(FADDP, "faddp");
+ FORMAT(FMUL, "fmul");
+ FORMAT(FCMGE, "fcmge");
+ FORMAT(FACGE, "facge");
+ FORMAT(FMAXP, "fmaxp");
+ FORMAT(FDIV, "fdiv");
+ FORMAT(FMINNMP, "fminnmp");
+ FORMAT(FABD, "fabd");
+ FORMAT(FCMGT, "fcmgt");
+ FORMAT(FACGT, "facgt");
+ FORMAT(FMINP, "fminp");
+#undef FORMAT
+ default:
+ form = "(NEON3SameFP16)";
+ }
+
+ Format(instr, mnemonic, nfd.Substitute(form));
+}
+
void Disassembler::VisitNEON3SameExtra(const Instruction *instr) {
static const NEONFormatMap map_usdot = {{30}, {NF_8B, NF_16B}};
@@ -2491,12 +2908,33 @@
void Disassembler::VisitNEONAcrossLanes(const Instruction *instr) {
const char *mnemonic = "unimplemented";
const char *form = "%sd, 'Vn.%s";
+ const char *form_half = "'Hd, 'Vn.%s";
+ bool half_op = false;
+ static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}};
NEONFormatDecoder nfd(instr,
NEONFormatDecoder::ScalarFormatMap(),
NEONFormatDecoder::IntegerFormatMap());
- if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
+ if (instr->Mask(NEONAcrossLanesFP16FMask) == NEONAcrossLanesFP16Fixed) {
+ half_op = true;
+ form = form_half;
+ nfd.SetFormatMaps(&map_half);
+ switch (instr->Mask(NEONAcrossLanesFP16Mask)) {
+ case NEON_FMAXV_H:
+ mnemonic = "fmaxv";
+ break;
+ case NEON_FMINV_H:
+ mnemonic = "fminv";
+ break;
+ case NEON_FMAXNMV_H:
+ mnemonic = "fmaxnmv";
+ break;
+ case NEON_FMINNMV_H:
+ mnemonic = "fminnmv";
+ break;
+ }
+ } else if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
nfd.SetFormatMap(0, nfd.FPScalarFormatMap());
nfd.SetFormatMap(1, nfd.FPFormatMap());
switch (instr->Mask(NEONAcrossLanesFPMask)) {
@@ -2546,11 +2984,16 @@
break;
}
}
- Format(instr,
- mnemonic,
- nfd.Substitute(form,
- NEONFormatDecoder::kPlaceholder,
- NEONFormatDecoder::kFormat));
+
+ if (half_op) {
+ Format(instr, mnemonic, nfd.Substitute(form));
+ } else {
+ Format(instr,
+ mnemonic,
+ nfd.Substitute(form,
+ NEONFormatDecoder::kPlaceholder,
+ NEONFormatDecoder::kFormat));
+ }
}
@@ -2559,6 +3002,7 @@
bool l_instr = false;
bool fp_instr = false;
bool cn_instr = false;
+ bool half_instr = false;
const char *form = "'Vd.%s, 'Vn.%s, 'Ve.%s['IVByElemIndex]";
@@ -2567,6 +3011,7 @@
{{23, 22, 30},
{NF_UNDEF, NF_UNDEF, NF_4H, NF_8H, NF_UNDEF, NF_4S, NF_UNDEF, NF_UNDEF}};
static const NEONFormatMap map_usdot = {{30}, {NF_8B, NF_16B}};
+ static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}};
NEONFormatDecoder nfd(instr,
&map_ta,
@@ -2659,6 +3104,22 @@
mnemonic = "fmulx";
fp_instr = true;
break;
+ case NEON_FMLA_H_byelement:
+ mnemonic = "fmla";
+ half_instr = true;
+ break;
+ case NEON_FMLS_H_byelement:
+ mnemonic = "fmls";
+ half_instr = true;
+ break;
+ case NEON_FMUL_H_byelement:
+ mnemonic = "fmul";
+ half_instr = true;
+ break;
+ case NEON_FMULX_H_byelement:
+ mnemonic = "fmulx";
+ half_instr = true;
+ break;
default:
switch (instr->Mask(NEONByIndexedElementFPComplexMask)) {
case NEON_FCMLA_byelement:
@@ -2670,7 +3131,11 @@
}
}
- if (l_instr) {
+ if (half_instr) {
+ form = "'Vd.%s, 'Vn.%s, 'Ve.h['IVByElemIndex]";
+ nfd.SetFormatMaps(&map_half, &map_half);
+ Format(instr, mnemonic, nfd.Substitute(form));
+ } else if (l_instr) {
Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form));
} else if (fp_instr) {
nfd.SetFormatMap(0, nfd.FPFormatMap());
@@ -3488,6 +3953,52 @@
Format(instr, mnemonic, nfd.SubstitutePlaceholders(form));
}
+void Disassembler::VisitNEONScalar2RegMiscFP16(const Instruction *instr) {
+ const char *mnemonic = "unimplemented";
+ const char *form = "'Hd, 'Hn";
+ const char *form_fp0 = "'Hd, 'Hn, #0.0";
+
+ switch (instr->Mask(NEONScalar2RegMiscFP16Mask)) {
+#define FORMAT(A, B) \
+ case NEON_##A##_H_scalar: \
+ mnemonic = B; \
+ break;
+ // clang-format off
+ FORMAT(FCVTNS, "fcvtns")
+ FORMAT(FCVTMS, "fcvtms")
+ FORMAT(FCVTAS, "fcvtas")
+ FORMAT(SCVTF, "scvtf")
+ FORMAT(FCVTPS, "fcvtps")
+ FORMAT(FCVTZS, "fcvtzs")
+ FORMAT(FRECPE, "frecpe")
+ FORMAT(FRECPX, "frecpx")
+ FORMAT(FCVTNU, "fcvtnu")
+ FORMAT(FCVTMU, "fcvtmu")
+ FORMAT(FCVTAU, "fcvtau")
+ FORMAT(UCVTF, "ucvtf")
+ FORMAT(FCVTPU, "fcvtpu")
+ FORMAT(FCVTZU, "fcvtzu")
+ FORMAT(FRSQRTE, "frsqrte")
+// clang-format on
+#undef FORMAT
+#define FORMAT(A, B) \
+ case NEON_##A##_H_zero_scalar: \
+ mnemonic = B; \
+ form = form_fp0; \
+ break;
+ FORMAT(FCMGT, "fcmgt")
+ FORMAT(FCMEQ, "fcmeq")
+ FORMAT(FCMLT, "fcmlt")
+ FORMAT(FCMGE, "fcmge")
+ FORMAT(FCMLE, "fcmle")
+#undef FORMAT
+
+ default:
+ VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
+
void Disassembler::VisitNEONScalar3Diff(const Instruction *instr) {
const char *mnemonic = "unimplemented";
@@ -3626,6 +4137,43 @@
Format(instr, mnemonic, nfd.SubstitutePlaceholders(form));
}
+void Disassembler::VisitNEONScalar3SameFP16(const Instruction *instr) {
+ const char *mnemonic = NULL;
+ const char *form = "'Hd, 'Hn, 'Hm";
+
+ switch (instr->Mask(NEONScalar3SameFP16Mask)) {
+ case NEON_FABD_H_scalar:
+ mnemonic = "fabd";
+ break;
+ case NEON_FMULX_H_scalar:
+ mnemonic = "fmulx";
+ break;
+ case NEON_FCMEQ_H_scalar:
+ mnemonic = "fcmeq";
+ break;
+ case NEON_FCMGE_H_scalar:
+ mnemonic = "fcmge";
+ break;
+ case NEON_FCMGT_H_scalar:
+ mnemonic = "fcmgt";
+ break;
+ case NEON_FACGE_H_scalar:
+ mnemonic = "facge";
+ break;
+ case NEON_FACGT_H_scalar:
+ mnemonic = "facgt";
+ break;
+ case NEON_FRECPS_H_scalar:
+ mnemonic = "frecps";
+ break;
+ case NEON_FRSQRTS_H_scalar:
+ mnemonic = "frsqrts";
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+ Format(instr, mnemonic, form);
+}
void Disassembler::VisitNEONScalar3SameExtra(const Instruction *instr) {
const char *mnemonic = "unimplemented";
@@ -3649,6 +4197,7 @@
void Disassembler::VisitNEONScalarByIndexedElement(const Instruction *instr) {
const char *mnemonic = "unimplemented";
const char *form = "%sd, %sn, 'Ve.%s['IVByElemIndex]";
+ const char *form_half = "'Hd, 'Hn, 'Ve.h['IVByElemIndex]";
NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
bool long_instr = false;
@@ -3692,6 +4241,22 @@
case NEON_FMULX_byelement_scalar:
mnemonic = "fmulx";
break;
+ case NEON_FMLA_H_byelement_scalar:
+ mnemonic = "fmla";
+ form = form_half;
+ break;
+ case NEON_FMLS_H_byelement_scalar:
+ mnemonic = "fmls";
+ form = form_half;
+ break;
+ case NEON_FMUL_H_byelement_scalar:
+ mnemonic = "fmul";
+ form = form_half;
+ break;
+ case NEON_FMULX_H_byelement_scalar:
+ mnemonic = "fmulx";
+ form = form_half;
+ break;
default:
form = "(NEONScalarByIndexedElement)";
}
@@ -3726,24 +4291,44 @@
const char *mnemonic = "unimplemented";
const char *form = "%sd, 'Vn.%s";
NEONFormatMap map = {{22}, {NF_2S, NF_2D}};
- NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarFormatMap(), &map);
+ NEONFormatDecoder nfd(instr,
+ NEONFormatDecoder::FPScalarPairwiseFormatMap(),
+ &map);
switch (instr->Mask(NEONScalarPairwiseMask)) {
case NEON_ADDP_scalar:
+ // All pairwise operations except ADDP use bit U to differentiate FP16
+ // from FP32/FP64 variations.
+ nfd.SetFormatMap(0, NEONFormatDecoder::FPScalarFormatMap());
mnemonic = "addp";
break;
+ case NEON_FADDP_h_scalar:
+ form = "%sd, 'Vn.2h";
+ VIXL_FALLTHROUGH();
case NEON_FADDP_scalar:
mnemonic = "faddp";
break;
+ case NEON_FMAXP_h_scalar:
+ form = "%sd, 'Vn.2h";
+ VIXL_FALLTHROUGH();
case NEON_FMAXP_scalar:
mnemonic = "fmaxp";
break;
+ case NEON_FMAXNMP_h_scalar:
+ form = "%sd, 'Vn.2h";
+ VIXL_FALLTHROUGH();
case NEON_FMAXNMP_scalar:
mnemonic = "fmaxnmp";
break;
+ case NEON_FMINP_h_scalar:
+ form = "%sd, 'Vn.2h";
+ VIXL_FALLTHROUGH();
case NEON_FMINP_scalar:
mnemonic = "fminp";
break;
+ case NEON_FMINNMP_h_scalar:
+ form = "%sd, 'Vn.2h";
+ VIXL_FALLTHROUGH();
case NEON_FMINNMP_scalar:
mnemonic = "fminnmp";
break;
@@ -4683,6 +5268,10 @@
ret += 3;
} else if (instr->GetNEONSize() == 1) {
vm_index = (vm_index << 1) | instr->GetNEONM();
+ } else if (instr->GetNEONSize() == 0) {
+ // Half-precision FP ops use H:L:M bit index
+ vm_index = (instr->GetNEONH() << 2) | (instr->GetNEONL() << 1) |
+ instr->GetNEONM();
}
AppendToOutput("%d", vm_index);
return ret;
@@ -5079,7 +5668,7 @@
unsigned target = instr->GetPrefetchTarget() + 1;
unsigned stream = instr->GetPrefetchStream();
- if ((hint >= (sizeof(hints) / sizeof(hints[0]))) || (target > 3)) {
+ if ((hint >= ArrayLength(hints)) || (target > 3)) {
// Unallocated prefetch operations.
int prefetch_mode = instr->GetImmPrefetchOperation();
AppendToOutput("#0b%c%c%c%c%c",
@@ -5089,7 +5678,7 @@
(prefetch_mode & (1 << 1)) ? '1' : '0',
(prefetch_mode & (1 << 0)) ? '1' : '0');
} else {
- VIXL_ASSERT(stream < (sizeof(stream_options) / sizeof(stream_options[0])));
+ VIXL_ASSERT(stream < ArrayLength(stream_options));
AppendToOutput("p%sl%d%s", hints[hint], target, stream_options[stream]);
}
return 6;
diff --git a/src/aarch64/instructions-aarch64.cc b/src/aarch64/instructions-aarch64.cc
index 2ebb085..a99a045 100644
--- a/src/aarch64/instructions-aarch64.cc
+++ b/src/aarch64/instructions-aarch64.cc
@@ -30,24 +30,6 @@
namespace vixl {
namespace aarch64 {
-
-// Floating-point infinity values.
-const float16 kFP16PositiveInfinity = 0x7c00;
-const float16 kFP16NegativeInfinity = 0xfc00;
-const float kFP32PositiveInfinity = RawbitsToFloat(0x7f800000);
-const float kFP32NegativeInfinity = RawbitsToFloat(0xff800000);
-const double kFP64PositiveInfinity =
- RawbitsToDouble(UINT64_C(0x7ff0000000000000));
-const double kFP64NegativeInfinity =
- RawbitsToDouble(UINT64_C(0xfff0000000000000));
-
-
-// The default NaN values (for FPCR.DN=1).
-const double kFP64DefaultNaN = RawbitsToDouble(UINT64_C(0x7ff8000000000000));
-const float kFP32DefaultNaN = RawbitsToFloat(0x7fc00000);
-const float16 kFP16DefaultNaN = 0x7e00;
-
-
static uint64_t RepeatBitsAcrossReg(unsigned reg_size,
uint64_t value,
unsigned width) {
@@ -180,7 +162,7 @@
}
-float16 Instruction::Imm8ToFP16(uint32_t imm8) {
+Float16 Instruction::Imm8ToFloat16(uint32_t imm8) {
// Imm8: abcdefgh (8 bits)
// Half: aBbb.cdef.gh00.0000 (16 bits)
// where B is b ^ 1
@@ -207,7 +189,7 @@
}
-float16 Instruction::GetImmFP16() const { return Imm8ToFP16(GetImmFP()); }
+Float16 Instruction::GetImmFP16() const { return Imm8ToFloat16(GetImmFP()); }
float Instruction::GetImmFP32() const { return Imm8ToFP32(GetImmFP()); }
@@ -231,8 +213,8 @@
double Instruction::GetImmFP64() const { return Imm8ToFP64(GetImmFP()); }
-float16 Instruction::GetImmNEONFP16() const {
- return Imm8ToFP16(GetImmNEONabcdefgh());
+Float16 Instruction::GetImmNEONFP16() const {
+ return Imm8ToFloat16(GetImmNEONabcdefgh());
}
@@ -566,6 +548,7 @@
case kFormatH:
return kHRegSize;
case kFormatS:
+ case kFormat2H:
return kSRegSize;
case kFormatD:
return kDRegSize;
@@ -593,6 +576,7 @@
case kFormat16B:
return 8;
case kFormatH:
+ case kFormat2H:
case kFormat4H:
case kFormat8H:
return 16;
@@ -624,6 +608,7 @@
case kFormat16B:
return 0;
case kFormatH:
+ case kFormat2H:
case kFormat4H:
case kFormat8H:
return 1;
@@ -653,6 +638,7 @@
case kFormat4H:
case kFormat4S:
return 4;
+ case kFormat2H:
case kFormat2S:
case kFormat2D:
return 2;
diff --git a/src/aarch64/instructions-aarch64.h b/src/aarch64/instructions-aarch64.h
index 1f134c1..4e6bce7 100644
--- a/src/aarch64/instructions-aarch64.h
+++ b/src/aarch64/instructions-aarch64.h
@@ -90,12 +90,15 @@
const uint64_t kWordMask = UINT64_C(0xffffffff);
const uint64_t kXMaxUInt = UINT64_C(0xffffffffffffffff);
const uint64_t kWMaxUInt = UINT64_C(0xffffffff);
+const uint64_t kHMaxUInt = UINT64_C(0xffff);
// Define k*MinInt with "-k*MaxInt - 1", because the hexadecimal representation
// (e.g. "INT32_C(0x80000000)") has implementation-defined behaviour.
const int64_t kXMaxInt = INT64_C(0x7fffffffffffffff);
const int64_t kXMinInt = -kXMaxInt - 1;
const int32_t kWMaxInt = INT32_C(0x7fffffff);
const int32_t kWMinInt = -kWMaxInt - 1;
+const int16_t kHMaxInt = INT16_C(0x7fff);
+const int16_t kHMinInt = -kHMaxInt - 1;
const unsigned kFpRegCode = 29;
const unsigned kLinkRegCode = 30;
const unsigned kSpRegCode = 31;
@@ -109,26 +112,27 @@
<< kAddressTagOffset;
VIXL_STATIC_ASSERT(kAddressTagMask == UINT64_C(0xff00000000000000));
-// AArch64 floating-point specifics. These match IEEE-754.
-const unsigned kDoubleMantissaBits = 52;
-const unsigned kDoubleExponentBits = 11;
-const unsigned kFloatMantissaBits = 23;
-const unsigned kFloatExponentBits = 8;
-const unsigned kFloat16MantissaBits = 10;
-const unsigned kFloat16ExponentBits = 5;
+const uint64_t kTTBRMask = UINT64_C(1) << 55;
-// Floating-point infinity values.
-extern const float16 kFP16PositiveInfinity;
-extern const float16 kFP16NegativeInfinity;
-extern const float kFP32PositiveInfinity;
-extern const float kFP32NegativeInfinity;
-extern const double kFP64PositiveInfinity;
-extern const double kFP64NegativeInfinity;
+// Make these moved float constants backwards compatible
+// with explicit vixl::aarch64:: namespace references.
+using vixl::kDoubleMantissaBits;
+using vixl::kDoubleExponentBits;
+using vixl::kFloatMantissaBits;
+using vixl::kFloatExponentBits;
+using vixl::kFloat16MantissaBits;
+using vixl::kFloat16ExponentBits;
-// The default NaN values (for FPCR.DN=1).
-extern const float16 kFP16DefaultNaN;
-extern const float kFP32DefaultNaN;
-extern const double kFP64DefaultNaN;
+using vixl::kFP16PositiveInfinity;
+using vixl::kFP16NegativeInfinity;
+using vixl::kFP32PositiveInfinity;
+using vixl::kFP32NegativeInfinity;
+using vixl::kFP64PositiveInfinity;
+using vixl::kFP64NegativeInfinity;
+
+using vixl::kFP16DefaultNaN;
+using vixl::kFP32DefaultNaN;
+using vixl::kFP64DefaultNaN;
unsigned CalcLSDataSize(LoadStoreOp op);
unsigned CalcLSPairDataSize(LoadStorePairOp op);
@@ -143,19 +147,6 @@
enum AddrMode { Offset, PreIndex, PostIndex };
-enum FPRounding {
- // The first four values are encodable directly by FPCR<RMode>.
- FPTieEven = 0x0,
- FPPositiveInfinity = 0x1,
- FPNegativeInfinity = 0x2,
- FPZero = 0x3,
-
- // The final rounding modes are only available when explicitly specified by
- // the instruction (such as with fcvta). It cannot be set in FPCR.
- FPTieAway,
- FPRoundOdd
-};
-
enum Reg31Mode { Reg31IsStackPointer, Reg31IsZeroRegister };
// Instructions. ---------------------------------------------------------------
@@ -226,7 +217,7 @@
return GetImmNEONabcdefgh();
}
- float16 GetImmFP16() const;
+ Float16 GetImmFP16() const;
float GetImmFP32() const;
VIXL_DEPRECATED("GetImmFP32", float ImmFP32() const) { return GetImmFP32(); }
@@ -234,7 +225,7 @@
double GetImmFP64() const;
VIXL_DEPRECATED("GetImmFP64", double ImmFP64() const) { return GetImmFP64(); }
- float16 GetImmNEONFP16() const;
+ Float16 GetImmNEONFP16() const;
float GetImmNEONFP32() const;
VIXL_DEPRECATED("GetImmNEONFP32", float ImmNEONFP32() const) {
@@ -505,7 +496,7 @@
private:
int GetImmBranch() const;
- static float16 Imm8ToFP16(uint32_t imm8);
+ static Float16 Imm8ToFloat16(uint32_t imm8);
static float Imm8ToFP32(uint32_t imm8);
static double Imm8ToFP64(uint32_t imm8);
@@ -534,7 +525,10 @@
kFormatB = NEON_B | NEONScalar,
kFormatH = NEON_H | NEONScalar,
kFormatS = NEON_S | NEONScalar,
- kFormatD = NEON_D | NEONScalar
+ kFormatD = NEON_D | NEONScalar,
+
+ // A value invented solely for FP16 scalar pairwise simulator trace tests.
+ kFormat2H = 0xfffffffe
};
const int kMaxLanesPerVector = 16;
@@ -628,7 +622,7 @@
formats_[2] = (format2 == NULL) ? formats_[1] : format2;
}
void SetFormatMap(unsigned index, const NEONFormatMap* format) {
- VIXL_ASSERT(index <= (sizeof(formats_) / sizeof(formats_[0])));
+ VIXL_ASSERT(index <= ArrayLength(formats_));
VIXL_ASSERT(format != NULL);
formats_[index] = format;
}
@@ -681,7 +675,7 @@
kFormatH,
kFormatS,
kFormatD};
- VIXL_ASSERT(GetNEONFormat(format_map) < (sizeof(vform) / sizeof(vform[0])));
+ VIXL_ASSERT(GetNEONFormat(format_map) < ArrayLength(vform));
return vform[GetNEONFormat(format_map)];
}
@@ -714,6 +708,13 @@
return ↦
}
+ // The FP16 format map uses one bit (Q) to encode the NEON vector format:
+ // NF_4H, NF_8H.
+ static const NEONFormatMap* FP16FormatMap() {
+ static const NEONFormatMap map = {{30}, {NF_4H, NF_8H}};
+ return ↦
+ }
+
// The load/store format map uses three bits (Q, 11, 10) to encode the
// set of NEON vector formats.
static const NEONFormatMap* LoadStoreFormatMap() {
@@ -765,6 +766,13 @@
return ↦
}
+ // The FP scalar pairwise format map assumes two bits (U, size<0>) are used to
+ // encode the NEON FP scalar formats: NF_H, NF_S, NF_D.
+ static const NEONFormatMap* FPScalarPairwiseFormatMap() {
+ static const NEONFormatMap map = {{29, 22}, {NF_H, NF_UNDEF, NF_S, NF_D}};
+ return ↦
+ }
+
// The triangular scalar format map uses between one and four bits to encode
// the NEON FP scalar formats:
// xxx1->B, xx10->H, x100->S, 1000->D, all others undefined.
@@ -815,7 +823,7 @@
"b", "h", "s", "d"
};
// clang-format on
- VIXL_ASSERT(format < (sizeof(formats) / sizeof(formats[0])));
+ VIXL_ASSERT(format < ArrayLength(formats));
return formats[format];
}
diff --git a/src/aarch64/instrument-aarch64.cc b/src/aarch64/instrument-aarch64.cc
index a2e6ca8..c3097ef 100644
--- a/src/aarch64/instrument-aarch64.cc
+++ b/src/aarch64/instrument-aarch64.cc
@@ -407,6 +407,14 @@
}
+void Instrument::VisitAtomicMemory(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("Other");
+ counter->Increment();
+}
+
+
void Instrument::VisitLoadLiteral(const Instruction* instr) {
USE(instr);
Update();
@@ -670,6 +678,14 @@
}
+void Instrument::VisitNEON2RegMiscFP16(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
void Instrument::VisitNEON3Same(const Instruction* instr) {
USE(instr);
Update();
@@ -678,6 +694,14 @@
}
+void Instrument::VisitNEON3SameFP16(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
void Instrument::VisitNEON3SameExtra(const Instruction* instr) {
USE(instr);
Update();
@@ -776,6 +800,14 @@
}
+void Instrument::VisitNEONScalar2RegMiscFP16(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
void Instrument::VisitNEONScalar3Diff(const Instruction* instr) {
USE(instr);
Update();
@@ -792,6 +824,14 @@
}
+void Instrument::VisitNEONScalar3SameFP16(const Instruction* instr) {
+ USE(instr);
+ Update();
+ static Counter* counter = GetCounter("NEON");
+ counter->Increment();
+}
+
+
void Instrument::VisitNEONScalar3SameExtra(const Instruction* instr) {
USE(instr);
Update();
diff --git a/src/aarch64/logic-aarch64.cc b/src/aarch64/logic-aarch64.cc
index 20d4c00..aebd227 100644
--- a/src/aarch64/logic-aarch64.cc
+++ b/src/aarch64/logic-aarch64.cc
@@ -33,6 +33,39 @@
namespace vixl {
namespace aarch64 {
+using vixl::internal::SimFloat16;
+
+template <typename T>
+bool IsFloat64() {
+ return false;
+}
+template <>
+bool IsFloat64<double>() {
+ return true;
+}
+
+template <typename T>
+bool IsFloat32() {
+ return false;
+}
+template <>
+bool IsFloat32<float>() {
+ return true;
+}
+
+template <typename T>
+bool IsFloat16() {
+ return false;
+}
+template <>
+bool IsFloat16<Float16>() {
+ return true;
+}
+template <>
+bool IsFloat16<SimFloat16>() {
+ return true;
+}
+
template <>
double Simulator::FPDefaultNaN<double>() {
return kFP64DefaultNaN;
@@ -44,6 +77,13 @@
return kFP32DefaultNaN;
}
+
+template <>
+SimFloat16 Simulator::FPDefaultNaN<SimFloat16>() {
+ return SimFloat16(kFP16DefaultNaN);
+}
+
+
double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
if (src >= 0) {
return UFixedToDouble(src, fbits, round);
@@ -98,6 +138,35 @@
}
+SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) {
+ if (src >= 0) {
+ return UFixedToFloat16(src, fbits, round);
+ } else if (src == INT64_MIN) {
+ return -UFixedToFloat16(src, fbits, round);
+ } else {
+ return -UFixedToFloat16(-src, fbits, round);
+ }
+}
+
+
+SimFloat16 Simulator::UFixedToFloat16(uint64_t src,
+ int fbits,
+ FPRounding round) {
+ // An input of 0 is a special case because the result is effectively
+ // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
+ if (src == 0) {
+ return 0.0f;
+ }
+
+ // Calculate the exponent. The highest significant bit will have the value
+ // 2^exponent.
+ const int highest_significant_bit = 63 - CountLeadingZeros(src);
+ const int16_t exponent = highest_significant_bit - fbits;
+
+ return FPRoundToFloat16(0, exponent, src, round);
+}
+
+
void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
dst.ClearForWrite(vform);
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
@@ -3614,13 +3683,14 @@
return -op;
}
-
template <typename T>
T Simulator::FPAdd(T op1, T op2) {
T result = FPProcessNaNs(op1, op2);
- if (std::isnan(result)) return result;
+ if (IsNaN(result)) {
+ return result;
+ }
- if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
+ if (IsInf(op1) && IsInf(op2) && (op1 != op2)) {
// inf + -inf returns the default NaN.
FPProcessException();
return FPDefaultNaN<T>();
@@ -3634,9 +3704,9 @@
template <typename T>
T Simulator::FPSub(T op1, T op2) {
// NaNs should be handled elsewhere.
- VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
+ VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
- if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
+ if (IsInf(op1) && IsInf(op2) && (op1 == op2)) {
// inf - inf returns the default NaN.
FPProcessException();
return FPDefaultNaN<T>();
@@ -3650,9 +3720,9 @@
template <typename T>
T Simulator::FPMul(T op1, T op2) {
// NaNs should be handled elsewhere.
- VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
+ VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
- if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
+ if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
// inf * 0.0 returns the default NaN.
FPProcessException();
return FPDefaultNaN<T>();
@@ -3665,7 +3735,7 @@
template <typename T>
T Simulator::FPMulx(T op1, T op2) {
- if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
+ if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
// inf * 0.0 returns +/-2.0.
T two = 2.0;
return copysign(1.0, op1) * copysign(1.0, op2) * two;
@@ -3680,13 +3750,13 @@
T sign_a = copysign(1.0, a);
T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
- bool isinf_prod = std::isinf(op1) || std::isinf(op2);
+ bool isinf_prod = IsInf(op1) || IsInf(op2);
bool operation_generates_nan =
- (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0
- (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf
- (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
+ (IsInf(op1) && (op2 == 0.0)) || // inf * 0.0
+ (IsInf(op2) && (op1 == 0.0)) || // 0.0 * inf
+ (IsInf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
- if (std::isnan(result)) {
+ if (IsNaN(result)) {
// Generated NaNs override quiet NaNs propagated from a.
if (operation_generates_nan && IsQuietNaN(a)) {
FPProcessException();
@@ -3705,11 +3775,11 @@
// Work around broken fma implementations for exact zero results: The sign of
// exact 0.0 results is positive unless both a and op1 * op2 are negative.
if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
- return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
+ return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? -0.0 : 0.0;
}
result = FusedMultiplyAdd(op1, op2, a);
- VIXL_ASSERT(!std::isnan(result));
+ VIXL_ASSERT(!IsNaN(result));
// Work around broken fma implementations for rounded zero results: If a is
// 0.0, the sign of the result is the sign of op1 * op2 before rounding.
@@ -3724,16 +3794,16 @@
template <typename T>
T Simulator::FPDiv(T op1, T op2) {
// NaNs should be handled elsewhere.
- VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
+ VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
- if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
+ if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
// inf / inf and 0.0 / 0.0 return the default NaN.
FPProcessException();
return FPDefaultNaN<T>();
} else {
if (op2 == 0.0) {
FPProcessException();
- if (!std::isnan(op1)) {
+ if (!IsNaN(op1)) {
double op1_sign = copysign(1.0, op1);
double op2_sign = copysign(1.0, op2);
return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
@@ -3748,9 +3818,9 @@
template <typename T>
T Simulator::FPSqrt(T op) {
- if (std::isnan(op)) {
+ if (IsNaN(op)) {
return FPProcessNaN(op);
- } else if (op < 0.0) {
+ } else if (op < T(0.0)) {
FPProcessException();
return FPDefaultNaN<T>();
} else {
@@ -3762,7 +3832,7 @@
template <typename T>
T Simulator::FPMax(T a, T b) {
T result = FPProcessNaNs(a, b);
- if (std::isnan(result)) return result;
+ if (IsNaN(result)) return result;
if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
// a and b are zero, and the sign differs: return +0.0.
@@ -3782,14 +3852,14 @@
}
T result = FPProcessNaNs(a, b);
- return std::isnan(result) ? result : FPMax(a, b);
+ return IsNaN(result) ? result : FPMax(a, b);
}
template <typename T>
T Simulator::FPMin(T a, T b) {
T result = FPProcessNaNs(a, b);
- if (std::isnan(result)) return result;
+ if (IsNaN(result)) return result;
if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
// a and b are zero, and the sign differs: return -0.0.
@@ -3809,17 +3879,16 @@
}
T result = FPProcessNaNs(a, b);
- return std::isnan(result) ? result : FPMin(a, b);
+ return IsNaN(result) ? result : FPMin(a, b);
}
template <typename T>
T Simulator::FPRecipStepFused(T op1, T op2) {
const T two = 2.0;
- if ((std::isinf(op1) && (op2 == 0.0)) ||
- ((op1 == 0.0) && (std::isinf(op2)))) {
+ if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
return two;
- } else if (std::isinf(op1) || std::isinf(op2)) {
+ } else if (IsInf(op1) || IsInf(op2)) {
// Return +inf if signs match, otherwise -inf.
return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
: kFP64NegativeInfinity;
@@ -3828,16 +3897,28 @@
}
}
+template <typename T>
+bool IsNormal(T value) {
+ return std::isnormal(value);
+}
+
+template <>
+bool IsNormal(SimFloat16 value) {
+ uint16_t rawbits = Float16ToRawbits(value);
+ uint16_t exp_mask = 0x7c00;
+ // Check that the exponent is neither all zeroes or all ones.
+ return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0);
+}
+
template <typename T>
T Simulator::FPRSqrtStepFused(T op1, T op2) {
const T one_point_five = 1.5;
const T two = 2.0;
- if ((std::isinf(op1) && (op2 == 0.0)) ||
- ((op1 == 0.0) && (std::isinf(op2)))) {
+ if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
return one_point_five;
- } else if (std::isinf(op1) || std::isinf(op2)) {
+ } else if (IsInf(op1) || IsInf(op2)) {
// Return +inf if signs match, otherwise -inf.
return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
: kFP64NegativeInfinity;
@@ -3845,9 +3926,9 @@
// The multiply-add-halve operation must be fully fused, so avoid interim
// rounding by checking which operand can be losslessly divided by two
// before doing the multiply-add.
- if (std::isnormal(op1 / two)) {
+ if (IsNormal(op1 / two)) {
return FusedMultiplyAdd(op1 / two, op2, one_point_five);
- } else if (std::isnormal(op2 / two)) {
+ } else if (IsNormal(op2 / two)) {
return FusedMultiplyAdd(op1, op2 / two, one_point_five);
} else {
// Neither operand is normal after halving: the result is dominated by
@@ -3857,12 +3938,80 @@
}
}
+int32_t Simulator::FPToFixedJS(double value) {
+ // The Z-flag is set when the conversion from double precision floating-point
+ // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
+ // outside the bounds of a 32-bit integer, or isn't an exact integer then the
+ // Z-flag is unset.
+ int Z = 1;
+ int32_t result;
+
+ if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
+ (value == kFP64NegativeInfinity)) {
+ // +/- zero and infinity all return zero, however -0 and +/- Infinity also
+ // unset the Z-flag.
+ result = 0.0;
+ if ((value != 0.0) || std::signbit(value)) {
+ Z = 0;
+ }
+ } else if (std::isnan(value)) {
+ // NaN values unset the Z-flag and set the result to 0.
+ FPProcessNaN(value);
+ result = 0;
+ Z = 0;
+ } else {
+ // All other values are converted to an integer representation, rounded
+ // toward zero.
+ double int_result = std::floor(value);
+ double error = value - int_result;
+
+ if ((error != 0.0) && (int_result < 0.0)) {
+ int_result++;
+ }
+
+ // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
+ // write a one-liner with std::round, but the behaviour on ties is incorrect
+ // for our purposes.
+ double mod_const = static_cast<double>(UINT64_C(1) << 32);
+ double mod_error =
+ (int_result / mod_const) - std::floor(int_result / mod_const);
+ double constrained;
+ if (mod_error == 0.5) {
+ constrained = INT32_MIN;
+ } else {
+ constrained = int_result - mod_const * round(int_result / mod_const);
+ }
+
+ VIXL_ASSERT(std::floor(constrained) == constrained);
+ VIXL_ASSERT(constrained >= INT32_MIN);
+ VIXL_ASSERT(constrained <= INT32_MAX);
+
+ // Take the bottom 32 bits of the result as a 32-bit integer.
+ result = static_cast<int32_t>(constrained);
+
+ if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
+ (error != 0.0)) {
+ // If the integer result is out of range or the conversion isn't exact,
+ // take exception and unset the Z-flag.
+ FPProcessException();
+ Z = 0;
+ }
+ }
+
+ ReadNzcv().SetN(0);
+ ReadNzcv().SetZ(Z);
+ ReadNzcv().SetC(0);
+ ReadNzcv().SetV(0);
+
+ return result;
+}
+
double Simulator::FPRoundInt(double value, FPRounding round_mode) {
if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
(value == kFP64NegativeInfinity)) {
return value;
- } else if (std::isnan(value)) {
+ } else if (IsNaN(value)) {
return FPProcessNaN(value);
}
@@ -3927,6 +4076,17 @@
}
+int16_t Simulator::FPToInt16(double value, FPRounding rmode) {
+ value = FPRoundInt(value, rmode);
+ if (value >= kHMaxInt) {
+ return kHMaxInt;
+ } else if (value < kHMinInt) {
+ return kHMinInt;
+ }
+ return IsNaN(value) ? 0 : static_cast<int16_t>(value);
+}
+
+
int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
value = FPRoundInt(value, rmode);
if (value >= kWMaxInt) {
@@ -3934,7 +4094,7 @@
} else if (value < kWMinInt) {
return kWMinInt;
}
- return std::isnan(value) ? 0 : static_cast<int32_t>(value);
+ return IsNaN(value) ? 0 : static_cast<int32_t>(value);
}
@@ -3945,7 +4105,18 @@
} else if (value < kXMinInt) {
return kXMinInt;
}
- return std::isnan(value) ? 0 : static_cast<int64_t>(value);
+ return IsNaN(value) ? 0 : static_cast<int64_t>(value);
+}
+
+
+uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) {
+ value = FPRoundInt(value, rmode);
+ if (value >= kHMaxUInt) {
+ return kHMaxUInt;
+ } else if (value < 0.0) {
+ return 0;
+ }
+ return IsNaN(value) ? 0 : static_cast<uint16_t>(value);
}
@@ -3956,7 +4127,7 @@
} else if (value < 0.0) {
return 0;
}
- return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
+ return IsNaN(value) ? 0 : static_cast<uint32_t>(value);
}
@@ -3967,7 +4138,7 @@
} else if (value < 0.0) {
return 0;
}
- return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
+ return IsNaN(value) ? 0 : static_cast<uint64_t>(value);
}
@@ -3984,7 +4155,7 @@
T result; \
if (PROCNAN) { \
result = FPProcessNaNs(op1, op2); \
- if (!std::isnan(result)) { \
+ if (!IsNaN(result)) { \
result = OP(op1, op2); \
} \
} else { \
@@ -3999,7 +4170,9 @@
LogicVRegister dst, \
const LogicVRegister& src1, \
const LogicVRegister& src2) { \
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { \
+ FN<SimFloat16>(vform, dst, src1, src2); \
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \
FN<float>(vform, dst, src1, src2); \
} else { \
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
@@ -4031,7 +4204,7 @@
T op1 = -src1.Float<T>(i);
T op2 = src2.Float<T>(i);
T result = FPProcessNaNs(op1, op2);
- dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
+ dst.SetFloat(i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
}
return dst;
}
@@ -4041,7 +4214,9 @@
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ frecps<SimFloat16>(vform, dst, src1, src2);
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
frecps<float>(vform, dst, src1, src2);
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
@@ -4061,7 +4236,7 @@
T op1 = -src1.Float<T>(i);
T op2 = src2.Float<T>(i);
T result = FPProcessNaNs(op1, op2);
- dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
+ dst.SetFloat(i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
}
return dst;
}
@@ -4071,7 +4246,9 @@
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ frsqrts<SimFloat16>(vform, dst, src1, src2);
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
frsqrts<float>(vform, dst, src1, src2);
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
@@ -4093,7 +4270,7 @@
T op1 = src1.Float<T>(i);
T op2 = src2.Float<T>(i);
T nan_result = FPProcessNaNs(op1, op2);
- if (!std::isnan(nan_result)) {
+ if (!IsNaN(nan_result)) {
switch (cond) {
case eq:
result = (op1 == op2);
@@ -4126,7 +4303,9 @@
const LogicVRegister& src1,
const LogicVRegister& src2,
Condition cond) {
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ fcmp<SimFloat16>(vform, dst, src1, src2, cond);
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
fcmp<float>(vform, dst, src1, src2, cond);
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
@@ -4141,7 +4320,11 @@
const LogicVRegister& src,
Condition cond) {
SimVRegister temp;
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ LogicVRegister zero_reg =
+ dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0)));
+ fcmp<SimFloat16>(vform, dst, src, zero_reg, cond);
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
fcmp<float>(vform, dst, src, zero_reg, cond);
} else {
@@ -4159,7 +4342,11 @@
const LogicVRegister& src2,
Condition cond) {
SimVRegister temp1, temp2;
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ LogicVRegister abs_src1 = fabs_<SimFloat16>(vform, temp1, src1);
+ LogicVRegister abs_src2 = fabs_<SimFloat16>(vform, temp2, src2);
+ fcmp<SimFloat16>(vform, dst, abs_src1, abs_src2, cond);
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
@@ -4194,7 +4381,9 @@
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ fmla<SimFloat16>(vform, dst, src1, src2);
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
fmla<float>(vform, dst, src1, src2);
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
@@ -4225,7 +4414,9 @@
LogicVRegister dst,
const LogicVRegister& src1,
const LogicVRegister& src2) {
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ fmls<SimFloat16>(vform, dst, src1, src2);
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
fmls<float>(vform, dst, src1, src2);
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
@@ -4252,7 +4443,9 @@
LogicVRegister Simulator::fneg(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ fneg<SimFloat16>(vform, dst, src);
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
fneg<float>(vform, dst, src);
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
@@ -4281,7 +4474,9 @@
LogicVRegister Simulator::fabs_(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ fabs_<SimFloat16>(vform, dst, src);
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
fabs_<float>(vform, dst, src);
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
@@ -4306,7 +4501,12 @@
LogicVRegister dst,
const LogicVRegister& src) {
dst.ClearForWrite(vform);
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ SimFloat16 result = FPSqrt(src.Float<SimFloat16>(i));
+ dst.SetFloat(i, result);
+ }
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
float result = FPSqrt(src.Float<float>(i));
dst.SetFloat(i, result);
@@ -4322,47 +4522,58 @@
}
-#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
- LogicVRegister Simulator::FNP(VectorFormat vform, \
- LogicVRegister dst, \
- const LogicVRegister& src1, \
- const LogicVRegister& src2) { \
- SimVRegister temp1, temp2; \
- uzp1(vform, temp1, src1, src2); \
- uzp2(vform, temp2, src1, src2); \
- FN(vform, dst, temp1, temp2); \
- return dst; \
- } \
- \
- LogicVRegister Simulator::FNP(VectorFormat vform, \
- LogicVRegister dst, \
- const LogicVRegister& src) { \
- if (vform == kFormatS) { \
- float result = OP(src.Float<float>(0), src.Float<float>(1)); \
- dst.SetFloat(0, result); \
- } else { \
- VIXL_ASSERT(vform == kFormatD); \
- double result = OP(src.Float<double>(0), src.Float<double>(1)); \
- dst.SetFloat(0, result); \
- } \
- dst.ClearForWrite(vform); \
- return dst; \
+#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
+ LogicVRegister Simulator::FNP(VectorFormat vform, \
+ LogicVRegister dst, \
+ const LogicVRegister& src1, \
+ const LogicVRegister& src2) { \
+ SimVRegister temp1, temp2; \
+ uzp1(vform, temp1, src1, src2); \
+ uzp2(vform, temp2, src1, src2); \
+ FN(vform, dst, temp1, temp2); \
+ return dst; \
+ } \
+ \
+ LogicVRegister Simulator::FNP(VectorFormat vform, \
+ LogicVRegister dst, \
+ const LogicVRegister& src) { \
+ if (vform == kFormatH) { \
+ SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(src.Uint(vform, 0))), \
+ SimFloat16(RawbitsToFloat16(src.Uint(vform, 1))))); \
+ dst.SetUint(vform, 0, Float16ToRawbits(result)); \
+ } else if (vform == kFormatS) { \
+ float result = OP(src.Float<float>(0), src.Float<float>(1)); \
+ dst.SetFloat(0, result); \
+ } else { \
+ VIXL_ASSERT(vform == kFormatD); \
+ double result = OP(src.Float<double>(0), src.Float<double>(1)); \
+ dst.SetFloat(0, result); \
+ } \
+ dst.ClearForWrite(vform); \
+ return dst; \
}
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
#undef DEFINE_NEON_FP_PAIR_OP
-
+template <typename T>
LogicVRegister Simulator::fminmaxv(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src,
- FPMinMaxOp Op) {
- VIXL_ASSERT(vform == kFormat4S);
+ typename TFPMinMaxOp<T>::type Op) {
+ VIXL_ASSERT((vform == kFormat4H) || (vform == kFormat8H) ||
+ (vform == kFormat4S));
USE(vform);
- float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
- float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
- float result = (this->*Op)(result1, result2);
- dst.ClearForWrite(kFormatS);
- dst.SetFloat<float>(0, result);
+ T result1 = (this->*Op)(src.Float<T>(0), src.Float<T>(1));
+ T result2 = (this->*Op)(src.Float<T>(2), src.Float<T>(3));
+ if (vform == kFormat8H) {
+ T result3 = (this->*Op)(src.Float<T>(4), src.Float<T>(5));
+ T result4 = (this->*Op)(src.Float<T>(6), src.Float<T>(7));
+ result1 = (this->*Op)(result1, result3);
+ result2 = (this->*Op)(result2, result4);
+ }
+ T result = (this->*Op)(result1, result2);
+ dst.ClearForWrite(ScalarFormatFromFormat(vform));
+ dst.SetFloat<T>(0, result);
return dst;
}
@@ -4370,28 +4581,50 @@
LogicVRegister Simulator::fmaxv(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- return fminmaxv(vform, dst, src, &Simulator::FPMax);
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ return fminmaxv<SimFloat16>(vform, dst, src, &Simulator::FPMax<SimFloat16>);
+ } else {
+ return fminmaxv<float>(vform, dst, src, &Simulator::FPMax<float>);
+ }
}
LogicVRegister Simulator::fminv(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- return fminmaxv(vform, dst, src, &Simulator::FPMin);
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ return fminmaxv<SimFloat16>(vform, dst, src, &Simulator::FPMin<SimFloat16>);
+ } else {
+ return fminmaxv<float>(vform, dst, src, &Simulator::FPMin<float>);
+ }
}
LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- return fminmaxv(vform, dst, src, &Simulator::FPMaxNM);
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ return fminmaxv<SimFloat16>(vform,
+ dst,
+ src,
+ &Simulator::FPMaxNM<SimFloat16>);
+ } else {
+ return fminmaxv<float>(vform, dst, src, &Simulator::FPMaxNM<float>);
+ }
}
LogicVRegister Simulator::fminnmv(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- return fminmaxv(vform, dst, src, &Simulator::FPMinNM);
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ return fminmaxv<SimFloat16>(vform,
+ dst,
+ src,
+ &Simulator::FPMinNM<SimFloat16>);
+ } else {
+ return fminmaxv<float>(vform, dst, src, &Simulator::FPMinNM<float>);
+ }
}
@@ -4402,10 +4635,12 @@
int index) {
dst.ClearForWrite(vform);
SimVRegister temp;
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
+ fmul<SimFloat16>(vform, dst, src1, index_reg);
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
fmul<float>(vform, dst, src1, index_reg);
-
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
@@ -4422,10 +4657,12 @@
int index) {
dst.ClearForWrite(vform);
SimVRegister temp;
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
+ fmla<SimFloat16>(vform, dst, src1, index_reg);
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
fmla<float>(vform, dst, src1, index_reg);
-
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
@@ -4442,10 +4679,12 @@
int index) {
dst.ClearForWrite(vform);
SimVRegister temp;
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
+ fmls<SimFloat16>(vform, dst, src1, index_reg);
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
fmls<float>(vform, dst, src1, index_reg);
-
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
@@ -4462,10 +4701,12 @@
int index) {
dst.ClearForWrite(vform);
SimVRegister temp;
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
+ fmulx<SimFloat16>(vform, dst, src1, index_reg);
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
fmulx<float>(vform, dst, src1, index_reg);
-
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
@@ -4481,11 +4722,20 @@
FPRounding rounding_mode,
bool inexact_exception) {
dst.ClearForWrite(vform);
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ SimFloat16 input = src.Float<SimFloat16>(i);
+ SimFloat16 rounded = FPRoundInt(input, rounding_mode);
+ if (inexact_exception && !IsNaN(input) && (input != rounded)) {
+ FPProcessException();
+ }
+ dst.SetFloat<SimFloat16>(i, rounded);
+ }
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
float input = src.Float<float>(i);
float rounded = FPRoundInt(input, rounding_mode);
- if (inexact_exception && !std::isnan(input) && (input != rounded)) {
+ if (inexact_exception && !IsNaN(input) && (input != rounded)) {
FPProcessException();
}
dst.SetFloat<float>(i, rounded);
@@ -4495,7 +4745,7 @@
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
double input = src.Float<double>(i);
double rounded = FPRoundInt(input, rounding_mode);
- if (inexact_exception && !std::isnan(input) && (input != rounded)) {
+ if (inexact_exception && !IsNaN(input) && (input != rounded)) {
FPProcessException();
}
dst.SetFloat<double>(i, rounded);
@@ -4511,7 +4761,13 @@
FPRounding rounding_mode,
int fbits) {
dst.ClearForWrite(vform);
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ SimFloat16 op =
+ static_cast<double>(src.Float<SimFloat16>(i)) * std::pow(2.0, fbits);
+ dst.SetInt(vform, i, FPToInt16(op, rounding_mode));
+ }
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
float op = src.Float<float>(i) * std::pow(2.0f, fbits);
dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
@@ -4533,7 +4789,13 @@
FPRounding rounding_mode,
int fbits) {
dst.ClearForWrite(vform);
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ SimFloat16 op =
+ static_cast<double>(src.Float<SimFloat16>(i)) * std::pow(2.0, fbits);
+ dst.SetUint(vform, i, FPToUInt16(op, rounding_mode));
+ }
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
float op = src.Float<float>(i) * std::pow(2.0f, fbits);
dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
@@ -4554,7 +4816,10 @@
const LogicVRegister& src) {
if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
- dst.SetFloat(i, FPToFloat(src.Float<float16>(i), ReadDN()));
+ // TODO: Full support for SimFloat16 in SimRegister(s).
+ dst.SetFloat(i,
+ FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)),
+ ReadDN()));
}
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
@@ -4572,7 +4837,11 @@
int lane_count = LaneCountFromFormat(vform);
if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
for (int i = 0; i < lane_count; i++) {
- dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count), ReadDN()));
+ // TODO: Full support for SimFloat16 in SimRegister(s).
+ dst.SetFloat(i,
+ FPToFloat(RawbitsToFloat16(
+ src.Float<uint16_t>(i + lane_count)),
+ ReadDN()));
}
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
@@ -4589,7 +4858,9 @@
const LogicVRegister& src) {
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN()));
+ dst.SetFloat(i,
+ Float16ToRawbits(
+ FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
}
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
@@ -4608,7 +4879,8 @@
if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
for (int i = lane_count - 1; i >= 0; i--) {
dst.SetFloat(i + lane_count,
- FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN()));
+ Float16ToRawbits(
+ FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
}
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
@@ -4669,7 +4941,7 @@
template <typename T>
T Simulator::FPRecipSqrtEstimate(T op) {
- if (std::isnan(op)) {
+ if (IsNaN(op)) {
return FPProcessNaN(op);
} else if (op == 0.0) {
if (copysign(1.0, op) < 0.0) {
@@ -4680,17 +4952,22 @@
} else if (copysign(1.0, op) < 0.0) {
FPProcessException();
return FPDefaultNaN<T>();
- } else if (std::isinf(op)) {
+ } else if (IsInf(op)) {
return 0.0;
} else {
uint64_t fraction;
int exp, result_exp;
- if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
+ if (IsFloat16<T>()) {
+ exp = Float16Exp(op);
+ fraction = Float16Mantissa(op);
+ fraction <<= 42;
+ } else if (IsFloat32<T>()) {
exp = FloatExp(op);
fraction = FloatMantissa(op);
fraction <<= 29;
} else {
+ VIXL_ASSERT(IsFloat64<T>());
exp = DoubleExp(op);
fraction = DoubleMantissa(op);
}
@@ -4710,19 +4987,27 @@
scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
}
- if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
+ if (IsFloat16<T>()) {
+ result_exp = (44 - exp) / 2;
+ } else if (IsFloat32<T>()) {
result_exp = (380 - exp) / 2;
} else {
+ VIXL_ASSERT(IsFloat64<T>());
result_exp = (3068 - exp) / 2;
}
uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
- if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
+ if (IsFloat16<T>()) {
+ uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
+ uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42));
+ return Float16Pack(0, exp_bits, est_bits);
+ } else if (IsFloat32<T>()) {
uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
return FloatPack(0, exp_bits, est_bits);
} else {
+ VIXL_ASSERT(IsFloat64<T>());
return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
}
}
@@ -4733,7 +5018,12 @@
LogicVRegister dst,
const LogicVRegister& src) {
dst.ClearForWrite(vform);
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ SimFloat16 input = src.Float<SimFloat16>(i);
+ dst.SetFloat(i, FPRecipSqrtEstimate<SimFloat16>(input));
+ }
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
float input = src.Float<float>(i);
dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
@@ -4752,23 +5042,25 @@
T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
uint32_t sign;
- if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
+ if (IsFloat16<T>()) {
+ sign = Float16Sign(op);
+ } else if (IsFloat32<T>()) {
sign = FloatSign(op);
} else {
+ VIXL_ASSERT(IsFloat64<T>());
sign = DoubleSign(op);
}
- if (std::isnan(op)) {
+ if (IsNaN(op)) {
return FPProcessNaN(op);
- } else if (std::isinf(op)) {
+ } else if (IsInf(op)) {
return (sign == 1) ? -0.0 : 0.0;
} else if (op == 0.0) {
FPProcessException(); // FPExc_DivideByZero exception.
return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
- } else if (((sizeof(T) == sizeof(float)) && // NOLINT(runtime/sizeof)
- (std::fabs(op) < std::pow(2.0, -128.0))) ||
- ((sizeof(T) == sizeof(double)) && // NOLINT(runtime/sizeof)
- (std::fabs(op) < std::pow(2.0, -1024.0)))) {
+ } else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) ||
+ (IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) ||
+ (IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) {
bool overflow_to_inf = false;
switch (rounding) {
case FPTieEven:
@@ -4791,9 +5083,12 @@
return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
} else {
// Return FPMaxNormal(sign).
- if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
+ if (IsFloat16<T>()) {
+ return Float16Pack(sign, 0x1f, 0x3ff);
+ } else if (IsFloat32<T>()) {
return FloatPack(sign, 0xfe, 0x07fffff);
} else {
+ VIXL_ASSERT(IsFloat64<T>());
return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
}
}
@@ -4802,12 +5097,18 @@
int exp, result_exp;
uint32_t sign;
- if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
+ if (IsFloat16<T>()) {
+ sign = Float16Sign(op);
+ exp = Float16Exp(op);
+ fraction = Float16Mantissa(op);
+ fraction <<= 42;
+ } else if (IsFloat32<T>()) {
sign = FloatSign(op);
exp = FloatExp(op);
fraction = FloatMantissa(op);
fraction <<= 29;
} else {
+ VIXL_ASSERT(IsFloat64<T>());
sign = DoubleSign(op);
exp = DoubleExp(op);
fraction = DoubleMantissa(op);
@@ -4824,9 +5125,12 @@
double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
- if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
- result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254.
+ if (IsFloat16<T>()) {
+ result_exp = (29 - exp); // In range 29-30 = -1 to 29+1 = 30.
+ } else if (IsFloat32<T>()) {
+ result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254.
} else {
+ VIXL_ASSERT(IsFloat64<T>());
result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046.
}
@@ -4839,11 +5143,16 @@
fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
result_exp = 0;
}
- if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
+ if (IsFloat16<T>()) {
+ uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
+ uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42));
+ return Float16Pack(sign, exp_bits, frac_bits);
+ } else if (IsFloat32<T>()) {
uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
return FloatPack(sign, exp_bits, frac_bits);
} else {
+ VIXL_ASSERT(IsFloat64<T>());
return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
}
}
@@ -4855,7 +5164,12 @@
const LogicVRegister& src,
FPRounding round) {
dst.ClearForWrite(vform);
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ for (int i = 0; i < LaneCountFromFormat(vform); i++) {
+ SimFloat16 input = src.Float<SimFloat16>(i);
+ dst.SetFloat(i, FPRecipEstimate<SimFloat16>(input, round));
+ }
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
float input = src.Float<float>(i);
dst.SetFloat(i, FPRecipEstimate<float>(input, round));
@@ -4933,17 +5247,23 @@
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
T op = src.Float<T>(i);
T result;
- if (std::isnan(op)) {
+ if (IsNaN(op)) {
result = FPProcessNaN(op);
} else {
int exp;
uint32_t sign;
- if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof)
+ if (IsFloat16<T>()) {
+ sign = Float16Sign(op);
+ exp = Float16Exp(op);
+ exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0));
+ result = Float16Pack(sign, exp, 0);
+ } else if (IsFloat32<T>()) {
sign = FloatSign(op);
exp = FloatExp(op);
exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
result = FloatPack(sign, exp, 0);
} else {
+ VIXL_ASSERT(IsFloat64<T>());
sign = DoubleSign(op);
exp = DoubleExp(op);
exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
@@ -4959,7 +5279,9 @@
LogicVRegister Simulator::frecpx(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src) {
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ frecpx<SimFloat16>(vform, dst, src);
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
frecpx<float>(vform, dst, src);
} else {
VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
@@ -4974,7 +5296,10 @@
int fbits,
FPRounding round) {
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ SimFloat16 result = FixedToFloat16(src.Int(kFormatH, i), fbits, round);
+ dst.SetFloat<SimFloat16>(i, result);
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
dst.SetFloat<float>(i, result);
} else {
@@ -4993,7 +5318,10 @@
int fbits,
FPRounding round) {
for (int i = 0; i < LaneCountFromFormat(vform); i++) {
- if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
+ if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
+ SimFloat16 result = UFixedToFloat16(src.Uint(kFormatH, i), fbits, round);
+ dst.SetFloat<SimFloat16>(i, result);
+ } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
dst.SetFloat<float>(i, result);
} else {
diff --git a/src/aarch64/macro-assembler-aarch64.cc b/src/aarch64/macro-assembler-aarch64.cc
index f13d15a..e881a81 100644
--- a/src/aarch64/macro-assembler-aarch64.cc
+++ b/src/aarch64/macro-assembler-aarch64.cc
@@ -1485,7 +1485,7 @@
MacroEmissionCheckScope guard(this);
if (vd.Is1H() || vd.Is4H() || vd.Is8H()) {
- Fmov(vd, F16(imm));
+ Fmov(vd, Float16(imm));
return;
}
@@ -1522,7 +1522,7 @@
MacroEmissionCheckScope guard(this);
if (vd.Is1H() || vd.Is4H() || vd.Is8H()) {
- Fmov(vd, F16(imm));
+ Fmov(vd, Float16(imm));
return;
}
@@ -1553,23 +1553,23 @@
}
-void MacroAssembler::Fmov(VRegister vd, F16 imm) {
+void MacroAssembler::Fmov(VRegister vd, Float16 imm) {
VIXL_ASSERT(allow_macro_instructions_);
MacroEmissionCheckScope guard(this);
if (vd.Is1S() || vd.Is2S() || vd.Is4S()) {
- Fmov(vd, static_cast<float>(imm));
+ Fmov(vd, FPToFloat(imm, kIgnoreDefaultNaN));
return;
}
if (vd.Is1D() || vd.Is2D()) {
- Fmov(vd, static_cast<double>(imm));
+ Fmov(vd, FPToDouble(imm, kIgnoreDefaultNaN));
return;
}
VIXL_ASSERT(vd.Is1H() || vd.Is4H() || vd.Is8H());
- uint16_t rawbits = imm.ToRawbits();
- if (IsImmFP16(rawbits)) {
+ uint16_t rawbits = Float16ToRawbits(imm);
+ if (IsImmFP16(imm)) {
fmov(vd, imm);
} else {
if (vd.IsScalar()) {
@@ -2966,7 +2966,7 @@
const CPURegister regs[] = {reg1, reg2, reg3, reg4};
- for (unsigned i = 0; i < (sizeof(regs) / sizeof(regs[0])); i++) {
+ for (size_t i = 0; i < ArrayLength(regs); i++) {
if (regs[i].IsRegister()) {
exclude |= regs[i].GetBit();
} else if (regs[i].IsFPRegister()) {
diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h
index 1dc0c0e..88ed557 100644
--- a/src/aarch64/macro-assembler-aarch64.h
+++ b/src/aarch64/macro-assembler-aarch64.h
@@ -44,7 +44,6 @@
// is needed regardless of whether the simulator is included or not, since
// generating simulator specific instructions is controlled at runtime.
#include "simulator-constants-aarch64.h"
-#include "utils-aarch64.h"
#define LS_MACRO_LIST(V) \
@@ -1036,6 +1035,12 @@
SingleEmissionCheckScope guard(this);
bfi(rd, rn, lsb, width);
}
+ void Bfc(const Register& rd, unsigned lsb, unsigned width) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(!rd.IsZero());
+ SingleEmissionCheckScope guard(this);
+ bfc(rd, lsb, width);
+ }
void Bfxil(const Register& rd,
const Register& rn,
unsigned lsb,
@@ -1066,6 +1071,56 @@
SingleEmissionCheckScope guard(this);
br(xn);
}
+ void Braaz(const Register& xn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ braaz(xn);
+ }
+ void Brabz(const Register& xn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ brabz(xn);
+ }
+ void Blraaz(const Register& xn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ blraaz(xn);
+ }
+ void Blrabz(const Register& xn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ blrabz(xn);
+ }
+ void Retaa() {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ retaa();
+ }
+ void Retab() {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ retab();
+ }
+ void Braa(const Register& xn, const Register& xm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ braa(xn, xm);
+ }
+ void Brab(const Register& xn, const Register& xm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ brab(xn, xm);
+ }
+ void Blraa(const Register& xn, const Register& xm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ blraa(xn, xm);
+ }
+ void Blrab(const Register& xn, const Register& xm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ blrab(xn, xm);
+ }
void Brk(int code = 0) {
VIXL_ASSERT(allow_macro_instructions_);
SingleEmissionCheckScope guard(this);
@@ -1087,6 +1142,79 @@
SingleEmissionCheckScope guard(this);
cinv(rd, rn, cond);
}
+
+#define PAUTH_SYSTEM_MODES(V) \
+ V(az) \
+ V(bz) \
+ V(asp) \
+ V(bsp)
+
+#define DEFINE_MACRO_ASM_FUNCS(SUFFIX) \
+ void Paci##SUFFIX() { \
+ VIXL_ASSERT(allow_macro_instructions_); \
+ SingleEmissionCheckScope guard(this); \
+ paci##SUFFIX(); \
+ } \
+ void Auti##SUFFIX() { \
+ VIXL_ASSERT(allow_macro_instructions_); \
+ SingleEmissionCheckScope guard(this); \
+ auti##SUFFIX(); \
+ }
+
+ PAUTH_SYSTEM_MODES(DEFINE_MACRO_ASM_FUNCS)
+#undef DEFINE_MACRO_ASM_FUNCS
+
+ // The 1716 pac and aut instructions encourage people to use x16 and x17
+ // directly, perhaps without realising that this is forbidden. For example:
+ //
+ // UseScratchRegisterScope temps(&masm);
+ // Register temp = temps.AcquireX(); // temp will be x16
+ // __ Mov(x17, ptr);
+ // __ Mov(x16, modifier); // Will override temp!
+ // __ Pacia1716();
+ //
+ // To work around this issue, you must exclude x16 and x17 from the scratch
+ // register list. You may need to replace them with other registers:
+ //
+ // UseScratchRegisterScope temps(&masm);
+ // temps.Exclude(x16, x17);
+ // temps.Include(x10, x11);
+ // __ Mov(x17, ptr);
+ // __ Mov(x16, modifier);
+ // __ Pacia1716();
+ void Pacia1716() {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(!GetScratchRegisterList()->IncludesAliasOf(x16));
+ VIXL_ASSERT(!GetScratchRegisterList()->IncludesAliasOf(x17));
+ SingleEmissionCheckScope guard(this);
+ pacia1716();
+ }
+ void Pacib1716() {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(!GetScratchRegisterList()->IncludesAliasOf(x16));
+ VIXL_ASSERT(!GetScratchRegisterList()->IncludesAliasOf(x17));
+ SingleEmissionCheckScope guard(this);
+ pacib1716();
+ }
+ void Autia1716() {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(!GetScratchRegisterList()->IncludesAliasOf(x16));
+ VIXL_ASSERT(!GetScratchRegisterList()->IncludesAliasOf(x17));
+ SingleEmissionCheckScope guard(this);
+ autia1716();
+ }
+ void Autib1716() {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(!GetScratchRegisterList()->IncludesAliasOf(x16));
+ VIXL_ASSERT(!GetScratchRegisterList()->IncludesAliasOf(x17));
+ SingleEmissionCheckScope guard(this);
+ autib1716();
+ }
+ void Xpaclri() {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ xpaclri();
+ }
void Clrex() {
VIXL_ASSERT(allow_macro_instructions_);
SingleEmissionCheckScope guard(this);
@@ -1113,6 +1241,11 @@
SingleEmissionCheckScope guard(this);
cneg(rd, rn, cond);
}
+ void Esb() {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ esb();
+ }
void Csdb() {
VIXL_ASSERT(allow_macro_instructions_);
SingleEmissionCheckScope guard(this);
@@ -1318,6 +1451,12 @@
SingleEmissionCheckScope guard(this);
fcvtzs(rd, vn, fbits);
}
+ void Fjcvtzs(const Register& rd, const VRegister& vn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(!rd.IsZero());
+ SingleEmissionCheckScope guard(this);
+ fjcvtzs(rd, vn);
+ }
void Fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0) {
VIXL_ASSERT(allow_macro_instructions_);
VIXL_ASSERT(!rd.IsZero());
@@ -1389,7 +1528,7 @@
// signalling NaNs to quiet NaNs when converting between float and double.
void Fmov(VRegister vd, double imm);
void Fmov(VRegister vd, float imm);
- void Fmov(VRegister vd, const F16 imm);
+ void Fmov(VRegister vd, const Float16 imm);
// Provide a template to allow other types to be converted automatically.
template <typename T>
void Fmov(VRegister vd, T imm) {
@@ -1568,6 +1707,91 @@
COMPARE_AND_SWAP_PAIR_MACRO_LIST(DEFINE_MACRO_ASM_FUNC)
#undef DEFINE_MACRO_ASM_FUNC
+// These macros generate all the variations of the atomic memory operations,
+// e.g. ldadd, ldadda, ldaddb, staddl, etc.
+
+// clang-format off
+#define ATOMIC_MEMORY_SIMPLE_MACRO_LIST(V, DEF, MASM_PRE, ASM_PRE) \
+ V(DEF, MASM_PRE##add, ASM_PRE##add) \
+ V(DEF, MASM_PRE##clr, ASM_PRE##clr) \
+ V(DEF, MASM_PRE##eor, ASM_PRE##eor) \
+ V(DEF, MASM_PRE##set, ASM_PRE##set) \
+ V(DEF, MASM_PRE##smax, ASM_PRE##smax) \
+ V(DEF, MASM_PRE##smin, ASM_PRE##smin) \
+ V(DEF, MASM_PRE##umax, ASM_PRE##umax) \
+ V(DEF, MASM_PRE##umin, ASM_PRE##umin)
+
+#define ATOMIC_MEMORY_STORE_MACRO_MODES(V, MASM, ASM) \
+ V(MASM, ASM) \
+ V(MASM##l, ASM##l) \
+ V(MASM##b, ASM##b) \
+ V(MASM##lb, ASM##lb) \
+ V(MASM##h, ASM##h) \
+ V(MASM##lh, ASM##lh)
+
+#define ATOMIC_MEMORY_LOAD_MACRO_MODES(V, MASM, ASM) \
+ ATOMIC_MEMORY_STORE_MACRO_MODES(V, MASM, ASM) \
+ V(MASM##a, ASM##a) \
+ V(MASM##al, ASM##al) \
+ V(MASM##ab, ASM##ab) \
+ V(MASM##alb, ASM##alb) \
+ V(MASM##ah, ASM##ah) \
+ V(MASM##alh, ASM##alh)
+// clang-format on
+
+#define DEFINE_MACRO_LOAD_ASM_FUNC(MASM, ASM) \
+ void MASM(const Register& rs, const Register& rt, const MemOperand& src) { \
+ VIXL_ASSERT(allow_macro_instructions_); \
+ SingleEmissionCheckScope guard(this); \
+ ASM(rs, rt, src); \
+ }
+#define DEFINE_MACRO_STORE_ASM_FUNC(MASM, ASM) \
+ void MASM(const Register& rs, const MemOperand& src) { \
+ VIXL_ASSERT(allow_macro_instructions_); \
+ SingleEmissionCheckScope guard(this); \
+ ASM(rs, src); \
+ }
+
+ ATOMIC_MEMORY_SIMPLE_MACRO_LIST(ATOMIC_MEMORY_LOAD_MACRO_MODES,
+ DEFINE_MACRO_LOAD_ASM_FUNC,
+ Ld,
+ ld)
+ ATOMIC_MEMORY_SIMPLE_MACRO_LIST(ATOMIC_MEMORY_STORE_MACRO_MODES,
+ DEFINE_MACRO_STORE_ASM_FUNC,
+ St,
+ st)
+
+#define DEFINE_MACRO_SWP_ASM_FUNC(MASM, ASM) \
+ void MASM(const Register& rs, const Register& rt, const MemOperand& src) { \
+ VIXL_ASSERT(allow_macro_instructions_); \
+ SingleEmissionCheckScope guard(this); \
+ ASM(rs, rt, src); \
+ }
+
+ ATOMIC_MEMORY_LOAD_MACRO_MODES(DEFINE_MACRO_SWP_ASM_FUNC, Swp, swp)
+
+#undef DEFINE_MACRO_LOAD_ASM_FUNC
+#undef DEFINE_MACRO_STORE_ASM_FUNC
+#undef DEFINE_MACRO_SWP_ASM_FUNC
+
+ void Ldaprb(const Register& rt, const MemOperand& src) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldaprb(rt, src);
+ }
+
+ void Ldaprh(const Register& rt, const MemOperand& src) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldaprh(rt, src);
+ }
+
+ void Ldapr(const Register& rt, const MemOperand& src) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ldapr(rt, src);
+ }
+
void Ldnp(const CPURegister& rt,
const CPURegister& rt2,
const MemOperand& src) {
@@ -1838,6 +2062,62 @@
SingleEmissionCheckScope guard(this);
rev32(rd, rn);
}
+ void Rev64(const Register& rd, const Register& rn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ VIXL_ASSERT(!rd.IsZero());
+ VIXL_ASSERT(!rn.IsZero());
+ SingleEmissionCheckScope guard(this);
+ rev64(rd, rn);
+ }
+
+#define PAUTH_MASM_VARIATIONS(V) \
+ V(Paci, paci) \
+ V(Pacd, pacd) \
+ V(Auti, auti) \
+ V(Autd, autd)
+
+#define DEFINE_MACRO_ASM_FUNCS(MASM_PRE, ASM_PRE) \
+ void MASM_PRE##a(const Register& xd, const Register& xn) { \
+ VIXL_ASSERT(allow_macro_instructions_); \
+ SingleEmissionCheckScope guard(this); \
+ ASM_PRE##a(xd, xn); \
+ } \
+ void MASM_PRE##za(const Register& xd) { \
+ VIXL_ASSERT(allow_macro_instructions_); \
+ SingleEmissionCheckScope guard(this); \
+ ASM_PRE##za(xd); \
+ } \
+ void MASM_PRE##b(const Register& xd, const Register& xn) { \
+ VIXL_ASSERT(allow_macro_instructions_); \
+ SingleEmissionCheckScope guard(this); \
+ ASM_PRE##b(xd, xn); \
+ } \
+ void MASM_PRE##zb(const Register& xd) { \
+ VIXL_ASSERT(allow_macro_instructions_); \
+ SingleEmissionCheckScope guard(this); \
+ ASM_PRE##zb(xd); \
+ }
+
+ PAUTH_MASM_VARIATIONS(DEFINE_MACRO_ASM_FUNCS)
+#undef DEFINE_MACRO_ASM_FUNCS
+
+ void Pacga(const Register& xd, const Register& xn, const Register& xm) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ pacga(xd, xn, xm);
+ }
+
+ void Xpaci(const Register& xd) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ xpaci(xd);
+ }
+
+ void Xpacd(const Register& xd) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ xpacd(xd);
+ }
void Ror(const Register& rd, const Register& rs, unsigned shift) {
VIXL_ASSERT(allow_macro_instructions_);
VIXL_ASSERT(!rd.IsZero());
diff --git a/src/aarch64/operands-aarch64.h b/src/aarch64/operands-aarch64.h
index 5b154ea..e3dbfa3 100644
--- a/src/aarch64/operands-aarch64.h
+++ b/src/aarch64/operands-aarch64.h
@@ -364,6 +364,7 @@
VRegister V8B() const { return VRegister(code_, kDRegSize, 8); }
VRegister V16B() const { return VRegister(code_, kQRegSize, 16); }
+ VRegister V2H() const { return VRegister(code_, kSRegSize, 2); }
VRegister V4H() const { return VRegister(code_, kDRegSize, 4); }
VRegister V8H() const { return VRegister(code_, kQRegSize, 8); }
VRegister V2S() const { return VRegister(code_, kDRegSize, 2); }
@@ -373,6 +374,7 @@
bool Is8B() const { return (Is64Bits() && (lanes_ == 8)); }
bool Is16B() const { return (Is128Bits() && (lanes_ == 16)); }
+ bool Is2H() const { return (Is32Bits() && (lanes_ == 2)); }
bool Is4H() const { return (Is64Bits() && (lanes_ == 4)); }
bool Is8H() const { return (Is128Bits() && (lanes_ == 8)); }
bool Is2S() const { return (Is64Bits() && (lanes_ == 2)); }
diff --git a/src/aarch64/pointer-auth-aarch64.cc b/src/aarch64/pointer-auth-aarch64.cc
new file mode 100644
index 0000000..55cf4ca
--- /dev/null
+++ b/src/aarch64/pointer-auth-aarch64.cc
@@ -0,0 +1,197 @@
+// Copyright 2018, VIXL authors
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of ARM Limited nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
+
+#include "simulator-aarch64.h"
+
+#include "utils-vixl.h"
+
+namespace vixl {
+namespace aarch64 {
+
+// Randomly generated example keys for simulating only.
+const Simulator::PACKey Simulator::kPACKeyIA = {0xc31718727de20f71,
+ 0xab9fd4e14b2fec51,
+ 0};
+const Simulator::PACKey Simulator::kPACKeyIB = {0xeebb163b474e04c8,
+ 0x5267ac6fc280fb7c,
+ 1};
+const Simulator::PACKey Simulator::kPACKeyDA = {0x5caef808deb8b1e2,
+ 0xd347cbc06b7b0f77,
+ 0};
+const Simulator::PACKey Simulator::kPACKeyDB = {0xe06aa1a949ba8cc7,
+ 0xcfde69e3db6d0432,
+ 1};
+
+// The general PAC key isn't intended to be used with AuthPAC so we ensure the
+// key number is invalid and asserts if used incorrectly.
+const Simulator::PACKey Simulator::kPACKeyGA = {0xfcd98a44d564b3d5,
+ 0x6c56df1904bf0ddc,
+ -1};
+
+static uint64_t GetNibble(uint64_t in_data, int position) {
+ return (in_data >> position) & 0xf;
+}
+
+static uint64_t ShuffleNibbles(uint64_t in_data) {
+ static int in_positions[16] =
+ {4, 36, 52, 40, 44, 0, 24, 12, 56, 60, 8, 32, 16, 28, 20, 48};
+ uint64_t out_data = 0;
+ for (int i = 0; i < 16; i++) {
+ out_data |= GetNibble(in_data, in_positions[i]) << (4 * i);
+ }
+ return out_data;
+}
+
+static uint64_t SubstituteNibbles(uint64_t in_data) {
+ // Randomly chosen substitutes.
+ static uint64_t subs[16] =
+ {4, 7, 3, 9, 10, 14, 0, 1, 15, 2, 8, 6, 12, 5, 11, 13};
+ uint64_t out_data = 0;
+ for (int i = 0; i < 16; i++) {
+ int index = (in_data >> (4 * i)) & 0xf;
+ out_data |= subs[index] << (4 * i);
+ }
+ return out_data;
+}
+
+// Rotate nibble to the left by the amount specified.
+static uint64_t RotNibble(uint64_t in_cell, int amount) {
+ VIXL_ASSERT((amount >= 0) && (amount <= 3));
+
+ in_cell &= 0xf;
+ uint64_t temp = (in_cell << 4) | in_cell;
+ return (temp >> (4 - amount)) & 0xf;
+}
+
+static uint64_t BigShuffle(uint64_t in_data) {
+ uint64_t out_data = 0;
+ for (int i = 0; i < 4; i++) {
+ uint64_t n12 = GetNibble(in_data, 4 * (i + 12));
+ uint64_t n8 = GetNibble(in_data, 4 * (i + 8));
+ uint64_t n4 = GetNibble(in_data, 4 * (i + 4));
+ uint64_t n0 = GetNibble(in_data, 4 * (i + 0));
+
+ uint64_t t0 = RotNibble(n8, 2) ^ RotNibble(n4, 1) ^ RotNibble(n0, 1);
+ uint64_t t1 = RotNibble(n12, 1) ^ RotNibble(n4, 2) ^ RotNibble(n0, 1);
+ uint64_t t2 = RotNibble(n12, 2) ^ RotNibble(n8, 1) ^ RotNibble(n0, 1);
+ uint64_t t3 = RotNibble(n12, 1) ^ RotNibble(n8, 1) ^ RotNibble(n4, 2);
+
+ out_data |= t3 << (4 * (i + 0));
+ out_data |= t2 << (4 * (i + 4));
+ out_data |= t1 << (4 * (i + 8));
+ out_data |= t0 << (4 * (i + 12));
+ }
+ return out_data;
+}
+
+// A simple, non-standard hash function invented for simulating. It mixes
+// reasonably well, however it is unlikely to be cryptographically secure and
+// may have a higher collision chance than other hashing algorithms.
+uint64_t Simulator::ComputePAC(uint64_t data, uint64_t context, PACKey key) {
+ uint64_t working_value = data ^ key.high;
+ working_value = BigShuffle(working_value);
+ working_value = ShuffleNibbles(working_value);
+ working_value ^= key.low;
+ working_value = ShuffleNibbles(working_value);
+ working_value = BigShuffle(working_value);
+ working_value ^= context;
+ working_value = SubstituteNibbles(working_value);
+ working_value = BigShuffle(working_value);
+ working_value = SubstituteNibbles(working_value);
+
+ return working_value;
+}
+
+// The TTBR is selected by bit 63 or 55 depending on TBI for pointers without
+// codes, but is always 55 once a PAC code is added to a pointer. For this
+// reason, it must be calculated at the call site.
+uint64_t Simulator::CalculatePACMask(uint64_t ptr, PointerType type, int ttbr) {
+ int bottom_pac_bit = GetBottomPACBit(ptr, ttbr);
+ int top_pac_bit = GetTopPACBit(ptr, type);
+ return ExtractUnsignedBitfield64(top_pac_bit,
+ bottom_pac_bit,
+ 0xffffffffffffffff & ~kTTBRMask)
+ << bottom_pac_bit;
+}
+
+uint64_t Simulator::AuthPAC(uint64_t ptr,
+ uint64_t context,
+ PACKey key,
+ PointerType type) {
+ VIXL_ASSERT((key.number == 0) || (key.number == 1));
+
+ uint64_t pac_mask = CalculatePACMask(ptr, type, (ptr >> 55) & 1);
+ uint64_t original_ptr =
+ ((ptr & kTTBRMask) == 0) ? (ptr & ~pac_mask) : (ptr | pac_mask);
+
+ uint64_t pac = ComputePAC(original_ptr, context, key);
+
+ uint64_t error_code = 1 << key.number;
+ if ((pac & pac_mask) == (ptr & pac_mask)) {
+ return original_ptr;
+ } else {
+ int error_lsb = GetTopPACBit(ptr, type) - 2;
+ uint64_t error_mask = UINT64_C(0x3) << error_lsb;
+ return (original_ptr & ~error_mask) | (error_code << error_lsb);
+ }
+}
+
+uint64_t Simulator::AddPAC(uint64_t ptr,
+ uint64_t context,
+ PACKey key,
+ PointerType type) {
+ int top_pac_bit = GetTopPACBit(ptr, type);
+
+ // TODO: Properly handle the case where extension bits are bad and TBI is
+ // turned off, and also test me.
+ VIXL_ASSERT(HasTBI(ptr, type));
+ int ttbr = (ptr >> 55) & 1;
+ uint64_t pac_mask = CalculatePACMask(ptr, type, ttbr);
+ uint64_t ext_ptr = (ttbr == 0) ? (ptr & ~pac_mask) : (ptr | pac_mask);
+
+ uint64_t pac = ComputePAC(ext_ptr, context, key);
+
+ // If the pointer isn't all zeroes or all ones in the PAC bitfield, corrupt
+ // the resulting code.
+ if (((ptr & (pac_mask | kTTBRMask)) != 0x0) &&
+ ((~ptr & (pac_mask | kTTBRMask)) != 0x0)) {
+ pac ^= UINT64_C(1) << (top_pac_bit - 1);
+ }
+
+ uint64_t ttbr_shifted = static_cast<uint64_t>(ttbr) << 55;
+ return (pac & pac_mask) | ttbr_shifted | (ptr & ~pac_mask);
+}
+
+uint64_t Simulator::StripPAC(uint64_t ptr, PointerType type) {
+ uint64_t pac_mask = CalculatePACMask(ptr, type, (ptr >> 55) & 1);
+ return ((ptr & kTTBRMask) == 0) ? (ptr & ~pac_mask) : (ptr | pac_mask);
+}
+} // namespace aarch64
+} // namespace vixl
+
+#endif // VIXL_INCLUDE_SIMULATOR_AARCH64
diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc
index a23d57e..c09650d 100644
--- a/src/aarch64/simulator-aarch64.cc
+++ b/src/aarch64/simulator-aarch64.cc
@@ -35,6 +35,8 @@
namespace vixl {
namespace aarch64 {
+using vixl::internal::SimFloat16;
+
const Instruction* Simulator::kEndOfSimAddress = NULL;
void SimSystemRegister::SetBits(int msb, int lsb, uint32_t bits) {
@@ -460,7 +462,7 @@
// TODO: This assumes that the C++ implementation handles comparisons in the
// way that we expect (as per AssertSupportedFPCR()).
bool process_exception = false;
- if ((std::isnan(val0) != 0) || (std::isnan(val1) != 0)) {
+ if ((IsNaN(val0) != 0) || (IsNaN(val1) != 0)) {
ReadNzcv().SetRawValue(FPUnorderedFlag);
if (IsSignallingNaN(val0) || IsSignallingNaN(val1) ||
(trap == EnableTrap)) {
@@ -839,7 +841,7 @@
double value;
switch (lane_size_in_bytes) {
case kHRegSizeInBytes:
- value = ReadVRegister(code).GetLane<float16>(lane);
+ value = ReadVRegister(code).GetLane<uint16_t>(lane);
break;
case kSRegSizeInBytes:
value = ReadVRegister(code).GetLane<float>(lane);
@@ -851,7 +853,7 @@
value = 0.0;
VIXL_UNREACHABLE();
}
- if (std::isnan(value)) {
+ if (IsNaN(value)) {
// The output for NaNs is implementation defined. Always print `nan`, so
// that traces are coherent across different implementations.
fprintf(stream_, "%s%snan%s", separator, clr_vreg_value, clr_normal);
@@ -925,7 +927,7 @@
"0b01 (Round towards Plus Infinity)",
"0b10 (Round towards Minus Infinity)",
"0b11 (Round towards Zero)"};
- VIXL_ASSERT(ReadFpcr().GetRMode() < (sizeof(rmode) / sizeof(rmode[0])));
+ VIXL_ASSERT(ReadFpcr().GetRMode() < ArrayLength(rmode));
fprintf(stream_,
"# %sFPCR: %sAHP:%d DN:%d FZ:%d RMode:%s%s\n",
clr_flag_name,
@@ -1085,19 +1087,68 @@
void Simulator::VisitUnconditionalBranchToRegister(const Instruction* instr) {
- const Instruction* target = Instruction::Cast(ReadXRegister(instr->GetRn()));
+ bool authenticate = false;
+ bool link = false;
+ uint64_t addr = 0;
+ uint64_t context = 0;
+ Instruction* target;
switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
case BLR:
- WriteLr(instr->GetNextInstruction());
+ link = true;
VIXL_FALLTHROUGH();
case BR:
case RET:
- WritePc(target);
+ addr = ReadXRegister(instr->GetRn());
+ break;
+
+ case BLRAAZ:
+ case BLRABZ:
+ link = true;
+ VIXL_FALLTHROUGH();
+ case BRAAZ:
+ case BRABZ:
+ authenticate = true;
+ addr = ReadXRegister(instr->GetRn());
+ break;
+
+ case BLRAA:
+ case BLRAB:
+ link = true;
+ VIXL_FALLTHROUGH();
+ case BRAA:
+ case BRAB:
+ authenticate = true;
+ addr = ReadXRegister(instr->GetRn());
+ context = ReadXRegister(instr->GetRd());
+ break;
+
+ case RETAA:
+ case RETAB:
+ authenticate = true;
+ addr = ReadXRegister(kLinkRegCode);
+ context = ReadXRegister(31, Reg31IsStackPointer);
break;
default:
VIXL_UNREACHABLE();
}
+
+ if (link) {
+ WriteLr(instr->GetNextInstruction());
+ }
+
+ if (authenticate) {
+ PACKey key = (instr->ExtractBit(10) == 0) ? kPACKeyIA : kPACKeyIB;
+ addr = AuthPAC(addr, context, key, kInstructionPointer);
+
+ int error_lsb = GetTopPACBit(addr, kInstructionPointer) - 2;
+ if (((addr >> error_lsb) & 0x3) != 0x0) {
+ VIXL_ABORT_WITH_MSG("Failed to authenticate pointer.");
+ }
+ }
+
+ target = Instruction::Cast(addr);
+ WritePc(target);
}
@@ -1427,7 +1478,7 @@
Memory::Write<uint8_t>(address, ReadBRegister(srcdst));
break;
case STR_h:
- Memory::Write<uint16_t>(address, ReadHRegister(srcdst));
+ Memory::Write<uint16_t>(address, ReadHRegisterBits(srcdst));
break;
case STR_s:
Memory::Write<float>(address, ReadSRegister(srcdst));
@@ -1918,6 +1969,223 @@
}
}
+template <typename T>
+void Simulator::AtomicMemorySimpleHelper(const Instruction* instr) {
+ unsigned rs = instr->GetRs();
+ unsigned rt = instr->GetRt();
+ unsigned rn = instr->GetRn();
+
+ bool is_acquire = (instr->ExtractBit(23) == 1) && (rt != kZeroRegCode);
+ bool is_release = instr->ExtractBit(22) == 1;
+
+ unsigned element_size = sizeof(T);
+ uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
+
+ // Verify that the address is available to the host.
+ VIXL_ASSERT(address == static_cast<uintptr_t>(address));
+
+ T value = ReadRegister<T>(rs);
+
+ T data = Memory::Read<T>(address);
+
+ if (is_acquire) {
+ // Approximate load-acquire by issuing a full barrier after the load.
+ __sync_synchronize();
+ }
+
+ T result = 0;
+ switch (instr->Mask(AtomicMemorySimpleOpMask)) {
+ case LDADDOp:
+ result = data + value;
+ break;
+ case LDCLROp:
+ VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
+ result = data & ~value;
+ break;
+ case LDEOROp:
+ VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
+ result = data ^ value;
+ break;
+ case LDSETOp:
+ VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
+ result = data | value;
+ break;
+
+ // Signed/Unsigned difference is done via the templated type T.
+ case LDSMAXOp:
+ case LDUMAXOp:
+ result = (data > value) ? data : value;
+ break;
+ case LDSMINOp:
+ case LDUMINOp:
+ result = (data > value) ? value : data;
+ break;
+ }
+
+ if (is_release) {
+ // Approximate store-release by issuing a full barrier before the store.
+ __sync_synchronize();
+ }
+
+ Memory::Write<T>(address, result);
+ WriteRegister<T>(rt, data, NoRegLog);
+
+ LogRead(address, rt, GetPrintRegisterFormatForSize(element_size));
+ LogWrite(address, rs, GetPrintRegisterFormatForSize(element_size));
+}
+
+template <typename T>
+void Simulator::AtomicMemorySwapHelper(const Instruction* instr) {
+ unsigned rs = instr->GetRs();
+ unsigned rt = instr->GetRt();
+ unsigned rn = instr->GetRn();
+
+ bool is_acquire = (instr->ExtractBit(23) == 1) && (rt != kZeroRegCode);
+ bool is_release = instr->ExtractBit(22) == 1;
+
+ unsigned element_size = sizeof(T);
+ uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
+
+ // Verify that the address is available to the host.
+ VIXL_ASSERT(address == static_cast<uintptr_t>(address));
+
+ T data = Memory::Read<T>(address);
+ if (is_acquire) {
+ // Approximate load-acquire by issuing a full barrier after the load.
+ __sync_synchronize();
+ }
+
+ if (is_release) {
+ // Approximate store-release by issuing a full barrier before the store.
+ __sync_synchronize();
+ }
+ Memory::Write<T>(address, ReadRegister<T>(rs));
+
+ WriteRegister<T>(rt, data);
+
+ LogRead(address, rt, GetPrintRegisterFormat(element_size));
+ LogWrite(address, rs, GetPrintRegisterFormat(element_size));
+}
+
+template <typename T>
+void Simulator::LoadAcquireRCpcHelper(const Instruction* instr) {
+ unsigned rt = instr->GetRt();
+ unsigned rn = instr->GetRn();
+
+ unsigned element_size = sizeof(T);
+ uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
+
+ // Verify that the address is available to the host.
+ VIXL_ASSERT(address == static_cast<uintptr_t>(address));
+ WriteRegister<T>(rt, Memory::Read<T>(address));
+
+ // Approximate load-acquire by issuing a full barrier after the load.
+ __sync_synchronize();
+
+ LogRead(address, rt, GetPrintRegisterFormat(element_size));
+}
+
+#define ATOMIC_MEMORY_SIMPLE_UINT_LIST(V) \
+ V(LDADD) \
+ V(LDCLR) \
+ V(LDEOR) \
+ V(LDSET) \
+ V(LDUMAX) \
+ V(LDUMIN)
+
+#define ATOMIC_MEMORY_SIMPLE_INT_LIST(V) \
+ V(LDSMAX) \
+ V(LDSMIN)
+
+void Simulator::VisitAtomicMemory(const Instruction* instr) {
+ switch (instr->Mask(AtomicMemoryMask)) {
+// clang-format off
+#define SIM_FUNC_B(A) \
+ case A##B: \
+ case A##AB: \
+ case A##LB: \
+ case A##ALB:
+#define SIM_FUNC_H(A) \
+ case A##H: \
+ case A##AH: \
+ case A##LH: \
+ case A##ALH:
+#define SIM_FUNC_w(A) \
+ case A##_w: \
+ case A##A_w: \
+ case A##L_w: \
+ case A##AL_w:
+#define SIM_FUNC_x(A) \
+ case A##_x: \
+ case A##A_x: \
+ case A##L_x: \
+ case A##AL_x:
+
+ ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_B)
+ AtomicMemorySimpleHelper<uint8_t>(instr);
+ break;
+ ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_B)
+ AtomicMemorySimpleHelper<int8_t>(instr);
+ break;
+ ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_H)
+ AtomicMemorySimpleHelper<uint16_t>(instr);
+ break;
+ ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_H)
+ AtomicMemorySimpleHelper<int16_t>(instr);
+ break;
+ ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_w)
+ AtomicMemorySimpleHelper<uint32_t>(instr);
+ break;
+ ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_w)
+ AtomicMemorySimpleHelper<int32_t>(instr);
+ break;
+ ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_x)
+ AtomicMemorySimpleHelper<uint64_t>(instr);
+ break;
+ ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_x)
+ AtomicMemorySimpleHelper<int64_t>(instr);
+ break;
+ // clang-format on
+
+ case SWPB:
+ case SWPAB:
+ case SWPLB:
+ case SWPALB:
+ AtomicMemorySwapHelper<uint8_t>(instr);
+ break;
+ case SWPH:
+ case SWPAH:
+ case SWPLH:
+ case SWPALH:
+ AtomicMemorySwapHelper<uint16_t>(instr);
+ break;
+ case SWP_w:
+ case SWPA_w:
+ case SWPL_w:
+ case SWPAL_w:
+ AtomicMemorySwapHelper<uint32_t>(instr);
+ break;
+ case SWP_x:
+ case SWPA_x:
+ case SWPL_x:
+ case SWPAL_x:
+ AtomicMemorySwapHelper<uint64_t>(instr);
+ break;
+ case LDAPRB:
+ LoadAcquireRCpcHelper<uint8_t>(instr);
+ break;
+ case LDAPRH:
+ LoadAcquireRCpcHelper<uint16_t>(instr);
+ break;
+ case LDAPR_w:
+ LoadAcquireRCpcHelper<uint32_t>(instr);
+ break;
+ case LDAPR_x:
+ LoadAcquireRCpcHelper<uint64_t>(instr);
+ break;
+ }
+}
+
void Simulator::VisitLoadLiteral(const Instruction* instr) {
unsigned rt = instr->GetRt();
@@ -2072,11 +2340,44 @@
}
+// clang-format off
+#define PAUTH_MODES(V) \
+ V(IA, ReadXRegister(src), kPACKeyIA, kInstructionPointer) \
+ V(IB, ReadXRegister(src), kPACKeyIB, kInstructionPointer) \
+ V(IZA, 0x00000000, kPACKeyIA, kInstructionPointer) \
+ V(IZB, 0x00000000, kPACKeyIB, kInstructionPointer) \
+ V(DA, ReadXRegister(src), kPACKeyDA, kDataPointer) \
+ V(DB, ReadXRegister(src), kPACKeyDB, kDataPointer) \
+ V(DZA, 0x00000000, kPACKeyDA, kDataPointer) \
+ V(DZB, 0x00000000, kPACKeyDB, kDataPointer)
+// clang-format on
+
void Simulator::VisitDataProcessing1Source(const Instruction* instr) {
unsigned dst = instr->GetRd();
unsigned src = instr->GetRn();
switch (instr->Mask(DataProcessing1SourceMask)) {
+#define DEFINE_PAUTH_FUNCS(SUFFIX, MOD, KEY, D) \
+ case PAC##SUFFIX: { \
+ uint64_t ptr = ReadXRegister(dst); \
+ WriteXRegister(dst, AddPAC(ptr, MOD, KEY, D)); \
+ break; \
+ } \
+ case AUT##SUFFIX: { \
+ uint64_t ptr = ReadXRegister(dst); \
+ WriteXRegister(dst, AuthPAC(ptr, MOD, KEY, D)); \
+ break; \
+ }
+
+ PAUTH_MODES(DEFINE_PAUTH_FUNCS)
+#undef DEFINE_PAUTH_FUNCS
+
+ case XPACI:
+ WriteXRegister(dst, StripPAC(ReadXRegister(dst), kInstructionPointer));
+ break;
+ case XPACD:
+ WriteXRegister(dst, StripPAC(ReadXRegister(dst), kDataPointer));
+ break;
case RBIT_w:
WriteWRegister(dst, ReverseBits(ReadWRegister(src)));
break;
@@ -2217,6 +2518,14 @@
case RORV_x:
shift_op = ROR;
break;
+ case PACGA: {
+ uint64_t dst = static_cast<uint64_t>(ReadXRegister(instr->GetRn()));
+ uint64_t src = static_cast<uint64_t>(
+ ReadXRegister(instr->GetRm(), Reg31IsStackPointer));
+ uint64_t code = ComputePAC(dst, src, kPACKeyGA);
+ result = code & 0xffffffff00000000;
+ break;
+ }
case CRC32B: {
uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
uint8_t val = ReadRegister<uint8_t>(instr->GetRm());
@@ -2438,7 +2747,7 @@
unsigned dest = instr->GetRd();
switch (instr->Mask(FPImmediateMask)) {
case FMOV_h_imm:
- WriteHRegister(dest, instr->GetImmFP16());
+ WriteHRegister(dest, Float16ToRawbits(instr->GetImmFP16()));
break;
case FMOV_s_imm:
WriteSRegister(dest, instr->GetImmFP32());
@@ -2461,6 +2770,12 @@
FPRounding round = ReadRMode();
switch (instr->Mask(FPIntegerConvertMask)) {
+ case FCVTAS_wh:
+ WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPTieAway));
+ break;
+ case FCVTAS_xh:
+ WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPTieAway));
+ break;
case FCVTAS_ws:
WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPTieAway));
break;
@@ -2473,6 +2788,12 @@
case FCVTAS_xd:
WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPTieAway));
break;
+ case FCVTAU_wh:
+ WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPTieAway));
+ break;
+ case FCVTAU_xh:
+ WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPTieAway));
+ break;
case FCVTAU_ws:
WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPTieAway));
break;
@@ -2485,6 +2806,12 @@
case FCVTAU_xd:
WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPTieAway));
break;
+ case FCVTMS_wh:
+ WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPNegativeInfinity));
+ break;
+ case FCVTMS_xh:
+ WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPNegativeInfinity));
+ break;
case FCVTMS_ws:
WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPNegativeInfinity));
break;
@@ -2497,6 +2824,12 @@
case FCVTMS_xd:
WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPNegativeInfinity));
break;
+ case FCVTMU_wh:
+ WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPNegativeInfinity));
+ break;
+ case FCVTMU_xh:
+ WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPNegativeInfinity));
+ break;
case FCVTMU_ws:
WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPNegativeInfinity));
break;
@@ -2509,6 +2842,12 @@
case FCVTMU_xd:
WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPNegativeInfinity));
break;
+ case FCVTPS_wh:
+ WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPPositiveInfinity));
+ break;
+ case FCVTPS_xh:
+ WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPPositiveInfinity));
+ break;
case FCVTPS_ws:
WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPPositiveInfinity));
break;
@@ -2521,6 +2860,12 @@
case FCVTPS_xd:
WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPPositiveInfinity));
break;
+ case FCVTPU_wh:
+ WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPPositiveInfinity));
+ break;
+ case FCVTPU_xh:
+ WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPPositiveInfinity));
+ break;
case FCVTPU_ws:
WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPPositiveInfinity));
break;
@@ -2533,6 +2878,12 @@
case FCVTPU_xd:
WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPPositiveInfinity));
break;
+ case FCVTNS_wh:
+ WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPTieEven));
+ break;
+ case FCVTNS_xh:
+ WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPTieEven));
+ break;
case FCVTNS_ws:
WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPTieEven));
break;
@@ -2545,6 +2896,12 @@
case FCVTNS_xd:
WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPTieEven));
break;
+ case FCVTNU_wh:
+ WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPTieEven));
+ break;
+ case FCVTNU_xh:
+ WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPTieEven));
+ break;
case FCVTNU_ws:
WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPTieEven));
break;
@@ -2557,6 +2914,12 @@
case FCVTNU_xd:
WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPTieEven));
break;
+ case FCVTZS_wh:
+ WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPZero));
+ break;
+ case FCVTZS_xh:
+ WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPZero));
+ break;
case FCVTZS_ws:
WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPZero));
break;
@@ -2569,6 +2932,12 @@
case FCVTZS_xd:
WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPZero));
break;
+ case FCVTZU_wh:
+ WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPZero));
+ break;
+ case FCVTZU_xh:
+ WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPZero));
+ break;
case FCVTZU_ws:
WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPZero));
break;
@@ -2581,6 +2950,9 @@
case FCVTZU_xd:
WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPZero));
break;
+ case FJCVTZS:
+ WriteWRegister(dst, FPToFixedJS(ReadDRegister(src)));
+ break;
case FMOV_hw:
WriteHRegister(dst, ReadWRegister(src) & kHRegMask);
break;
@@ -2626,9 +2998,7 @@
break;
case UCVTF_dw: {
WriteDRegister(dst,
- UFixedToDouble(static_cast<uint32_t>(ReadWRegister(src)),
- 0,
- round));
+ UFixedToDouble(ReadRegister<uint32_t>(src), 0, round));
break;
}
case SCVTF_sx:
@@ -2641,10 +3011,21 @@
WriteSRegister(dst, UFixedToFloat(ReadXRegister(src), 0, round));
break;
case UCVTF_sw: {
- WriteSRegister(dst,
- UFixedToFloat(static_cast<uint32_t>(ReadWRegister(src)),
- 0,
- round));
+ WriteSRegister(dst, UFixedToFloat(ReadRegister<uint32_t>(src), 0, round));
+ break;
+ }
+ case SCVTF_hx:
+ WriteHRegister(dst, FixedToFloat16(ReadXRegister(src), 0, round));
+ break;
+ case SCVTF_hw:
+ WriteHRegister(dst, FixedToFloat16(ReadWRegister(src), 0, round));
+ break;
+ case UCVTF_hx:
+ WriteHRegister(dst, UFixedToFloat16(ReadXRegister(src), 0, round));
+ break;
+ case UCVTF_hw: {
+ WriteHRegister(dst,
+ UFixedToFloat16(ReadRegister<uint32_t>(src), 0, round));
break;
}
@@ -2677,9 +3058,7 @@
break;
case UCVTF_dw_fixed: {
WriteDRegister(dst,
- UFixedToDouble(static_cast<uint32_t>(ReadWRegister(src)),
- fbits,
- round));
+ UFixedToDouble(ReadRegister<uint32_t>(src), fbits, round));
break;
}
case SCVTF_sx_fixed:
@@ -2693,9 +3072,23 @@
break;
case UCVTF_sw_fixed: {
WriteSRegister(dst,
- UFixedToFloat(static_cast<uint32_t>(ReadWRegister(src)),
- fbits,
- round));
+ UFixedToFloat(ReadRegister<uint32_t>(src), fbits, round));
+ break;
+ }
+ case SCVTF_hx_fixed:
+ WriteHRegister(dst, FixedToFloat16(ReadXRegister(src), fbits, round));
+ break;
+ case SCVTF_hw_fixed:
+ WriteHRegister(dst, FixedToFloat16(ReadWRegister(src), fbits, round));
+ break;
+ case UCVTF_hx_fixed:
+ WriteHRegister(dst, UFixedToFloat16(ReadXRegister(src), fbits, round));
+ break;
+ case UCVTF_hw_fixed: {
+ WriteHRegister(dst,
+ UFixedToFloat16(ReadRegister<uint32_t>(src),
+ fbits,
+ round));
break;
}
case FCVTZS_xd_fixed:
@@ -2738,6 +3131,30 @@
FPToUInt32(ReadSRegister(src) * std::pow(2.0f, fbits),
FPZero));
break;
+ case FCVTZS_xh_fixed: {
+ double output =
+ static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
+ WriteXRegister(dst, FPToInt64(output, FPZero));
+ break;
+ }
+ case FCVTZS_wh_fixed: {
+ double output =
+ static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
+ WriteWRegister(dst, FPToInt32(output, FPZero));
+ break;
+ }
+ case FCVTZU_xh_fixed: {
+ double output =
+ static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
+ WriteXRegister(dst, FPToUInt64(output, FPZero));
+ break;
+ }
+ case FCVTZU_wh_fixed: {
+ double output =
+ static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
+ WriteWRegister(dst, FPToUInt32(output, FPZero));
+ break;
+ }
default:
VIXL_UNREACHABLE();
}
@@ -2749,6 +3166,14 @@
FPTrapFlags trap = DisableTrap;
switch (instr->Mask(FPCompareMask)) {
+ case FCMPE_h:
+ trap = EnableTrap;
+ VIXL_FALLTHROUGH();
+ case FCMP_h:
+ FPCompare(ReadHRegister(instr->GetRn()),
+ ReadHRegister(instr->GetRm()),
+ trap);
+ break;
case FCMPE_s:
trap = EnableTrap;
VIXL_FALLTHROUGH();
@@ -2765,6 +3190,12 @@
ReadDRegister(instr->GetRm()),
trap);
break;
+ case FCMPE_h_zero:
+ trap = EnableTrap;
+ VIXL_FALLTHROUGH();
+ case FCMP_h_zero:
+ FPCompare(ReadHRegister(instr->GetRn()), SimFloat16(0.0), trap);
+ break;
case FCMPE_s_zero:
trap = EnableTrap;
VIXL_FALLTHROUGH();
@@ -2788,6 +3219,19 @@
FPTrapFlags trap = DisableTrap;
switch (instr->Mask(FPConditionalCompareMask)) {
+ case FCCMPE_h:
+ trap = EnableTrap;
+ VIXL_FALLTHROUGH();
+ case FCCMP_h:
+ if (ConditionPassed(instr->GetCondition())) {
+ FPCompare(ReadHRegister(instr->GetRn()),
+ ReadHRegister(instr->GetRm()),
+ trap);
+ } else {
+ ReadNzcv().SetFlags(instr->GetNzcv());
+ LogSystemRegister(NZCV);
+ }
+ break;
case FCCMPE_s:
trap = EnableTrap;
VIXL_FALLTHROUGH();
@@ -2831,6 +3275,9 @@
}
switch (instr->Mask(FPConditionalSelectMask)) {
+ case FCSEL_h:
+ WriteHRegister(instr->GetRd(), ReadHRegister(selected));
+ break;
case FCSEL_s:
WriteSRegister(instr->GetRd(), ReadSRegister(selected));
break;
@@ -2861,6 +3308,7 @@
vform = kFormatH;
break;
}
+
SimVRegister& rd = ReadVRegister(instr->GetRd());
SimVRegister& rn = ReadVRegister(instr->GetRn());
bool inexact_exception = false;
@@ -2878,12 +3326,14 @@
case FMOV_d:
WriteDRegister(fd, ReadDRegister(fn));
return;
+ case FABS_h:
case FABS_s:
case FABS_d:
fabs_(vform, ReadVRegister(fd), ReadVRegister(fn));
// Explicitly log the register update whilst we have type information.
LogVRegister(fd, GetPrintRegisterFormatFP(vform));
return;
+ case FNEG_h:
case FNEG_s:
case FNEG_d:
fneg(vform, ReadVRegister(fd), ReadVRegister(fn));
@@ -2897,48 +3347,58 @@
WriteSRegister(fd, FPToFloat(ReadDRegister(fn), FPTieEven, ReadDN()));
return;
case FCVT_hs:
- WriteHRegister(fd, FPToFloat16(ReadSRegister(fn), FPTieEven, ReadDN()));
+ WriteHRegister(fd,
+ Float16ToRawbits(
+ FPToFloat16(ReadSRegister(fn), FPTieEven, ReadDN())));
return;
case FCVT_sh:
WriteSRegister(fd, FPToFloat(ReadHRegister(fn), ReadDN()));
return;
case FCVT_dh:
- WriteDRegister(fd,
- FPToDouble(FPToFloat(ReadHRegister(fn), ReadDN()),
- ReadDN()));
+ WriteDRegister(fd, FPToDouble(ReadHRegister(fn), ReadDN()));
return;
case FCVT_hd:
- WriteHRegister(fd, FPToFloat16(ReadDRegister(fn), FPTieEven, ReadDN()));
+ WriteHRegister(fd,
+ Float16ToRawbits(
+ FPToFloat16(ReadDRegister(fn), FPTieEven, ReadDN())));
return;
+ case FSQRT_h:
case FSQRT_s:
case FSQRT_d:
fsqrt(vform, rd, rn);
// Explicitly log the register update whilst we have type information.
LogVRegister(fd, GetPrintRegisterFormatFP(vform));
return;
+ case FRINTI_h:
case FRINTI_s:
case FRINTI_d:
break; // Use FPCR rounding mode.
+ case FRINTX_h:
case FRINTX_s:
case FRINTX_d:
inexact_exception = true;
break;
+ case FRINTA_h:
case FRINTA_s:
case FRINTA_d:
fpcr_rounding = FPTieAway;
break;
+ case FRINTM_h:
case FRINTM_s:
case FRINTM_d:
fpcr_rounding = FPNegativeInfinity;
break;
+ case FRINTN_h:
case FRINTN_s:
case FRINTN_d:
fpcr_rounding = FPTieEven;
break;
+ case FRINTP_h:
case FRINTP_s:
case FRINTP_d:
fpcr_rounding = FPPositiveInfinity;
break;
+ case FRINTZ_h:
case FRINTZ_s:
case FRINTZ_d:
fpcr_rounding = FPZero;
@@ -2967,44 +3427,56 @@
case FP32:
vform = kFormatS;
break;
+ case FP16:
+ vform = kFormatH;
+ break;
}
SimVRegister& rd = ReadVRegister(instr->GetRd());
SimVRegister& rn = ReadVRegister(instr->GetRn());
SimVRegister& rm = ReadVRegister(instr->GetRm());
switch (instr->Mask(FPDataProcessing2SourceMask)) {
+ case FADD_h:
case FADD_s:
case FADD_d:
fadd(vform, rd, rn, rm);
break;
+ case FSUB_h:
case FSUB_s:
case FSUB_d:
fsub(vform, rd, rn, rm);
break;
+ case FMUL_h:
case FMUL_s:
case FMUL_d:
fmul(vform, rd, rn, rm);
break;
+ case FNMUL_h:
case FNMUL_s:
case FNMUL_d:
fnmul(vform, rd, rn, rm);
break;
+ case FDIV_h:
case FDIV_s:
case FDIV_d:
fdiv(vform, rd, rn, rm);
break;
+ case FMAX_h:
case FMAX_s:
case FMAX_d:
fmax(vform, rd, rn, rm);
break;
+ case FMIN_h:
case FMIN_s:
case FMIN_d:
fmin(vform, rd, rn, rm);
break;
+ case FMAXNM_h:
case FMAXNM_s:
case FMAXNM_d:
fmaxnm(vform, rd, rn, rm);
break;
+ case FMINNM_h:
case FMINNM_s:
case FMINNM_d:
fminnm(vform, rd, rn, rm);
@@ -3027,6 +3499,18 @@
switch (instr->Mask(FPDataProcessing3SourceMask)) {
// fd = fa +/- (fn * fm)
+ case FMADD_h:
+ WriteHRegister(fd,
+ FPMulAdd(ReadHRegister(fa),
+ ReadHRegister(fn),
+ ReadHRegister(fm)));
+ break;
+ case FMSUB_h:
+ WriteHRegister(fd,
+ FPMulAdd(ReadHRegister(fa),
+ -ReadHRegister(fn),
+ ReadHRegister(fm)));
+ break;
case FMADD_s:
WriteSRegister(fd,
FPMulAdd(ReadSRegister(fa),
@@ -3052,6 +3536,18 @@
ReadDRegister(fm)));
break;
// Negated variants of the above.
+ case FNMADD_h:
+ WriteHRegister(fd,
+ FPMulAdd(-ReadHRegister(fa),
+ -ReadHRegister(fn),
+ ReadHRegister(fm)));
+ break;
+ case FNMSUB_h:
+ WriteHRegister(fd,
+ FPMulAdd(-ReadHRegister(fa),
+ ReadHRegister(fn),
+ ReadHRegister(fm)));
+ break;
case FNMADD_s:
WriteSRegister(fd,
FPMulAdd(-ReadSRegister(fa),
@@ -3090,16 +3586,19 @@
if (instr->Mask(FP64) == FP64) {
double result = FPProcessNaNs(ReadDRegister(fn), ReadDRegister(fm));
- if (std::isnan(result)) {
+ if (IsNaN(result)) {
WriteDRegister(fd, result);
done = true;
}
- } else {
+ } else if (instr->Mask(FP32) == FP32) {
float result = FPProcessNaNs(ReadSRegister(fn), ReadSRegister(fm));
- if (std::isnan(result)) {
+ if (IsNaN(result)) {
WriteSRegister(fd, result);
done = true;
}
+ } else {
+ VIXL_ASSERT(instr->Mask(FP16) == FP16);
+ VIXL_UNIMPLEMENTED();
}
return done;
@@ -3125,11 +3624,43 @@
}
+// clang-format off
+#define PAUTH_SYSTEM_MODES(V) \
+ V(A1716, 17, ReadXRegister(16), kPACKeyIA) \
+ V(B1716, 17, ReadXRegister(16), kPACKeyIB) \
+ V(AZ, 30, 0x00000000, kPACKeyIA) \
+ V(BZ, 30, 0x00000000, kPACKeyIB) \
+ V(ASP, 30, ReadXRegister(31, Reg31IsStackPointer), kPACKeyIA) \
+ V(BSP, 30, ReadXRegister(31, Reg31IsStackPointer), kPACKeyIB)
+// clang-format on
+
+
void Simulator::VisitSystem(const Instruction* instr) {
// Some system instructions hijack their Op and Cp fields to represent a
// range of immediates instead of indicating a different instruction. This
// makes the decoding tricky.
- if (instr->Mask(SystemExclusiveMonitorFMask) == SystemExclusiveMonitorFixed) {
+ if (instr->GetInstructionBits() == XPACLRI) {
+ WriteXRegister(30, StripPAC(ReadXRegister(30), kInstructionPointer));
+ } else if (instr->Mask(SystemPAuthFMask) == SystemPAuthFixed) {
+ switch (instr->Mask(SystemPAuthMask)) {
+#define DEFINE_PAUTH_FUNCS(SUFFIX, DST, MOD, KEY) \
+ case PACI##SUFFIX: \
+ WriteXRegister(DST, \
+ AddPAC(ReadXRegister(DST), MOD, KEY, kInstructionPointer)); \
+ break; \
+ case AUTI##SUFFIX: \
+ WriteXRegister(DST, \
+ AuthPAC(ReadXRegister(DST), \
+ MOD, \
+ KEY, \
+ kInstructionPointer)); \
+ break;
+
+ PAUTH_SYSTEM_MODES(DEFINE_PAUTH_FUNCS)
+#undef DEFINE_PAUTH_FUNCS
+ }
+ } else if (instr->Mask(SystemExclusiveMonitorFMask) ==
+ SystemExclusiveMonitorFixed) {
VIXL_ASSERT(instr->Mask(SystemExclusiveMonitorMask) == CLREX);
switch (instr->Mask(SystemExclusiveMonitorMask)) {
case CLREX: {
@@ -3173,6 +3704,7 @@
VIXL_ASSERT(instr->Mask(SystemHintMask) == HINT);
switch (instr->GetImmHint()) {
case NOP:
+ case ESB:
case CSDB:
break;
default:
@@ -3514,6 +4046,111 @@
}
+void Simulator::VisitNEON2RegMiscFP16(const Instruction* instr) {
+ static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}};
+ NEONFormatDecoder nfd(instr);
+ VectorFormat fpf = nfd.GetVectorFormat(&map_half);
+
+ FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
+
+ SimVRegister& rd = ReadVRegister(instr->GetRd());
+ SimVRegister& rn = ReadVRegister(instr->GetRn());
+
+ switch (instr->Mask(NEON2RegMiscFP16Mask)) {
+ case NEON_SCVTF_H:
+ scvtf(fpf, rd, rn, 0, fpcr_rounding);
+ return;
+ case NEON_UCVTF_H:
+ ucvtf(fpf, rd, rn, 0, fpcr_rounding);
+ return;
+ case NEON_FCVTNS_H:
+ fcvts(fpf, rd, rn, FPTieEven);
+ return;
+ case NEON_FCVTNU_H:
+ fcvtu(fpf, rd, rn, FPTieEven);
+ return;
+ case NEON_FCVTPS_H:
+ fcvts(fpf, rd, rn, FPPositiveInfinity);
+ return;
+ case NEON_FCVTPU_H:
+ fcvtu(fpf, rd, rn, FPPositiveInfinity);
+ return;
+ case NEON_FCVTMS_H:
+ fcvts(fpf, rd, rn, FPNegativeInfinity);
+ return;
+ case NEON_FCVTMU_H:
+ fcvtu(fpf, rd, rn, FPNegativeInfinity);
+ return;
+ case NEON_FCVTZS_H:
+ fcvts(fpf, rd, rn, FPZero);
+ return;
+ case NEON_FCVTZU_H:
+ fcvtu(fpf, rd, rn, FPZero);
+ return;
+ case NEON_FCVTAS_H:
+ fcvts(fpf, rd, rn, FPTieAway);
+ return;
+ case NEON_FCVTAU_H:
+ fcvtu(fpf, rd, rn, FPTieAway);
+ return;
+ case NEON_FRINTI_H:
+ frint(fpf, rd, rn, fpcr_rounding, false);
+ return;
+ case NEON_FRINTX_H:
+ frint(fpf, rd, rn, fpcr_rounding, true);
+ return;
+ case NEON_FRINTA_H:
+ frint(fpf, rd, rn, FPTieAway, false);
+ return;
+ case NEON_FRINTM_H:
+ frint(fpf, rd, rn, FPNegativeInfinity, false);
+ return;
+ case NEON_FRINTN_H:
+ frint(fpf, rd, rn, FPTieEven, false);
+ return;
+ case NEON_FRINTP_H:
+ frint(fpf, rd, rn, FPPositiveInfinity, false);
+ return;
+ case NEON_FRINTZ_H:
+ frint(fpf, rd, rn, FPZero, false);
+ return;
+ case NEON_FABS_H:
+ fabs_(fpf, rd, rn);
+ return;
+ case NEON_FNEG_H:
+ fneg(fpf, rd, rn);
+ return;
+ case NEON_FSQRT_H:
+ fsqrt(fpf, rd, rn);
+ return;
+ case NEON_FRSQRTE_H:
+ frsqrte(fpf, rd, rn);
+ return;
+ case NEON_FRECPE_H:
+ frecpe(fpf, rd, rn, fpcr_rounding);
+ return;
+ case NEON_FCMGT_H_zero:
+ fcmp_zero(fpf, rd, rn, gt);
+ return;
+ case NEON_FCMGE_H_zero:
+ fcmp_zero(fpf, rd, rn, ge);
+ return;
+ case NEON_FCMEQ_H_zero:
+ fcmp_zero(fpf, rd, rn, eq);
+ return;
+ case NEON_FCMLE_H_zero:
+ fcmp_zero(fpf, rd, rn, le);
+ return;
+ case NEON_FCMLT_H_zero:
+ fcmp_zero(fpf, rd, rn, lt);
+ return;
+ default:
+ VIXL_UNIMPLEMENTED();
+ return;
+ }
+}
+
+
void Simulator::VisitNEON3Same(const Instruction* instr) {
NEONFormatDecoder nfd(instr);
SimVRegister& rd = ReadVRegister(instr->GetRd());
@@ -3773,6 +4410,59 @@
}
+void Simulator::VisitNEON3SameFP16(const Instruction* instr) {
+ NEONFormatDecoder nfd(instr);
+ SimVRegister& rd = ReadVRegister(instr->GetRd());
+ SimVRegister& rn = ReadVRegister(instr->GetRn());
+ SimVRegister& rm = ReadVRegister(instr->GetRm());
+
+ VectorFormat vf = nfd.GetVectorFormat(nfd.FP16FormatMap());
+ switch (instr->Mask(NEON3SameFP16Mask)) {
+#define SIM_FUNC(A, B) \
+ case NEON_##A##_H: \
+ B(vf, rd, rn, rm); \
+ break;
+ SIM_FUNC(FMAXNM, fmaxnm);
+ SIM_FUNC(FMLA, fmla);
+ SIM_FUNC(FADD, fadd);
+ SIM_FUNC(FMULX, fmulx);
+ SIM_FUNC(FMAX, fmax);
+ SIM_FUNC(FRECPS, frecps);
+ SIM_FUNC(FMINNM, fminnm);
+ SIM_FUNC(FMLS, fmls);
+ SIM_FUNC(FSUB, fsub);
+ SIM_FUNC(FMIN, fmin);
+ SIM_FUNC(FRSQRTS, frsqrts);
+ SIM_FUNC(FMAXNMP, fmaxnmp);
+ SIM_FUNC(FADDP, faddp);
+ SIM_FUNC(FMUL, fmul);
+ SIM_FUNC(FMAXP, fmaxp);
+ SIM_FUNC(FDIV, fdiv);
+ SIM_FUNC(FMINNMP, fminnmp);
+ SIM_FUNC(FABD, fabd);
+ SIM_FUNC(FMINP, fminp);
+#undef SIM_FUNC
+ case NEON_FCMEQ_H:
+ fcmp(vf, rd, rn, rm, eq);
+ break;
+ case NEON_FCMGE_H:
+ fcmp(vf, rd, rn, rm, ge);
+ break;
+ case NEON_FACGE_H:
+ fabscmp(vf, rd, rn, rm, ge);
+ break;
+ case NEON_FCMGT_H:
+ fcmp(vf, rd, rn, rm, gt);
+ break;
+ case NEON_FACGT_H:
+ fabscmp(vf, rd, rn, rm, gt);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ break;
+ }
+}
+
void Simulator::VisitNEON3SameExtra(const Instruction* instr) {
NEONFormatDecoder nfd(instr);
SimVRegister& rd = ReadVRegister(instr->GetRd());
@@ -3983,11 +4673,31 @@
void Simulator::VisitNEONAcrossLanes(const Instruction* instr) {
NEONFormatDecoder nfd(instr);
+ static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}};
+
SimVRegister& rd = ReadVRegister(instr->GetRd());
SimVRegister& rn = ReadVRegister(instr->GetRn());
- // The input operand's VectorFormat is passed for these instructions.
- if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
+ if (instr->Mask(NEONAcrossLanesFP16FMask) == NEONAcrossLanesFP16Fixed) {
+ VectorFormat vf = nfd.GetVectorFormat(&map_half);
+ switch (instr->Mask(NEONAcrossLanesFP16Mask)) {
+ case NEON_FMAXV_H:
+ fmaxv(vf, rd, rn);
+ break;
+ case NEON_FMINV_H:
+ fminv(vf, rd, rn);
+ break;
+ case NEON_FMAXNMV_H:
+ fmaxnmv(vf, rd, rn);
+ break;
+ case NEON_FMINNMV_H:
+ fminnmv(vf, rd, rn);
+ break;
+ default:
+ VIXL_UNIMPLEMENTED();
+ }
+ } else if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
+ // The input operand's VectorFormat is passed for these instructions.
VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap());
switch (instr->Mask(NEONAcrossLanesFPMask)) {
@@ -4040,7 +4750,9 @@
void Simulator::VisitNEONByIndexedElement(const Instruction* instr) {
NEONFormatDecoder nfd(instr);
+ static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}};
VectorFormat vf_r = nfd.GetVectorFormat();
+ VectorFormat vf_half = nfd.GetVectorFormat(&map_half);
VectorFormat vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
SimVRegister& rd = ReadVRegister(instr->GetRd());
@@ -4157,22 +4869,37 @@
break;
default:
index = instr->GetNEONH();
- if ((instr->GetFPType() & 1) == 0) {
+ if (instr->GetFPType() == 0) {
+ rm_reg &= 0xf;
+ index = (index << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
+ } else if ((instr->GetFPType() & 1) == 0) {
index = (index << 1) | instr->GetNEONL();
}
vf = nfd.GetVectorFormat(nfd.FPFormatMap());
switch (instr->Mask(NEONByIndexedElementFPMask)) {
+ case NEON_FMUL_H_byelement:
+ vf = vf_half;
+ VIXL_FALLTHROUGH();
case NEON_FMUL_byelement:
Op = &Simulator::fmul;
break;
+ case NEON_FMLA_H_byelement:
+ vf = vf_half;
+ VIXL_FALLTHROUGH();
case NEON_FMLA_byelement:
Op = &Simulator::fmla;
break;
+ case NEON_FMLS_H_byelement:
+ vf = vf_half;
+ VIXL_FALLTHROUGH();
case NEON_FMLS_byelement:
Op = &Simulator::fmls;
break;
+ case NEON_FMULX_H_byelement:
+ vf = vf_half;
+ VIXL_FALLTHROUGH();
case NEON_FMULX_byelement:
Op = &Simulator::fmulx;
break;
@@ -4710,7 +5437,7 @@
} else { // cmode_0 == 1, cmode == 0xf.
if (half_enc == 1) {
vform = q ? kFormat8H : kFormat4H;
- imm = instr->GetImmNEONFP16();
+ imm = Float16ToRawbits(instr->GetImmNEONFP16());
} else if (op_bit == 0) {
vform = q ? kFormat4S : kFormat2S;
imm = FloatToRawbits(instr->GetImmNEONFP32());
@@ -4911,6 +5638,78 @@
}
+void Simulator::VisitNEONScalar2RegMiscFP16(const Instruction* instr) {
+ VectorFormat fpf = kFormatH;
+ FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
+
+ SimVRegister& rd = ReadVRegister(instr->GetRd());
+ SimVRegister& rn = ReadVRegister(instr->GetRn());
+
+ switch (instr->Mask(NEONScalar2RegMiscFP16Mask)) {
+ case NEON_FRECPE_H_scalar:
+ frecpe(fpf, rd, rn, fpcr_rounding);
+ break;
+ case NEON_FRECPX_H_scalar:
+ frecpx(fpf, rd, rn);
+ break;
+ case NEON_FRSQRTE_H_scalar:
+ frsqrte(fpf, rd, rn);
+ break;
+ case NEON_FCMGT_H_zero_scalar:
+ fcmp_zero(fpf, rd, rn, gt);
+ break;
+ case NEON_FCMGE_H_zero_scalar:
+ fcmp_zero(fpf, rd, rn, ge);
+ break;
+ case NEON_FCMEQ_H_zero_scalar:
+ fcmp_zero(fpf, rd, rn, eq);
+ break;
+ case NEON_FCMLE_H_zero_scalar:
+ fcmp_zero(fpf, rd, rn, le);
+ break;
+ case NEON_FCMLT_H_zero_scalar:
+ fcmp_zero(fpf, rd, rn, lt);
+ break;
+ case NEON_SCVTF_H_scalar:
+ scvtf(fpf, rd, rn, 0, fpcr_rounding);
+ break;
+ case NEON_UCVTF_H_scalar:
+ ucvtf(fpf, rd, rn, 0, fpcr_rounding);
+ break;
+ case NEON_FCVTNS_H_scalar:
+ fcvts(fpf, rd, rn, FPTieEven);
+ break;
+ case NEON_FCVTNU_H_scalar:
+ fcvtu(fpf, rd, rn, FPTieEven);
+ break;
+ case NEON_FCVTPS_H_scalar:
+ fcvts(fpf, rd, rn, FPPositiveInfinity);
+ break;
+ case NEON_FCVTPU_H_scalar:
+ fcvtu(fpf, rd, rn, FPPositiveInfinity);
+ break;
+ case NEON_FCVTMS_H_scalar:
+ fcvts(fpf, rd, rn, FPNegativeInfinity);
+ break;
+ case NEON_FCVTMU_H_scalar:
+ fcvtu(fpf, rd, rn, FPNegativeInfinity);
+ break;
+ case NEON_FCVTZS_H_scalar:
+ fcvts(fpf, rd, rn, FPZero);
+ break;
+ case NEON_FCVTZU_H_scalar:
+ fcvtu(fpf, rd, rn, FPZero);
+ break;
+ case NEON_FCVTAS_H_scalar:
+ fcvts(fpf, rd, rn, FPTieAway);
+ break;
+ case NEON_FCVTAU_H_scalar:
+ fcvtu(fpf, rd, rn, FPTieAway);
+ break;
+ }
+}
+
+
void Simulator::VisitNEONScalar3Diff(const Instruction* instr) {
NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap());
VectorFormat vf = nfd.GetVectorFormat();
@@ -5049,6 +5848,44 @@
}
}
+void Simulator::VisitNEONScalar3SameFP16(const Instruction* instr) {
+ SimVRegister& rd = ReadVRegister(instr->GetRd());
+ SimVRegister& rn = ReadVRegister(instr->GetRn());
+ SimVRegister& rm = ReadVRegister(instr->GetRm());
+
+ switch (instr->Mask(NEONScalar3SameFP16Mask)) {
+ case NEON_FABD_H_scalar:
+ fabd(kFormatH, rd, rn, rm);
+ break;
+ case NEON_FMULX_H_scalar:
+ fmulx(kFormatH, rd, rn, rm);
+ break;
+ case NEON_FCMEQ_H_scalar:
+ fcmp(kFormatH, rd, rn, rm, eq);
+ break;
+ case NEON_FCMGE_H_scalar:
+ fcmp(kFormatH, rd, rn, rm, ge);
+ break;
+ case NEON_FCMGT_H_scalar:
+ fcmp(kFormatH, rd, rn, rm, gt);
+ break;
+ case NEON_FACGE_H_scalar:
+ fabscmp(kFormatH, rd, rn, rm, ge);
+ break;
+ case NEON_FACGT_H_scalar:
+ fabscmp(kFormatH, rd, rn, rm, gt);
+ break;
+ case NEON_FRECPS_H_scalar:
+ frecps(kFormatH, rd, rn, rm);
+ break;
+ case NEON_FRSQRTS_H_scalar:
+ frsqrts(kFormatH, rd, rn, rm);
+ break;
+ default:
+ VIXL_UNREACHABLE();
+ }
+}
+
void Simulator::VisitNEONScalar3SameExtra(const Instruction* instr) {
NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
@@ -5115,19 +5952,27 @@
default:
vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
index = instr->GetNEONH();
- if ((instr->GetFPType() & 1) == 0) {
+ if (instr->GetFPType() == 0) {
+ index = (index << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
+ rm_reg &= 0xf;
+ vf = kFormatH;
+ } else if ((instr->GetFPType() & 1) == 0) {
index = (index << 1) | instr->GetNEONL();
}
switch (instr->Mask(NEONScalarByIndexedElementFPMask)) {
+ case NEON_FMUL_H_byelement_scalar:
case NEON_FMUL_byelement_scalar:
Op = &Simulator::fmul;
break;
+ case NEON_FMLA_H_byelement_scalar:
case NEON_FMLA_byelement_scalar:
Op = &Simulator::fmla;
break;
+ case NEON_FMLS_H_byelement_scalar:
case NEON_FMLS_byelement_scalar:
Op = &Simulator::fmls;
break;
+ case NEON_FMULX_H_byelement_scalar:
case NEON_FMULX_byelement_scalar:
Op = &Simulator::fmulx;
break;
@@ -5159,27 +6004,36 @@
void Simulator::VisitNEONScalarPairwise(const Instruction* instr) {
- NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarFormatMap());
+ NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarPairwiseFormatMap());
VectorFormat vf = nfd.GetVectorFormat();
SimVRegister& rd = ReadVRegister(instr->GetRd());
SimVRegister& rn = ReadVRegister(instr->GetRn());
switch (instr->Mask(NEONScalarPairwiseMask)) {
- case NEON_ADDP_scalar:
- addp(vf, rd, rn);
+ case NEON_ADDP_scalar: {
+ // All pairwise operations except ADDP use bit U to differentiate FP16
+ // from FP32/FP64 variations.
+ NEONFormatDecoder nfd_addp(instr, NEONFormatDecoder::FPScalarFormatMap());
+ addp(nfd_addp.GetVectorFormat(), rd, rn);
break;
+ }
+ case NEON_FADDP_h_scalar:
case NEON_FADDP_scalar:
faddp(vf, rd, rn);
break;
+ case NEON_FMAXP_h_scalar:
case NEON_FMAXP_scalar:
fmaxp(vf, rd, rn);
break;
+ case NEON_FMAXNMP_h_scalar:
case NEON_FMAXNMP_scalar:
fmaxnmp(vf, rd, rn);
break;
+ case NEON_FMINP_h_scalar:
case NEON_FMINP_scalar:
fminp(vf, rd, rn);
break;
+ case NEON_FMINNMP_h_scalar:
case NEON_FMINNMP_scalar:
fminnmp(vf, rd, rn);
break;
diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h
index f63f0c2..a411787 100644
--- a/src/aarch64/simulator-aarch64.h
+++ b/src/aarch64/simulator-aarch64.h
@@ -39,7 +39,6 @@
#include "instructions-aarch64.h"
#include "instrument-aarch64.h"
#include "simulator-constants-aarch64.h"
-#include "utils-aarch64.h"
#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
@@ -97,12 +96,11 @@
// Write the specified value. The value is zero-extended if necessary.
template <typename T>
void Write(T new_value) {
- VIXL_STATIC_ASSERT(sizeof(new_value) <= kSizeInBytes);
if (sizeof(new_value) < kSizeInBytes) {
// All AArch64 registers are zero-extending.
memset(value_ + sizeof(new_value), 0, kSizeInBytes - sizeof(new_value));
}
- memcpy(value_, &new_value, sizeof(new_value));
+ WriteLane(new_value, 0);
NotifyRegisterWrite();
}
template <typename T>
@@ -116,10 +114,7 @@
// 0 represents the least significant bits.
template <typename T>
void Insert(int lane, T new_value) {
- VIXL_ASSERT(lane >= 0);
- VIXL_ASSERT((sizeof(new_value) + (lane * sizeof(new_value))) <=
- kSizeInBytes);
- memcpy(&value_[lane * sizeof(new_value)], &new_value, sizeof(new_value));
+ WriteLane(new_value, lane);
NotifyRegisterWrite();
}
@@ -134,9 +129,7 @@
template <typename T>
T GetLane(int lane) const {
T result;
- VIXL_ASSERT(lane >= 0);
- VIXL_ASSERT((sizeof(result) + (lane * sizeof(result))) <= kSizeInBytes);
- memcpy(&result, &value_[lane * sizeof(result)], sizeof(result));
+ ReadLane(&result, lane);
return result;
}
template <typename T>
@@ -158,10 +151,44 @@
bool written_since_last_log_;
void NotifyRegisterWrite() { written_since_last_log_ = true; }
+
+ private:
+ template <typename T>
+ void ReadLane(T* dst, int lane) const {
+ VIXL_ASSERT(lane >= 0);
+ VIXL_ASSERT((sizeof(*dst) + (lane * sizeof(*dst))) <= kSizeInBytes);
+ memcpy(dst, &value_[lane * sizeof(*dst)], sizeof(*dst));
+ }
+
+ template <typename T>
+ void WriteLane(T src, int lane) {
+ VIXL_ASSERT(lane >= 0);
+ VIXL_ASSERT((sizeof(src) + (lane * sizeof(src))) <= kSizeInBytes);
+ memcpy(&value_[lane * sizeof(src)], &src, sizeof(src));
+ }
};
typedef SimRegisterBase<kXRegSizeInBytes> SimRegister; // r0-r31
typedef SimRegisterBase<kQRegSizeInBytes> SimVRegister; // v0-v31
+// The default ReadLane and WriteLane methods assume what we are copying is
+// "trivially copyable" by using memcpy. We have to provide alternative
+// implementations for SimFloat16 which cannot be copied this way.
+
+template <>
+template <>
+inline void SimVRegister::ReadLane(vixl::internal::SimFloat16* dst,
+ int lane) const {
+ uint16_t rawbits;
+ ReadLane(&rawbits, lane);
+ *dst = RawbitsToFloat16(rawbits);
+}
+
+template <>
+template <>
+inline void SimVRegister::WriteLane(vixl::internal::SimFloat16 src, int lane) {
+ WriteLane(Float16ToRawbits(src), lane);
+}
+
// Representation of a vector register, with typed getters and setters for lanes
// and additional information to represent lane state.
class LogicVRegister {
@@ -169,10 +196,10 @@
inline LogicVRegister(
SimVRegister& other) // NOLINT(runtime/references)(runtime/explicit)
: register_(other) {
- for (unsigned i = 0; i < sizeof(saturated_) / sizeof(saturated_[0]); i++) {
+ for (size_t i = 0; i < ArrayLength(saturated_); i++) {
saturated_[i] = kNotSaturated;
}
- for (unsigned i = 0; i < sizeof(round_) / sizeof(round_[0]); i++) {
+ for (size_t i = 0; i < ArrayLength(round_); i++) {
round_[i] = 0;
}
}
@@ -977,11 +1004,11 @@
return ReadBRegister(code);
}
- int16_t ReadHRegister(unsigned code) const {
- return ReadVRegister<int16_t>(code);
+ vixl::internal::SimFloat16 ReadHRegister(unsigned code) const {
+ return RawbitsToFloat16(ReadHRegisterBits(code));
}
VIXL_DEPRECATED("ReadHRegister", int16_t hreg(unsigned code) const) {
- return ReadHRegister(code);
+ return Float16ToRawbits(ReadHRegister(code));
}
uint16_t ReadHRegisterBits(unsigned code) const {
@@ -1098,6 +1125,12 @@
}
void WriteHRegister(unsigned code,
+ vixl::internal::SimFloat16 value,
+ RegLogMode log_mode = LogRegWrites) {
+ WriteVRegister(code, Float16ToRawbits(value), log_mode);
+ }
+
+ void WriteHRegister(unsigned code,
int16_t value,
RegLogMode log_mode = LogRegWrites) {
WriteVRegister(code, value, log_mode);
@@ -1397,9 +1430,9 @@
return GetPrintRegisterFormatForSizeFP(sizeof(value));
}
- PrintRegisterFormat GetPrintRegisterFormat(float16 value) {
- VIXL_STATIC_ASSERT(sizeof(value) == kHRegSizeInBytes);
- return GetPrintRegisterFormatForSizeFP(sizeof(value));
+ PrintRegisterFormat GetPrintRegisterFormat(Float16 value) {
+ VIXL_STATIC_ASSERT(sizeof(Float16ToRawbits(value)) == kHRegSizeInBytes);
+ return GetPrintRegisterFormatForSizeFP(sizeof(Float16ToRawbits(value)));
}
PrintRegisterFormat GetPrintRegisterFormat(VectorFormat vform);
@@ -1552,6 +1585,44 @@
print_exclusive_access_warning_ = false;
}
+ enum PointerType { kDataPointer, kInstructionPointer };
+
+ struct PACKey {
+ uint64_t high;
+ uint64_t low;
+ int number;
+ };
+
+ // Current implementation is that all pointers are tagged.
+ bool HasTBI(uint64_t ptr, PointerType type) {
+ USE(ptr, type);
+ return true;
+ }
+
+ // Current implementation uses 48-bit virtual addresses.
+ int GetBottomPACBit(uint64_t ptr, int ttbr) {
+ USE(ptr, ttbr);
+ VIXL_ASSERT((ttbr == 0) || (ttbr == 1));
+ return 48;
+ }
+
+ // The top PAC bit is 55 for the purposes of relative bit fields with TBI,
+ // however bit 55 is the TTBR bit regardless of TBI so isn't part of the PAC
+ // codes in pointers.
+ int GetTopPACBit(uint64_t ptr, PointerType type) {
+ return HasTBI(ptr, type) ? 55 : 63;
+ }
+
+ // Armv8.3 Pointer authentication helpers.
+ uint64_t CalculatePACMask(uint64_t ptr, PointerType type, int ext_bit);
+ uint64_t ComputePAC(uint64_t data, uint64_t context, PACKey key);
+ uint64_t AuthPAC(uint64_t ptr,
+ uint64_t context,
+ PACKey key,
+ PointerType type);
+ uint64_t AddPAC(uint64_t ptr, uint64_t context, PACKey key, PointerType type);
+ uint64_t StripPAC(uint64_t ptr, PointerType type);
+
// The common CPUFeatures interface with the set of available features.
CPUFeatures* GetCPUFeatures() {
@@ -1740,6 +1811,12 @@
void CompareAndSwapHelper(const Instruction* instr);
template <typename T>
void CompareAndSwapPairHelper(const Instruction* instr);
+ template <typename T>
+ void AtomicMemorySimpleHelper(const Instruction* instr);
+ template <typename T>
+ void AtomicMemorySwapHelper(const Instruction* instr);
+ template <typename T>
+ void LoadAcquireRCpcHelper(const Instruction* instr);
uintptr_t AddressModeHelper(unsigned addr_reg,
int64_t offset,
AddrMode addrmode);
@@ -2891,12 +2968,16 @@
LogicVRegister dst,
const LogicVRegister& src);
- typedef float (Simulator::*FPMinMaxOp)(float a, float b);
+ template <typename T>
+ struct TFPMinMaxOp {
+ typedef T (Simulator::*type)(T a, T b);
+ };
+ template <typename T>
LogicVRegister fminmaxv(VectorFormat vform,
LogicVRegister dst,
const LogicVRegister& src,
- FPMinMaxOp Op);
+ typename TFPMinMaxOp<T>::type Op);
LogicVRegister fminv(VectorFormat vform,
LogicVRegister dst,
@@ -2937,10 +3018,19 @@
double UFixedToDouble(uint64_t src, int fbits, FPRounding round_mode);
float FixedToFloat(int64_t src, int fbits, FPRounding round_mode);
float UFixedToFloat(uint64_t src, int fbits, FPRounding round_mode);
+ ::vixl::internal::SimFloat16 FixedToFloat16(int64_t src,
+ int fbits,
+ FPRounding round_mode);
+ ::vixl::internal::SimFloat16 UFixedToFloat16(uint64_t src,
+ int fbits,
+ FPRounding round_mode);
+ int16_t FPToInt16(double value, FPRounding rmode);
int32_t FPToInt32(double value, FPRounding rmode);
int64_t FPToInt64(double value, FPRounding rmode);
+ uint16_t FPToUInt16(double value, FPRounding rmode);
uint32_t FPToUInt32(double value, FPRounding rmode);
uint64_t FPToUInt64(double value, FPRounding rmode);
+ int32_t FPToFixedJS(double value);
template <typename T>
T FPAdd(T op1, T op2);
@@ -3078,13 +3168,19 @@
static const char* vreg_names[];
private:
+ static const PACKey kPACKeyIA;
+ static const PACKey kPACKeyIB;
+ static const PACKey kPACKeyDA;
+ static const PACKey kPACKeyDB;
+ static const PACKey kPACKeyGA;
+
template <typename T>
static T FPDefaultNaN();
// Standard NaN processing.
template <typename T>
T FPProcessNaN(T op) {
- VIXL_ASSERT(std::isnan(op));
+ VIXL_ASSERT(IsNaN(op));
if (IsSignallingNaN(op)) {
FPProcessException();
}
@@ -3097,10 +3193,10 @@
return FPProcessNaN(op1);
} else if (IsSignallingNaN(op2)) {
return FPProcessNaN(op2);
- } else if (std::isnan(op1)) {
+ } else if (IsNaN(op1)) {
VIXL_ASSERT(IsQuietNaN(op1));
return FPProcessNaN(op1);
- } else if (std::isnan(op2)) {
+ } else if (IsNaN(op2)) {
VIXL_ASSERT(IsQuietNaN(op2));
return FPProcessNaN(op2);
} else {
@@ -3116,13 +3212,13 @@
return FPProcessNaN(op2);
} else if (IsSignallingNaN(op3)) {
return FPProcessNaN(op3);
- } else if (std::isnan(op1)) {
+ } else if (IsNaN(op1)) {
VIXL_ASSERT(IsQuietNaN(op1));
return FPProcessNaN(op1);
- } else if (std::isnan(op2)) {
+ } else if (IsNaN(op2)) {
VIXL_ASSERT(IsQuietNaN(op2));
return FPProcessNaN(op2);
- } else if (std::isnan(op3)) {
+ } else if (IsNaN(op3)) {
VIXL_ASSERT(IsQuietNaN(op3));
return FPProcessNaN(op3);
} else {
diff --git a/src/aarch64/utils-aarch64.cc b/src/aarch64/utils-aarch64.cc
deleted file mode 100644
index 4873add..0000000
--- a/src/aarch64/utils-aarch64.cc
+++ /dev/null
@@ -1,311 +0,0 @@
-// Copyright 2018, VIXL authors
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// * Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-// * Neither the name of ARM Limited nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
-// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "utils-aarch64.h"
-
-namespace vixl {
-namespace aarch64 {
-
-float FPToFloat(float16 value, UseDefaultNaN DN, bool* exception) {
- uint32_t sign = value >> 15;
- uint32_t exponent =
- ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
- kFloat16MantissaBits,
- value);
- uint32_t mantissa =
- ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, value);
-
- switch (Float16Classify(value)) {
- case FP_ZERO:
- return (sign == 0) ? 0.0f : -0.0f;
-
- case FP_INFINITE:
- return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
-
- case FP_SUBNORMAL: {
- // Calculate shift required to put mantissa into the most-significant bits
- // of the destination mantissa.
- int shift = CountLeadingZeros(mantissa << (32 - 10));
-
- // Shift mantissa and discard implicit '1'.
- mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
- mantissa &= (1 << kFloatMantissaBits) - 1;
-
- // Adjust the exponent for the shift applied, and rebias.
- exponent = exponent - shift + (-15 + 127);
- break;
- }
-
- case FP_NAN:
- if (IsSignallingNaN(value)) {
- if (exception != NULL) {
- *exception = true;
- }
- }
- if (DN == kUseDefaultNaN) return kFP32DefaultNaN;
-
- // Convert NaNs as the processor would:
- // - The sign is propagated.
- // - The payload (mantissa) is transferred entirely, except that the top
- // bit is forced to '1', making the result a quiet NaN. The unused
- // (low-order) payload bits are set to 0.
- exponent = (1 << kFloatExponentBits) - 1;
-
- // Increase bits in mantissa, making low-order bits 0.
- mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
- mantissa |= 1 << 22; // Force a quiet NaN.
- break;
-
- case FP_NORMAL:
- // Increase bits in mantissa, making low-order bits 0.
- mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
-
- // Change exponent bias.
- exponent += (-15 + 127);
- break;
-
- default:
- VIXL_UNREACHABLE();
- }
- return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) |
- mantissa);
-}
-
-
-float FPToFloat(double value,
- FPRounding round_mode,
- UseDefaultNaN DN,
- bool* exception) {
- // Only the FPTieEven rounding mode is implemented.
- VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
- USE(round_mode);
-
- switch (std::fpclassify(value)) {
- case FP_NAN: {
- if (IsSignallingNaN(value)) {
- if (exception != NULL) {
- *exception = true;
- }
- }
- if (DN == kUseDefaultNaN) return kFP32DefaultNaN;
-
- // Convert NaNs as the processor would:
- // - The sign is propagated.
- // - The payload (mantissa) is transferred as much as possible, except
- // that the top bit is forced to '1', making the result a quiet NaN.
- uint64_t raw = DoubleToRawbits(value);
-
- uint32_t sign = raw >> 63;
- uint32_t exponent = (1 << 8) - 1;
- uint32_t payload =
- static_cast<uint32_t>(ExtractUnsignedBitfield64(50, 52 - 23, raw));
- payload |= (1 << 22); // Force a quiet NaN.
-
- return RawbitsToFloat((sign << 31) | (exponent << 23) | payload);
- }
-
- case FP_ZERO:
- case FP_INFINITE: {
- // In a C++ cast, any value representable in the target type will be
- // unchanged. This is always the case for +/-0.0 and infinities.
- return static_cast<float>(value);
- }
-
- case FP_NORMAL:
- case FP_SUBNORMAL: {
- // Convert double-to-float as the processor would, assuming that FPCR.FZ
- // (flush-to-zero) is not set.
- uint64_t raw = DoubleToRawbits(value);
- // Extract the IEEE-754 double components.
- uint32_t sign = raw >> 63;
- // Extract the exponent and remove the IEEE-754 encoding bias.
- int32_t exponent =
- static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023;
- // Extract the mantissa and add the implicit '1' bit.
- uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
- if (std::fpclassify(value) == FP_NORMAL) {
- mantissa |= (UINT64_C(1) << 52);
- }
- return FPRoundToFloat(sign, exponent, mantissa, round_mode);
- }
- }
-
- VIXL_UNREACHABLE();
- return value;
-}
-
-
-double FPToDouble(float value, UseDefaultNaN DN, bool* exception) {
- switch (std::fpclassify(value)) {
- case FP_NAN: {
- if (IsSignallingNaN(value)) {
- if (exception != NULL) {
- *exception = true;
- }
- }
- if (DN == kUseDefaultNaN) return kFP64DefaultNaN;
-
- // Convert NaNs as the processor would:
- // - The sign is propagated.
- // - The payload (mantissa) is transferred entirely, except that the top
- // bit is forced to '1', making the result a quiet NaN. The unused
- // (low-order) payload bits are set to 0.
- uint32_t raw = FloatToRawbits(value);
-
- uint64_t sign = raw >> 31;
- uint64_t exponent = (1 << 11) - 1;
- uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw);
- payload <<= (52 - 23); // The unused low-order bits should be 0.
- payload |= (UINT64_C(1) << 51); // Force a quiet NaN.
-
- return RawbitsToDouble((sign << 63) | (exponent << 52) | payload);
- }
-
- case FP_ZERO:
- case FP_NORMAL:
- case FP_SUBNORMAL:
- case FP_INFINITE: {
- // All other inputs are preserved in a standard cast, because every value
- // representable using an IEEE-754 float is also representable using an
- // IEEE-754 double.
- return static_cast<double>(value);
- }
- }
-
- VIXL_UNREACHABLE();
- return static_cast<double>(value);
-}
-
-
-float16 FPToFloat16(float value,
- FPRounding round_mode,
- UseDefaultNaN DN,
- bool* exception) {
- // Only the FPTieEven rounding mode is implemented.
- VIXL_ASSERT(round_mode == FPTieEven);
- USE(round_mode);
-
- uint32_t raw = FloatToRawbits(value);
- int32_t sign = raw >> 31;
- int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127;
- uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw);
-
- switch (std::fpclassify(value)) {
- case FP_NAN: {
- if (IsSignallingNaN(value)) {
- if (exception != NULL) {
- *exception = true;
- }
- }
- if (DN == kUseDefaultNaN) return kFP16DefaultNaN;
-
- // Convert NaNs as the processor would:
- // - The sign is propagated.
- // - The payload (mantissa) is transferred as much as possible, except
- // that the top bit is forced to '1', making the result a quiet NaN.
- float16 result =
- (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
- result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
- result |= (1 << 9); // Force a quiet NaN;
- return result;
- }
-
- case FP_ZERO:
- return (sign == 0) ? 0 : 0x8000;
-
- case FP_INFINITE:
- return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
-
- case FP_NORMAL:
- case FP_SUBNORMAL: {
- // Convert float-to-half as the processor would, assuming that FPCR.FZ
- // (flush-to-zero) is not set.
-
- // Add the implicit '1' bit to the mantissa.
- mantissa += (1 << 23);
- return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
- }
- }
-
- VIXL_UNREACHABLE();
- return 0;
-}
-
-
-float16 FPToFloat16(double value,
- FPRounding round_mode,
- UseDefaultNaN DN,
- bool* exception) {
- // Only the FPTieEven rounding mode is implemented.
- VIXL_ASSERT(round_mode == FPTieEven);
- USE(round_mode);
-
- uint64_t raw = DoubleToRawbits(value);
- int32_t sign = raw >> 63;
- int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023;
- uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
-
- switch (std::fpclassify(value)) {
- case FP_NAN: {
- if (IsSignallingNaN(value)) {
- if (exception != NULL) {
- *exception = true;
- }
- }
- if (DN == kUseDefaultNaN) return kFP16DefaultNaN;
-
- // Convert NaNs as the processor would:
- // - The sign is propagated.
- // - The payload (mantissa) is transferred as much as possible, except
- // that the top bit is forced to '1', making the result a quiet NaN.
- float16 result =
- (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
- result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
- result |= (1 << 9); // Force a quiet NaN;
- return result;
- }
-
- case FP_ZERO:
- return (sign == 0) ? 0 : 0x8000;
-
- case FP_INFINITE:
- return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
- case FP_NORMAL:
- case FP_SUBNORMAL: {
- // Convert double-to-half as the processor would, assuming that FPCR.FZ
- // (flush-to-zero) is not set.
-
- // Add the implicit '1' bit to the mantissa.
- mantissa += (UINT64_C(1) << 52);
- return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
- }
- }
-
- VIXL_UNREACHABLE();
- return 0;
-}
-} // namespace aarch64
-} // namespace vixl
diff --git a/src/aarch64/utils-aarch64.h b/src/aarch64/utils-aarch64.h
deleted file mode 100644
index d714516..0000000
--- a/src/aarch64/utils-aarch64.h
+++ /dev/null
@@ -1,328 +0,0 @@
-// Copyright 2018, VIXL authors
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// * Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-// * Neither the name of ARM Limited nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
-// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef VIXL_AARCH64_UTILS_AARCH64_H_
-#define VIXL_AARCH64_UTILS_AARCH64_H_
-
-#include <limits>
-
-#include "instructions-aarch64.h"
-
-namespace vixl {
-namespace aarch64 {
-
-enum UseDefaultNaN { kUseDefaultNaN, kIgnoreDefaultNaN };
-
-// Assemble the specified IEEE-754 components into the target type and apply
-// appropriate rounding.
-// sign: 0 = positive, 1 = negative
-// exponent: Unbiased IEEE-754 exponent.
-// mantissa: The mantissa of the input. The top bit (which is not encoded for
-// normal IEEE-754 values) must not be omitted. This bit has the
-// value 'pow(2, exponent)'.
-//
-// The input value is assumed to be a normalized value. That is, the input may
-// not be infinity or NaN. If the source value is subnormal, it must be
-// normalized before calling this function such that the highest set bit in the
-// mantissa has the value 'pow(2, exponent)'.
-//
-// Callers should use FPRoundToFloat or FPRoundToDouble directly, rather than
-// calling a templated FPRound.
-template <class T, int ebits, int mbits>
-T FPRound(int64_t sign,
- int64_t exponent,
- uint64_t mantissa,
- FPRounding round_mode) {
- VIXL_ASSERT((sign == 0) || (sign == 1));
-
- // Only FPTieEven and FPRoundOdd rounding modes are implemented.
- VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
-
- // Rounding can promote subnormals to normals, and normals to infinities. For
- // example, a double with exponent 127 (FLT_MAX_EXP) would appear to be
- // encodable as a float, but rounding based on the low-order mantissa bits
- // could make it overflow. With ties-to-even rounding, this value would become
- // an infinity.
-
- // ---- Rounding Method ----
- //
- // The exponent is irrelevant in the rounding operation, so we treat the
- // lowest-order bit that will fit into the result ('onebit') as having
- // the value '1'. Similarly, the highest-order bit that won't fit into
- // the result ('halfbit') has the value '0.5'. The 'point' sits between
- // 'onebit' and 'halfbit':
- //
- // These bits fit into the result.
- // |---------------------|
- // mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
- // ||
- // / |
- // / halfbit
- // onebit
- //
- // For subnormal outputs, the range of representable bits is smaller and
- // the position of onebit and halfbit depends on the exponent of the
- // input, but the method is otherwise similar.
- //
- // onebit(frac)
- // |
- // | halfbit(frac) halfbit(adjusted)
- // | / /
- // | | |
- // 0b00.0 (exact) -> 0b00.0 (exact) -> 0b00
- // 0b00.0... -> 0b00.0... -> 0b00
- // 0b00.1 (exact) -> 0b00.0111..111 -> 0b00
- // 0b00.1... -> 0b00.1... -> 0b01
- // 0b01.0 (exact) -> 0b01.0 (exact) -> 0b01
- // 0b01.0... -> 0b01.0... -> 0b01
- // 0b01.1 (exact) -> 0b01.1 (exact) -> 0b10
- // 0b01.1... -> 0b01.1... -> 0b10
- // 0b10.0 (exact) -> 0b10.0 (exact) -> 0b10
- // 0b10.0... -> 0b10.0... -> 0b10
- // 0b10.1 (exact) -> 0b10.0111..111 -> 0b10
- // 0b10.1... -> 0b10.1... -> 0b11
- // 0b11.0 (exact) -> 0b11.0 (exact) -> 0b11
- // ... / | / |
- // / | / |
- // / |
- // adjusted = frac - (halfbit(mantissa) & ~onebit(frac)); / |
- //
- // mantissa = (mantissa >> shift) + halfbit(adjusted);
-
- static const int mantissa_offset = 0;
- static const int exponent_offset = mantissa_offset + mbits;
- static const int sign_offset = exponent_offset + ebits;
- VIXL_ASSERT(sign_offset == (sizeof(T) * 8 - 1));
-
- // Bail out early for zero inputs.
- if (mantissa == 0) {
- return static_cast<T>(sign << sign_offset);
- }
-
- // If all bits in the exponent are set, the value is infinite or NaN.
- // This is true for all binary IEEE-754 formats.
- static const int infinite_exponent = (1 << ebits) - 1;
- static const int max_normal_exponent = infinite_exponent - 1;
-
- // Apply the exponent bias to encode it for the result. Doing this early makes
- // it easy to detect values that will be infinite or subnormal.
- exponent += max_normal_exponent >> 1;
-
- if (exponent > max_normal_exponent) {
- // Overflow: the input is too large for the result type to represent.
- if (round_mode == FPTieEven) {
- // FPTieEven rounding mode handles overflows using infinities.
- exponent = infinite_exponent;
- mantissa = 0;
- } else {
- VIXL_ASSERT(round_mode == FPRoundOdd);
- // FPRoundOdd rounding mode handles overflows using the largest magnitude
- // normal number.
- exponent = max_normal_exponent;
- mantissa = (UINT64_C(1) << exponent_offset) - 1;
- }
- return static_cast<T>((sign << sign_offset) |
- (exponent << exponent_offset) |
- (mantissa << mantissa_offset));
- }
-
- // Calculate the shift required to move the top mantissa bit to the proper
- // place in the destination type.
- const int highest_significant_bit = 63 - CountLeadingZeros(mantissa);
- int shift = highest_significant_bit - mbits;
-
- if (exponent <= 0) {
- // The output will be subnormal (before rounding).
- // For subnormal outputs, the shift must be adjusted by the exponent. The +1
- // is necessary because the exponent of a subnormal value (encoded as 0) is
- // the same as the exponent of the smallest normal value (encoded as 1).
- shift += -exponent + 1;
-
- // Handle inputs that would produce a zero output.
- //
- // Shifts higher than highest_significant_bit+1 will always produce a zero
- // result. A shift of exactly highest_significant_bit+1 might produce a
- // non-zero result after rounding.
- if (shift > (highest_significant_bit + 1)) {
- if (round_mode == FPTieEven) {
- // The result will always be +/-0.0.
- return static_cast<T>(sign << sign_offset);
- } else {
- VIXL_ASSERT(round_mode == FPRoundOdd);
- VIXL_ASSERT(mantissa != 0);
- // For FPRoundOdd, if the mantissa is too small to represent and
- // non-zero return the next "odd" value.
- return static_cast<T>((sign << sign_offset) | 1);
- }
- }
-
- // Properly encode the exponent for a subnormal output.
- exponent = 0;
- } else {
- // Clear the topmost mantissa bit, since this is not encoded in IEEE-754
- // normal values.
- mantissa &= ~(UINT64_C(1) << highest_significant_bit);
- }
-
- // The casts below are only well-defined for unsigned integers.
- VIXL_STATIC_ASSERT(std::numeric_limits<T>::is_integer);
- VIXL_STATIC_ASSERT(!std::numeric_limits<T>::is_signed);
-
- if (shift > 0) {
- if (round_mode == FPTieEven) {
- // We have to shift the mantissa to the right. Some precision is lost, so
- // we need to apply rounding.
- uint64_t onebit_mantissa = (mantissa >> (shift)) & 1;
- uint64_t halfbit_mantissa = (mantissa >> (shift - 1)) & 1;
- uint64_t adjustment = (halfbit_mantissa & ~onebit_mantissa);
- uint64_t adjusted = mantissa - adjustment;
- T halfbit_adjusted = (adjusted >> (shift - 1)) & 1;
-
- T result =
- static_cast<T>((sign << sign_offset) | (exponent << exponent_offset) |
- ((mantissa >> shift) << mantissa_offset));
-
- // A very large mantissa can overflow during rounding. If this happens,
- // the exponent should be incremented and the mantissa set to 1.0
- // (encoded as 0). Applying halfbit_adjusted after assembling the float
- // has the nice side-effect that this case is handled for free.
- //
- // This also handles cases where a very large finite value overflows to
- // infinity, or where a very large subnormal value overflows to become
- // normal.
- return result + halfbit_adjusted;
- } else {
- VIXL_ASSERT(round_mode == FPRoundOdd);
- // If any bits at position halfbit or below are set, onebit (ie. the
- // bottom bit of the resulting mantissa) must be set.
- uint64_t fractional_bits = mantissa & ((UINT64_C(1) << shift) - 1);
- if (fractional_bits != 0) {
- mantissa |= UINT64_C(1) << shift;
- }
-
- return static_cast<T>((sign << sign_offset) |
- (exponent << exponent_offset) |
- ((mantissa >> shift) << mantissa_offset));
- }
- } else {
- // We have to shift the mantissa to the left (or not at all). The input
- // mantissa is exactly representable in the output mantissa, so apply no
- // rounding correction.
- return static_cast<T>((sign << sign_offset) |
- (exponent << exponent_offset) |
- ((mantissa << -shift) << mantissa_offset));
- }
-}
-
-
-// See FPRound for a description of this function.
-inline double FPRoundToDouble(int64_t sign,
- int64_t exponent,
- uint64_t mantissa,
- FPRounding round_mode) {
- uint64_t bits =
- FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
- exponent,
- mantissa,
- round_mode);
- return RawbitsToDouble(bits);
-}
-
-
-// See FPRound for a description of this function.
-inline float16 FPRoundToFloat16(int64_t sign,
- int64_t exponent,
- uint64_t mantissa,
- FPRounding round_mode) {
- return FPRound<float16,
- kFloat16ExponentBits,
- kFloat16MantissaBits>(sign, exponent, mantissa, round_mode);
-}
-
-
-// See FPRound for a description of this function.
-static inline float FPRoundToFloat(int64_t sign,
- int64_t exponent,
- uint64_t mantissa,
- FPRounding round_mode) {
- uint32_t bits =
- FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
- exponent,
- mantissa,
- round_mode);
- return RawbitsToFloat(bits);
-}
-
-
-float FPToFloat(float16 value, UseDefaultNaN DN, bool* exception = NULL);
-float FPToFloat(double value,
- FPRounding round_mode,
- UseDefaultNaN DN,
- bool* exception = NULL);
-
-double FPToDouble(float value, UseDefaultNaN DN, bool* exception = NULL);
-
-float16 FPToFloat16(float value,
- FPRounding round_mode,
- UseDefaultNaN DN,
- bool* exception = NULL);
-
-float16 FPToFloat16(double value,
- FPRounding round_mode,
- UseDefaultNaN DN,
- bool* exception = NULL);
-
-
-// Wrapper class for passing FP16 values through the assembler.
-// This is purely to aid with type checking/casting.
-class F16 {
- public:
- static F16 FromRawbits(uint16_t bits) {
- F16 f(0.0);
- f.rawbits_ = bits;
- return f;
- }
- // This is class used to aid in the context of h registers
- // in the assembler(s). It is only used by half-precision
- // instructions and utilities, so shouldn't suffer from
- // any ambiguity. Providing this constructor as implicit
- // allows for a more transparent solution to the end user.
- F16(double dvalue) { // NOLINT(runtime/explicit).
- rawbits_ = FPToFloat16(dvalue, FPTieEven, kIgnoreDefaultNaN);
- }
- uint16_t ToRawbits() { return rawbits_; }
- operator double() const {
- return FPToDouble(FPToFloat(rawbits_, kUseDefaultNaN), kUseDefaultNaN);
- }
-
- private:
- uint16_t rawbits_;
-};
-
-} // namespace aarch64
-} // namespace vixl
-
-#endif // VIXL_AARCH64_UTILS_AARCH64_H_
diff --git a/src/cpu-features.h b/src/cpu-features.h
index e5c68ab..853421b 100644
--- a/src/cpu-features.h
+++ b/src/cpu-features.h
@@ -61,6 +61,8 @@
/* Half-precision (FP16) support for FP and NEON, respectively. */ \
V(kFPHalf, "FPHalf", "fphp") \
V(kNEONHalf, "NEONHalf", "asimdhp") \
+ /* The RAS extension, including the ESB instruction. */ \
+ V(kRAS, "RAS", NULL) \
/* Data cache clean to the point of persistence: DC CVAP. */ \
V(kDCPoP, "DCPoP", "dcpop") \
/* Cryptographic support instructions. */ \
diff --git a/src/globals-vixl.h b/src/globals-vixl.h
index 1a71c24..727d494 100644
--- a/src/globals-vixl.h
+++ b/src/globals-vixl.h
@@ -63,9 +63,6 @@
typedef uint8_t byte;
-// Type for half-precision (16 bit) floating point numbers.
-typedef uint16_t float16;
-
const int KBytes = 1024;
const int MBytes = 1024 * KBytes;
diff --git a/src/utils-vixl.cc b/src/utils-vixl.cc
index bfe8bf1..41b5586 100644
--- a/src/utils-vixl.cc
+++ b/src/utils-vixl.cc
@@ -30,12 +30,33 @@
namespace vixl {
-uint16_t Float16ToRawbits(float16 value) {
- uint16_t bits = 0;
- memcpy(&bits, &value, 2);
- return value;
+// The default NaN values (for FPCR.DN=1).
+const double kFP64DefaultNaN = RawbitsToDouble(UINT64_C(0x7ff8000000000000));
+const float kFP32DefaultNaN = RawbitsToFloat(0x7fc00000);
+const Float16 kFP16DefaultNaN = RawbitsToFloat16(0x7e00);
+
+// Floating-point zero values.
+const Float16 kFP16PositiveZero = RawbitsToFloat16(0x0);
+const Float16 kFP16NegativeZero = RawbitsToFloat16(0x8000);
+
+// Floating-point infinity values.
+const Float16 kFP16PositiveInfinity = RawbitsToFloat16(0x7c00);
+const Float16 kFP16NegativeInfinity = RawbitsToFloat16(0xfc00);
+const float kFP32PositiveInfinity = RawbitsToFloat(0x7f800000);
+const float kFP32NegativeInfinity = RawbitsToFloat(0xff800000);
+const double kFP64PositiveInfinity =
+ RawbitsToDouble(UINT64_C(0x7ff0000000000000));
+const double kFP64NegativeInfinity =
+ RawbitsToDouble(UINT64_C(0xfff0000000000000));
+
+bool IsZero(Float16 value) {
+ uint16_t bits = Float16ToRawbits(value);
+ return (bits == Float16ToRawbits(kFP16PositiveZero) ||
+ bits == Float16ToRawbits(kFP16NegativeZero));
}
+uint16_t Float16ToRawbits(Float16 value) { return value.rawbits_; }
+
uint32_t FloatToRawbits(float value) {
uint32_t bits = 0;
memcpy(&bits, &value, 4);
@@ -50,10 +71,10 @@
}
-float16 RawbitsToFloat16(uint16_t bits) {
- float16 value = 0x0;
- memcpy(&value, &bits, 2);
- return value;
+Float16 RawbitsToFloat16(uint16_t bits) {
+ Float16 f;
+ f.rawbits_ = bits;
+ return f;
}
@@ -71,6 +92,23 @@
}
+uint32_t Float16Sign(internal::SimFloat16 val) {
+ uint16_t rawbits = Float16ToRawbits(val);
+ return ExtractUnsignedBitfield32(15, 15, rawbits);
+}
+
+
+uint32_t Float16Exp(internal::SimFloat16 val) {
+ uint16_t rawbits = Float16ToRawbits(val);
+ return ExtractUnsignedBitfield32(14, 10, rawbits);
+}
+
+uint32_t Float16Mantissa(internal::SimFloat16 val) {
+ uint16_t rawbits = Float16ToRawbits(val);
+ return ExtractUnsignedBitfield32(9, 0, rawbits);
+}
+
+
uint32_t FloatSign(float val) {
uint32_t rawbits = FloatToRawbits(val);
return ExtractUnsignedBitfield32(31, 31, rawbits);
@@ -107,6 +145,14 @@
}
+internal::SimFloat16 Float16Pack(uint16_t sign,
+ uint16_t exp,
+ uint16_t mantissa) {
+ uint16_t bits = (sign << 15) | (exp << 10) | mantissa;
+ return RawbitsToFloat16(bits);
+}
+
+
float FloatPack(uint32_t sign, uint32_t exp, uint32_t mantissa) {
uint32_t bits = (sign << 31) | (exp << 23) | mantissa;
return RawbitsToFloat(bits);
@@ -119,13 +165,14 @@
}
-int Float16Classify(float16 value) {
+int Float16Classify(Float16 value) {
+ uint16_t bits = Float16ToRawbits(value);
uint16_t exponent_max = (1 << 5) - 1;
uint16_t exponent_mask = exponent_max << 10;
uint16_t mantissa_mask = (1 << 10) - 1;
- uint16_t exponent = (value & exponent_mask) >> 10;
- uint16_t mantissa = value & mantissa_mask;
+ uint16_t exponent = (bits & exponent_mask) >> 10;
+ uint16_t mantissa = bits & mantissa_mask;
if (exponent == 0) {
if (mantissa == 0) {
return FP_ZERO;
@@ -156,10 +203,353 @@
int BitCount(uint64_t value) { return CountSetBits(value); }
+// Float16 definitions.
+
+Float16::Float16(double dvalue) {
+ rawbits_ =
+ Float16ToRawbits(FPToFloat16(dvalue, FPTieEven, kIgnoreDefaultNaN));
+}
+
namespace internal {
+SimFloat16 SimFloat16::operator-() const {
+ return RawbitsToFloat16(rawbits_ ^ 0x8000);
+}
+
+// SimFloat16 definitions.
+SimFloat16 SimFloat16::operator+(SimFloat16 rhs) const {
+ return static_cast<double>(*this) + static_cast<double>(rhs);
+}
+
+SimFloat16 SimFloat16::operator-(SimFloat16 rhs) const {
+ return static_cast<double>(*this) - static_cast<double>(rhs);
+}
+
+SimFloat16 SimFloat16::operator*(SimFloat16 rhs) const {
+ return static_cast<double>(*this) * static_cast<double>(rhs);
+}
+
+SimFloat16 SimFloat16::operator/(SimFloat16 rhs) const {
+ return static_cast<double>(*this) / static_cast<double>(rhs);
+}
+
+bool SimFloat16::operator<(SimFloat16 rhs) const {
+ return static_cast<double>(*this) < static_cast<double>(rhs);
+}
+
+bool SimFloat16::operator>(SimFloat16 rhs) const {
+ return static_cast<double>(*this) > static_cast<double>(rhs);
+}
+
+bool SimFloat16::operator==(SimFloat16 rhs) const {
+ if (IsNaN(*this) || IsNaN(rhs)) {
+ return false;
+ } else if (IsZero(rhs) && IsZero(*this)) {
+ // +0 and -0 should be treated as equal.
+ return true;
+ }
+ return this->rawbits_ == rhs.rawbits_;
+}
+
+bool SimFloat16::operator!=(SimFloat16 rhs) const { return !(*this == rhs); }
+
+bool SimFloat16::operator==(double rhs) const {
+ return static_cast<double>(*this) == static_cast<double>(rhs);
+}
+
+SimFloat16::operator double() const {
+ return FPToDouble(*this, kIgnoreDefaultNaN);
+}
+
Int64 BitCount(Uint32 value) { return CountSetBits(value.Get()); }
} // namespace internal
+float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception) {
+ uint16_t bits = Float16ToRawbits(value);
+ uint32_t sign = bits >> 15;
+ uint32_t exponent =
+ ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
+ kFloat16MantissaBits,
+ bits);
+ uint32_t mantissa =
+ ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, bits);
+
+ switch (Float16Classify(value)) {
+ case FP_ZERO:
+ return (sign == 0) ? 0.0f : -0.0f;
+
+ case FP_INFINITE:
+ return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
+
+ case FP_SUBNORMAL: {
+ // Calculate shift required to put mantissa into the most-significant bits
+ // of the destination mantissa.
+ int shift = CountLeadingZeros(mantissa << (32 - 10));
+
+ // Shift mantissa and discard implicit '1'.
+ mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
+ mantissa &= (1 << kFloatMantissaBits) - 1;
+
+ // Adjust the exponent for the shift applied, and rebias.
+ exponent = exponent - shift + (-15 + 127);
+ break;
+ }
+
+ case FP_NAN:
+ if (IsSignallingNaN(value)) {
+ if (exception != NULL) {
+ *exception = true;
+ }
+ }
+ if (DN == kUseDefaultNaN) return kFP32DefaultNaN;
+
+ // Convert NaNs as the processor would:
+ // - The sign is propagated.
+ // - The payload (mantissa) is transferred entirely, except that the top
+ // bit is forced to '1', making the result a quiet NaN. The unused
+ // (low-order) payload bits are set to 0.
+ exponent = (1 << kFloatExponentBits) - 1;
+
+ // Increase bits in mantissa, making low-order bits 0.
+ mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
+ mantissa |= 1 << 22; // Force a quiet NaN.
+ break;
+
+ case FP_NORMAL:
+ // Increase bits in mantissa, making low-order bits 0.
+ mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
+
+ // Change exponent bias.
+ exponent += (-15 + 127);
+ break;
+
+ default:
+ VIXL_UNREACHABLE();
+ }
+ return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) |
+ mantissa);
+}
+
+
+float FPToFloat(double value,
+ FPRounding round_mode,
+ UseDefaultNaN DN,
+ bool* exception) {
+ // Only the FPTieEven rounding mode is implemented.
+ VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
+ USE(round_mode);
+
+ switch (std::fpclassify(value)) {
+ case FP_NAN: {
+ if (IsSignallingNaN(value)) {
+ if (exception != NULL) {
+ *exception = true;
+ }
+ }
+ if (DN == kUseDefaultNaN) return kFP32DefaultNaN;
+
+ // Convert NaNs as the processor would:
+ // - The sign is propagated.
+ // - The payload (mantissa) is transferred as much as possible, except
+ // that the top bit is forced to '1', making the result a quiet NaN.
+ uint64_t raw = DoubleToRawbits(value);
+
+ uint32_t sign = raw >> 63;
+ uint32_t exponent = (1 << 8) - 1;
+ uint32_t payload =
+ static_cast<uint32_t>(ExtractUnsignedBitfield64(50, 52 - 23, raw));
+ payload |= (1 << 22); // Force a quiet NaN.
+
+ return RawbitsToFloat((sign << 31) | (exponent << 23) | payload);
+ }
+
+ case FP_ZERO:
+ case FP_INFINITE: {
+ // In a C++ cast, any value representable in the target type will be
+ // unchanged. This is always the case for +/-0.0 and infinities.
+ return static_cast<float>(value);
+ }
+
+ case FP_NORMAL:
+ case FP_SUBNORMAL: {
+ // Convert double-to-float as the processor would, assuming that FPCR.FZ
+ // (flush-to-zero) is not set.
+ uint64_t raw = DoubleToRawbits(value);
+ // Extract the IEEE-754 double components.
+ uint32_t sign = raw >> 63;
+ // Extract the exponent and remove the IEEE-754 encoding bias.
+ int32_t exponent =
+ static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023;
+ // Extract the mantissa and add the implicit '1' bit.
+ uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
+ if (std::fpclassify(value) == FP_NORMAL) {
+ mantissa |= (UINT64_C(1) << 52);
+ }
+ return FPRoundToFloat(sign, exponent, mantissa, round_mode);
+ }
+ }
+
+ VIXL_UNREACHABLE();
+ return value;
+}
+
+// TODO: We should consider implementing a full FPToDouble(Float16)
+// conversion function (for performance reasons).
+double FPToDouble(Float16 value, UseDefaultNaN DN, bool* exception) {
+ // We can rely on implicit float to double conversion here.
+ return FPToFloat(value, DN, exception);
+}
+
+
+double FPToDouble(float value, UseDefaultNaN DN, bool* exception) {
+ switch (std::fpclassify(value)) {
+ case FP_NAN: {
+ if (IsSignallingNaN(value)) {
+ if (exception != NULL) {
+ *exception = true;
+ }
+ }
+ if (DN == kUseDefaultNaN) return kFP64DefaultNaN;
+
+ // Convert NaNs as the processor would:
+ // - The sign is propagated.
+ // - The payload (mantissa) is transferred entirely, except that the top
+ // bit is forced to '1', making the result a quiet NaN. The unused
+ // (low-order) payload bits are set to 0.
+ uint32_t raw = FloatToRawbits(value);
+
+ uint64_t sign = raw >> 31;
+ uint64_t exponent = (1 << 11) - 1;
+ uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw);
+ payload <<= (52 - 23); // The unused low-order bits should be 0.
+ payload |= (UINT64_C(1) << 51); // Force a quiet NaN.
+
+ return RawbitsToDouble((sign << 63) | (exponent << 52) | payload);
+ }
+
+ case FP_ZERO:
+ case FP_NORMAL:
+ case FP_SUBNORMAL:
+ case FP_INFINITE: {
+ // All other inputs are preserved in a standard cast, because every value
+ // representable using an IEEE-754 float is also representable using an
+ // IEEE-754 double.
+ return static_cast<double>(value);
+ }
+ }
+
+ VIXL_UNREACHABLE();
+ return static_cast<double>(value);
+}
+
+
+Float16 FPToFloat16(float value,
+ FPRounding round_mode,
+ UseDefaultNaN DN,
+ bool* exception) {
+ // Only the FPTieEven rounding mode is implemented.
+ VIXL_ASSERT(round_mode == FPTieEven);
+ USE(round_mode);
+
+ uint32_t raw = FloatToRawbits(value);
+ int32_t sign = raw >> 31;
+ int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127;
+ uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw);
+
+ switch (std::fpclassify(value)) {
+ case FP_NAN: {
+ if (IsSignallingNaN(value)) {
+ if (exception != NULL) {
+ *exception = true;
+ }
+ }
+ if (DN == kUseDefaultNaN) return kFP16DefaultNaN;
+
+ // Convert NaNs as the processor would:
+ // - The sign is propagated.
+ // - The payload (mantissa) is transferred as much as possible, except
+ // that the top bit is forced to '1', making the result a quiet NaN.
+ uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity)
+ : Float16ToRawbits(kFP16NegativeInfinity);
+ result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
+ result |= (1 << 9); // Force a quiet NaN;
+ return RawbitsToFloat16(result);
+ }
+
+ case FP_ZERO:
+ return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero;
+
+ case FP_INFINITE:
+ return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
+
+ case FP_NORMAL:
+ case FP_SUBNORMAL: {
+ // Convert float-to-half as the processor would, assuming that FPCR.FZ
+ // (flush-to-zero) is not set.
+
+ // Add the implicit '1' bit to the mantissa.
+ mantissa += (1 << 23);
+ return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
+ }
+ }
+
+ VIXL_UNREACHABLE();
+ return kFP16PositiveZero;
+}
+
+
+Float16 FPToFloat16(double value,
+ FPRounding round_mode,
+ UseDefaultNaN DN,
+ bool* exception) {
+ // Only the FPTieEven rounding mode is implemented.
+ VIXL_ASSERT(round_mode == FPTieEven);
+ USE(round_mode);
+
+ uint64_t raw = DoubleToRawbits(value);
+ int32_t sign = raw >> 63;
+ int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023;
+ uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
+
+ switch (std::fpclassify(value)) {
+ case FP_NAN: {
+ if (IsSignallingNaN(value)) {
+ if (exception != NULL) {
+ *exception = true;
+ }
+ }
+ if (DN == kUseDefaultNaN) return kFP16DefaultNaN;
+
+ // Convert NaNs as the processor would:
+ // - The sign is propagated.
+ // - The payload (mantissa) is transferred as much as possible, except
+ // that the top bit is forced to '1', making the result a quiet NaN.
+ uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity)
+ : Float16ToRawbits(kFP16NegativeInfinity);
+ result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
+ result |= (1 << 9); // Force a quiet NaN;
+ return RawbitsToFloat16(result);
+ }
+
+ case FP_ZERO:
+ return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero;
+
+ case FP_INFINITE:
+ return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
+ case FP_NORMAL:
+ case FP_SUBNORMAL: {
+ // Convert double-to-half as the processor would, assuming that FPCR.FZ
+ // (flush-to-zero) is not set.
+
+ // Add the implicit '1' bit to the mantissa.
+ mantissa += (UINT64_C(1) << 52);
+ return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
+ }
+ }
+
+ VIXL_UNREACHABLE();
+ return kFP16PositiveZero;
+}
+
} // namespace vixl
diff --git a/src/utils-vixl.h b/src/utils-vixl.h
index c4ba800..1c76fcb 100644
--- a/src/utils-vixl.h
+++ b/src/utils-vixl.h
@@ -29,6 +29,7 @@
#include <cmath>
#include <cstring>
+#include <limits>
#include <vector>
#include "compiler-intrinsics-vixl.h"
@@ -65,6 +66,11 @@
#define VIXL_UNREACHABLE_OR_FALLTHROUGH() VIXL_FALLTHROUGH()
#endif
+template <typename T, size_t n>
+size_t ArrayLength(const T (&)[n]) {
+ return n;
+}
+
// Check number width.
// TODO: Refactor these using templates.
inline bool IsIntN(unsigned n, uint32_t x) {
@@ -222,8 +228,21 @@
}
+// Wrapper class for passing FP16 values through the assembler.
+// This is purely to aid with type checking/casting.
+class Float16 {
+ public:
+ explicit Float16(double dvalue);
+ Float16() : rawbits_(0x0) {}
+ friend uint16_t Float16ToRawbits(Float16 value);
+ friend Float16 RawbitsToFloat16(uint16_t bits);
+
+ protected:
+ uint16_t rawbits_;
+};
+
// Floating point representation.
-uint16_t Float16ToRawbits(float16 value);
+uint16_t Float16ToRawbits(Float16 value);
uint32_t FloatToRawbits(float value);
@@ -238,7 +257,7 @@
return DoubleToRawbits(value);
}
-float16 RawbitsToFloat16(uint16_t bits);
+Float16 RawbitsToFloat16(uint16_t bits);
float RawbitsToFloat(uint32_t bits);
VIXL_DEPRECATED("RawbitsToFloat",
@@ -252,6 +271,41 @@
return RawbitsToDouble(bits);
}
+namespace internal {
+
+// Internal simulation class used solely by the simulator to
+// provide an abstraction layer for any half-precision arithmetic.
+class SimFloat16 : public Float16 {
+ public:
+ // TODO: We should investigate making this constructor explicit.
+ // This is currently difficult to do due to a number of templated
+ // functions in the simulator which rely on returning double values.
+ SimFloat16(double dvalue) : Float16(dvalue) {} // NOLINT(runtime/explicit)
+ SimFloat16(Float16 f) { // NOLINT(runtime/explicit)
+ this->rawbits_ = Float16ToRawbits(f);
+ }
+ SimFloat16() : Float16() {}
+ SimFloat16 operator-() const;
+ SimFloat16 operator+(SimFloat16 rhs) const;
+ SimFloat16 operator-(SimFloat16 rhs) const;
+ SimFloat16 operator*(SimFloat16 rhs) const;
+ SimFloat16 operator/(SimFloat16 rhs) const;
+ bool operator<(SimFloat16 rhs) const;
+ bool operator>(SimFloat16 rhs) const;
+ bool operator==(SimFloat16 rhs) const;
+ bool operator!=(SimFloat16 rhs) const;
+ // This is necessary for conversions peformed in (macro asm) Fmov.
+ bool operator==(double rhs) const;
+ operator double() const;
+};
+} // namespace internal
+
+uint32_t Float16Sign(internal::SimFloat16 value);
+
+uint32_t Float16Exp(internal::SimFloat16 value);
+
+uint32_t Float16Mantissa(internal::SimFloat16 value);
+
uint32_t FloatSign(float value);
VIXL_DEPRECATED("FloatSign", inline uint32_t float_sign(float value)) {
return FloatSign(value);
@@ -283,6 +337,10 @@
return DoubleMantissa(value);
}
+internal::SimFloat16 Float16Pack(uint16_t sign,
+ uint16_t exp,
+ uint16_t mantissa);
+
float FloatPack(uint32_t sign, uint32_t exp, uint32_t mantissa);
VIXL_DEPRECATED("FloatPack",
inline float float_pack(uint32_t sign,
@@ -300,21 +358,33 @@
}
// An fpclassify() function for 16-bit half-precision floats.
-int Float16Classify(float16 value);
-VIXL_DEPRECATED("Float16Classify", inline int float16classify(float16 value)) {
- return Float16Classify(value);
+int Float16Classify(Float16 value);
+VIXL_DEPRECATED("Float16Classify", inline int float16classify(uint16_t value)) {
+ return Float16Classify(RawbitsToFloat16(value));
}
+bool IsZero(Float16 value);
-// Check for float16 (uint16_t) NaNs.
-inline bool IsNaN(float16 value) { return Float16Classify(value) == FP_NAN; }
+inline bool IsNaN(float value) { return std::isnan(value); }
+
+inline bool IsNaN(double value) { return std::isnan(value); }
+
+inline bool IsNaN(Float16 value) { return Float16Classify(value) == FP_NAN; }
+
+inline bool IsInf(float value) { return std::isinf(value); }
+
+inline bool IsInf(double value) { return std::isinf(value); }
+
+inline bool IsInf(Float16 value) {
+ return Float16Classify(value) == FP_INFINITE;
+}
// NaN tests.
inline bool IsSignallingNaN(double num) {
const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000);
uint64_t raw = DoubleToRawbits(num);
- if (std::isnan(num) && ((raw & kFP64QuietNaNMask) == 0)) {
+ if (IsNaN(num) && ((raw & kFP64QuietNaNMask) == 0)) {
return true;
}
return false;
@@ -324,40 +394,48 @@
inline bool IsSignallingNaN(float num) {
const uint32_t kFP32QuietNaNMask = 0x00400000;
uint32_t raw = FloatToRawbits(num);
- if (std::isnan(num) && ((raw & kFP32QuietNaNMask) == 0)) {
+ if (IsNaN(num) && ((raw & kFP32QuietNaNMask) == 0)) {
return true;
}
return false;
}
-inline bool IsSignallingNaN(float16 num) {
+inline bool IsSignallingNaN(Float16 num) {
const uint16_t kFP16QuietNaNMask = 0x0200;
- return IsNaN(num) && ((num & kFP16QuietNaNMask) == 0);
+ return IsNaN(num) && ((Float16ToRawbits(num) & kFP16QuietNaNMask) == 0);
}
template <typename T>
inline bool IsQuietNaN(T num) {
- return std::isnan(num) && !IsSignallingNaN(num);
+ return IsNaN(num) && !IsSignallingNaN(num);
}
// Convert the NaN in 'num' to a quiet NaN.
inline double ToQuietNaN(double num) {
const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000);
- VIXL_ASSERT(std::isnan(num));
+ VIXL_ASSERT(IsNaN(num));
return RawbitsToDouble(DoubleToRawbits(num) | kFP64QuietNaNMask);
}
inline float ToQuietNaN(float num) {
const uint32_t kFP32QuietNaNMask = 0x00400000;
- VIXL_ASSERT(std::isnan(num));
+ VIXL_ASSERT(IsNaN(num));
return RawbitsToFloat(FloatToRawbits(num) | kFP32QuietNaNMask);
}
+inline internal::SimFloat16 ToQuietNaN(internal::SimFloat16 num) {
+ const uint16_t kFP16QuietNaNMask = 0x0200;
+ VIXL_ASSERT(IsNaN(num));
+ return internal::SimFloat16(
+ RawbitsToFloat16(Float16ToRawbits(num) | kFP16QuietNaNMask));
+}
+
+
// Fused multiply-add.
inline double FusedMultiplyAdd(double op1, double op2, double a) {
return fma(op1, op2, a);
@@ -898,6 +976,306 @@
} // namespace internal
+// The default NaN values (for FPCR.DN=1).
+extern const double kFP64DefaultNaN;
+extern const float kFP32DefaultNaN;
+extern const Float16 kFP16DefaultNaN;
+
+// Floating-point infinity values.
+extern const Float16 kFP16PositiveInfinity;
+extern const Float16 kFP16NegativeInfinity;
+extern const float kFP32PositiveInfinity;
+extern const float kFP32NegativeInfinity;
+extern const double kFP64PositiveInfinity;
+extern const double kFP64NegativeInfinity;
+
+// Floating-point zero values.
+extern const Float16 kFP16PositiveZero;
+extern const Float16 kFP16NegativeZero;
+
+// AArch64 floating-point specifics. These match IEEE-754.
+const unsigned kDoubleMantissaBits = 52;
+const unsigned kDoubleExponentBits = 11;
+const unsigned kFloatMantissaBits = 23;
+const unsigned kFloatExponentBits = 8;
+const unsigned kFloat16MantissaBits = 10;
+const unsigned kFloat16ExponentBits = 5;
+
+enum FPRounding {
+ // The first four values are encodable directly by FPCR<RMode>.
+ FPTieEven = 0x0,
+ FPPositiveInfinity = 0x1,
+ FPNegativeInfinity = 0x2,
+ FPZero = 0x3,
+
+ // The final rounding modes are only available when explicitly specified by
+ // the instruction (such as with fcvta). It cannot be set in FPCR.
+ FPTieAway,
+ FPRoundOdd
+};
+
+enum UseDefaultNaN { kUseDefaultNaN, kIgnoreDefaultNaN };
+
+// Assemble the specified IEEE-754 components into the target type and apply
+// appropriate rounding.
+// sign: 0 = positive, 1 = negative
+// exponent: Unbiased IEEE-754 exponent.
+// mantissa: The mantissa of the input. The top bit (which is not encoded for
+// normal IEEE-754 values) must not be omitted. This bit has the
+// value 'pow(2, exponent)'.
+//
+// The input value is assumed to be a normalized value. That is, the input may
+// not be infinity or NaN. If the source value is subnormal, it must be
+// normalized before calling this function such that the highest set bit in the
+// mantissa has the value 'pow(2, exponent)'.
+//
+// Callers should use FPRoundToFloat or FPRoundToDouble directly, rather than
+// calling a templated FPRound.
+template <class T, int ebits, int mbits>
+T FPRound(int64_t sign,
+ int64_t exponent,
+ uint64_t mantissa,
+ FPRounding round_mode) {
+ VIXL_ASSERT((sign == 0) || (sign == 1));
+
+ // Only FPTieEven and FPRoundOdd rounding modes are implemented.
+ VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
+
+ // Rounding can promote subnormals to normals, and normals to infinities. For
+ // example, a double with exponent 127 (FLT_MAX_EXP) would appear to be
+ // encodable as a float, but rounding based on the low-order mantissa bits
+ // could make it overflow. With ties-to-even rounding, this value would become
+ // an infinity.
+
+ // ---- Rounding Method ----
+ //
+ // The exponent is irrelevant in the rounding operation, so we treat the
+ // lowest-order bit that will fit into the result ('onebit') as having
+ // the value '1'. Similarly, the highest-order bit that won't fit into
+ // the result ('halfbit') has the value '0.5'. The 'point' sits between
+ // 'onebit' and 'halfbit':
+ //
+ // These bits fit into the result.
+ // |---------------------|
+ // mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ // ||
+ // / |
+ // / halfbit
+ // onebit
+ //
+ // For subnormal outputs, the range of representable bits is smaller and
+ // the position of onebit and halfbit depends on the exponent of the
+ // input, but the method is otherwise similar.
+ //
+ // onebit(frac)
+ // |
+ // | halfbit(frac) halfbit(adjusted)
+ // | / /
+ // | | |
+ // 0b00.0 (exact) -> 0b00.0 (exact) -> 0b00
+ // 0b00.0... -> 0b00.0... -> 0b00
+ // 0b00.1 (exact) -> 0b00.0111..111 -> 0b00
+ // 0b00.1... -> 0b00.1... -> 0b01
+ // 0b01.0 (exact) -> 0b01.0 (exact) -> 0b01
+ // 0b01.0... -> 0b01.0... -> 0b01
+ // 0b01.1 (exact) -> 0b01.1 (exact) -> 0b10
+ // 0b01.1... -> 0b01.1... -> 0b10
+ // 0b10.0 (exact) -> 0b10.0 (exact) -> 0b10
+ // 0b10.0... -> 0b10.0... -> 0b10
+ // 0b10.1 (exact) -> 0b10.0111..111 -> 0b10
+ // 0b10.1... -> 0b10.1... -> 0b11
+ // 0b11.0 (exact) -> 0b11.0 (exact) -> 0b11
+ // ... / | / |
+ // / | / |
+ // / |
+ // adjusted = frac - (halfbit(mantissa) & ~onebit(frac)); / |
+ //
+ // mantissa = (mantissa >> shift) + halfbit(adjusted);
+
+ static const int mantissa_offset = 0;
+ static const int exponent_offset = mantissa_offset + mbits;
+ static const int sign_offset = exponent_offset + ebits;
+ VIXL_ASSERT(sign_offset == (sizeof(T) * 8 - 1));
+
+ // Bail out early for zero inputs.
+ if (mantissa == 0) {
+ return static_cast<T>(sign << sign_offset);
+ }
+
+ // If all bits in the exponent are set, the value is infinite or NaN.
+ // This is true for all binary IEEE-754 formats.
+ static const int infinite_exponent = (1 << ebits) - 1;
+ static const int max_normal_exponent = infinite_exponent - 1;
+
+ // Apply the exponent bias to encode it for the result. Doing this early makes
+ // it easy to detect values that will be infinite or subnormal.
+ exponent += max_normal_exponent >> 1;
+
+ if (exponent > max_normal_exponent) {
+ // Overflow: the input is too large for the result type to represent.
+ if (round_mode == FPTieEven) {
+ // FPTieEven rounding mode handles overflows using infinities.
+ exponent = infinite_exponent;
+ mantissa = 0;
+ } else {
+ VIXL_ASSERT(round_mode == FPRoundOdd);
+ // FPRoundOdd rounding mode handles overflows using the largest magnitude
+ // normal number.
+ exponent = max_normal_exponent;
+ mantissa = (UINT64_C(1) << exponent_offset) - 1;
+ }
+ return static_cast<T>((sign << sign_offset) |
+ (exponent << exponent_offset) |
+ (mantissa << mantissa_offset));
+ }
+
+ // Calculate the shift required to move the top mantissa bit to the proper
+ // place in the destination type.
+ const int highest_significant_bit = 63 - CountLeadingZeros(mantissa);
+ int shift = highest_significant_bit - mbits;
+
+ if (exponent <= 0) {
+ // The output will be subnormal (before rounding).
+ // For subnormal outputs, the shift must be adjusted by the exponent. The +1
+ // is necessary because the exponent of a subnormal value (encoded as 0) is
+ // the same as the exponent of the smallest normal value (encoded as 1).
+ shift += -exponent + 1;
+
+ // Handle inputs that would produce a zero output.
+ //
+ // Shifts higher than highest_significant_bit+1 will always produce a zero
+ // result. A shift of exactly highest_significant_bit+1 might produce a
+ // non-zero result after rounding.
+ if (shift > (highest_significant_bit + 1)) {
+ if (round_mode == FPTieEven) {
+ // The result will always be +/-0.0.
+ return static_cast<T>(sign << sign_offset);
+ } else {
+ VIXL_ASSERT(round_mode == FPRoundOdd);
+ VIXL_ASSERT(mantissa != 0);
+ // For FPRoundOdd, if the mantissa is too small to represent and
+ // non-zero return the next "odd" value.
+ return static_cast<T>((sign << sign_offset) | 1);
+ }
+ }
+
+ // Properly encode the exponent for a subnormal output.
+ exponent = 0;
+ } else {
+ // Clear the topmost mantissa bit, since this is not encoded in IEEE-754
+ // normal values.
+ mantissa &= ~(UINT64_C(1) << highest_significant_bit);
+ }
+
+ // The casts below are only well-defined for unsigned integers.
+ VIXL_STATIC_ASSERT(std::numeric_limits<T>::is_integer);
+ VIXL_STATIC_ASSERT(!std::numeric_limits<T>::is_signed);
+
+ if (shift > 0) {
+ if (round_mode == FPTieEven) {
+ // We have to shift the mantissa to the right. Some precision is lost, so
+ // we need to apply rounding.
+ uint64_t onebit_mantissa = (mantissa >> (shift)) & 1;
+ uint64_t halfbit_mantissa = (mantissa >> (shift - 1)) & 1;
+ uint64_t adjustment = (halfbit_mantissa & ~onebit_mantissa);
+ uint64_t adjusted = mantissa - adjustment;
+ T halfbit_adjusted = (adjusted >> (shift - 1)) & 1;
+
+ T result =
+ static_cast<T>((sign << sign_offset) | (exponent << exponent_offset) |
+ ((mantissa >> shift) << mantissa_offset));
+
+ // A very large mantissa can overflow during rounding. If this happens,
+ // the exponent should be incremented and the mantissa set to 1.0
+ // (encoded as 0). Applying halfbit_adjusted after assembling the float
+ // has the nice side-effect that this case is handled for free.
+ //
+ // This also handles cases where a very large finite value overflows to
+ // infinity, or where a very large subnormal value overflows to become
+ // normal.
+ return result + halfbit_adjusted;
+ } else {
+ VIXL_ASSERT(round_mode == FPRoundOdd);
+ // If any bits at position halfbit or below are set, onebit (ie. the
+ // bottom bit of the resulting mantissa) must be set.
+ uint64_t fractional_bits = mantissa & ((UINT64_C(1) << shift) - 1);
+ if (fractional_bits != 0) {
+ mantissa |= UINT64_C(1) << shift;
+ }
+
+ return static_cast<T>((sign << sign_offset) |
+ (exponent << exponent_offset) |
+ ((mantissa >> shift) << mantissa_offset));
+ }
+ } else {
+ // We have to shift the mantissa to the left (or not at all). The input
+ // mantissa is exactly representable in the output mantissa, so apply no
+ // rounding correction.
+ return static_cast<T>((sign << sign_offset) |
+ (exponent << exponent_offset) |
+ ((mantissa << -shift) << mantissa_offset));
+ }
+}
+
+
+// See FPRound for a description of this function.
+inline double FPRoundToDouble(int64_t sign,
+ int64_t exponent,
+ uint64_t mantissa,
+ FPRounding round_mode) {
+ uint64_t bits =
+ FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
+ exponent,
+ mantissa,
+ round_mode);
+ return RawbitsToDouble(bits);
+}
+
+
+// See FPRound for a description of this function.
+inline Float16 FPRoundToFloat16(int64_t sign,
+ int64_t exponent,
+ uint64_t mantissa,
+ FPRounding round_mode) {
+ return RawbitsToFloat16(
+ FPRound<uint16_t,
+ kFloat16ExponentBits,
+ kFloat16MantissaBits>(sign, exponent, mantissa, round_mode));
+}
+
+
+// See FPRound for a description of this function.
+static inline float FPRoundToFloat(int64_t sign,
+ int64_t exponent,
+ uint64_t mantissa,
+ FPRounding round_mode) {
+ uint32_t bits =
+ FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
+ exponent,
+ mantissa,
+ round_mode);
+ return RawbitsToFloat(bits);
+}
+
+
+float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception = NULL);
+float FPToFloat(double value,
+ FPRounding round_mode,
+ UseDefaultNaN DN,
+ bool* exception = NULL);
+
+double FPToDouble(Float16 value, UseDefaultNaN DN, bool* exception = NULL);
+double FPToDouble(float value, UseDefaultNaN DN, bool* exception = NULL);
+
+Float16 FPToFloat16(float value,
+ FPRounding round_mode,
+ UseDefaultNaN DN,
+ bool* exception = NULL);
+
+Float16 FPToFloat16(double value,
+ FPRounding round_mode,
+ UseDefaultNaN DN,
+ bool* exception = NULL);
} // namespace vixl
#endif // VIXL_UTILS_H