Add support for CSSC instructions (#69)
Add support for CSSC instructions (abs, cnt, ctz, smax, smin, umax, umin) to all
components, and refactor some of the code nearby.
diff --git a/src/aarch64/assembler-aarch64.cc b/src/aarch64/assembler-aarch64.cc
index df17813..604ec46 100644
--- a/src/aarch64/assembler-aarch64.cc
+++ b/src/aarch64/assembler-aarch64.cc
@@ -2429,6 +2429,52 @@
Emit(0x19c02400 | Rd(rd) | Rn(rn) | Rs(rs));
}
+void Assembler::abs(const Register& rd, const Register& rn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kCSSC));
+ VIXL_ASSERT(rd.IsSameSizeAndType(rn));
+
+ Emit(0x5ac02000 | SF(rd) | Rd(rd) | Rn(rn));
+}
+
+void Assembler::cnt(const Register& rd, const Register& rn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kCSSC));
+ VIXL_ASSERT(rd.IsSameSizeAndType(rn));
+
+ Emit(0x5ac01c00 | SF(rd) | Rd(rd) | Rn(rn));
+}
+
+void Assembler::ctz(const Register& rd, const Register& rn) {
+ VIXL_ASSERT(CPUHas(CPUFeatures::kCSSC));
+ VIXL_ASSERT(rd.IsSameSizeAndType(rn));
+
+ Emit(0x5ac01800 | SF(rd) | Rd(rd) | Rn(rn));
+}
+
+#define MINMAX(V) \
+ V(smax, 0x11c00000, 0x1ac06000, true) \
+ V(smin, 0x11c80000, 0x1ac06800, true) \
+ V(umax, 0x11c40000, 0x1ac06400, false) \
+ V(umin, 0x11cc0000, 0x1ac06c00, false)
+
+#define VIXL_DEFINE_ASM_FUNC(FN, IMMOP, REGOP, SIGNED) \
+ void Assembler::FN(const Register& rd, \
+ const Register& rn, \
+ const Operand& op) { \
+ VIXL_ASSERT(rd.IsSameSizeAndType(rn)); \
+ Instr i = SF(rd) | Rd(rd) | Rn(rn); \
+ if (op.IsImmediate()) { \
+ int64_t imm = op.GetImmediate(); \
+ i |= SIGNED ? ImmField<17, 10>(imm) : ImmUnsignedField<17, 10>(imm); \
+ Emit(IMMOP | i); \
+ } else { \
+ VIXL_ASSERT(op.IsPlainRegister()); \
+ VIXL_ASSERT(op.GetRegister().IsSameSizeAndType(rd)); \
+ Emit(REGOP | i | Rm(op.GetRegister())); \
+ } \
+ }
+MINMAX(VIXL_DEFINE_ASM_FUNC)
+#undef VIXL_DEFINE_ASM_FUNC
+
// NEON structure loads and stores.
Instr Assembler::LoadStoreStructAddrModeField(const MemOperand& addr) {
Instr addr_field = RnSP(addr.GetBaseRegister());
diff --git a/src/aarch64/assembler-aarch64.h b/src/aarch64/assembler-aarch64.h
index 0d0387f..fbf5a84 100644
--- a/src/aarch64/assembler-aarch64.h
+++ b/src/aarch64/assembler-aarch64.h
@@ -7050,6 +7050,27 @@
// Memory Set, non-temporal.
void setpn(const Register& rd, const Register& rn, const Register& rs);
+ // Absolute value.
+ void abs(const Register& rd, const Register& rn);
+
+ // Count bits.
+ void cnt(const Register& rd, const Register& rn);
+
+ // Count Trailing Zeros.
+ void ctz(const Register& rd, const Register& rn);
+
+ // Signed Maximum.
+ void smax(const Register& rd, const Register& rn, const Operand& op);
+
+ // Signed Minimum.
+ void smin(const Register& rd, const Register& rn, const Operand& op);
+
+ // Unsigned Maximum.
+ void umax(const Register& rd, const Register& rn, const Operand& op);
+
+ // Unsigned Minimum.
+ void umin(const Register& rd, const Register& rn, const Operand& op);
+
// Emit generic instructions.
// Emit raw instructions into the instruction stream.
diff --git a/src/aarch64/cpu-aarch64.cc b/src/aarch64/cpu-aarch64.cc
index fba0dfd..3b70cfc 100644
--- a/src/aarch64/cpu-aarch64.cc
+++ b/src/aarch64/cpu-aarch64.cc
@@ -81,6 +81,8 @@
const IDRegister::Field AA64ISAR2::kWFXT(0);
const IDRegister::Field AA64ISAR2::kRPRES(4);
+const IDRegister::Field AA64ISAR2::kMOPS(16);
+const IDRegister::Field AA64ISAR2::kCSSC(52);
const IDRegister::Field AA64MMFR0::kECV(60);
@@ -195,6 +197,8 @@
CPUFeatures f;
if (Get(kWFXT) >= 2) f.Combine(CPUFeatures::kWFXT);
if (Get(kRPRES) >= 1) f.Combine(CPUFeatures::kRPRES);
+ if (Get(kMOPS) >= 1) f.Combine(CPUFeatures::kMOPS);
+ if (Get(kCSSC) >= 1) f.Combine(CPUFeatures::kCSSC);
return f;
}
diff --git a/src/aarch64/cpu-aarch64.h b/src/aarch64/cpu-aarch64.h
index 7ec3a25..d5a5f8c 100644
--- a/src/aarch64/cpu-aarch64.h
+++ b/src/aarch64/cpu-aarch64.h
@@ -171,6 +171,8 @@
private:
static const Field kWFXT;
static const Field kRPRES;
+ static const Field kMOPS;
+ static const Field kCSSC;
};
class AA64MMFR0 : public IDRegister {
diff --git a/src/aarch64/cpu-features-auditor-aarch64.cc b/src/aarch64/cpu-features-auditor-aarch64.cc
index 1adb37c..e426932 100644
--- a/src/aarch64/cpu-features-auditor-aarch64.cc
+++ b/src/aarch64/cpu-features-auditor-aarch64.cc
@@ -1800,6 +1800,28 @@
{"setm_set_memcms"_h, CPUFeatures::kMOPS},
{"setpn_set_memcms"_h, CPUFeatures::kMOPS},
{"setp_set_memcms"_h, CPUFeatures::kMOPS},
+ {"abs_32_dp_1src"_h, CPUFeatures::kCSSC},
+ {"abs_64_dp_1src"_h, CPUFeatures::kCSSC},
+ {"cnt_32_dp_1src"_h, CPUFeatures::kCSSC},
+ {"cnt_64_dp_1src"_h, CPUFeatures::kCSSC},
+ {"ctz_32_dp_1src"_h, CPUFeatures::kCSSC},
+ {"ctz_64_dp_1src"_h, CPUFeatures::kCSSC},
+ {"smax_32_dp_2src"_h, CPUFeatures::kCSSC},
+ {"smax_64_dp_2src"_h, CPUFeatures::kCSSC},
+ {"smin_32_dp_2src"_h, CPUFeatures::kCSSC},
+ {"smin_64_dp_2src"_h, CPUFeatures::kCSSC},
+ {"umax_32_dp_2src"_h, CPUFeatures::kCSSC},
+ {"umax_64_dp_2src"_h, CPUFeatures::kCSSC},
+ {"umin_32_dp_2src"_h, CPUFeatures::kCSSC},
+ {"umin_64_dp_2src"_h, CPUFeatures::kCSSC},
+ {"smax_32_minmax_imm"_h, CPUFeatures::kCSSC},
+ {"smax_64_minmax_imm"_h, CPUFeatures::kCSSC},
+ {"smin_32_minmax_imm"_h, CPUFeatures::kCSSC},
+ {"smin_64_minmax_imm"_h, CPUFeatures::kCSSC},
+ {"umax_32u_minmax_imm"_h, CPUFeatures::kCSSC},
+ {"umax_64u_minmax_imm"_h, CPUFeatures::kCSSC},
+ {"umin_32u_minmax_imm"_h, CPUFeatures::kCSSC},
+ {"umin_64u_minmax_imm"_h, CPUFeatures::kCSSC},
};
if (features.count(form_hash) > 0) {
diff --git a/src/aarch64/disasm-aarch64.cc b/src/aarch64/disasm-aarch64.cc
index 24d20df..3592752 100644
--- a/src/aarch64/disasm-aarch64.cc
+++ b/src/aarch64/disasm-aarch64.cc
@@ -730,6 +730,28 @@
{"setm_set_memcms"_h, &Disassembler::DisassembleSet},
{"setpn_set_memcms"_h, &Disassembler::DisassembleSet},
{"setp_set_memcms"_h, &Disassembler::DisassembleSet},
+ {"abs_32_dp_1src"_h, &Disassembler::VisitDataProcessing1Source},
+ {"abs_64_dp_1src"_h, &Disassembler::VisitDataProcessing1Source},
+ {"cnt_32_dp_1src"_h, &Disassembler::VisitDataProcessing1Source},
+ {"cnt_64_dp_1src"_h, &Disassembler::VisitDataProcessing1Source},
+ {"ctz_32_dp_1src"_h, &Disassembler::VisitDataProcessing1Source},
+ {"ctz_64_dp_1src"_h, &Disassembler::VisitDataProcessing1Source},
+ {"smax_32_dp_2src"_h, &Disassembler::VisitDataProcessing2Source},
+ {"smax_64_dp_2src"_h, &Disassembler::VisitDataProcessing2Source},
+ {"smin_32_dp_2src"_h, &Disassembler::VisitDataProcessing2Source},
+ {"smin_64_dp_2src"_h, &Disassembler::VisitDataProcessing2Source},
+ {"umax_32_dp_2src"_h, &Disassembler::VisitDataProcessing2Source},
+ {"umax_64_dp_2src"_h, &Disassembler::VisitDataProcessing2Source},
+ {"umin_32_dp_2src"_h, &Disassembler::VisitDataProcessing2Source},
+ {"umin_64_dp_2src"_h, &Disassembler::VisitDataProcessing2Source},
+ {"smax_32_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
+ {"smax_64_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
+ {"smin_32_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
+ {"smin_64_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
+ {"umax_32u_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
+ {"umax_64u_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
+ {"umin_32u_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
+ {"umin_64u_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm},
};
return &form_to_visitor;
} // NOLINT(readability/fn_size)
@@ -1416,6 +1438,10 @@
Format(instr, mnemonic, form);
}
+void Disassembler::DisassembleMinMaxImm(const Instruction *instr) {
+ const char *suffix = (instr->ExtractBit(18) == 0) ? "'s1710" : "'u1710";
+ FormatWithDecodedMnemonic(instr, "'Rd, 'Rn, #", suffix);
+}
void Disassembler::VisitCompareBranch(const Instruction *instr) {
FormatWithDecodedMnemonic(instr, "'Rt, 'TImmCmpa");
diff --git a/src/aarch64/disasm-aarch64.h b/src/aarch64/disasm-aarch64.h
index 3345174..cc941bb 100644
--- a/src/aarch64/disasm-aarch64.h
+++ b/src/aarch64/disasm-aarch64.h
@@ -186,6 +186,7 @@
void DisassembleCpy(const Instruction* instr);
void DisassembleSet(const Instruction* instr);
+ void DisassembleMinMaxImm(const Instruction* instr);
void DisassembleSVEShiftLeftImm(const Instruction* instr);
void DisassembleSVEShiftRightImm(const Instruction* instr);
diff --git a/src/aarch64/macro-assembler-aarch64.cc b/src/aarch64/macro-assembler-aarch64.cc
index a48931b..94e153a 100644
--- a/src/aarch64/macro-assembler-aarch64.cc
+++ b/src/aarch64/macro-assembler-aarch64.cc
@@ -1411,6 +1411,33 @@
Add(rd, rn, operand, SetFlags);
}
+#define MINMAX(V) \
+ V(Smax, smax, IsInt8) \
+ V(Smin, smin, IsInt8) \
+ V(Umax, umax, IsUint8) \
+ V(Umin, umin, IsUint8)
+
+#define VIXL_DEFINE_MASM_FUNC(MASM, ASM, RANGE) \
+ void MacroAssembler::MASM(const Register& rd, \
+ const Register& rn, \
+ const Operand& op) { \
+ VIXL_ASSERT(allow_macro_instructions_); \
+ if (op.IsImmediate()) { \
+ int64_t imm = op.GetImmediate(); \
+ if (!RANGE(imm)) { \
+ UseScratchRegisterScope temps(this); \
+ Register temp = temps.AcquireSameSizeAs(rd); \
+ Mov(temp, imm); \
+ MASM(rd, rn, temp); \
+ return; \
+ } \
+ } \
+ SingleEmissionCheckScope guard(this); \
+ ASM(rd, rn, op); \
+ }
+MINMAX(VIXL_DEFINE_MASM_FUNC)
+#undef VIXL_DEFINE_MASM_FUNC
+
void MacroAssembler::St2g(const Register& rt, const MemOperand& addr) {
VIXL_ASSERT(allow_macro_instructions_);
SingleEmissionCheckScope guard(this);
diff --git a/src/aarch64/macro-assembler-aarch64.h b/src/aarch64/macro-assembler-aarch64.h
index f0663ee..78c9e9a 100644
--- a/src/aarch64/macro-assembler-aarch64.h
+++ b/src/aarch64/macro-assembler-aarch64.h
@@ -7777,6 +7777,29 @@
MOPS_LIST(DEFINE_MACRO_ASM_FUNC)
#undef DEFINE_MACRO_ASM_FUNC
+ void Abs(const Register& rd, const Register& rn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ abs(rd, rn);
+ }
+
+ void Cnt(const Register& rd, const Register& rn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ cnt(rd, rn);
+ }
+
+ void Ctz(const Register& rd, const Register& rn) {
+ VIXL_ASSERT(allow_macro_instructions_);
+ SingleEmissionCheckScope guard(this);
+ ctz(rd, rn);
+ }
+
+ void Smax(const Register& rd, const Register& rn, const Operand& op);
+ void Smin(const Register& rd, const Register& rn, const Operand& op);
+ void Umax(const Register& rd, const Register& rn, const Operand& op);
+ void Umin(const Register& rd, const Register& rn, const Operand& op);
+
template <typename T>
Literal<T>* CreateLiteralDestroyedWithPool(T value) {
return new Literal<T>(value,
diff --git a/src/aarch64/simulator-aarch64.cc b/src/aarch64/simulator-aarch64.cc
index 275840a..7c7dc1c 100644
--- a/src/aarch64/simulator-aarch64.cc
+++ b/src/aarch64/simulator-aarch64.cc
@@ -468,6 +468,28 @@
{"seten_set_memcms"_h, &Simulator::SimulateSetE},
{"setge_set_memcms"_h, &Simulator::SimulateSetE},
{"setgen_set_memcms"_h, &Simulator::SimulateSetE},
+ {"abs_32_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
+ {"abs_64_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
+ {"cnt_32_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
+ {"cnt_64_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
+ {"ctz_32_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
+ {"ctz_64_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
+ {"smax_32_dp_2src"_h, &Simulator::SimulateSignedMinMax},
+ {"smax_64_dp_2src"_h, &Simulator::SimulateSignedMinMax},
+ {"smin_32_dp_2src"_h, &Simulator::SimulateSignedMinMax},
+ {"smin_64_dp_2src"_h, &Simulator::SimulateSignedMinMax},
+ {"smax_32_minmax_imm"_h, &Simulator::SimulateSignedMinMax},
+ {"smax_64_minmax_imm"_h, &Simulator::SimulateSignedMinMax},
+ {"smin_32_minmax_imm"_h, &Simulator::SimulateSignedMinMax},
+ {"smin_64_minmax_imm"_h, &Simulator::SimulateSignedMinMax},
+ {"umax_32_dp_2src"_h, &Simulator::SimulateUnsignedMinMax},
+ {"umax_64_dp_2src"_h, &Simulator::SimulateUnsignedMinMax},
+ {"umin_32_dp_2src"_h, &Simulator::SimulateUnsignedMinMax},
+ {"umin_64_dp_2src"_h, &Simulator::SimulateUnsignedMinMax},
+ {"umax_32u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
+ {"umax_64u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
+ {"umin_32u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
+ {"umin_64u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
};
return &form_to_visitor;
}
@@ -5225,112 +5247,102 @@
}
-#define PAUTH_MODES_REGISTER_CONTEXT(V) \
- V(IA, kPACKeyIA, kInstructionPointer) \
- V(IB, kPACKeyIB, kInstructionPointer) \
- V(DA, kPACKeyDA, kDataPointer) \
- V(DB, kPACKeyDB, kDataPointer)
-
-#define PAUTH_MODES_ZERO_CONTEXT(V) \
- V(IZA, kPACKeyIA, kInstructionPointer) \
- V(IZB, kPACKeyIB, kInstructionPointer) \
- V(DZA, kPACKeyDA, kDataPointer) \
- V(DZB, kPACKeyDB, kDataPointer)
+#define PAUTH_MODES_REGISTER_CONTEXT(V) \
+ V(i, a, kPACKeyIA, kInstructionPointer) \
+ V(i, b, kPACKeyIB, kInstructionPointer) \
+ V(d, a, kPACKeyDA, kDataPointer) \
+ V(d, b, kPACKeyDB, kDataPointer)
void Simulator::VisitDataProcessing1Source(const Instruction* instr) {
unsigned dst = instr->GetRd();
unsigned src = instr->GetRn();
+ Reg31Mode r31_pac = Reg31IsStackPointer;
- switch (instr->Mask(DataProcessing1SourceMask)) {
-#define DEFINE_PAUTH_FUNCS(SUFFIX, KEY, D) \
- case PAC##SUFFIX: { \
- uint64_t mod = ReadXRegister(src, Reg31IsStackPointer); \
- uint64_t ptr = ReadXRegister(dst); \
- WriteXRegister(dst, AddPAC(ptr, mod, KEY, D)); \
- break; \
- } \
- case AUT##SUFFIX: { \
- uint64_t mod = ReadXRegister(src, Reg31IsStackPointer); \
- uint64_t ptr = ReadXRegister(dst); \
- WriteXRegister(dst, AuthPAC(ptr, mod, KEY, D)); \
- break; \
+ switch (form_hash_) {
+#define DEFINE_PAUTH_FUNCS(SUF0, SUF1, KEY, D) \
+ case "pac" #SUF0 "z" #SUF1 "_64z_dp_1src"_h: \
+ VIXL_ASSERT(src == kZeroRegCode); \
+ r31_pac = Reg31IsZeroRegister; \
+ VIXL_FALLTHROUGH(); \
+ case "pac" #SUF0 #SUF1 "_64p_dp_1src"_h: { \
+ uint64_t mod = ReadXRegister(src, r31_pac); \
+ uint64_t ptr = ReadXRegister(dst); \
+ WriteXRegister(dst, AddPAC(ptr, mod, KEY, D)); \
+ break; \
+ } \
+ case "aut" #SUF0 "z" #SUF1 "_64z_dp_1src"_h: \
+ VIXL_ASSERT(src == kZeroRegCode); \
+ r31_pac = Reg31IsZeroRegister; \
+ VIXL_FALLTHROUGH(); \
+ case "aut" #SUF0 #SUF1 "_64p_dp_1src"_h: { \
+ uint64_t mod = ReadXRegister(src, r31_pac); \
+ uint64_t ptr = ReadXRegister(dst); \
+ WriteXRegister(dst, AuthPAC(ptr, mod, KEY, D)); \
+ break; \
}
-
PAUTH_MODES_REGISTER_CONTEXT(DEFINE_PAUTH_FUNCS)
#undef DEFINE_PAUTH_FUNCS
-#define DEFINE_PAUTH_FUNCS(SUFFIX, KEY, D) \
- case PAC##SUFFIX: { \
- if (src != kZeroRegCode) { \
- VIXL_UNIMPLEMENTED(); \
- } \
- uint64_t ptr = ReadXRegister(dst); \
- WriteXRegister(dst, AddPAC(ptr, 0x0, KEY, D)); \
- break; \
- } \
- case AUT##SUFFIX: { \
- if (src != kZeroRegCode) { \
- VIXL_UNIMPLEMENTED(); \
- } \
- uint64_t ptr = ReadXRegister(dst); \
- WriteXRegister(dst, AuthPAC(ptr, 0x0, KEY, D)); \
- break; \
- }
-
- PAUTH_MODES_ZERO_CONTEXT(DEFINE_PAUTH_FUNCS)
-#undef DEFINE_PAUTH_FUNCS
-
- case XPACI:
- if (src != kZeroRegCode) {
- VIXL_UNIMPLEMENTED();
- }
+ case "xpaci_64z_dp_1src"_h:
WriteXRegister(dst, StripPAC(ReadXRegister(dst), kInstructionPointer));
break;
- case XPACD:
- if (src != kZeroRegCode) {
- VIXL_UNIMPLEMENTED();
- }
+ case "xpacd_64z_dp_1src"_h:
WriteXRegister(dst, StripPAC(ReadXRegister(dst), kDataPointer));
break;
- case RBIT_w:
+ case "rbit_32_dp_1src"_h:
WriteWRegister(dst, ReverseBits(ReadWRegister(src)));
break;
- case RBIT_x:
+ case "rbit_64_dp_1src"_h:
WriteXRegister(dst, ReverseBits(ReadXRegister(src)));
break;
- case REV16_w:
+ case "rev16_32_dp_1src"_h:
WriteWRegister(dst, ReverseBytes(ReadWRegister(src), 1));
break;
- case REV16_x:
+ case "rev16_64_dp_1src"_h:
WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 1));
break;
- case REV_w:
+ case "rev_32_dp_1src"_h:
WriteWRegister(dst, ReverseBytes(ReadWRegister(src), 2));
break;
- case REV32_x:
+ case "rev32_64_dp_1src"_h:
WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 2));
break;
- case REV_x:
+ case "rev_64_dp_1src"_h:
WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 3));
break;
- case CLZ_w:
+ case "clz_32_dp_1src"_h:
WriteWRegister(dst, CountLeadingZeros(ReadWRegister(src)));
break;
- case CLZ_x:
+ case "clz_64_dp_1src"_h:
WriteXRegister(dst, CountLeadingZeros(ReadXRegister(src)));
break;
- case CLS_w:
+ case "cls_32_dp_1src"_h:
WriteWRegister(dst, CountLeadingSignBits(ReadWRegister(src)));
break;
- case CLS_x:
+ case "cls_64_dp_1src"_h:
WriteXRegister(dst, CountLeadingSignBits(ReadXRegister(src)));
break;
- default:
- VIXL_UNIMPLEMENTED();
+ case "abs_32_dp_1src"_h:
+ WriteWRegister(dst, Abs(ReadWRegister(src)));
+ break;
+ case "abs_64_dp_1src"_h:
+ WriteXRegister(dst, Abs(ReadXRegister(src)));
+ break;
+ case "cnt_32_dp_1src"_h:
+ WriteWRegister(dst, CountSetBits(ReadWRegister(src)));
+ break;
+ case "cnt_64_dp_1src"_h:
+ WriteXRegister(dst, CountSetBits(ReadXRegister(src)));
+ break;
+ case "ctz_32_dp_1src"_h:
+ WriteWRegister(dst, CountTrailingZeros(ReadWRegister(src)));
+ break;
+ case "ctz_64_dp_1src"_h:
+ WriteXRegister(dst, CountTrailingZeros(ReadXRegister(src)));
+ break;
}
}
-
uint32_t Simulator::Poly32Mod2(unsigned n, uint64_t data, uint32_t poly) {
VIXL_ASSERT((n > 32) && (n <= 64));
for (unsigned i = (n - 1); i >= 32; i--) {
@@ -5507,6 +5519,81 @@
WriteRegister(reg_size, instr->GetRd(), result);
}
+void Simulator::SimulateSignedMinMax(const Instruction* instr) {
+ int32_t wn = ReadWRegister(instr->GetRn());
+ int32_t wm = ReadWRegister(instr->GetRm());
+ int64_t xn = ReadXRegister(instr->GetRn());
+ int64_t xm = ReadXRegister(instr->GetRm());
+ int32_t imm = instr->ExtractSignedBits(17, 10);
+ int dst = instr->GetRd();
+
+ switch (form_hash_) {
+ case "smax_64_minmax_imm"_h:
+ case "smin_64_minmax_imm"_h:
+ xm = imm;
+ break;
+ case "smax_32_minmax_imm"_h:
+ case "smin_32_minmax_imm"_h:
+ wm = imm;
+ break;
+ }
+
+ switch (form_hash_) {
+ case "smax_32_minmax_imm"_h:
+ case "smax_32_dp_2src"_h:
+ WriteWRegister(dst, std::max(wn, wm));
+ break;
+ case "smax_64_minmax_imm"_h:
+ case "smax_64_dp_2src"_h:
+ WriteXRegister(dst, std::max(xn, xm));
+ break;
+ case "smin_32_minmax_imm"_h:
+ case "smin_32_dp_2src"_h:
+ WriteWRegister(dst, std::min(wn, wm));
+ break;
+ case "smin_64_minmax_imm"_h:
+ case "smin_64_dp_2src"_h:
+ WriteXRegister(dst, std::min(xn, xm));
+ break;
+ }
+}
+
+void Simulator::SimulateUnsignedMinMax(const Instruction* instr) {
+ uint64_t xn = ReadXRegister(instr->GetRn());
+ uint64_t xm = ReadXRegister(instr->GetRm());
+ uint32_t imm = instr->ExtractBits(17, 10);
+ int dst = instr->GetRd();
+
+ switch (form_hash_) {
+ case "umax_64u_minmax_imm"_h:
+ case "umax_32u_minmax_imm"_h:
+ case "umin_64u_minmax_imm"_h:
+ case "umin_32u_minmax_imm"_h:
+ xm = imm;
+ break;
+ }
+
+ switch (form_hash_) {
+ case "umax_32u_minmax_imm"_h:
+ case "umax_32_dp_2src"_h:
+ xn &= 0xffff'ffff;
+ xm &= 0xffff'ffff;
+ VIXL_FALLTHROUGH();
+ case "umax_64u_minmax_imm"_h:
+ case "umax_64_dp_2src"_h:
+ WriteXRegister(dst, std::max(xn, xm));
+ break;
+ case "umin_32u_minmax_imm"_h:
+ case "umin_32_dp_2src"_h:
+ xn &= 0xffff'ffff;
+ xm &= 0xffff'ffff;
+ VIXL_FALLTHROUGH();
+ case "umin_64u_minmax_imm"_h:
+ case "umin_64_dp_2src"_h:
+ WriteXRegister(dst, std::min(xn, xm));
+ break;
+ }
+}
void Simulator::VisitDataProcessing3Source(const Instruction* instr) {
unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
diff --git a/src/aarch64/simulator-aarch64.h b/src/aarch64/simulator-aarch64.h
index 1321c4b..c5cc894 100644
--- a/src/aarch64/simulator-aarch64.h
+++ b/src/aarch64/simulator-aarch64.h
@@ -1397,6 +1397,8 @@
void SimulateSetE(const Instruction* instr);
void SimulateSetGP(const Instruction* instr);
void SimulateSetGM(const Instruction* instr);
+ void SimulateSignedMinMax(const Instruction* instr);
+ void SimulateUnsignedMinMax(const Instruction* instr);
// Integer register accessors.
diff --git a/src/cpu-features.h b/src/cpu-features.h
index 962ad52..97eb661 100644
--- a/src/cpu-features.h
+++ b/src/cpu-features.h
@@ -200,7 +200,8 @@
V(kWFXT, "WFXT", "wfxt") \
/* Extended BFloat16 instructions */ \
V(kEBF16, "EBF16", "ebf16") \
- V(kSVE_EBF16, "EBF16 (SVE)", "sveebf16")
+ V(kSVE_EBF16, "EBF16 (SVE)", "sveebf16") \
+ V(kCSSC, "CSSC", "cssc")
// clang-format on
diff --git a/src/utils-vixl.h b/src/utils-vixl.h
index b6c8455..7af9483 100644
--- a/src/utils-vixl.h
+++ b/src/utils-vixl.h
@@ -291,6 +291,17 @@
return ~value + 1;
}
+// An absolute operation for signed integers that is defined for results outside
+// the representable range. Specifically, Abs(MIN_INT) is MIN_INT.
+template <typename T>
+T Abs(T val) {
+ // TODO: this static assertion is for signed integer inputs, as that's the
+ // only type tested. However, the code should work for all numeric inputs.
+ // Remove the assertion and this comment when more tests are available.
+ VIXL_STATIC_ASSERT(std::is_signed<T>::value && std::is_integral<T>::value);
+ return ((val >= -std::numeric_limits<T>::max()) && (val < 0)) ? -val : val;
+}
+
// Convert unsigned to signed numbers in a well-defined way (using two's
// complement representations).
inline int64_t RawbitsToInt64(uint64_t bits) {
diff --git a/test/aarch64/test-assembler-aarch64.cc b/test/aarch64/test-assembler-aarch64.cc
index 293af0a..58f4fd1 100644
--- a/test/aarch64/test-assembler-aarch64.cc
+++ b/test/aarch64/test-assembler-aarch64.cc
@@ -14377,6 +14377,276 @@
}
}
+TEST(cssc_abs) {
+ SETUP_WITH_FEATURES(CPUFeatures::kCSSC);
+
+ START();
+ __ Mov(x0, -1);
+ __ Mov(x1, 1);
+ __ Mov(x2, 0);
+ __ Mov(x3, 0x7fff'ffff);
+ __ Mov(x4, 0x8000'0000);
+ __ Mov(x5, 0x8000'0001);
+ __ Mov(x6, 0x7fff'ffff'ffff'ffff);
+ __ Mov(x7, 0x8000'0000'0000'0000);
+ __ Mov(x8, 0x8000'0000'0000'0001);
+
+ __ Abs(w10, w0);
+ __ Abs(x11, x0);
+ __ Abs(w12, w1);
+ __ Abs(x13, x1);
+ __ Abs(w14, w2);
+ __ Abs(x15, x2);
+
+ __ Abs(w19, w3);
+ __ Abs(x20, x3);
+ __ Abs(w21, w4);
+ __ Abs(x22, x4);
+ __ Abs(w23, w5);
+ __ Abs(x24, x5);
+ __ Abs(w25, w6);
+ __ Abs(x26, x6);
+ __ Abs(w27, w7);
+ __ Abs(x28, x7);
+ __ Abs(w29, w8);
+ __ Abs(x30, x8);
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+
+ ASSERT_EQUAL_64(1, x10);
+ ASSERT_EQUAL_64(1, x11);
+ ASSERT_EQUAL_64(1, x12);
+ ASSERT_EQUAL_64(1, x13);
+ ASSERT_EQUAL_64(0, x14);
+ ASSERT_EQUAL_64(0, x15);
+ ASSERT_EQUAL_64(0x7fff'ffff, x19);
+ ASSERT_EQUAL_64(0x7fff'ffff, x20);
+ ASSERT_EQUAL_64(0x8000'0000, x21);
+ ASSERT_EQUAL_64(0x8000'0000, x22);
+ ASSERT_EQUAL_64(0x7fff'ffff, x23);
+ ASSERT_EQUAL_64(0x8000'0001, x24);
+ ASSERT_EQUAL_64(1, x25);
+ ASSERT_EQUAL_64(0x7fff'ffff'ffff'ffff, x26);
+ ASSERT_EQUAL_64(0, x27);
+ ASSERT_EQUAL_64(0x8000'0000'0000'0000, x28);
+ ASSERT_EQUAL_64(1, x29);
+ ASSERT_EQUAL_64(0x7fff'ffff'ffff'ffff, x30);
+ }
+}
+
+TEST(cssc_cnt) {
+ SETUP_WITH_FEATURES(CPUFeatures::kCSSC);
+
+ START();
+ __ Mov(x0, -1);
+ __ Mov(x1, 1);
+ __ Mov(x2, 0);
+ __ Mov(x3, 0x7fff'ffff);
+ __ Mov(x4, 0x8000'0000);
+ __ Mov(x5, 0x8000'0001);
+ __ Mov(x6, 0x7fff'ffff'ffff'ffff);
+ __ Mov(x7, 0x4242'4242'aaaa'aaaa);
+
+ __ Cnt(w10, w0);
+ __ Cnt(x11, x0);
+ __ Cnt(w12, w1);
+ __ Cnt(x13, x1);
+ __ Cnt(w14, w2);
+ __ Cnt(x15, x2);
+ __ Cnt(w19, w3);
+ __ Cnt(x20, x3);
+ __ Cnt(w21, w4);
+ __ Cnt(x22, x4);
+ __ Cnt(w23, w5);
+ __ Cnt(x24, x5);
+ __ Cnt(w25, w6);
+ __ Cnt(x26, x6);
+ __ Cnt(w27, w7);
+ __ Cnt(x28, x7);
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+
+ ASSERT_EQUAL_64(32, x10);
+ ASSERT_EQUAL_64(64, x11);
+ ASSERT_EQUAL_64(1, x12);
+ ASSERT_EQUAL_64(1, x13);
+ ASSERT_EQUAL_64(0, x14);
+ ASSERT_EQUAL_64(0, x15);
+ ASSERT_EQUAL_64(31, x19);
+ ASSERT_EQUAL_64(31, x20);
+ ASSERT_EQUAL_64(1, x21);
+ ASSERT_EQUAL_64(1, x22);
+ ASSERT_EQUAL_64(2, x23);
+ ASSERT_EQUAL_64(2, x24);
+ ASSERT_EQUAL_64(32, x25);
+ ASSERT_EQUAL_64(63, x26);
+ ASSERT_EQUAL_64(16, x27);
+ ASSERT_EQUAL_64(24, x28);
+ }
+}
+
+TEST(cssc_ctz) {
+ SETUP_WITH_FEATURES(CPUFeatures::kCSSC);
+
+ START();
+ __ Mov(x0, -1);
+ __ Mov(x1, 1);
+ __ Mov(x2, 2);
+ __ Mov(x3, 0x7fff'ff00);
+ __ Mov(x4, 0x8000'4000);
+ __ Mov(x5, 0x4000'0001);
+ __ Mov(x6, 0x0000'0001'0000'0000);
+ __ Mov(x7, 0x4200'0000'0000'0000);
+
+ __ Ctz(w10, w0);
+ __ Ctz(x11, x0);
+ __ Ctz(w12, w1);
+ __ Ctz(x13, x1);
+ __ Ctz(w14, w2);
+ __ Ctz(x15, x2);
+ __ Ctz(w19, w3);
+ __ Ctz(x20, x3);
+ __ Ctz(w21, w4);
+ __ Ctz(x22, x4);
+ __ Ctz(w23, w5);
+ __ Ctz(x24, x5);
+ __ Ctz(w25, w6);
+ __ Ctz(x26, x6);
+ __ Ctz(w27, w7);
+ __ Ctz(x28, x7);
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+
+ ASSERT_EQUAL_64(0, x10);
+ ASSERT_EQUAL_64(0, x11);
+ ASSERT_EQUAL_64(0, x12);
+ ASSERT_EQUAL_64(0, x13);
+ ASSERT_EQUAL_64(1, x14);
+ ASSERT_EQUAL_64(1, x15);
+ ASSERT_EQUAL_64(8, x19);
+ ASSERT_EQUAL_64(8, x20);
+ ASSERT_EQUAL_64(14, x21);
+ ASSERT_EQUAL_64(14, x22);
+ ASSERT_EQUAL_64(0, x23);
+ ASSERT_EQUAL_64(0, x24);
+ ASSERT_EQUAL_64(32, x25);
+ ASSERT_EQUAL_64(32, x26);
+ ASSERT_EQUAL_64(32, x27);
+ ASSERT_EQUAL_64(57, x28);
+ }
+}
+
+using MinMaxOp = void (MacroAssembler::*)(const Register&,
+ const Register&,
+ const Operand&);
+
+static void MinMaxHelper(MinMaxOp op,
+ bool is_signed,
+ uint64_t a,
+ uint64_t b,
+ uint32_t wexp,
+ uint64_t xexp) {
+ SETUP_WITH_FEATURES(CPUFeatures::kCSSC);
+
+ START();
+ __ Mov(x0, a);
+ __ Mov(x1, b);
+ if ((is_signed && IsInt8(b)) || (!is_signed && IsUint8(b))) {
+ (masm.*op)(w10, w0, b);
+ (masm.*op)(x11, x0, b);
+ } else {
+ (masm.*op)(w10, w0, w1);
+ (masm.*op)(x11, x0, x1);
+ }
+ END();
+
+ if (CAN_RUN()) {
+ RUN();
+ ASSERT_EQUAL_64(wexp, x10);
+ ASSERT_EQUAL_64(xexp, x11);
+ }
+}
+
+TEST(cssc_umin) {
+ MinMaxOp op = &MacroAssembler::Umin;
+ uint32_t s32min = 0x8000'0000;
+ uint32_t s32max = 0x7fff'ffff;
+ uint64_t s64min = 0x8000'0000'0000'0000;
+ uint64_t s64max = 0x7fff'ffff'ffff'ffff;
+
+ MinMaxHelper(op, false, 0, 0, 0, 0);
+ MinMaxHelper(op, false, 128, 255, 128, 128);
+ MinMaxHelper(op, false, 0, 0xffff'ffff'ffff'ffff, 0, 0);
+ MinMaxHelper(op, false, s32max, s32min, s32max, s32max);
+ MinMaxHelper(op, false, s32min, s32max, s32max, s32max);
+ MinMaxHelper(op, false, s64max, s32min, s32min, s32min);
+ MinMaxHelper(op, false, s64min, s64max, 0, s64max);
+}
+
+TEST(cssc_umax) {
+ MinMaxOp op = &MacroAssembler::Umax;
+ uint32_t s32min = 0x8000'0000;
+ uint32_t s32max = 0x7fff'ffff;
+ uint64_t s64min = 0x8000'0000'0000'0000;
+ uint64_t s64max = 0x7fff'ffff'ffff'ffff;
+
+ MinMaxHelper(op, false, 0, 0, 0, 0);
+ MinMaxHelper(op, false, 128, 255, 255, 255);
+ MinMaxHelper(op,
+ false,
+ 0,
+ 0xffff'ffff'ffff'ffff,
+ 0xffff'ffff,
+ 0xffff'ffff'ffff'ffff);
+ MinMaxHelper(op, false, s32max, s32min, s32min, s32min);
+ MinMaxHelper(op, false, s32min, s32max, s32min, s32min);
+ MinMaxHelper(op, false, s64max, s32min, 0xffff'ffff, s64max);
+ MinMaxHelper(op, false, s64min, s64max, 0xffff'ffff, s64min);
+}
+
+TEST(cssc_smin) {
+ MinMaxOp op = &MacroAssembler::Smin;
+ uint32_t s32min = 0x8000'0000;
+ uint32_t s32max = 0x7fff'ffff;
+ uint64_t s64min = 0x8000'0000'0000'0000;
+ uint64_t s64max = 0x7fff'ffff'ffff'ffff;
+
+ MinMaxHelper(op, true, 0, 0, 0, 0);
+ MinMaxHelper(op, true, 128, 255, 128, 128);
+ MinMaxHelper(op,
+ true,
+ 0,
+ 0xffff'ffff'ffff'ffff,
+ 0xffff'ffff,
+ 0xffff'ffff'ffff'ffff);
+ MinMaxHelper(op, true, s32max, s32min, s32min, s32max);
+ MinMaxHelper(op, true, s32min, s32max, s32min, s32max);
+ MinMaxHelper(op, true, s64max, s32min, s32min, s32min);
+ MinMaxHelper(op, true, s64min, s64max, 0xffff'ffff, s64min);
+}
+
+TEST(cssc_smax) {
+ MinMaxOp op = &MacroAssembler::Smax;
+ uint32_t s32min = 0x8000'0000;
+ uint32_t s32max = 0x7fff'ffff;
+ uint64_t s64min = 0x8000'0000'0000'0000;
+ uint64_t s64max = 0x7fff'ffff'ffff'ffff;
+
+ MinMaxHelper(op, true, 0, 0, 0, 0);
+ MinMaxHelper(op, true, 128, 255, 255, 255);
+ MinMaxHelper(op, true, 0, 0xffff'ffff'ffff'ffff, 0, 0);
+ MinMaxHelper(op, true, s32max, s32min, s32max, s32min);
+ MinMaxHelper(op, true, s32min, s32max, s32max, s32min);
+ MinMaxHelper(op, true, s64max, s32min, 0xffff'ffff, s64max);
+ MinMaxHelper(op, true, s64min, s64max, 0, s64max);
+}
+
#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
// Test the pseudo-instructions that control CPUFeatures dynamically in the
// Simulator. These are used by the test infrastructure itself, but in a fairly
diff --git a/test/aarch64/test-disasm-aarch64.cc b/test/aarch64/test-disasm-aarch64.cc
index 1de3cab..99cf4ec 100644
--- a/test/aarch64/test-disasm-aarch64.cc
+++ b/test/aarch64/test-disasm-aarch64.cc
@@ -3300,6 +3300,64 @@
CLEANUP();
}
+TEST(cssc) {
+ SETUP();
+
+ COMPARE_MACRO(Abs(w0, w22), "abs w0, w22");
+ COMPARE_MACRO(Abs(x0, x23), "abs x0, x23");
+ COMPARE_MACRO(Abs(wzr, wzr), "abs wzr, wzr");
+ COMPARE_MACRO(Cnt(w21, w30), "cnt w21, w30");
+ COMPARE_MACRO(Cnt(x19, x9), "cnt x19, x9");
+ COMPARE_MACRO(Cnt(xzr, x30), "cnt xzr, x30");
+ COMPARE_MACRO(Ctz(w3, w5), "ctz w3, w5");
+ COMPARE_MACRO(Ctz(x3, x28), "ctz x3, x28");
+ COMPARE_MACRO(Ctz(w0, wzr), "ctz w0, wzr");
+
+ COMPARE_MACRO(Smax(w5, w9, w10), "smax w5, w9, w10");
+ COMPARE_MACRO(Smax(x6, x8, x9), "smax x6, x8, x9");
+ COMPARE_MACRO(Smin(w11, w8, w17), "smin w11, w8, w17");
+ COMPARE_MACRO(Smin(x12, x10, x20), "smin x12, x10, x20");
+ COMPARE_MACRO(Umax(w5, w9, w10), "umax w5, w9, w10");
+ COMPARE_MACRO(Umax(x6, x8, x9), "umax x6, x8, x9");
+ COMPARE_MACRO(Umin(w11, w8, w17), "umin w11, w8, w17");
+ COMPARE_MACRO(Umin(x12, x10, x20), "umin x12, x10, x20");
+
+ COMPARE_MACRO(Smax(w5, w9, 127), "smax w5, w9, #127");
+ COMPARE_MACRO(Smax(x6, x8, -128), "smax x6, x8, #-128");
+ COMPARE_MACRO(Smin(w19, w20, -1), "smin w19, w20, #-1");
+ COMPARE_MACRO(Smin(x30, xzr, 0), "smin x30, xzr, #0");
+ COMPARE_MACRO(Umax(w5, w9, 255), "umax w5, w9, #255");
+ COMPARE_MACRO(Umax(x6, x8, 128), "umax x6, x8, #128");
+ COMPARE_MACRO(Umin(wzr, w20, 1), "umin wzr, w20, #1");
+ COMPARE_MACRO(Umin(x30, xzr, 0), "umin x30, xzr, #0");
+
+ COMPARE_MACRO(Smax(w5, w6, 128),
+ "mov w16, #0x80\n"
+ "smax w5, w6, w16");
+ COMPARE_MACRO(Smax(x10, x11, -129),
+ "mov x16, #0xffffffffffffff7f\n"
+ "smax x10, x11, x16");
+ COMPARE_MACRO(Smin(w5, w6, 128),
+ "mov w16, #0x80\n"
+ "smin w5, w6, w16");
+ COMPARE_MACRO(Smin(x10, x11, -129),
+ "mov x16, #0xffffffffffffff7f\n"
+ "smin x10, x11, x16");
+ COMPARE_MACRO(Umax(w5, w6, 256),
+ "mov w16, #0x100\n"
+ "umax w5, w6, w16");
+ COMPARE_MACRO(Umax(x10, x11, 0x4242),
+ "mov x16, #0x4242\n"
+ "umax x10, x11, x16");
+ COMPARE_MACRO(Umin(w5, w6, 256),
+ "mov w16, #0x100\n"
+ "umin w5, w6, w16");
+ COMPARE_MACRO(Umin(x10, x11, 0x4242),
+ "mov x16, #0x4242\n"
+ "umin x10, x11, x16");
+ CLEANUP();
+}
+
TEST(architecture_features) {
SETUP();