cpu-features: Update OS queryable hwcaps (#43)
* cpu-features: Support hwcap for FEAT_MTE3
* cpu-features: Support hwcap for FEAT_SME
* cpu-aarch64: Allow specifying IDRegister field size
This will be necessary for testing some SME extensions.
Preserves existing behavior by defaulting the field size to 4 bits.
* cpu-aarch64: Add skeleton for AA64SMFR0_EL1
This register contains the bulk of the SME extension bitfields.
* cpu-features: Support hwcap for FEAT_SME_I16I64
* cpu-features: Support hwcap for FEAT_SME_F64F64
* cpu-features: Support hwcap for FEAT_SME_I8I32
* cpu-features: Support hwcap for FEAT_SME_F16F32
* cpu-features: Support hwcap for FEAT_SME_B16F32
* cpu-features: Support hwcap for FEAT_SME_F32F32
* cpu-features: Support hwcap for FEAT_SME_FA64
* cpu-aarch64: Handle hwcap auxvals separately
HWCAP2 makes use of bit 31 and bit 32 which would fall outside the range
of what can be handled by one array, so we can split the array into two
arrays and combine over them individually.
* cpu-features: Support hwcap for FEAT_WFxT
* cpu-features: Support hwcap for FEAT_EBF16
diff --git a/src/aarch64/cpu-aarch64.cc b/src/aarch64/cpu-aarch64.cc
index ae51992..3f7e1e0 100644
--- a/src/aarch64/cpu-aarch64.cc
+++ b/src/aarch64/cpu-aarch64.cc
@@ -48,6 +48,7 @@
const IDRegister::Field AA64PFR1::kBT(0);
const IDRegister::Field AA64PFR1::kSSBS(4);
const IDRegister::Field AA64PFR1::kMTE(8);
+const IDRegister::Field AA64PFR1::kSME(24);
const IDRegister::Field AA64ISAR0::kAES(4);
const IDRegister::Field AA64ISAR0::kSHA1(8);
@@ -78,6 +79,7 @@
const IDRegister::Field AA64ISAR1::kDGH(48);
const IDRegister::Field AA64ISAR1::kI8MM(52);
+const IDRegister::Field AA64ISAR2::kWFXT(0);
const IDRegister::Field AA64ISAR2::kRPRES(4);
const IDRegister::Field AA64MMFR0::kECV(60);
@@ -97,6 +99,14 @@
const IDRegister::Field AA64ZFR0::kF32MM(52);
const IDRegister::Field AA64ZFR0::kF64MM(56);
+const IDRegister::Field AA64SMFR0::kSMEf32f32(32, 1);
+const IDRegister::Field AA64SMFR0::kSMEb16f32(34, 1);
+const IDRegister::Field AA64SMFR0::kSMEf16f32(35, 1);
+const IDRegister::Field AA64SMFR0::kSMEi8i32(36);
+const IDRegister::Field AA64SMFR0::kSMEf64f64(48, 1);
+const IDRegister::Field AA64SMFR0::kSMEi16i64(52);
+const IDRegister::Field AA64SMFR0::kSMEfa64(63, 1);
+
CPUFeatures AA64PFR0::GetCPUFeatures() const {
CPUFeatures f;
if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP);
@@ -119,6 +129,8 @@
if (Get(kSSBS) >= 2) f.Combine(CPUFeatures::kSSBSControl);
if (Get(kMTE) >= 1) f.Combine(CPUFeatures::kMTEInstructions);
if (Get(kMTE) >= 2) f.Combine(CPUFeatures::kMTE);
+ if (Get(kMTE) >= 3) f.Combine(CPUFeatures::kMTE3);
+ if (Get(kSME) >= 1) f.Combine(CPUFeatures::kSME);
return f;
}
@@ -155,6 +167,7 @@
if (Get(kSB) >= 1) f.Combine(CPUFeatures::kSB);
if (Get(kSPECRES) >= 1) f.Combine(CPUFeatures::kSPECRES);
if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kBF16);
+ if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kEBF16);
if (Get(kDGH) >= 1) f.Combine(CPUFeatures::kDGH);
if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kI8MM);
@@ -180,6 +193,7 @@
CPUFeatures AA64ISAR2::GetCPUFeatures() const {
CPUFeatures f;
+ if (Get(kWFXT) >= 2) f.Combine(CPUFeatures::kWFXT);
if (Get(kRPRES) >= 1) f.Combine(CPUFeatures::kRPRES);
return f;
}
@@ -220,6 +234,18 @@
return f;
}
+CPUFeatures AA64SMFR0::GetCPUFeatures() const {
+ CPUFeatures f;
+ if (Get(kSMEf32f32) >= 1) f.Combine(CPUFeatures::kSMEf32f32);
+ if (Get(kSMEb16f32) >= 1) f.Combine(CPUFeatures::kSMEb16f32);
+ if (Get(kSMEf16f32) >= 1) f.Combine(CPUFeatures::kSMEf16f32);
+ if (Get(kSMEi8i32) >= 15) f.Combine(CPUFeatures::kSMEi8i32);
+ if (Get(kSMEf64f64) >= 1) f.Combine(CPUFeatures::kSMEf64f64);
+ if (Get(kSMEi16i64) >= 15) f.Combine(CPUFeatures::kSMEi16i64);
+ if (Get(kSMEfa64) >= 1) f.Combine(CPUFeatures::kSMEfa64);
+ return f;
+}
+
int IDRegister::Get(IDRegister::Field field) const {
int msb = field.GetMsb();
int lsb = field.GetLsb();
@@ -252,7 +278,7 @@
// Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather
// than explicit bits, but explicit bits allow us to identify features that
// the toolchain doesn't know about.
- static const CPUFeatures::Feature kFeatureBits[] =
+ static const CPUFeatures::Feature kFeatureBitsLow[] =
{// Bits 0-7
CPUFeatures::kFP,
CPUFeatures::kNEON,
@@ -288,8 +314,11 @@
CPUFeatures::kSSBSControl,
CPUFeatures::kSB,
CPUFeatures::kPAuth,
- CPUFeatures::kPAuthGeneric,
- // Bits 32-39
+ CPUFeatures::kPAuthGeneric};
+ VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsLow) < 64);
+
+ static const CPUFeatures::Feature kFeatureBitsHigh[] =
+ {// Bits 0-7
CPUFeatures::kDCCVADP,
CPUFeatures::kSVE2,
CPUFeatures::kSVEAES,
@@ -298,7 +327,7 @@
CPUFeatures::kSVESHA3,
CPUFeatures::kSVESM4,
CPUFeatures::kAXFlag,
- // Bits 40-47
+ // Bits 8-15
CPUFeatures::kFrintToFixedSizedInt,
CPUFeatures::kSVEI8MM,
CPUFeatures::kSVEF32MM,
@@ -307,24 +336,42 @@
CPUFeatures::kI8MM,
CPUFeatures::kBF16,
CPUFeatures::kDGH,
- // Bits 48+
+ // Bits 16-23
CPUFeatures::kRNG,
CPUFeatures::kBTI,
CPUFeatures::kMTE,
CPUFeatures::kECV,
CPUFeatures::kAFP,
- CPUFeatures::kRPRES};
+ CPUFeatures::kRPRES,
+ CPUFeatures::kMTE3,
+ CPUFeatures::kSME,
+ // Bits 24-31
+ CPUFeatures::kSMEi16i64,
+ CPUFeatures::kSMEf64f64,
+ CPUFeatures::kSMEi8i32,
+ CPUFeatures::kSMEf16f32,
+ CPUFeatures::kSMEb16f32,
+ CPUFeatures::kSMEf32f32,
+ CPUFeatures::kSMEfa64,
+ CPUFeatures::kWFXT,
+ // Bits 32-39
+ CPUFeatures::kEBF16};
+ VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsHigh) < 64);
- uint64_t hwcap_low32 = getauxval(AT_HWCAP);
- uint64_t hwcap_high32 = getauxval(AT_HWCAP2);
- VIXL_ASSERT(IsUint32(hwcap_low32));
- VIXL_ASSERT(IsUint32(hwcap_high32));
- uint64_t hwcap = hwcap_low32 | (hwcap_high32 << 32);
+ auto combine_features = [&features](uint64_t hwcap,
+ const CPUFeatures::Feature* feature_array,
+ size_t features_size) {
+ for (size_t i = 0; i < features_size; i++) {
+ if (hwcap & (UINT64_C(1) << i)) features.Combine(feature_array[i]);
+ }
+ };
- VIXL_STATIC_ASSERT(ArrayLength(kFeatureBits) < 64);
- for (size_t i = 0; i < ArrayLength(kFeatureBits); i++) {
- if (hwcap & (UINT64_C(1) << i)) features.Combine(kFeatureBits[i]);
- }
+ uint64_t hwcap_low = getauxval(AT_HWCAP);
+ uint64_t hwcap_high = getauxval(AT_HWCAP2);
+
+ combine_features(hwcap_low, kFeatureBitsLow, ArrayLength(kFeatureBitsLow));
+ combine_features(hwcap_high, kFeatureBitsHigh, ArrayLength(kFeatureBitsHigh));
+
// MTE support from HWCAP2 signifies FEAT_MTE1 and FEAT_MTE2 support
if (features.Has(CPUFeatures::kMTE)) {
features.Combine(CPUFeatures::kMTEInstructions);
diff --git a/src/aarch64/cpu-aarch64.h b/src/aarch64/cpu-aarch64.h
index 892f48f..b83482b 100644
--- a/src/aarch64/cpu-aarch64.h
+++ b/src/aarch64/cpu-aarch64.h
@@ -56,24 +56,24 @@
public:
enum Type { kUnsigned, kSigned };
+ static const int kMaxWidthInBits = 4;
+
// This needs to be constexpr so that fields have "constant initialisation".
// This avoids initialisation order problems when these values are used to
// (dynamically) initialise static variables, etc.
- explicit constexpr Field(int lsb, Type type = kUnsigned)
- : lsb_(lsb), type_(type) {}
+ explicit constexpr Field(int lsb,
+ int bitWidth = kMaxWidthInBits,
+ Type type = kUnsigned)
+ : lsb_(lsb), bitWidth_(bitWidth), type_(type) {}
- static const int kMaxWidthInBits = 4;
-
- int GetWidthInBits() const {
- // All current ID fields have four bits.
- return kMaxWidthInBits;
- }
+ int GetWidthInBits() const { return bitWidth_; }
int GetLsb() const { return lsb_; }
int GetMsb() const { return lsb_ + GetWidthInBits() - 1; }
Type GetType() const { return type_; }
private:
int lsb_;
+ int bitWidth_;
Type type_;
};
@@ -113,6 +113,7 @@
static const Field kBT;
static const Field kSSBS;
static const Field kMTE;
+ static const Field kSME;
};
class AA64ISAR0 : public IDRegister {
@@ -167,6 +168,7 @@
CPUFeatures GetCPUFeatures() const;
private:
+ static const Field kWFXT;
static const Field kRPRES;
};
@@ -219,6 +221,22 @@
static const Field kF64MM;
};
+class AA64SMFR0 : public IDRegister {
+ public:
+ explicit AA64SMFR0(uint64_t value) : IDRegister(value) {}
+
+ CPUFeatures GetCPUFeatures() const;
+
+ private:
+ static const Field kSMEf32f32;
+ static const Field kSMEb16f32;
+ static const Field kSMEf16f32;
+ static const Field kSMEi8i32;
+ static const Field kSMEf64f64;
+ static const Field kSMEi16i64;
+ static const Field kSMEfa64;
+};
+
class CPU {
public:
// Initialise CPU support.
@@ -285,6 +303,7 @@
V(AA64MMFR1, "ID_AA64MMFR1_EL1") \
/* These registers are RES0 in the baseline Arm8.0. We can always safely */ \
/* read them, but some compilers don't accept the symbolic names. */ \
+ V(AA64SMFR0, "S3_0_C0_C4_5") \
V(AA64ISAR2, "S3_0_C0_C6_2") \
V(AA64MMFR2, "S3_0_C0_C7_2") \
V(AA64ZFR0, "S3_0_C0_C4_4")
diff --git a/src/cpu-features.h b/src/cpu-features.h
index ebd0578..9006fcb 100644
--- a/src/cpu-features.h
+++ b/src/cpu-features.h
@@ -166,6 +166,7 @@
/* Memory Tagging Extension. */ \
V(kMTEInstructions, "MTE (EL0 instructions)", NULL) \
V(kMTE, "MTE", NULL) \
+ V(kMTE3, "MTE (asymmetric)", "mte3") \
/* PAuth extensions. */ \
V(kPAuthEnhancedPAC, "PAuth EnhancedPAC", NULL) \
V(kPAuthEnhancedPAC2, "PAuth EnhancedPAC2", NULL) \
@@ -183,7 +184,20 @@
/* Enhanced Counter Virtualization */ \
V(kECV, "ECV", "ecv") \
/* Increased precision of Reciprocal Estimate and Square Root Estimate */ \
- V(kRPRES, "RPRES", "rpres")
+ V(kRPRES, "RPRES", "rpres") \
+ /* Scalable Matrix Extension (SME) */ \
+ V(kSME, "SME", "sme") \
+ V(kSMEi16i64, "SME (i16i64)", "smei16i64") \
+ V(kSMEf64f64, "SME (f64f64)", "smef64f64") \
+ V(kSMEi8i32, "SME (i8i32)", "smei8i32") \
+ V(kSMEf16f32, "SME (f16f32)", "smef16f32") \
+ V(kSMEb16f32, "SME (b16f32)", "smeb16f32") \
+ V(kSMEf32f32, "SME (f32f32)", "smef32f32") \
+ V(kSMEfa64, "SME (fa64)", "smefa64") \
+ /* WFET and WFIT instruction support */ \
+ V(kWFXT, "WFXT", "wfxt") \
+ /* Extended BFloat16 instructions */ \
+ V(kEBF16, "EBF16", "ebf16")
// clang-format on