Dep: Update vixl to 662828c
This commit is contained in:
@@ -27,6 +27,7 @@
|
||||
#ifndef VIXL_CPU_FEATURES_H
|
||||
#define VIXL_CPU_FEATURES_H
|
||||
|
||||
#include <bitset>
|
||||
#include <ostream>
|
||||
|
||||
#include "globals-vixl.h"
|
||||
@@ -34,16 +35,65 @@
|
||||
namespace vixl {
|
||||
|
||||
|
||||
// VIXL aims to handle and detect all architectural features that are likely to
|
||||
// influence code-generation decisions at EL0 (user-space).
|
||||
//
|
||||
// - There may be multiple VIXL feature flags for a given architectural
|
||||
// extension. This occurs where the extension allow components to be
|
||||
// implemented independently, or where kernel support is needed, and is likely
|
||||
// to be fragmented.
|
||||
//
|
||||
// For example, Pointer Authentication (kPAuth*) has a separate feature flag
|
||||
// for access to PACGA, and to indicate that the QARMA algorithm is
|
||||
// implemented.
|
||||
//
|
||||
// - Conversely, some extensions have configuration options that do not affect
|
||||
// EL0, so these are presented as a single VIXL feature.
|
||||
//
|
||||
// For example, the RAS extension (kRAS) has several variants, but the only
|
||||
// feature relevant to VIXL is the addition of the ESB instruction so we only
|
||||
// need a single flag.
|
||||
//
|
||||
// - VIXL offers separate flags for separate features even if they're
|
||||
// architecturally linked.
|
||||
//
|
||||
// For example, the architecture requires kFPHalf and kNEONHalf to be equal,
|
||||
// but they have separate hardware ID register fields so VIXL presents them as
|
||||
// separate features.
|
||||
//
|
||||
// - VIXL can detect every feature for which it can generate code.
|
||||
//
|
||||
// - VIXL can detect some features for which it cannot generate code.
|
||||
//
|
||||
// The CPUFeatures::Feature enum — derived from the macro list below — is
|
||||
// frequently extended. New features may be added to the list at any point, and
|
||||
// no assumptions should be made about the numerical values assigned to each
|
||||
// enum constant. The symbolic names can be considered to be stable.
|
||||
//
|
||||
// The debug descriptions are used only for debug output. The 'cpuinfo' strings
|
||||
// are informative; VIXL does not use /proc/cpuinfo for feature detection.
|
||||
|
||||
// clang-format off
|
||||
#define VIXL_CPU_FEATURE_LIST(V) \
|
||||
/* If set, the OS traps and emulates MRS accesses to relevant (EL1) ID_* */ \
|
||||
/* registers, so that the detailed feature registers can be read */ \
|
||||
/* directly. */ \
|
||||
\
|
||||
/* Constant name Debug description Linux 'cpuinfo' string. */ \
|
||||
V(kIDRegisterEmulation, "ID register emulation", "cpuid") \
|
||||
\
|
||||
V(kFP, "FP", "fp") \
|
||||
V(kNEON, "NEON", "asimd") \
|
||||
V(kCRC32, "CRC32", "crc32") \
|
||||
V(kDGH, "DGH", "dgh") \
|
||||
/* Speculation control features. */ \
|
||||
V(kCSV2, "CSV2", NULL) \
|
||||
V(kSCXTNUM, "SCXTNUM", NULL) \
|
||||
V(kCSV3, "CSV3", NULL) \
|
||||
V(kSB, "SB", "sb") \
|
||||
V(kSPECRES, "SPECRES", NULL) \
|
||||
V(kSSBS, "SSBS", NULL) \
|
||||
V(kSSBSControl, "SSBS (PSTATE control)", "ssbs") \
|
||||
/* Cryptographic support instructions. */ \
|
||||
V(kAES, "AES", "aes") \
|
||||
V(kSHA1, "SHA1", "sha1") \
|
||||
@@ -56,34 +106,102 @@ namespace vixl {
|
||||
V(kLORegions, "LORegions", NULL) \
|
||||
/* Rounding doubling multiply add/subtract: SQRDMLAH and SQRDMLSH. */ \
|
||||
V(kRDM, "RDM", "asimdrdm") \
|
||||
/* Scalable Vector Extension. */ \
|
||||
V(kSVE, "SVE", "sve") \
|
||||
V(kSVEF64MM, "SVE F64MM", "svef64mm") \
|
||||
V(kSVEF32MM, "SVE F32MM", "svef32mm") \
|
||||
V(kSVEI8MM, "SVE I8MM", "svei8imm") \
|
||||
V(kSVEBF16, "SVE BFloat16", "svebf16") \
|
||||
/* SDOT and UDOT support (in NEON). */ \
|
||||
V(kDotProduct, "DotProduct", "asimddp") \
|
||||
/* Int8 matrix multiplication (in NEON). */ \
|
||||
V(kI8MM, "NEON I8MM", "i8mm") \
|
||||
/* Half-precision (FP16) support for FP and NEON, respectively. */ \
|
||||
V(kFPHalf, "FPHalf", "fphp") \
|
||||
V(kNEONHalf, "NEONHalf", "asimdhp") \
|
||||
/* BFloat16 support (in both FP and NEON.) */ \
|
||||
V(kBF16, "FP/NEON BFloat 16", "bf16") \
|
||||
/* The RAS extension, including the ESB instruction. */ \
|
||||
V(kRAS, "RAS", NULL) \
|
||||
/* Data cache clean to the point of persistence: DC CVAP. */ \
|
||||
V(kDCPoP, "DCPoP", "dcpop") \
|
||||
/* Data cache clean to the point of deep persistence: DC CVADP. */ \
|
||||
V(kDCCVADP, "DCCVADP", "dcpodp") \
|
||||
/* Cryptographic support instructions. */ \
|
||||
V(kSHA3, "SHA3", "sha3") \
|
||||
V(kSHA512, "SHA512", "sha512") \
|
||||
V(kSM3, "SM3", "sm3") \
|
||||
V(kSM4, "SM4", "sm4") \
|
||||
/* Pointer authentication for addresses. */ \
|
||||
V(kPAuth, "PAuth", NULL) \
|
||||
V(kPAuth, "PAuth", "paca") \
|
||||
/* Pointer authentication for addresses uses QARMA. */ \
|
||||
V(kPAuthQARMA, "PAuthQARMA", NULL) \
|
||||
/* Generic authentication (using the PACGA instruction). */ \
|
||||
V(kPAuthGeneric, "PAuthGeneric", NULL) \
|
||||
V(kPAuthGeneric, "PAuthGeneric", "pacg") \
|
||||
/* Generic authentication uses QARMA. */ \
|
||||
V(kPAuthGenericQARMA, "PAuthGenericQARMA", NULL) \
|
||||
/* JavaScript-style FP <-> integer conversion instruction: FJCVTZS. */ \
|
||||
/* JavaScript-style FP -> integer conversion instruction: FJCVTZS. */ \
|
||||
V(kJSCVT, "JSCVT", "jscvt") \
|
||||
/* Complex number support for NEON: FCMLA and FCADD. */ \
|
||||
V(kFcma, "Fcma", "fcma") \
|
||||
/* RCpc-based model (for weaker release consistency): LDAPR and variants. */ \
|
||||
V(kRCpc, "RCpc", "lrcpc") \
|
||||
/* Complex number support for NEON: FCMLA and FCADD. */ \
|
||||
V(kFcma, "Fcma", "fcma")
|
||||
V(kRCpcImm, "RCpc (imm)", "ilrcpc") \
|
||||
/* Flag manipulation instructions: SETF{8,16}, CFINV, RMIF. */ \
|
||||
V(kFlagM, "FlagM", "flagm") \
|
||||
/* Unaligned single-copy atomicity. */ \
|
||||
V(kUSCAT, "USCAT", "uscat") \
|
||||
/* FP16 fused multiply-add or -subtract long: FMLAL{2}, FMLSL{2}. */ \
|
||||
V(kFHM, "FHM", "asimdfhm") \
|
||||
/* Data-independent timing (for selected instructions). */ \
|
||||
V(kDIT, "DIT", "dit") \
|
||||
/* Branch target identification. */ \
|
||||
V(kBTI, "BTI", "bti") \
|
||||
/* Flag manipulation instructions: {AX,XA}FLAG */ \
|
||||
V(kAXFlag, "AXFlag", "flagm2") \
|
||||
/* Random number generation extension, */ \
|
||||
V(kRNG, "RNG", "rng") \
|
||||
/* Floating-point round to {32,64}-bit integer. */ \
|
||||
V(kFrintToFixedSizedInt,"Frint (bounded)", "frint") \
|
||||
/* Memory Tagging Extension. */ \
|
||||
V(kMTEInstructions, "MTE (EL0 instructions)", NULL) \
|
||||
V(kMTE, "MTE", NULL) \
|
||||
V(kMTE3, "MTE (asymmetric)", "mte3") \
|
||||
/* PAuth extensions. */ \
|
||||
V(kPAuthEnhancedPAC, "PAuth EnhancedPAC", NULL) \
|
||||
V(kPAuthEnhancedPAC2, "PAuth EnhancedPAC2", NULL) \
|
||||
V(kPAuthFPAC, "PAuth FPAC", NULL) \
|
||||
V(kPAuthFPACCombined, "PAuth FPACCombined", NULL) \
|
||||
/* Scalable Vector Extension 2. */ \
|
||||
V(kSVE2, "SVE2", "sve2") \
|
||||
V(kSVESM4, "SVE SM4", "svesm4") \
|
||||
V(kSVESHA3, "SVE SHA3", "svesha3") \
|
||||
V(kSVEBitPerm, "SVE BitPerm", "svebitperm") \
|
||||
V(kSVEAES, "SVE AES", "sveaes") \
|
||||
V(kSVEPmull128, "SVE Pmull128", "svepmull") \
|
||||
/* Alternate floating-point behavior */ \
|
||||
V(kAFP, "AFP", "afp") \
|
||||
/* Enhanced Counter Virtualization */ \
|
||||
V(kECV, "ECV", "ecv") \
|
||||
/* Increased precision of Reciprocal Estimate and Square Root Estimate */ \
|
||||
V(kRPRES, "RPRES", "rpres") \
|
||||
/* Memory operation instructions, for memcpy, memset */ \
|
||||
V(kMOPS, "Memory ops", NULL) \
|
||||
/* Scalable Matrix Extension (SME) */ \
|
||||
V(kSME, "SME", "sme") \
|
||||
V(kSMEi16i64, "SME (i16i64)", "smei16i64") \
|
||||
V(kSMEf64f64, "SME (f64f64)", "smef64f64") \
|
||||
V(kSMEi8i32, "SME (i8i32)", "smei8i32") \
|
||||
V(kSMEf16f32, "SME (f16f32)", "smef16f32") \
|
||||
V(kSMEb16f32, "SME (b16f32)", "smeb16f32") \
|
||||
V(kSMEf32f32, "SME (f32f32)", "smef32f32") \
|
||||
V(kSMEfa64, "SME (fa64)", "smefa64") \
|
||||
/* WFET and WFIT instruction support */ \
|
||||
V(kWFXT, "WFXT", "wfxt") \
|
||||
/* Extended BFloat16 instructions */ \
|
||||
V(kEBF16, "EBF16", "ebf16") \
|
||||
V(kSVE_EBF16, "EBF16 (SVE)", "sveebf16") \
|
||||
V(kCSSC, "CSSC", "cssc")
|
||||
// clang-format on
|
||||
|
||||
|
||||
@@ -176,13 +294,13 @@ class CPUFeatures {
|
||||
// clang-format on
|
||||
|
||||
// By default, construct with no features enabled.
|
||||
CPUFeatures() : features_(0) {}
|
||||
CPUFeatures() : features_{} {}
|
||||
|
||||
// Construct with some features already enabled.
|
||||
CPUFeatures(Feature feature0,
|
||||
Feature feature1 = kNone,
|
||||
Feature feature2 = kNone,
|
||||
Feature feature3 = kNone);
|
||||
template <typename T, typename... U>
|
||||
CPUFeatures(T first, U... others) : features_{} {
|
||||
Combine(first, others...);
|
||||
}
|
||||
|
||||
// Construct with all features enabled. This can be used to disable feature
|
||||
// checking: `Has(...)` returns true regardless of the argument.
|
||||
@@ -198,51 +316,80 @@ class CPUFeatures {
|
||||
return CPUFeatures(kFP, kNEON, kCRC32);
|
||||
}
|
||||
|
||||
// Construct a new CPUFeatures object using ID registers. This assumes that
|
||||
// kIDRegisterEmulation is present.
|
||||
static CPUFeatures InferFromIDRegisters();
|
||||
|
||||
enum QueryIDRegistersOption {
|
||||
kDontQueryIDRegisters,
|
||||
kQueryIDRegistersIfAvailable
|
||||
};
|
||||
|
||||
// Construct a new CPUFeatures object based on what the OS reports.
|
||||
static CPUFeatures InferFromOS();
|
||||
static CPUFeatures InferFromOS(
|
||||
QueryIDRegistersOption option = kQueryIDRegistersIfAvailable);
|
||||
|
||||
// Combine another CPUFeatures object into this one. Features that already
|
||||
// exist in this set are left unchanged.
|
||||
void Combine(const CPUFeatures& other);
|
||||
|
||||
// Combine specific features into this set. Features that already exist in
|
||||
// this set are left unchanged.
|
||||
void Combine(Feature feature0,
|
||||
Feature feature1 = kNone,
|
||||
Feature feature2 = kNone,
|
||||
Feature feature3 = kNone);
|
||||
// Combine a specific feature into this set. If it already exists in the set,
|
||||
// the set is left unchanged.
|
||||
void Combine(Feature feature);
|
||||
|
||||
// Combine multiple features (or feature sets) into this set.
|
||||
template <typename T, typename... U>
|
||||
void Combine(T first, U... others) {
|
||||
Combine(first);
|
||||
Combine(others...);
|
||||
}
|
||||
|
||||
// Remove features in another CPUFeatures object from this one.
|
||||
void Remove(const CPUFeatures& other);
|
||||
|
||||
// Remove specific features from this set.
|
||||
void Remove(Feature feature0,
|
||||
Feature feature1 = kNone,
|
||||
Feature feature2 = kNone,
|
||||
Feature feature3 = kNone);
|
||||
// Remove a specific feature from this set. This has no effect if the feature
|
||||
// doesn't exist in the set.
|
||||
void Remove(Feature feature0);
|
||||
|
||||
// Chaining helpers for convenient construction.
|
||||
CPUFeatures With(const CPUFeatures& other) const;
|
||||
CPUFeatures With(Feature feature0,
|
||||
Feature feature1 = kNone,
|
||||
Feature feature2 = kNone,
|
||||
Feature feature3 = kNone) const;
|
||||
CPUFeatures Without(const CPUFeatures& other) const;
|
||||
CPUFeatures Without(Feature feature0,
|
||||
Feature feature1 = kNone,
|
||||
Feature feature2 = kNone,
|
||||
Feature feature3 = kNone) const;
|
||||
// Remove multiple features (or feature sets) from this set.
|
||||
template <typename T, typename... U>
|
||||
void Remove(T first, U... others) {
|
||||
Remove(first);
|
||||
Remove(others...);
|
||||
}
|
||||
|
||||
// Query features.
|
||||
// Note that an empty query (like `Has(kNone)`) always returns true.
|
||||
// Chaining helpers for convenient construction by combining other CPUFeatures
|
||||
// or individual Features.
|
||||
template <typename... T>
|
||||
CPUFeatures With(T... others) const {
|
||||
CPUFeatures f(*this);
|
||||
f.Combine(others...);
|
||||
return f;
|
||||
}
|
||||
|
||||
template <typename... T>
|
||||
CPUFeatures Without(T... others) const {
|
||||
CPUFeatures f(*this);
|
||||
f.Remove(others...);
|
||||
return f;
|
||||
}
|
||||
|
||||
// Test whether the `other` feature set is equal to or a subset of this one.
|
||||
bool Has(const CPUFeatures& other) const;
|
||||
bool Has(Feature feature0,
|
||||
Feature feature1 = kNone,
|
||||
Feature feature2 = kNone,
|
||||
Feature feature3 = kNone) const;
|
||||
|
||||
// Test whether a single feature exists in this set.
|
||||
// Note that `Has(kNone)` always returns true.
|
||||
bool Has(Feature feature) const;
|
||||
|
||||
// Test whether all of the specified features exist in this set.
|
||||
template <typename T, typename... U>
|
||||
bool Has(T first, U... others) const {
|
||||
return Has(first) && Has(others...);
|
||||
}
|
||||
|
||||
// Return the number of enabled features.
|
||||
size_t Count() const;
|
||||
bool HasNoFeatures() const { return Count() == 0; }
|
||||
|
||||
// Check for equivalence.
|
||||
bool operator==(const CPUFeatures& other) const {
|
||||
@@ -256,9 +403,8 @@ class CPUFeatures {
|
||||
const_iterator end() const;
|
||||
|
||||
private:
|
||||
// Each bit represents a feature. This field will be replaced as needed if
|
||||
// features are added.
|
||||
uint64_t features_;
|
||||
// Each bit represents a feature. This set will be extended as needed.
|
||||
std::bitset<kNumberOfFeatures> features_;
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os,
|
||||
const vixl::CPUFeatures& features);
|
||||
@@ -281,8 +427,8 @@ class CPUFeaturesConstIterator {
|
||||
bool operator!=(const CPUFeaturesConstIterator& other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
CPUFeatures::Feature operator++();
|
||||
CPUFeatures::Feature operator++(int);
|
||||
CPUFeaturesConstIterator& operator++();
|
||||
CPUFeaturesConstIterator operator++(int);
|
||||
|
||||
CPUFeatures::Feature operator*() const {
|
||||
VIXL_ASSERT(IsValid());
|
||||
@@ -301,8 +447,10 @@ class CPUFeaturesConstIterator {
|
||||
CPUFeatures::Feature feature_;
|
||||
|
||||
bool IsValid() const {
|
||||
return ((cpu_features_ == NULL) && (feature_ == CPUFeatures::kNone)) ||
|
||||
cpu_features_->Has(feature_);
|
||||
if (cpu_features_ == NULL) {
|
||||
return feature_ == CPUFeatures::kNone;
|
||||
}
|
||||
return cpu_features_->Has(feature_);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -325,21 +473,17 @@ class CPUFeaturesScope {
|
||||
// Start a CPUFeaturesScope on any object that implements
|
||||
// `CPUFeatures* GetCPUFeatures()`.
|
||||
template <typename T>
|
||||
explicit CPUFeaturesScope(T* cpu_features_wrapper,
|
||||
CPUFeatures::Feature feature0 = CPUFeatures::kNone,
|
||||
CPUFeatures::Feature feature1 = CPUFeatures::kNone,
|
||||
CPUFeatures::Feature feature2 = CPUFeatures::kNone,
|
||||
CPUFeatures::Feature feature3 = CPUFeatures::kNone)
|
||||
explicit CPUFeaturesScope(T* cpu_features_wrapper)
|
||||
: cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
|
||||
old_features_(*cpu_features_) {
|
||||
cpu_features_->Combine(feature0, feature1, feature2, feature3);
|
||||
}
|
||||
old_features_(*cpu_features_) {}
|
||||
|
||||
template <typename T>
|
||||
CPUFeaturesScope(T* cpu_features_wrapper, const CPUFeatures& other)
|
||||
// Start a CPUFeaturesScope on any object that implements
|
||||
// `CPUFeatures* GetCPUFeatures()`, with the specified features enabled.
|
||||
template <typename T, typename U, typename... V>
|
||||
CPUFeaturesScope(T* cpu_features_wrapper, U first, V... features)
|
||||
: cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
|
||||
old_features_(*cpu_features_) {
|
||||
cpu_features_->Combine(other);
|
||||
cpu_features_->Combine(first, features...);
|
||||
}
|
||||
|
||||
~CPUFeaturesScope() { *cpu_features_ = old_features_; }
|
||||
|
||||
Reference in New Issue
Block a user