Dep: Update vixl to 662828c

2024-06-14 17:27:12 +10:00
parent d45e218da7
commit f0c2832d03
66 changed files with 65453 additions and 10345 deletions
--- a/dep/vixl/include/vixl/cpu-features.h
+++ b/dep/vixl/include/vixl/cpu-features.h
@@ -27,6 +27,7 @@
 #ifndef VIXL_CPU_FEATURES_H
 #define VIXL_CPU_FEATURES_H

+#include <bitset>
 #include <ostream>

 #include "globals-vixl.h"
@@ -34,16 +35,65 @@
 namespace vixl {


+// VIXL aims to handle and detect all architectural features that are likely to
+// influence code-generation decisions at EL0 (user-space).
+//
+// - There may be multiple VIXL feature flags for a given architectural
+//   extension. This occurs where the extension allow components to be
+//   implemented independently, or where kernel support is needed, and is likely
+//   to be fragmented.
+//
+//   For example, Pointer Authentication (kPAuth*) has a separate feature flag
+//   for access to PACGA, and to indicate that the QARMA algorithm is
+//   implemented.
+//
+// - Conversely, some extensions have configuration options that do not affect
+//   EL0, so these are presented as a single VIXL feature.
+//
+//   For example, the RAS extension (kRAS) has several variants, but the only
+//   feature relevant to VIXL is the addition of the ESB instruction so we only
+//   need a single flag.
+//
+// - VIXL offers separate flags for separate features even if they're
+//   architecturally linked.
+//
+//   For example, the architecture requires kFPHalf and kNEONHalf to be equal,
+//   but they have separate hardware ID register fields so VIXL presents them as
+//   separate features.
+//
+// - VIXL can detect every feature for which it can generate code.
+//
+// - VIXL can detect some features for which it cannot generate code.
+//
+// The CPUFeatures::Feature enum — derived from the macro list below — is
+// frequently extended. New features may be added to the list at any point, and
+// no assumptions should be made about the numerical values assigned to each
+// enum constant. The symbolic names can be considered to be stable.
+//
+// The debug descriptions are used only for debug output. The 'cpuinfo' strings
+// are informative; VIXL does not use /proc/cpuinfo for feature detection.
+
 // clang-format off
 #define VIXL_CPU_FEATURE_LIST(V)                                               \
  /* If set, the OS traps and emulates MRS accesses to relevant (EL1) ID_*  */ \
  /* registers, so that the detailed feature registers can be read          */ \
  /* directly.                                                              */ \
+                                                                               \
+  /* Constant name        Debug description         Linux 'cpuinfo' string. */ \
  V(kIDRegisterEmulation, "ID register emulation",  "cpuid")                   \
                                                                               \
  V(kFP,                  "FP",                     "fp")                      \
  V(kNEON,                "NEON",                   "asimd")                   \
  V(kCRC32,               "CRC32",                  "crc32")                   \
+  V(kDGH,                 "DGH",                    "dgh")                     \
+  /* Speculation control features.                                          */ \
+  V(kCSV2,                "CSV2",                   NULL)                      \
+  V(kSCXTNUM,             "SCXTNUM",                NULL)                      \
+  V(kCSV3,                "CSV3",                   NULL)                      \
+  V(kSB,                  "SB",                     "sb")                      \
+  V(kSPECRES,             "SPECRES",                NULL)                      \
+  V(kSSBS,                "SSBS",                   NULL)                      \
+  V(kSSBSControl,         "SSBS (PSTATE control)",  "ssbs")                    \
  /* Cryptographic support instructions.                                    */ \
  V(kAES,                 "AES",                    "aes")                     \
  V(kSHA1,                "SHA1",                   "sha1")                    \
@@ -56,34 +106,102 @@ namespace vixl {
  V(kLORegions,           "LORegions",              NULL)                      \
  /* Rounding doubling multiply add/subtract: SQRDMLAH and SQRDMLSH.        */ \
  V(kRDM,                 "RDM",                    "asimdrdm")                \
+  /* Scalable Vector Extension.                                             */ \
+  V(kSVE,                 "SVE",                    "sve")                     \
+  V(kSVEF64MM,            "SVE F64MM",              "svef64mm")                \
+  V(kSVEF32MM,            "SVE F32MM",              "svef32mm")                \
+  V(kSVEI8MM,             "SVE I8MM",               "svei8imm")                \
+  V(kSVEBF16,             "SVE BFloat16",           "svebf16")                 \
  /* SDOT and UDOT support (in NEON).                                       */ \
  V(kDotProduct,          "DotProduct",             "asimddp")                 \
+  /* Int8 matrix multiplication (in NEON).                                  */ \
+  V(kI8MM,                "NEON I8MM",              "i8mm")                    \
  /* Half-precision (FP16) support for FP and NEON, respectively.           */ \
  V(kFPHalf,              "FPHalf",                 "fphp")                    \
  V(kNEONHalf,            "NEONHalf",               "asimdhp")                 \
+  /* BFloat16 support (in both FP and NEON.)                                */ \
+  V(kBF16,                "FP/NEON BFloat 16",      "bf16")                    \
  /* The RAS extension, including the ESB instruction.                      */ \
  V(kRAS,                 "RAS",                    NULL)                      \
  /* Data cache clean to the point of persistence: DC CVAP.                 */ \
  V(kDCPoP,               "DCPoP",                  "dcpop")                   \
+  /* Data cache clean to the point of deep persistence: DC CVADP.           */ \
+  V(kDCCVADP,             "DCCVADP",                "dcpodp")                  \
  /* Cryptographic support instructions.                                    */ \
  V(kSHA3,                "SHA3",                   "sha3")                    \
  V(kSHA512,              "SHA512",                 "sha512")                  \
  V(kSM3,                 "SM3",                    "sm3")                     \
  V(kSM4,                 "SM4",                    "sm4")                     \
  /* Pointer authentication for addresses.                                  */ \
-  V(kPAuth,               "PAuth",                  NULL)                      \
+  V(kPAuth,               "PAuth",                  "paca")                    \
  /* Pointer authentication for addresses uses QARMA.                       */ \
  V(kPAuthQARMA,          "PAuthQARMA",             NULL)                      \
  /* Generic authentication (using the PACGA instruction).                  */ \
-  V(kPAuthGeneric,        "PAuthGeneric",           NULL)                      \
+  V(kPAuthGeneric,        "PAuthGeneric",           "pacg")                    \
  /* Generic authentication uses QARMA.                                     */ \
  V(kPAuthGenericQARMA,   "PAuthGenericQARMA",      NULL)                      \
-  /* JavaScript-style FP <-> integer conversion instruction: FJCVTZS.       */ \
+  /* JavaScript-style FP -> integer conversion instruction: FJCVTZS.        */ \
  V(kJSCVT,               "JSCVT",                  "jscvt")                   \
+  /* Complex number support for NEON: FCMLA and FCADD.                      */ \
+  V(kFcma,                "Fcma",                   "fcma")                    \
  /* RCpc-based model (for weaker release consistency): LDAPR and variants. */ \
  V(kRCpc,                "RCpc",                   "lrcpc")                   \
-  /* Complex number support for NEON: FCMLA and FCADD.                      */ \
-  V(kFcma,                "Fcma",                   "fcma")
+  V(kRCpcImm,             "RCpc (imm)",             "ilrcpc")                  \
+  /* Flag manipulation instructions: SETF{8,16}, CFINV, RMIF.               */ \
+  V(kFlagM,               "FlagM",                  "flagm")                   \
+  /* Unaligned single-copy atomicity.                                       */ \
+  V(kUSCAT,               "USCAT",                  "uscat")                   \
+  /* FP16 fused multiply-add or -subtract long: FMLAL{2}, FMLSL{2}.         */ \
+  V(kFHM,                 "FHM",                    "asimdfhm")                \
+  /* Data-independent timing (for selected instructions).                   */ \
+  V(kDIT,                 "DIT",                    "dit")                     \
+  /* Branch target identification.                                          */ \
+  V(kBTI,                 "BTI",                    "bti")                     \
+  /* Flag manipulation instructions: {AX,XA}FLAG                            */ \
+  V(kAXFlag,              "AXFlag",                 "flagm2")                  \
+  /* Random number generation extension,                                    */ \
+  V(kRNG,                 "RNG",                    "rng")                     \
+  /* Floating-point round to {32,64}-bit integer.                           */ \
+  V(kFrintToFixedSizedInt,"Frint (bounded)",        "frint")                   \
+  /* Memory Tagging Extension.                                              */ \
+  V(kMTEInstructions,     "MTE (EL0 instructions)", NULL)                      \
+  V(kMTE,                 "MTE",                    NULL)                      \
+  V(kMTE3,                "MTE (asymmetric)",       "mte3")                    \
+  /* PAuth extensions.                                                      */ \
+  V(kPAuthEnhancedPAC,    "PAuth EnhancedPAC",      NULL)                      \
+  V(kPAuthEnhancedPAC2,   "PAuth EnhancedPAC2",     NULL)                      \
+  V(kPAuthFPAC,           "PAuth FPAC",             NULL)                      \
+  V(kPAuthFPACCombined,   "PAuth FPACCombined",     NULL)                      \
+  /* Scalable Vector Extension 2.                                           */ \
+  V(kSVE2,                "SVE2",                   "sve2")                    \
+  V(kSVESM4,              "SVE SM4",                "svesm4")                  \
+  V(kSVESHA3,             "SVE SHA3",               "svesha3")                 \
+  V(kSVEBitPerm,          "SVE BitPerm",            "svebitperm")              \
+  V(kSVEAES,              "SVE AES",                "sveaes")                  \
+  V(kSVEPmull128,         "SVE Pmull128",           "svepmull")                \
+  /* Alternate floating-point behavior                                      */ \
+  V(kAFP,                 "AFP",                    "afp")                     \
+  /* Enhanced Counter Virtualization                                        */ \
+  V(kECV,                 "ECV",                    "ecv")                     \
+  /* Increased precision of Reciprocal Estimate and Square Root Estimate    */ \
+  V(kRPRES,               "RPRES",                  "rpres")                   \
+  /* Memory operation instructions, for memcpy, memset                      */ \
+  V(kMOPS,                "Memory ops",             NULL)                      \
+  /* Scalable Matrix Extension (SME)                                        */ \
+  V(kSME,                 "SME",                    "sme")                     \
+  V(kSMEi16i64,           "SME (i16i64)",           "smei16i64")               \
+  V(kSMEf64f64,           "SME (f64f64)",           "smef64f64")               \
+  V(kSMEi8i32,            "SME (i8i32)",            "smei8i32")                \
+  V(kSMEf16f32,           "SME (f16f32)",           "smef16f32")               \
+  V(kSMEb16f32,           "SME (b16f32)",           "smeb16f32")               \
+  V(kSMEf32f32,           "SME (f32f32)",           "smef32f32")               \
+  V(kSMEfa64,             "SME (fa64)",             "smefa64")                 \
+  /* WFET and WFIT instruction support                                      */ \
+  V(kWFXT,                "WFXT",                   "wfxt")                    \
+  /* Extended BFloat16 instructions                                         */ \
+  V(kEBF16,               "EBF16",                  "ebf16")                   \
+  V(kSVE_EBF16,           "EBF16 (SVE)",            "sveebf16")                \
+  V(kCSSC,                "CSSC",                   "cssc")
 // clang-format on


@@ -176,13 +294,13 @@ class CPUFeatures {
  // clang-format on

  // By default, construct with no features enabled.
-  CPUFeatures() : features_(0) {}
+  CPUFeatures() : features_{} {}

  // Construct with some features already enabled.
-  CPUFeatures(Feature feature0,
-              Feature feature1 = kNone,
-              Feature feature2 = kNone,
-              Feature feature3 = kNone);
+  template <typename T, typename... U>
+  CPUFeatures(T first, U... others) : features_{} {
+    Combine(first, others...);
+  }

  // Construct with all features enabled. This can be used to disable feature
  // checking: `Has(...)` returns true regardless of the argument.
@@ -198,51 +316,80 @@ class CPUFeatures {
    return CPUFeatures(kFP, kNEON, kCRC32);
  }

+  // Construct a new CPUFeatures object using ID registers. This assumes that
+  // kIDRegisterEmulation is present.
+  static CPUFeatures InferFromIDRegisters();
+
+  enum QueryIDRegistersOption {
+    kDontQueryIDRegisters,
+    kQueryIDRegistersIfAvailable
+  };
+
  // Construct a new CPUFeatures object based on what the OS reports.
-  static CPUFeatures InferFromOS();
+  static CPUFeatures InferFromOS(
+      QueryIDRegistersOption option = kQueryIDRegistersIfAvailable);

  // Combine another CPUFeatures object into this one. Features that already
  // exist in this set are left unchanged.
  void Combine(const CPUFeatures& other);

-  // Combine specific features into this set. Features that already exist in
-  // this set are left unchanged.
-  void Combine(Feature feature0,
-               Feature feature1 = kNone,
-               Feature feature2 = kNone,
-               Feature feature3 = kNone);
+  // Combine a specific feature into this set. If it already exists in the set,
+  // the set is left unchanged.
+  void Combine(Feature feature);
+
+  // Combine multiple features (or feature sets) into this set.
+  template <typename T, typename... U>
+  void Combine(T first, U... others) {
+    Combine(first);
+    Combine(others...);
+  }

  // Remove features in another CPUFeatures object from this one.
  void Remove(const CPUFeatures& other);

-  // Remove specific features from this set.
-  void Remove(Feature feature0,
-              Feature feature1 = kNone,
-              Feature feature2 = kNone,
-              Feature feature3 = kNone);
+  // Remove a specific feature from this set. This has no effect if the feature
+  // doesn't exist in the set.
+  void Remove(Feature feature0);

-  // Chaining helpers for convenient construction.
-  CPUFeatures With(const CPUFeatures& other) const;
-  CPUFeatures With(Feature feature0,
-                   Feature feature1 = kNone,
-                   Feature feature2 = kNone,
-                   Feature feature3 = kNone) const;
-  CPUFeatures Without(const CPUFeatures& other) const;
-  CPUFeatures Without(Feature feature0,
-                      Feature feature1 = kNone,
-                      Feature feature2 = kNone,
-                      Feature feature3 = kNone) const;
+  // Remove multiple features (or feature sets) from this set.
+  template <typename T, typename... U>
+  void Remove(T first, U... others) {
+    Remove(first);
+    Remove(others...);
+  }

-  // Query features.
-  // Note that an empty query (like `Has(kNone)`) always returns true.
+  // Chaining helpers for convenient construction by combining other CPUFeatures
+  // or individual Features.
+  template <typename... T>
+  CPUFeatures With(T... others) const {
+    CPUFeatures f(*this);
+    f.Combine(others...);
+    return f;
+  }
+
+  template <typename... T>
+  CPUFeatures Without(T... others) const {
+    CPUFeatures f(*this);
+    f.Remove(others...);
+    return f;
+  }
+
+  // Test whether the `other` feature set is equal to or a subset of this one.
  bool Has(const CPUFeatures& other) const;
-  bool Has(Feature feature0,
-           Feature feature1 = kNone,
-           Feature feature2 = kNone,
-           Feature feature3 = kNone) const;
+
+  // Test whether a single feature exists in this set.
+  // Note that `Has(kNone)` always returns true.
+  bool Has(Feature feature) const;
+
+  // Test whether all of the specified features exist in this set.
+  template <typename T, typename... U>
+  bool Has(T first, U... others) const {
+    return Has(first) && Has(others...);
+  }

  // Return the number of enabled features.
  size_t Count() const;
+  bool HasNoFeatures() const { return Count() == 0; }

  // Check for equivalence.
  bool operator==(const CPUFeatures& other) const {
@@ -256,9 +403,8 @@ class CPUFeatures {
  const_iterator end() const;

 private:
-  // Each bit represents a feature. This field will be replaced as needed if
-  // features are added.
-  uint64_t features_;
+  // Each bit represents a feature. This set will be extended as needed.
+  std::bitset<kNumberOfFeatures> features_;

  friend std::ostream& operator<<(std::ostream& os,
                                  const vixl::CPUFeatures& features);
@@ -281,8 +427,8 @@ class CPUFeaturesConstIterator {
  bool operator!=(const CPUFeaturesConstIterator& other) const {
    return !(*this == other);
  }
-  CPUFeatures::Feature operator++();
-  CPUFeatures::Feature operator++(int);
+  CPUFeaturesConstIterator& operator++();
+  CPUFeaturesConstIterator operator++(int);

  CPUFeatures::Feature operator*() const {
    VIXL_ASSERT(IsValid());
@@ -301,8 +447,10 @@ class CPUFeaturesConstIterator {
  CPUFeatures::Feature feature_;

  bool IsValid() const {
-    return ((cpu_features_ == NULL) && (feature_ == CPUFeatures::kNone)) ||
-           cpu_features_->Has(feature_);
+    if (cpu_features_ == NULL) {
+      return feature_ == CPUFeatures::kNone;
+    }
+    return cpu_features_->Has(feature_);
  }
 };

@@ -325,21 +473,17 @@ class CPUFeaturesScope {
  // Start a CPUFeaturesScope on any object that implements
  // `CPUFeatures* GetCPUFeatures()`.
  template <typename T>
-  explicit CPUFeaturesScope(T* cpu_features_wrapper,
-                            CPUFeatures::Feature feature0 = CPUFeatures::kNone,
-                            CPUFeatures::Feature feature1 = CPUFeatures::kNone,
-                            CPUFeatures::Feature feature2 = CPUFeatures::kNone,
-                            CPUFeatures::Feature feature3 = CPUFeatures::kNone)
+  explicit CPUFeaturesScope(T* cpu_features_wrapper)
      : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
-        old_features_(*cpu_features_) {
-    cpu_features_->Combine(feature0, feature1, feature2, feature3);
-  }
+        old_features_(*cpu_features_) {}

-  template <typename T>
-  CPUFeaturesScope(T* cpu_features_wrapper, const CPUFeatures& other)
+  // Start a CPUFeaturesScope on any object that implements
+  // `CPUFeatures* GetCPUFeatures()`, with the specified features enabled.
+  template <typename T, typename U, typename... V>
+  CPUFeaturesScope(T* cpu_features_wrapper, U first, V... features)
      : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
        old_features_(*cpu_features_) {
-    cpu_features_->Combine(other);
+    cpu_features_->Combine(first, features...);
  }

  ~CPUFeaturesScope() { *cpu_features_ = old_features_; }