diff mbox series

[FFmpeg-devel,3/5] aarch64: Add CPU feature flags for SVE and SVE2

Message ID 20240917121419.610349-3-martin@martin.st
State Accepted
Headers show
Series [FFmpeg-devel,1/5] aarch64: Detect I8MM on Windows via SVE-I8MM | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Martin Storsjö Sept. 17, 2024, 12:14 p.m. UTC
Add code for detecting the feature on Linux and Windows.
---
 libavutil/aarch64/cpu.c   | 20 ++++++++++++++++++++
 libavutil/aarch64/cpu.h   |  2 ++
 libavutil/cpu.c           |  2 ++
 libavutil/cpu.h           |  2 ++
 libavutil/tests/cpu.c     |  2 ++
 tests/checkasm/checkasm.c |  2 ++
 6 files changed, 30 insertions(+)
diff mbox series

Patch

diff --git a/libavutil/aarch64/cpu.c b/libavutil/aarch64/cpu.c
index fe24b1da4d..e82c0f19ab 100644
--- a/libavutil/aarch64/cpu.c
+++ b/libavutil/aarch64/cpu.c
@@ -25,6 +25,8 @@ 
 #include <sys/auxv.h>
 
 #define HWCAP_AARCH64_ASIMDDP (1 << 20)
+#define HWCAP_AARCH64_SVE     (1 << 22)
+#define HWCAP2_AARCH64_SVE2   (1 << 1)
 #define HWCAP2_AARCH64_I8MM   (1 << 13)
 
 static int detect_flags(void)
@@ -36,6 +38,10 @@  static int detect_flags(void)
 
     if (hwcap & HWCAP_AARCH64_ASIMDDP)
         flags |= AV_CPU_FLAG_DOTPROD;
+    if (hwcap & HWCAP_AARCH64_SVE)
+        flags |= AV_CPU_FLAG_SVE;
+    if (hwcap2 & HWCAP2_AARCH64_SVE2)
+        flags |= AV_CPU_FLAG_SVE2;
     if (hwcap2 & HWCAP2_AARCH64_I8MM)
         flags |= AV_CPU_FLAG_I8MM;
 
@@ -119,6 +125,14 @@  static int detect_flags(void)
      * regular I8MM is available. */
     if (IsProcessorFeaturePresent(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE))
         flags |= AV_CPU_FLAG_I8MM;
+#endif
+#ifdef PF_ARM_SVE_INSTRUCTIONS_AVAILABLE
+    if (IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE))
+        flags |= AV_CPU_FLAG_SVE;
+#endif
+#ifdef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE
+    if (IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE))
+        flags |= AV_CPU_FLAG_SVE2;
 #endif
     return flags;
 }
@@ -142,6 +156,12 @@  int ff_get_cpu_flags_aarch64(void)
 #ifdef __ARM_FEATURE_MATMUL_INT8
     flags |= AV_CPU_FLAG_I8MM;
 #endif
+#ifdef __ARM_FEATURE_SVE
+    flags |= AV_CPU_FLAG_SVE;
+#endif
+#ifdef __ARM_FEATURE_SVE2
+    flags |= AV_CPU_FLAG_SVE2;
+#endif
 
     flags |= detect_flags();
 
diff --git a/libavutil/aarch64/cpu.h b/libavutil/aarch64/cpu.h
index 64d703be37..df7becca30 100644
--- a/libavutil/aarch64/cpu.h
+++ b/libavutil/aarch64/cpu.h
@@ -27,5 +27,7 @@ 
 #define have_vfp(flags)  CPUEXT(flags, VFP)
 #define have_dotprod(flags) CPUEXT(flags, DOTPROD)
 #define have_i8mm(flags)    CPUEXT(flags, I8MM)
+#define have_sve(flags)     CPUEXT(flags, SVE)
+#define have_sve2(flags)    CPUEXT(flags, SVE2)
 
 #endif /* AVUTIL_AARCH64_CPU_H */
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index df00bd541f..e16ebc0d38 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -180,6 +180,8 @@  int av_parse_cpu_caps(unsigned *flags, const char *s)
         { "vfp",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFP      },    .unit = "flags" },
         { "dotprod",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_DOTPROD  },    .unit = "flags" },
         { "i8mm",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_I8MM     },    .unit = "flags" },
+        { "sve",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SVE      },    .unit = "flags" },
+        { "sve2",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SVE2     },    .unit = "flags" },
 #elif ARCH_MIPS
         { "mmi",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMI      },    .unit = "flags" },
         { "msa",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MSA      },    .unit = "flags" },
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index ba6c234e04..6b6e50f07a 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -72,6 +72,8 @@ 
 #define AV_CPU_FLAG_VFP_VM       (1 << 7) ///< VFPv2 vector mode, deprecated in ARMv7-A and unavailable in various CPUs implementations
 #define AV_CPU_FLAG_DOTPROD      (1 << 8)
 #define AV_CPU_FLAG_I8MM         (1 << 9)
+#define AV_CPU_FLAG_SVE          (1 <<10)
+#define AV_CPU_FLAG_SVE2         (1 <<11)
 #define AV_CPU_FLAG_SETEND       (1 <<16)
 
 #define AV_CPU_FLAG_MMI          (1 << 0)
diff --git a/libavutil/tests/cpu.c b/libavutil/tests/cpu.c
index 0a459c1d9e..679b538f0f 100644
--- a/libavutil/tests/cpu.c
+++ b/libavutil/tests/cpu.c
@@ -40,6 +40,8 @@  static const struct {
     { AV_CPU_FLAG_VFP,       "vfp"        },
     { AV_CPU_FLAG_DOTPROD,   "dotprod"    },
     { AV_CPU_FLAG_I8MM,      "i8mm"       },
+    { AV_CPU_FLAG_SVE,       "sve"        },
+    { AV_CPU_FLAG_SVE2,      "sve2"       },
 #elif ARCH_ARM
     { AV_CPU_FLAG_ARMV5TE,   "armv5te"    },
     { AV_CPU_FLAG_ARMV6,     "armv6"      },
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 73a998ae3a..c932e028a5 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -305,6 +305,8 @@  static const struct {
     { "NEON",     "neon",     AV_CPU_FLAG_NEON },
     { "DOTPROD",  "dotprod",  AV_CPU_FLAG_DOTPROD },
     { "I8MM",     "i8mm",     AV_CPU_FLAG_I8MM },
+    { "SVE",      "sve",      AV_CPU_FLAG_SVE },
+    { "SVE2",     "sve2",     AV_CPU_FLAG_SVE2 },
 #elif ARCH_ARM
     { "ARMV5TE",  "armv5te",  AV_CPU_FLAG_ARMV5TE },
     { "ARMV6",    "armv6",    AV_CPU_FLAG_ARMV6 },