diff mbox series

[FFmpeg-devel,2/5] configure: Add detection of assembler support for SVE/SVE2

Message ID 20240917121419.610349-2-martin@martin.st
State Accepted
Headers show
Series [FFmpeg-devel,1/5] aarch64: Detect I8MM on Windows via SVE-I8MM | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Martin Storsjö Sept. 17, 2024, 12:14 p.m. UTC
It turns out that recent versions of MS armasm64 does support some
SVE instructions, but not all of them. Test for one of the
instructions that it currently doesn't support.

---

Just as disclaimer, I'm not currently actively planning on writing
SVE/SVE2 optimizations. However, related projects such as x264 and
dav1d do have a few functions using these extensions, so we might just
as well add the framework support for these features in ffmpeg
anyway, as functions needing this support will come sooner or later
anyway.

In the related projects, there's no really use of longer vectors
(as there's very little such HW available anyway), but SVE gives
widening loads (used in a couple places in x264) and 16 bit dot
products (used in dav1d), which can be useful with 128 bit vectors.
---
 configure               | 14 +++++++++++++-
 ffbuild/arch.mak        |  2 ++
 libavutil/aarch64/asm.S | 18 ++++++++++++++++++
 3 files changed, 33 insertions(+), 1 deletion(-)

Comments

Martin Storsjö Sept. 26, 2024, 10:25 a.m. UTC | #1
On Tue, 17 Sep 2024, Martin Storsjö wrote:

> It turns out that recent versions of MS armasm64 does support some
> SVE instructions, but not all of them. Test for one of the
> instructions that it currently doesn't support.
>
> ---
>
> Just as disclaimer, I'm not currently actively planning on writing
> SVE/SVE2 optimizations. However, related projects such as x264 and
> dav1d do have a few functions using these extensions, so we might just
> as well add the framework support for these features in ffmpeg
> anyway, as functions needing this support will come sooner or later
> anyway.
>
> In the related projects, there's no really use of longer vectors
> (as there's very little such HW available anyway), but SVE gives
> widening loads (used in a couple places in x264) and 16 bit dot
> products (used in dav1d), which can be useful with 128 bit vectors.
> ---
> configure               | 14 +++++++++++++-
> ffbuild/arch.mak        |  2 ++
> libavutil/aarch64/asm.S | 18 ++++++++++++++++++
> 3 files changed, 33 insertions(+), 1 deletion(-)

Planning on pushing this set later today.

// Martin
diff mbox series

Patch

diff --git a/configure b/configure
index da36419f2d..d05c4a5a51 100755
--- a/configure
+++ b/configure
@@ -466,6 +466,8 @@  Optimization options (experts only):
   --disable-neon           disable NEON optimizations
   --disable-dotprod        disable DOTPROD optimizations
   --disable-i8mm           disable I8MM optimizations
+  --disable-sve            disable SVE optimizations
+  --disable-sve2           disable SVE2 optimizations
   --disable-inline-asm     disable use of inline assembly
   --disable-x86asm         disable use of standalone x86 assembly
   --disable-mipsdsp        disable MIPS DSP ASE R1 optimizations
@@ -2163,6 +2165,8 @@  ARCH_EXT_LIST_ARM="
     vfp
     vfpv3
     setend
+    sve
+    sve2
 "
 
 ARCH_EXT_LIST_MIPS="
@@ -2435,6 +2439,8 @@  TOOLCHAIN_FEATURES="
     as_arch_directive
     as_archext_dotprod_directive
     as_archext_i8mm_directive
+    as_archext_sve_directive
+    as_archext_sve2_directive
     as_dn_directive
     as_fpu_directive
     as_func
@@ -2755,6 +2761,8 @@  vfpv3_deps="vfp"
 setend_deps="arm"
 dotprod_deps="aarch64 neon"
 i8mm_deps="aarch64 neon"
+sve_deps="aarch64 neon"
+sve2_deps="aarch64 neon sve"
 
 map 'eval ${v}_inline_deps=inline_asm' $ARCH_EXT_LIST_ARM
 
@@ -6223,9 +6231,11 @@  if enabled aarch64; then
     # internal assembler in clang 3.3 does not support this instruction
     enabled neon && check_insn neon 'ext   v0.8B, v0.8B, v1.8B, #1'
 
-    archext_list="dotprod i8mm"
+    archext_list="dotprod i8mm sve sve2"
     enabled dotprod && check_archext_insn dotprod 'udot v0.4s, v0.16b, v0.16b'
     enabled i8mm    && check_archext_insn i8mm    'usdot v0.4s, v0.16b, v0.16b'
+    enabled sve     && check_archext_insn sve     'whilelt p0.s, x0, x1'
+    enabled sve2    && check_archext_insn sve2    'sqrdmulh z0.s, z0.s, z0.s'
 
     # Disable the main feature (e.g. HAVE_NEON) if neither inline nor external
     # assembly support the feature out of the box. Skip this for the features
@@ -7913,6 +7923,8 @@  if enabled aarch64; then
     echo "NEON enabled              ${neon-no}"
     echo "DOTPROD enabled           ${dotprod-no}"
     echo "I8MM enabled              ${i8mm-no}"
+    echo "SVE enabled               ${sve-no}"
+    echo "SVE2 enabled              ${sve2-no}"
 fi
 if enabled arm; then
     echo "ARMv5TE enabled           ${armv5te-no}"
diff --git a/ffbuild/arch.mak b/ffbuild/arch.mak
index 3fc40e5e5d..af71aacfd2 100644
--- a/ffbuild/arch.mak
+++ b/ffbuild/arch.mak
@@ -3,6 +3,8 @@  OBJS-$(HAVE_ARMV6)   += $(ARMV6-OBJS)   $(ARMV6-OBJS-yes)
 OBJS-$(HAVE_ARMV8)   += $(ARMV8-OBJS)   $(ARMV8-OBJS-yes)
 OBJS-$(HAVE_VFP)     += $(VFP-OBJS)     $(VFP-OBJS-yes)
 OBJS-$(HAVE_NEON)    += $(NEON-OBJS)    $(NEON-OBJS-yes)
+OBJS-$(HAVE_SVE)     += $(SVE-OBJS)     $(SVE-OBJS-yes)
+OBJS-$(HAVE_SVE2)    += $(SVE2-OBJS)    $(SVE2-OBJS-yes)
 
 OBJS-$(HAVE_MIPSFPU)   += $(MIPSFPU-OBJS)    $(MIPSFPU-OBJS-yes)
 OBJS-$(HAVE_MIPSDSP)   += $(MIPSDSP-OBJS)    $(MIPSDSP-OBJS-yes)
diff --git a/libavutil/aarch64/asm.S b/libavutil/aarch64/asm.S
index 1840f9fb01..50ce7d4dfd 100644
--- a/libavutil/aarch64/asm.S
+++ b/libavutil/aarch64/asm.S
@@ -56,8 +56,26 @@ 
 #define DISABLE_I8MM
 #endif
 
+#if HAVE_AS_ARCHEXT_SVE_DIRECTIVE
+#define ENABLE_SVE  .arch_extension sve
+#define DISABLE_SVE .arch_extension nosve
+#else
+#define ENABLE_SVE
+#define DISABLE_SVE
+#endif
+
+#if HAVE_AS_ARCHEXT_SVE2_DIRECTIVE
+#define ENABLE_SVE2  .arch_extension sve2
+#define DISABLE_SVE2 .arch_extension nosve2
+#else
+#define ENABLE_SVE2
+#define DISABLE_SVE2
+#endif
+
 DISABLE_DOTPROD
 DISABLE_I8MM
+DISABLE_SVE
+DISABLE_SVE2
 
 
 /* Support macros for