diff mbox series

[FFmpeg-devel,v2,1/5] configure: aarch64: Support assembling the dotprod and i8mm arch extensions

Message ID 20230530123043.52940-1-martin@martin.st
State Accepted
Commit fb1b88af77cd39034cef4b6d08af79496cd75ed8
Headers show
Series [FFmpeg-devel,v2,1/5] configure: aarch64: Support assembling the dotprod and i8mm arch extensions | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Martin Storsjö May 30, 2023, 12:30 p.m. UTC
These are available since ARMv8.4-a and ARMv8.6-a respectively,
but can also be available optionally since ARMv8.2-a.

Check if ".arch armv8.2-a" and ".arch_extension {dotprod,i8mm}" are
supported, and check if the instructions can be assembled.

Current clang versions fail to support the dotprod and i8mm
features in the .arch_extension directive, but do support them
if enabled with -march=armv8.4-a on the command line. (Curiously,
lowering the arch level with ".arch armv8.2-a" doesn't make the
extensions unavailable if they were enabled with -march; if that
changes, Clang should also learn to support these extensions via
.arch_extension for them to remain usable here.)
---
Simplified the detection logic somewhat; check if ".arch armv8.2-a"
and ".arch_extension {dotprod,i8mm}" are available, then check if
the instruction can be assembled. This way, we check exactly the same
thing as we are going to assemble in the end, so there shouldn't be
any risk of build breakage due to testing and building subtly different
things.
---
 configure               | 81 ++++++++++++++++++++++++++++++++++++++++-
 libavutil/aarch64/asm.S | 11 ++++++
 2 files changed, 91 insertions(+), 1 deletion(-)

Comments

Martin Storsjö June 6, 2023, 10:25 a.m. UTC | #1
On Tue, 30 May 2023, Martin Storsjö wrote:

> Current clang versions fail to support the dotprod and i8mm
> features in the .arch_extension directive, but do support them
> if enabled with -march=armv8.4-a on the command line. (Curiously,
> lowering the arch level with ".arch armv8.2-a" doesn't make the
> extensions unavailable if they were enabled with -march; if that
> changes, Clang should also learn to support these extensions via
> .arch_extension for them to remain usable here.)

FWIW, since today, Clang does support enabling these extensions with both 
these extensions, see 
https://github.com/llvm/llvm-project/commit/4b8d9abca7d0280878fb12de331e688ee85d7cd8 
and 
https://github.com/llvm/llvm-project/commit/4b8d9abca7d0280878fb12de331e688ee85d7cd8.

It turns out that it is possible to enable these extensions with older 
Clang via assembly too, but due to a bug, it would require using e.g. 
".arch armv8.6-a+crc" (it requires using a "+<ext>" for any random 
unrelated extension). I won't try to support using that in our assembly, 
as the proper mechanism should be supported going forward.

As there was no further opposition, I'll push this patchset now with the 
last modifications that were suggested.

// Martin
diff mbox series

Patch

diff --git a/configure b/configure
index 495493aa0e..50eb27ba0e 100755
--- a/configure
+++ b/configure
@@ -454,6 +454,8 @@  Optimization options (experts only):
   --disable-armv6t2        disable armv6t2 optimizations
   --disable-vfp            disable VFP optimizations
   --disable-neon           disable NEON optimizations
+  --disable-dotprod        disable DOTPROD optimizations
+  --disable-i8mm           disable I8MM optimizations
   --disable-inline-asm     disable use of inline assembly
   --disable-x86asm         disable use of standalone x86 assembly
   --disable-mipsdsp        disable MIPS DSP ASE R1 optimizations
@@ -1154,6 +1156,43 @@  check_insn(){
     check_as ${1}_external "$2"
 }
 
+check_arch_level(){
+    log check_arch_level "$@"
+    level="$1"
+    check_as tested_arch_level ".arch $level"
+    enabled tested_arch_level && as_arch_level="$level"
+}
+
+check_archext_insn(){
+    log check_archext_insn "$@"
+    feature="$1"
+    instr="$2"
+    # Check if the assembly is accepted in inline assembly.
+    check_inline_asm ${feature}_inline "\"$instr\""
+    # We don't check if the instruction is supported out of the box by the
+    # external assembler (we don't try to set ${feature}_external) as we don't
+    # need to use these instructions in non-runtime detected codepaths.
+
+    disable $feature
+
+    enabled as_arch_directive && arch_directive=".arch $as_arch_level" || arch_directive=""
+
+    # Test if the assembler supports the .arch_extension $feature directive.
+    arch_extension_directive=".arch_extension $feature"
+    test_as <<EOF && enable as_archext_${feature}_directive || arch_extension_directive=""
+$arch_directive
+$arch_extension_directive
+EOF
+
+    # Test if we can assemble the instruction after potential .arch and
+    # .arch_extension directives.
+    test_as <<EOF && enable ${feature}
+$arch_directive
+$arch_extension_directive
+$instr
+EOF
+}
+
 check_x86asm(){
     log check_x86asm "$@"
     name=$1
@@ -2059,6 +2098,8 @@  ARCH_EXT_LIST_ARM="
     armv6
     armv6t2
     armv8
+    dotprod
+    i8mm
     neon
     vfp
     vfpv3
@@ -2322,6 +2363,8 @@  SYSTEM_LIBRARIES="
 
 TOOLCHAIN_FEATURES="
     as_arch_directive
+    as_archext_dotprod_directive
+    as_archext_i8mm_directive
     as_dn_directive
     as_fpu_directive
     as_func
@@ -2622,6 +2665,8 @@  intrinsics_neon_deps="neon"
 vfp_deps_any="aarch64 arm"
 vfpv3_deps="vfp"
 setend_deps="arm"
+dotprod_deps="aarch64 neon"
+i8mm_deps="aarch64 neon"
 
 map 'eval ${v}_inline_deps=inline_asm' $ARCH_EXT_LIST_ARM
 
@@ -5988,12 +6033,27 @@  check_inline_asm inline_asm_labels '"1:\n"'
 check_inline_asm inline_asm_nonlocal_labels '"Label:\n"'
 
 if enabled aarch64; then
+    as_arch_level="armv8-a"
+    check_as as_arch_directive ".arch $as_arch_level"
+    enabled as_arch_directive && check_arch_level armv8.2-a
+
     enabled armv8 && check_insn armv8 'prfm   pldl1strm, [x0]'
     # internal assembler in clang 3.3 does not support this instruction
     enabled neon && check_insn neon 'ext   v0.8B, v0.8B, v1.8B, #1'
     enabled vfp  && check_insn vfp  'fmadd d0,    d0,    d1,    d2'
 
-    map 'enabled_any ${v}_external ${v}_inline || disable $v' $ARCH_EXT_LIST_ARM
+    archext_list="dotprod i8mm"
+    enabled dotprod && check_archext_insn dotprod 'udot v0.4s, v0.16b, v0.16b'
+    enabled i8mm    && check_archext_insn i8mm    'usdot v0.4s, v0.16b, v0.16b'
+
+    # Disable the main feature (e.g. HAVE_NEON) if neither inline nor external
+    # assembly support the feature out of the box. Skip this for the features
+    # checked with check_archext_insn above, as that function takes care of
+    # updating all the variables as necessary.
+    for v in $ARCH_EXT_LIST_ARM; do
+        is_in $v $archext_list && continue
+        enabled_any ${v}_external ${v}_inline || disable $v
+    done
 
 elif enabled alpha; then
 
@@ -6022,6 +6082,12 @@  EOF
         warn "Compiler does not indicate floating-point ABI, guessing $fpabi."
     fi
 
+    # Test for various instruction sets, testing support both in inline and
+    # external assembly. This sets the ${v}_inline or ${v}_external flags
+    # if the instruction can be used unconditionally in either inline or
+    # external assembly. This means that if the ${v}_external feature is set,
+    # that feature can be used unconditionally in various support macros
+    # anywhere in external assembly, in any function.
     enabled armv5te && check_insn armv5te 'qadd r0, r0, r0'
     enabled armv6   && check_insn armv6   'sadd16 r0, r0, r0'
     enabled armv6t2 && check_insn armv6t2 'movt r0, #0'
@@ -6030,6 +6096,14 @@  EOF
     enabled vfpv3   && check_insn vfpv3   'vmov.f32 s0, #1.0'
     enabled setend  && check_insn setend  'setend be'
 
+    # If neither inline nor external assembly can use the feature by default,
+    # disable the main unsuffixed feature (e.g. HAVE_NEON).
+    #
+    # For targets that support runtime CPU feature detection, don't disable
+    # the main feature flag - there we assume that all supported toolchains
+    # can assemble code for all instruction set features (e.g. NEON) with
+    # suitable assembly flags (such as ".fpu neon"); we don't check
+    # specifically that they really do.
     [ $target_os = linux ] || [ $target_os = android ] ||
         map 'enabled_any ${v}_external ${v}_inline || disable $v' \
             $ARCH_EXT_LIST_ARM
@@ -7610,6 +7684,8 @@  fi
 if enabled aarch64; then
     echo "NEON enabled              ${neon-no}"
     echo "VFP enabled               ${vfp-no}"
+    echo "DOTPROD enabled           ${dotprod-no}"
+    echo "I8MM enabled              ${i8mm-no}"
 fi
 if enabled arm; then
     echo "ARMv5TE enabled           ${armv5te-no}"
@@ -7900,6 +7976,9 @@  test -n "$assert_level" &&
 test -n "$malloc_prefix" &&
     echo "#define MALLOC_PREFIX $malloc_prefix" >>$TMPH
 
+enabled aarch64 &&
+    echo "#define AS_ARCH_LEVEL $as_arch_level" >>$TMPH
+
 if enabled x86asm; then
     append config_files $TMPASM
     cat > $TMPASM <<EOF
diff --git a/libavutil/aarch64/asm.S b/libavutil/aarch64/asm.S
index a7782415d7..8589cf74fc 100644
--- a/libavutil/aarch64/asm.S
+++ b/libavutil/aarch64/asm.S
@@ -36,6 +36,17 @@ 
 #   define __has_feature(x) 0
 #endif
 
+#if HAVE_AS_ARCH_DIRECTIVE
+        .arch           AS_ARCH_LEVEL
+#endif
+
+#if HAVE_AS_ARCHEXT_DOTPROD_DIRECTIVE
+        .arch_extension dotprod
+#endif
+#if HAVE_AS_ARCHEXT_I8MM_DIRECTIVE
+        .arch_extension i8mm
+#endif
+
 
 /* Support macros for
  *   - Armv8.3-A Pointer Authentication and