diff mbox series

[FFmpeg-devel,v2,3/5] aarch64: Add Linux runtime cpu feature detection using getauxval(AT_HWCAP)

Message ID 20230530123043.52940-3-martin@martin.st
State New
Headers show
Series [FFmpeg-devel,v2,1/5] configure: aarch64: Support assembling the dotprod and i8mm arch extensions | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Martin Storsjö May 30, 2023, 12:30 p.m. UTC
Based partially on code by Janne Grunau.

---
Updated to use both the direct HWCAP* macros and HWCAP_CPUID. A
not unreasonably old distribution like Ubuntu 20.04 does have
HWCAP_CPUID but not HWCAP2_I8MM in the distribution provided headers.

Alternatively I guess we could carry our own fallback hardcoded values
for the HWCAP* values we use and skip HWCAP_CPUID.
---
 configure               |  2 ++
 libavutil/aarch64/cpu.c | 63 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)

Comments

Rémi Denis-Courmont May 31, 2023, 4:54 p.m. UTC | #1
Le tiistaina 30. toukokuuta 2023, 15.30.41 EEST Martin Storsjö a écrit :
> Based partially on code by Janne Grunau.
> 
> ---
> Updated to use both the direct HWCAP* macros and HWCAP_CPUID. A
> not unreasonably old distribution like Ubuntu 20.04 does have
> HWCAP_CPUID but not HWCAP2_I8MM in the distribution provided headers.
> 
> Alternatively I guess we could carry our own fallback hardcoded values
> for the HWCAP* values we use and skip HWCAP_CPUID.
> ---
>  configure               |  2 ++
>  libavutil/aarch64/cpu.c | 63 +++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 65 insertions(+)
> 
> diff --git a/configure b/configure
> index 50eb27ba0e..b39de74de5 100755
> --- a/configure
> +++ b/configure
> @@ -2209,6 +2209,7 @@ HAVE_LIST_PUB="
> 
>  HEADERS_LIST="
>      arpa_inet_h
> +    asm_hwcap_h
>      asm_types_h
>      cdio_paranoia_h
>      cdio_paranoia_paranoia_h
> @@ -6432,6 +6433,7 @@ check_headers io.h
>  enabled libdrm &&
>      check_headers linux/dma-buf.h
> 
> +check_headers asm/hwcap.h
>  check_headers linux/perf_event.h
>  check_headers libcrystalhd/libcrystalhd_if.h
>  check_headers malloc.h
> diff --git a/libavutil/aarch64/cpu.c b/libavutil/aarch64/cpu.c
> index 0c76f5ad15..4563959ffd 100644
> --- a/libavutil/aarch64/cpu.c
> +++ b/libavutil/aarch64/cpu.c
> @@ -20,6 +20,67 @@
>  #include "libavutil/cpu_internal.h"
>  #include "config.h"
> 
> +#if (defined(__linux__) || defined(__ANDROID__)) && HAVE_GETAUXVAL &&
> HAVE_ASM_HWCAP_H +#include <stdint.h>
> +#include <asm/hwcap.h>
> +#include <sys/auxv.h>
> +
> +#define get_cpu_feature_reg(reg, val) \
> +        __asm__("mrs %0, " #reg : "=r" (val))
> +
> +static int detect_flags(void)
> +{
> +    int flags = 0;
> +    unsigned long hwcap, hwcap2;
> +
> +    // Check for support using direct individual HWCAPs
> +    hwcap = getauxval(AT_HWCAP);
> +#ifdef HWCAP_ASIMDDP
> +    if (hwcap & HWCAP_ASIMDDP)
> +        flags |= AV_CPU_FLAG_DOTPROD;
> +#endif
> +
> +#ifdef AT_HWCAP2
> +    hwcap2 = getauxval(AT_HWCAP2);
> +#ifdef HWCAP2_I8MM
> +    if (hwcap2 & HWCAP2_I8MM)
> +        flags |= AV_CPU_FLAG_I8MM;
> +#endif
> +#endif
> +
> +    // Silence warnings if none of the hwcaps to check are known.
> +    (void)hwcap;
> +    (void)hwcap2;
> +
> +#if defined(HWCAP_CPUID)
> +    // The HWCAP_* defines for individual extensions may become available
> late, as
> +    // they require updates to userland headers. As a fallback, see if we 
can access
> +    // the CPUID registers (trapped via the kernel).
> +    // See https://www.kernel.org/doc/html/latest/arm64/cpu-feature-registers.html

I don't actually care which method is used and whether to hard-code the 
missing constants or not. But doing both methods is weird. If you are going to 
trigger the TID3 traps anyway, there is no point checking the auxillary 
vectors before, AFAICT.

You *could* check the auxillary vectors as a run-time fallback if HWCAP_CPUID 
is *not* set, but that only really makes for HWCAP_FP and HWCAP_ASIMD, not for 
HWCAP_ASIMDDP (Linux 4.15) and HWCAP2_I8MM (Linux 5.6) which are more recent 
than HWCAP_CPUID (Linux 4.11). And then, that would be only in the corner case 
that FP and/or AdvSIMD were explicitly disabled since they are on by default 
for all AArch64 targets.
Martin Storsjö May 31, 2023, 7:37 p.m. UTC | #2
On Wed, 31 May 2023, Rémi Denis-Courmont wrote:

> Le tiistaina 30. toukokuuta 2023, 15.30.41 EEST Martin Storsjö a écrit :
>> Based partially on code by Janne Grunau.
>> 
>> ---
>> Updated to use both the direct HWCAP* macros and HWCAP_CPUID. A
>> not unreasonably old distribution like Ubuntu 20.04 does have
>> HWCAP_CPUID but not HWCAP2_I8MM in the distribution provided headers.
>> 
>> Alternatively I guess we could carry our own fallback hardcoded values
>> for the HWCAP* values we use and skip HWCAP_CPUID.
>> ---
>>  configure               |  2 ++
>>  libavutil/aarch64/cpu.c | 63 +++++++++++++++++++++++++++++++++++++++++
>>  2 files changed, 65 insertions(+)
>> 
>> diff --git a/configure b/configure
>> index 50eb27ba0e..b39de74de5 100755
>> --- a/configure
>> +++ b/configure
>> @@ -2209,6 +2209,7 @@ HAVE_LIST_PUB="
>>
>>  HEADERS_LIST="
>>      arpa_inet_h
>> +    asm_hwcap_h
>>      asm_types_h
>>      cdio_paranoia_h
>>      cdio_paranoia_paranoia_h
>> @@ -6432,6 +6433,7 @@ check_headers io.h
>>  enabled libdrm &&
>>      check_headers linux/dma-buf.h
>> 
>> +check_headers asm/hwcap.h
>>  check_headers linux/perf_event.h
>>  check_headers libcrystalhd/libcrystalhd_if.h
>>  check_headers malloc.h
>> diff --git a/libavutil/aarch64/cpu.c b/libavutil/aarch64/cpu.c
>> index 0c76f5ad15..4563959ffd 100644
>> --- a/libavutil/aarch64/cpu.c
>> +++ b/libavutil/aarch64/cpu.c
>> @@ -20,6 +20,67 @@
>>  #include "libavutil/cpu_internal.h"
>>  #include "config.h"
>> 
>> +#if (defined(__linux__) || defined(__ANDROID__)) && HAVE_GETAUXVAL &&
>> HAVE_ASM_HWCAP_H +#include <stdint.h>
>> +#include <asm/hwcap.h>
>> +#include <sys/auxv.h>
>> +
>> +#define get_cpu_feature_reg(reg, val) \
>> +        __asm__("mrs %0, " #reg : "=r" (val))
>> +
>> +static int detect_flags(void)
>> +{
>> +    int flags = 0;
>> +    unsigned long hwcap, hwcap2;
>> +
>> +    // Check for support using direct individual HWCAPs
>> +    hwcap = getauxval(AT_HWCAP);
>> +#ifdef HWCAP_ASIMDDP
>> +    if (hwcap & HWCAP_ASIMDDP)
>> +        flags |= AV_CPU_FLAG_DOTPROD;
>> +#endif
>> +
>> +#ifdef AT_HWCAP2
>> +    hwcap2 = getauxval(AT_HWCAP2);
>> +#ifdef HWCAP2_I8MM
>> +    if (hwcap2 & HWCAP2_I8MM)
>> +        flags |= AV_CPU_FLAG_I8MM;
>> +#endif
>> +#endif
>> +
>> +    // Silence warnings if none of the hwcaps to check are known.
>> +    (void)hwcap;
>> +    (void)hwcap2;
>> +
>> +#if defined(HWCAP_CPUID)
>> +    // The HWCAP_* defines for individual extensions may become available
>> late, as
>> +    // they require updates to userland headers. As a fallback, see if we 
> can access
>> +    // the CPUID registers (trapped via the kernel).
>> +    // See https://www.kernel.org/doc/html/latest/arm64/cpu-feature-registers.html
>
> I don't actually care which method is used and whether to hard-code the 
> missing constants or not. But doing both methods is weird. If you are going to 
> trigger the TID3 traps anyway, there is no point checking the auxillary 
> vectors before, AFAICT.

Yeah, that's true.

> You *could* check the auxillary vectors as a run-time fallback if HWCAP_CPUID 
> is *not* set, but that only really makes for HWCAP_FP and HWCAP_ASIMD, not for 
> HWCAP_ASIMDDP (Linux 4.15) and HWCAP2_I8MM (Linux 5.6) which are more recent 
> than HWCAP_CPUID (Linux 4.11). And then, that would be only in the corner case 
> that FP and/or AdvSIMD were explicitly disabled since they are on by default 
> for all AArch64 targets.

Yeah - I guess there's no potential configuration where a kernel does know 
about HWCAP_CPUID and newer HWCAPs but has decided to set HWCAP_CPUID to 0 
and not handle the trapping?

I considered falling back on the trapping CPUID codepath only if the 
individual HWCAPs weren't detected/supported, but that soon becomes quite 
a mess if we're adding more than a couple extensions.

So I guess after all that it's simplest to just go with CPUID, possibly 
with a code comment that we could go with individual HWCAPs at some point 
in the future if we want to simplify things and don't care about older 
systems/toolchains.

// Martin
diff mbox series

Patch

diff --git a/configure b/configure
index 50eb27ba0e..b39de74de5 100755
--- a/configure
+++ b/configure
@@ -2209,6 +2209,7 @@  HAVE_LIST_PUB="
 
 HEADERS_LIST="
     arpa_inet_h
+    asm_hwcap_h
     asm_types_h
     cdio_paranoia_h
     cdio_paranoia_paranoia_h
@@ -6432,6 +6433,7 @@  check_headers io.h
 enabled libdrm &&
     check_headers linux/dma-buf.h
 
+check_headers asm/hwcap.h
 check_headers linux/perf_event.h
 check_headers libcrystalhd/libcrystalhd_if.h
 check_headers malloc.h
diff --git a/libavutil/aarch64/cpu.c b/libavutil/aarch64/cpu.c
index 0c76f5ad15..4563959ffd 100644
--- a/libavutil/aarch64/cpu.c
+++ b/libavutil/aarch64/cpu.c
@@ -20,6 +20,67 @@ 
 #include "libavutil/cpu_internal.h"
 #include "config.h"
 
+#if (defined(__linux__) || defined(__ANDROID__)) && HAVE_GETAUXVAL && HAVE_ASM_HWCAP_H
+#include <stdint.h>
+#include <asm/hwcap.h>
+#include <sys/auxv.h>
+
+#define get_cpu_feature_reg(reg, val) \
+        __asm__("mrs %0, " #reg : "=r" (val))
+
+static int detect_flags(void)
+{
+    int flags = 0;
+    unsigned long hwcap, hwcap2;
+
+    // Check for support using direct individual HWCAPs
+    hwcap = getauxval(AT_HWCAP);
+#ifdef HWCAP_ASIMDDP
+    if (hwcap & HWCAP_ASIMDDP)
+        flags |= AV_CPU_FLAG_DOTPROD;
+#endif
+
+#ifdef AT_HWCAP2
+    hwcap2 = getauxval(AT_HWCAP2);
+#ifdef HWCAP2_I8MM
+    if (hwcap2 & HWCAP2_I8MM)
+        flags |= AV_CPU_FLAG_I8MM;
+#endif
+#endif
+
+    // Silence warnings if none of the hwcaps to check are known.
+    (void)hwcap;
+    (void)hwcap2;
+
+#if defined(HWCAP_CPUID)
+    // The HWCAP_* defines for individual extensions may become available late, as
+    // they require updates to userland headers. As a fallback, see if we can access
+    // the CPUID registers (trapped via the kernel).
+    // See https://www.kernel.org/doc/html/latest/arm64/cpu-feature-registers.html
+    if (hwcap & HWCAP_CPUID) {
+        uint64_t tmp;
+
+        get_cpu_feature_reg(ID_AA64ISAR0_EL1, tmp);
+        if (((tmp >> 44) & 0xf) == 0x1)
+            flags |= AV_CPU_FLAG_DOTPROD;
+        get_cpu_feature_reg(ID_AA64ISAR1_EL1, tmp);
+        if (((tmp >> 52) & 0xf) == 0x1)
+            flags |= AV_CPU_FLAG_I8MM;
+    }
+#endif
+
+    return flags;
+}
+
+#else
+
+static int detect_flags(void)
+{
+    return 0;
+}
+
+#endif
+
 int ff_get_cpu_flags_aarch64(void)
 {
     int flags = AV_CPU_FLAG_ARMV8 * HAVE_ARMV8 |
@@ -33,6 +94,8 @@  int ff_get_cpu_flags_aarch64(void)
     flags |= AV_CPU_FLAG_I8MM;
 #endif
 
+    flags |= detect_flags();
+
     return flags;
 }