diff mbox series

[FFmpeg-devel,05/26] lavu/cpu: CPU flags for the RISC-V Vector extension

Message ID 20220920144013.4959-5-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,01/26] lavu/cpu: detect RISC-V base extensions | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont Sept. 20, 2022, 2:39 p.m. UTC
From: Rémi Denis-Courmont <remi@remlab.net>

RVV defines a total of 12 different extensions, including:

- 5 different instruction subsets:
  - Zve32x: 8-, 16- and 32-bit integers,
  - Zve32f: Zve32x plus single precision floats,
  - Zve64x: Zve32x plus 64-bit integers,
  - Zve64f: Zve32f plus Zve64x,
  - Zve64d: Zve64f plus double precision floats.

- 6 different vector lengths:
  - Zvl32b (embedded only),
  - Zvl64b (embedded only),
  - Zvl128b,
  - Zvl256b,
  - Zvl512b,
  - Zvl1024b,

- and the V extension proper: equivalent to Zve64f and Zvl128b.

In total, there are 6 different possible sets of supported instructions
(including the empty set), but for convenience we allocate one bit for
each type sets: up-to-32-bit ints (ZVE32X), floats (ZV32F),
64-bit ints (ZV64X) and doubles (ZVE64D).

Whence the vector size is needed, it can be retrieved by reading the
unprivileged read-only vlenb CSR. This should probably be a separate
helper macro if needed at a later point.
---
 libavutil/cpu.c           |  4 ++++
 libavutil/cpu.h           |  4 ++++
 libavutil/riscv/cpu.c     | 46 ++++++++++++++++++++++++++++++++++++++-
 tests/checkasm/checkasm.c | 10 ++++++---
 4 files changed, 60 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 78e92a1bf6..58ae4858b4 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -187,6 +187,10 @@  int av_parse_cpu_caps(unsigned *flags, const char *s)
         { "rvi",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVI      },    .unit = "flags" },
         { "rvf",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVF      },    .unit = "flags" },
         { "rvd",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVD      },    .unit = "flags" },
+        { "rvve32",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RV_ZVE32X},    .unit = "flags" },
+        { "rvvf",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RV_ZVE32F},    .unit = "flags" },
+        { "rvve64",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RV_ZVE64X},    .unit = "flags" },
+        { "rvv",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RV_ZVE64D},    .unit = "flags" },
 #endif
         { NULL },
     };
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 9aae2ccc7a..00698e30ef 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -82,6 +82,10 @@ 
 #define AV_CPU_FLAG_RVI          (1 << 0) ///< I (full GPR bank)
 #define AV_CPU_FLAG_RVF          (1 << 1) ///< F (single precision FP)
 #define AV_CPU_FLAG_RVD          (1 << 2) ///< D (double precision FP)
+#define AV_CPU_FLAG_RV_ZVE32X    (1 << 3) ///< Vectors of 8/16/32-bit int's */
+#define AV_CPU_FLAG_RV_ZVE32F    (1 << 4) ///< Vectors of float's */
+#define AV_CPU_FLAG_RV_ZVE64X    (1 << 5) ///< Vectors of 64-bit int's */
+#define AV_CPU_FLAG_RV_ZVE64D    (1 << 6) ///< Vectors of double's
 
 /**
  * Return the flags which specify extensions supported by the CPU.
diff --git a/libavutil/riscv/cpu.c b/libavutil/riscv/cpu.c
index fec1f7822a..6f862635b3 100644
--- a/libavutil/riscv/cpu.c
+++ b/libavutil/riscv/cpu.c
@@ -30,7 +30,32 @@ 
 
 int ff_force_cpu_flags_riscv(int flags)
 {
-    if ((flags & AV_CPU_FLAG_RVD) && !(flags & AV_CPU_FLAG_RVF)) {
+    if ((flags & AV_CPU_FLAG_RV_ZVE64D) && !(flags & AV_CPU_FLAG_RV_ZVE64X)) {
+        av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n",
+               "_ZVE64X");
+        flags |= AV_CPU_FLAG_RV_ZVE64X;
+    }
+
+    if ((flags & AV_CPU_FLAG_RV_ZVE64D) && !(flags & AV_CPU_FLAG_RV_ZVE32F)) {
+        av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n",
+               "_ZVE32F");
+        flags |= AV_CPU_FLAG_RV_ZVE32F;
+    }
+
+    if ((flags & (AV_CPU_FLAG_RV_ZVE64X | AV_CPU_FLAG_RV_ZVE32F))
+        && !(flags & AV_CPU_FLAG_RV_ZVE32X)) {
+        av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n",
+               "_ZVE32X");
+        flags |= AV_CPU_FLAG_RV_ZVE32X;
+    }
+
+    if ((flags & AV_CPU_FLAG_RV_ZVE64D) && !(flags & AV_CPU_FLAG_RVD)) {
+        av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n", "D");
+        flags |= AV_CPU_FLAG_RVD;
+    }
+
+    if ((flags & (AV_CPU_FLAG_RVD | AV_CPU_FLAG_RV_ZVE32F))
+        && !(flags & AV_CPU_FLAG_RVF)) {
         av_log(NULL, AV_LOG_WARNING, "RV%s implied by specified flags\n", "F");
         flags |= AV_CPU_FLAG_RVF;
     }
@@ -50,6 +75,11 @@  int ff_get_cpu_flags_riscv(void)
         ret |= AV_CPU_FLAG_RVF;
     if (hwcap & HWCAP_RV('D'))
         ret |= AV_CPU_FLAG_RVD;
+
+    /* The V extension implies all Zve* functional subsets */
+    if (hwcap & HWCAP_RV('V'))
+        ret |= AV_CPU_FLAG_RV_ZVE32X | AV_CPU_FLAG_RV_ZVE64X
+             | AV_CPU_FLAG_RV_ZVE32F | AV_CPU_FLAG_RV_ZVE64D;
 #endif
 
 #ifdef __riscv_i
@@ -60,6 +90,20 @@  int ff_get_cpu_flags_riscv(void)
 #if (__riscv_flen >= 64)
     ret |= AV_CPU_FLAG_RVD;
 #endif
+#endif
+
+    /* If RV-V is enabled statically at compile-time, check the details. */
+#ifdef __riscv_vectors
+    ret |= AV_CPU_FLAG_RV_ZVE32X;
+#if __riscv_v_elen >= 64
+    ret |= AV_CPU_FLAG_RV_ZVE64X;
+#endif
+#if __riscv_v_elen_fp >= 32
+    ret |= AV_CPU_FLAG_RV_ZVE32F;
+#if __riscv_v_elen_fp >= 64
+    ret |= AV_CPU_FLAG_RV_ZVE64F;
+#endif
+#endif
 #endif
 
     return ret;
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 7730b14d98..c4352f1a16 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -230,9 +230,13 @@  static const struct {
     { "VSX",      "vsx",      AV_CPU_FLAG_VSX },
     { "POWER8",   "power8",   AV_CPU_FLAG_POWER8 },
 #elif ARCH_RISCV
-    { "RVI",      "rvi",      AV_CPU_FLAG_RVI },
-    { "RVF",      "rvf",      AV_CPU_FLAG_RVF },
-    { "RVD",      "rvd",      AV_CPU_FLAG_RVD },
+    { "RVI",        "rvi",       AV_CPU_FLAG_RVI },
+    { "RVF",        "rvf",       AV_CPU_FLAG_RVF },
+    { "RVD",        "rvd",       AV_CPU_FLAG_RVD },
+    { "RV_Zve32x",  "rv_zve32x", AV_CPU_FLAG_RV_ZVE32X },
+    { "RV_Zve32f",  "rv_zve32f", AV_CPU_FLAG_RV_ZVE32F },
+    { "RV_Zve64x",  "rv_zve64x", AV_CPU_FLAG_RV_ZVE64X },
+    { "RV_Zve64d",  "rv_zve64d", AV_CPU_FLAG_RV_ZVE64D },
 #elif ARCH_MIPS
     { "MMI",      "mmi",      AV_CPU_FLAG_MMI },
     { "MSA",      "msa",      AV_CPU_FLAG_MSA },