diff mbox series

[FFmpeg-devel,01/10] riscv: add CPU flags for the RISC-V Vector extension

Message ID 20220904135503.116704-1-remi@remlab.net
State New
Headers show
Series RISC-V V floating point DSP | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont Sept. 4, 2022, 1:54 p.m. UTC
From: Rémi Denis-Courmont <remi@remlab.net>

RVV defines a total of 12 different extensions: V, Zvl32b, Zvl64b,
Zvl128b, Zvl256b, Zvl512b, Zvl1024b, Zve32x, Zve32f, Zve64x, Zve64f and
Zve64d.

At this stage, we don't expose the vector length extensions Zvl*, as
the vector length is most commonly determined at run-time depending on
the element size and the effective multipler. There are anyways no
other run-time mechanisms defiend to determine the actual maximum
vector length than to invoke VSETVL.

Zve64f is equivalent to Zve32f plus Zve64x, so it is exposed as a
convenience flag, but not tracked internally. Likewise V is the
equivalent of Zve64d plus Zvl128b.

Technically, Zve32f and Zve64x are both implied by Zve64d and both
imply Zve32x, leaving only 5 possibilities (including no vector
support), but we keep 4 separate bits for easy run-time checks as on
other instruction set architectures.
---
 libavutil/cpu.c          | 14 ++++++++++
 libavutil/cpu.h          |  6 +++++
 libavutil/cpu_internal.h |  1 +
 libavutil/riscv/Makefile |  1 +
 libavutil/riscv/cpu.c    | 57 ++++++++++++++++++++++++++++++++++++++++
 5 files changed, 79 insertions(+)
 create mode 100644 libavutil/riscv/Makefile
 create mode 100644 libavutil/riscv/cpu.c
diff mbox series

Patch

diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 0035e927a5..83bf513cf2 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -62,6 +62,8 @@  static int get_cpu_flags(void)
     return ff_get_cpu_flags_arm();
 #elif ARCH_PPC
     return ff_get_cpu_flags_ppc();
+#elif ARCH_RISCV
+    return ff_get_cpu_flags_riscv();
 #elif ARCH_X86
     return ff_get_cpu_flags_x86();
 #elif ARCH_LOONGARCH
@@ -178,6 +180,18 @@  int av_parse_cpu_caps(unsigned *flags, const char *s)
 #elif ARCH_LOONGARCH
         { "lsx",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LSX      },    .unit = "flags" },
         { "lasx",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LASX     },    .unit = "flags" },
+#elif ARCH_RISCV
+#define AV_CPU_FLAG_ZVE32F_M (AV_CPU_FLAG_ZVE32F | AV_CPU_FLAG_ZVE32X)
+#define AV_CPU_FLAG_ZVE64X_M (AV_CPU_FLAG_ZVE64X | AV_CPU_FLAG_ZVE32X)
+#define AV_CPU_FLAG_ZVE64D_M (AV_CPU_FLAG_ZVE64D | AV_CPU_FLAG_ZVE64F_M)
+#define AV_CPU_FLAG_ZVE64F_M (AV_CPU_FLAG_ZVE32F_M | AV_CPU_FLAG_ZVE64X_M)
+#define AV_CPU_FLAG_VECTORS  AV_CPU_FLAG_ZVE64D_M
+        { "vectors",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VECTORS  },    .unit = "flags" },
+        { "zve32x",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE32X   },    .unit = "flags" },
+        { "zve32f",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE32F_M },    .unit = "flags" },
+        { "zve64x",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE64X_M },    .unit = "flags" },
+        { "zve64f",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE64F_M },    .unit = "flags" },
+        { "zve64d",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ZVE64D_M },    .unit = "flags" },
 #endif
         { NULL },
     };
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 9711e574c5..44836e50d6 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -78,6 +78,12 @@ 
 #define AV_CPU_FLAG_LSX          (1 << 0)
 #define AV_CPU_FLAG_LASX         (1 << 1)
 
+// RISC-V Vector extension
+#define AV_CPU_FLAG_ZVE32X       (1 << 0) /* 8-, 16-, 32-bit integers */
+#define AV_CPU_FLAG_ZVE32F       (1 << 1) /* single precision scalars */
+#define AV_CPU_FLAG_ZVE64X       (1 << 2) /* 64-bit integers */
+#define AV_CPU_FLAG_ZVE64D       (1 << 3) /* double precision scalars */
+
 /**
  * Return the flags which specify extensions supported by the CPU.
  * The returned value is affected by av_force_cpu_flags() if that was used
diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h
index 650d47fc96..634f28bac4 100644
--- a/libavutil/cpu_internal.h
+++ b/libavutil/cpu_internal.h
@@ -48,6 +48,7 @@  int ff_get_cpu_flags_mips(void);
 int ff_get_cpu_flags_aarch64(void);
 int ff_get_cpu_flags_arm(void);
 int ff_get_cpu_flags_ppc(void);
+int ff_get_cpu_flags_riscv(void);
 int ff_get_cpu_flags_x86(void);
 int ff_get_cpu_flags_loongarch(void);
 
diff --git a/libavutil/riscv/Makefile b/libavutil/riscv/Makefile
new file mode 100644
index 0000000000..1f818043dc
--- /dev/null
+++ b/libavutil/riscv/Makefile
@@ -0,0 +1 @@ 
+OBJS += riscv/cpu.o
diff --git a/libavutil/riscv/cpu.c b/libavutil/riscv/cpu.c
new file mode 100644
index 0000000000..9e4cce5e8b
--- /dev/null
+++ b/libavutil/riscv/cpu.c
@@ -0,0 +1,57 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/cpu_internal.h"
+#include "config.h"
+
+#if HAVE_GETAUXVAL
+#include <sys/auxv.h>
+#endif
+
+#define HWCAP_RV(letter) (1ul << ((letter) - 'A'))
+
+int ff_get_cpu_flags_riscv(void)
+{
+    int ret = 0;
+
+    /* If RV-V is enabled statically at compile-time, check the details. */
+#ifdef __riscv_vectors
+    ret |= AV_CPU_FLAG_ZVE32X;
+#if __riscv_v_elen >= 64
+    ret |= AV_CPU_FLAG_ZVE64X;
+#endif
+#if __riscv_v_elen_fp >= 32
+    ret |= AV_CPU_FLAG_ZVE32F;
+#if __riscv_v_elen_fp >= 64
+    ret |= AV_CPU_FLAG_ZVE64F;
+#endif
+#endif
+#endif
+
+#if HAVE_GETAUXVAL
+    const unsigned long hwcap = getauxval(AT_HWCAP);
+
+    /* The V extension implies all subsets */
+    if (hwcap & HWCAP_RV('V'))
+        ret |= AV_CPU_FLAG_ZVE32X | AV_CPU_FLAG_ZVE64X
+             | AV_CPU_FLAG_ZVE32F | AV_CPU_FLAG_ZVE64D;
+#endif
+
+    return ret;
+}