diff mbox series

[FFmpeg-devel,3/3] lavc/ac3dsp: add R-V Zvbb extract_exponents

Message ID 20240507191208.61372-3-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,1/3] riscv: add Zvbb vector bit manipulation extension | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont May 7, 2024, 7:12 p.m. UTC
---
 libavcodec/riscv/Makefile      |  1 +
 libavcodec/riscv/ac3dsp_init.c |  5 ++++
 libavcodec/riscv/ac3dsp_rvvb.S | 43 ++++++++++++++++++++++++++++++++++
 3 files changed, 49 insertions(+)
 create mode 100644 libavcodec/riscv/ac3dsp_rvvb.S
diff mbox series

Patch

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 11d47f9a57..bd24f18cb9 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -5,6 +5,7 @@  RVV-OBJS-$(CONFIG_AAC_ENCODER) += riscv/aacencdsp_rvv.o
 OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_init.o
 RV-OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_rvb.o
 RVV-OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_rvv.o
+RVVB-OBJS-$(CONFIG_AC3DSP) += riscv/ac3dsp_rvvb.o
 OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_init.o
 RVV-OBJS-$(CONFIG_ALAC_DECODER) += riscv/alacdsp_rvv.o
 OBJS-$(CONFIG_AUDIODSP) += riscv/audiodsp_init.o
diff --git a/libavcodec/riscv/ac3dsp_init.c b/libavcodec/riscv/ac3dsp_init.c
index 8cfa69055a..f66b6cac57 100644
--- a/libavcodec/riscv/ac3dsp_init.c
+++ b/libavcodec/riscv/ac3dsp_init.c
@@ -29,6 +29,7 @@ 
 void ff_ac3_exponent_min_rvb(uint8_t *exp, int, int);
 void ff_ac3_exponent_min_rvv(uint8_t *exp, int, int);
 void ff_extract_exponents_rvb(uint8_t *exp, int32_t *coef, int nb_coefs);
+void ff_extract_exponents_rvvb(uint8_t *exp, int32_t *coef, int nb_coefs);
 void ff_float_to_fixed24_rvv(int32_t *dst, const float *src, size_t len);
 void ff_sum_square_butterfly_int32_rvv(int64_t *, const int32_t *,
                                        const int32_t *, int);
@@ -48,6 +49,10 @@  av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
     if (flags & AV_CPU_FLAG_RVB_ADDR) {
         if (flags & AV_CPU_FLAG_RVB_BASIC)
             c->extract_exponents = ff_extract_exponents_rvb;
+# if HAVE_RV_ZVBB
+        if (flags & AV_CPU_FLAG_RV_ZVBB)
+            c->extract_exponents = ff_extract_exponents_rvvb;
+# endif
         if (flags & AV_CPU_FLAG_RVV_F32) {
             c->float_to_fixed24 = ff_float_to_fixed24_rvv;
             c->sum_square_butterfly_float = ff_sum_square_butterfly_float_rvv;
diff --git a/libavcodec/riscv/ac3dsp_rvvb.S b/libavcodec/riscv/ac3dsp_rvvb.S
new file mode 100644
index 0000000000..64766b56be
--- /dev/null
+++ b/libavcodec/riscv/ac3dsp_rvvb.S
@@ -0,0 +1,43 @@ 
+/*
+ * Copyright © 2023 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/riscv/asm.S"
+
+func ff_extract_exponents_rvvb, zve32x, zvbb
+1:
+        vsetvli     t0, a2, e32, m8, ta, ma
+        vle32.v     v8, (a1)
+        sub         a2, a2, t0
+        vneg.v      v16, v8
+        sh2add      a1, t0, a1
+        vmax.vv     v8, v8, v16
+        vclz.v      v8, v8
+        vsetvli     zero, zero, e16, m4, ta, ma
+        vncvt.x.x.w v4, v8
+        vsetvli     zero, zero, e8, m2, ta, ma
+        vncvt.x.x.w v2, v4
+        vadd.vi     v2, v2, 24 - 32
+        vse8.v      v2, (a0)
+        add         a0, a0, t0
+        bnez        a2, 1b
+
+        ret
+endfunc