diff mbox series

[FFmpeg-devel,21/26] lavc/audiodsp: RISC-V V scalarproduct_int16

Message ID 20220920144013.4959-21-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,01/26] lavu/cpu: detect RISC-V base extensions | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont Sept. 20, 2022, 2:40 p.m. UTC
From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavcodec/riscv/audiodsp_init.c |  2 ++
 libavcodec/riscv/audiodsp_rvv.S  | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+)
diff mbox series

Patch

diff --git a/libavcodec/riscv/audiodsp_init.c b/libavcodec/riscv/audiodsp_init.c
index ddd561484f..6f38b7bc83 100644
--- a/libavcodec/riscv/audiodsp_init.c
+++ b/libavcodec/riscv/audiodsp_init.c
@@ -29,6 +29,7 @@  void ff_vector_clipf_rvf(float *dst, const float *src, int len, float min, float
 void ff_vector_clipf_rvv(float *dst, const float *src, int len, float min, float max);
 void ff_vector_clip_int32_rvv(int32_t *dst, const int32_t *src, int32_t min,
                               int32_t max, unsigned int len);
+int32_t ff_scalarproduct_int16_rvv(const int16_t *v1, const int16_t *v2, int len);
 
 av_cold void ff_audiodsp_init_riscv(AudioDSPContext *c)
 {
@@ -38,6 +39,7 @@  av_cold void ff_audiodsp_init_riscv(AudioDSPContext *c)
         c->vector_clipf = ff_vector_clipf_rvf;
 #if HAVE_RVV
     if (flags & AV_CPU_FLAG_RV_ZVE32X) {
+        c->scalarproduct_int16 = ff_scalarproduct_int16_rvv;
         c->vector_clip_int32 = ff_vector_clip_int32_rvv;
 
         if (flags & AV_CPU_FLAG_RV_ZVE32F)
diff --git a/libavcodec/riscv/audiodsp_rvv.S b/libavcodec/riscv/audiodsp_rvv.S
index e5a09f3b19..852ae1dc1f 100644
--- a/libavcodec/riscv/audiodsp_rvv.S
+++ b/libavcodec/riscv/audiodsp_rvv.S
@@ -20,6 +20,26 @@ 
 
 #include "libavutil/riscv/asm.S"
 
+func ff_scalarproduct_int16_rvv, zve32x
+        vsetvli     zero, zero, e16, m1, ta, ma
+        vmv.s.x     v8, zero
+1:
+        vsetvli     t0, a2, e16, m1, ta, ma
+        vle16.v     v16, (a0)
+        slli        t1, t0, 1
+        vle16.v     v24, (a1)
+        sub         a2, a2, t0
+        vwmul.vv    v0, v16, v24
+        add         a0, a0, t1
+        vsetvli     zero, t0, e32, m2, ta, ma
+        vredsum.vs  v8, v0, v8
+        add         a1, a1, t1
+        bnez        a2, 1b
+
+        vmv.x.s     a0, v8
+        ret
+endfunc
+
 func ff_vector_clip_int32_rvv, zve32x
 1:
         vsetvli t0, a4, e32, m1, ta, ma