diff mbox series

[FFmpeg-devel,26/26] lavc/aacpsdsp: RISC-V V mul_pair_single

Message ID 20220920144013.4959-26-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,01/26] lavu/cpu: detect RISC-V base extensions | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont Sept. 20, 2022, 2:40 p.m. UTC
From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavcodec/riscv/aacpsdsp_init.c |  6 +++++-
 libavcodec/riscv/aacpsdsp_rvv.S  | 19 +++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/libavcodec/riscv/aacpsdsp_init.c b/libavcodec/riscv/aacpsdsp_init.c
index 525fc9aa38..90c9c501c3 100644
--- a/libavcodec/riscv/aacpsdsp_init.c
+++ b/libavcodec/riscv/aacpsdsp_init.c
@@ -25,13 +25,17 @@ 
 #include "libavcodec/aacpsdsp.h"
 
 void ff_ps_add_squares_rvv(float *dst, const float (*src)[2], int n);
+void ff_ps_mul_pair_single_rvv(float (*dst)[2], float (*src0)[2], float *src1,
+                               int n);
 
 av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
 {
 #if HAVE_RVV
     int flags = av_get_cpu_flags();
 
-    if (flags & AV_CPU_FLAG_RV_ZVE32F)
+    if (flags & AV_CPU_FLAG_RV_ZVE32F) {
         c->add_squares = ff_ps_add_squares_rvv;
+        c->mul_pair_single = ff_ps_mul_pair_single_rvv;
+    }
 #endif
 }
diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S
index cedaab0cf0..1c174cd110 100644
--- a/libavcodec/riscv/aacpsdsp_rvv.S
+++ b/libavcodec/riscv/aacpsdsp_rvv.S
@@ -37,3 +37,22 @@  func ff_ps_add_squares_rvv, zve32f
 
         ret
 endfunc
+
+func ff_ps_mul_pair_single_rvv, zve32f
+1:
+        vsetvli     t0, a3, e32, m1, ta, ma
+        slli        t1, t0, 3
+        vlseg2e32.v v24, (a1)
+        slli        t2, t0, 2
+        vle32.v     v16, (a2)
+        sub         a3, a3, t0
+        vfmul.vv    v24, v24, v16
+        add         a1, a1, t1
+        vfmul.vv    v25, v25, v16
+        add         a2, a2, t2
+        vsseg2e32.v v24, (a0)
+        add         a0, a0, t1
+        bnez        a3, 1b
+
+        ret
+endfunc