diff mbox series

[FFmpeg-devel,27/31] lavc/aacpsdsp: RISC-V V mul_pair_single

Message ID 20220926145251.56351-27-remi@remlab.net
State Accepted
Commit e180326a0b72bbbdee51810592b16178a48797f3
Headers show
Series initial RISC-V CPU extensions | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont Sept. 26, 2022, 2:52 p.m. UTC
From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavcodec/riscv/aacpsdsp_init.c |  6 +++++-
 libavcodec/riscv/aacpsdsp_rvv.S  | 17 +++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/libavcodec/riscv/aacpsdsp_init.c b/libavcodec/riscv/aacpsdsp_init.c
index 83f6d9b16b..21fd5b8470 100644
--- a/libavcodec/riscv/aacpsdsp_init.c
+++ b/libavcodec/riscv/aacpsdsp_init.c
@@ -25,13 +25,17 @@ 
 #include "libavcodec/aacpsdsp.h"
 
 void ff_ps_add_squares_rvv(float *dst, const float (*src)[2], int n);
+void ff_ps_mul_pair_single_rvv(float (*dst)[2], float (*src0)[2], float *src1,
+                               int n);
 
 av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
 {
 #if HAVE_RVV
     int flags = av_get_cpu_flags();
 
-    if (flags & AV_CPU_FLAG_RVV_F32)
+    if (flags & AV_CPU_FLAG_RVV_F32) {
         c->add_squares = ff_ps_add_squares_rvv;
+        c->mul_pair_single = ff_ps_mul_pair_single_rvv;
+    }
 #endif
 }
diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S
index b516063ea7..70b7b72218 100644
--- a/libavcodec/riscv/aacpsdsp_rvv.S
+++ b/libavcodec/riscv/aacpsdsp_rvv.S
@@ -35,3 +35,20 @@  func ff_ps_add_squares_rvv, zve32f
 
         ret
 endfunc
+
+func ff_ps_mul_pair_single_rvv, zve32f
+1:
+        vsetvli     t0, a3, e32, m1, ta, ma
+        vlseg2e32.v v24, (a1)
+        sub         a3, a3, t0
+        vle32.v     v16, (a2)
+        sh3add      a1, t0, a1
+        vfmul.vv    v24, v24, v16
+        sh2add      a2, t0, a2
+        vfmul.vv    v25, v25, v16
+        vsseg2e32.v v24, (a0)
+        sh3add      a0, t0, a0
+        bnez        a3, 1b
+
+        ret
+endfunc