diff mbox series

[FFmpeg-devel,30/31] lavc/aacpsdsp: RISC-V V hybrid_synthesis_deint

Message ID 20220925142619.67917-30-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,01/31] lavu/cpu: detect RISC-V base extensions | expand

Commit Message

Rémi Denis-Courmont Sept. 25, 2022, 2:26 p.m. UTC
From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavcodec/riscv/aacpsdsp_init.c |  3 +++
 libavcodec/riscv/aacpsdsp_rvv.S  | 35 ++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)
diff mbox series

Patch

diff --git a/libavcodec/riscv/aacpsdsp_init.c b/libavcodec/riscv/aacpsdsp_init.c
index 76f55502ee..20b1a12741 100644
--- a/libavcodec/riscv/aacpsdsp_init.c
+++ b/libavcodec/riscv/aacpsdsp_init.c
@@ -31,6 +31,8 @@  void ff_ps_hybrid_analysis_rvv(float (*out)[2], float (*in)[2],
                                const float (*filter)[8][2], ptrdiff_t, int n);
 void ff_ps_hybrid_analysis_ileave_rvv(float (*out)[32][2], float L[2][38][64],
                                       int i, int len);
+void ff_ps_hybrid_synthesis_deint_rvv(float out[2][38][64], float (*in)[32][2],
+                                      int i, int len);
 
 av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
 {
@@ -39,6 +41,7 @@  av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
 
     if (flags & AV_CPU_FLAG_RV_ZVE32X) {
         c->hybrid_analysis_ileave = ff_ps_hybrid_analysis_ileave_rvv;
+        c->hybrid_synthesis_deint = ff_ps_hybrid_synthesis_deint_rvv;
 
         if (flags & AV_CPU_FLAG_RV_ZVE32F) {
             c->add_squares = ff_ps_add_squares_rvv;
diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S
index c9cc15e73d..0cbe4c1d3c 100644
--- a/libavcodec/riscv/aacpsdsp_rvv.S
+++ b/libavcodec/riscv/aacpsdsp_rvv.S
@@ -184,3 +184,38 @@  func ff_ps_hybrid_analysis_ileave_rvv, zve32x /* no needs for zve32f here */
 3:
         ret
 endfunc
+
+func ff_ps_hybrid_synthesis_deint_rvv, zve32x
+        slli        t1, a2, 5 + 1 + 2
+        sh2add      a0, a2, a0
+        add         a1, a1, t1
+        addi        a2, a2, -64
+        li          t1, 38 * 64 * 4
+        li          t6, 64 * 4
+        add         a4, a0, t1
+        beqz        a2, 3f
+1:
+        mv          t0, a0
+        mv          t1, a1
+        mv          t3, a3
+        mv          t4, a4
+        addi        a2, a2, 1
+2:
+        vsetvli     t5, t3, e32, m1, ta, ma
+        vlseg2e32.v v16, (t1)
+        sub         t3, t3, t5
+        vsse32.v    v16, (t0), t6
+        mul         t2, t5, t6
+        vsse32.v    v17, (t4), t6
+        sh3add      t1, t5, t1
+        add         t0, t0, t2
+        add         t4, t4, t2
+        bnez        t3, 2b
+
+        add         a0, a0, 4
+        add         a1, a1, 32 * 2 * 4
+        add         a4, a4, 4
+        bnez        a2, 1b
+3:
+        ret
+endfunc