diff mbox series

[FFmpeg-devel] lavc/opusdsp: R-V V deemphasis function

Message ID 20231111181821.60210-1-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel] lavc/opusdsp: R-V V deemphasis function | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont Nov. 11, 2023, 6:18 p.m. UTC
Considering the marginality of the measured performance gains (3-4%),
I suppose that we should not merge this. Furthermore those measurements
are not expected to improve with large vector sizes, since the code
uses only 32 bits per vector no matter what.

deemphasis_c: 7703.2
deemphasis_rvv_f32: 7452.0
---
 libavcodec/riscv/opusdsp_init.c | 10 +++++---
 libavcodec/riscv/opusdsp_rvv.S  | 43 +++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/riscv/opusdsp_init.c b/libavcodec/riscv/opusdsp_init.c
index 88d8e77f0e..8d363aaf37 100644
--- a/libavcodec/riscv/opusdsp_init.c
+++ b/libavcodec/riscv/opusdsp_init.c
@@ -26,14 +26,18 @@ 
 #include "libavcodec/opusdsp.h"
 
 void ff_opus_postfilter_rvv(float *data, int period, float *g, int len);
+float ff_opus_deemphasis_rvv(float *y, float *x, float coeff, int len);
 
 av_cold void ff_opus_dsp_init_riscv(OpusDSP *d)
 {
 #if HAVE_RVV
     int flags = av_get_cpu_flags();
 
-    if ((flags & AV_CPU_FLAG_RVV_F32) && (flags & AV_CPU_FLAG_RVB_ADDR) &&
-        (flags & AV_CPU_FLAG_RVB_BASIC))
-        d->postfilter = ff_opus_postfilter_rvv;
+    if (flags & AV_CPU_FLAG_RVV_F32) {
+        if ((flags & AV_CPU_FLAG_RVB_ADDR) && (flags & AV_CPU_FLAG_RVB_BASIC))
+            d->postfilter = ff_opus_postfilter_rvv;
+        if (ff_get_rv_vlenb() >= 8)
+            d->deemphasis = ff_opus_deemphasis_rvv;
+    }
 #endif
 }
diff --git a/libavcodec/riscv/opusdsp_rvv.S b/libavcodec/riscv/opusdsp_rvv.S
index 79ae86c30e..839edfa4b0 100644
--- a/libavcodec/riscv/opusdsp_rvv.S
+++ b/libavcodec/riscv/opusdsp_rvv.S
@@ -64,3 +64,46 @@  func ff_opus_postfilter_rvv, zve32f
 
         ret
 endfunc
+
+// FIXME: Zvl64b
+func ff_opus_deemphasis_rvv, zve32f
+        li       t0, 0x3f599a00 // 0.85f
+        li       t1, 8
+NOHWF   fmv.w.x  fa0, a2
+NOHWF   mv       a2, a3
+        vsetivli zero, 1, e32, mf2, ta, ma
+        vmv.s.x  v8, t0
+        fmv.w.x  ft0, t0
+        blt      a2, t1, 2f
+1:
+        vlseg8e32.v v0, (a1)
+        addi    a2, a2, -8
+        vfmacc.vf v0, fa0, v8
+        addi    a1, a1, 8 * 4
+        vfmacc.vf v1, ft0, v0
+        vfmacc.vf v2, ft0, v1
+        vfmacc.vf v3, ft0, v2
+        vfmacc.vf v4, ft0, v3
+        vfmacc.vf v5, ft0, v4
+        vfmacc.vf v6, ft0, v5
+        vfmacc.vf v7, ft0, v6
+        vfmv.f.s fa0, v7
+        vsseg8e32.v v0, (a0)
+        addi    a0, a0, 8 * 4
+        bge     a2, t1, 1b
+2:
+        beqz    a2, 4f
+3:
+        flw     fa1, (a1)
+        addi    a2, a2, -1
+        fmadd.s fa0, ft0, fa0, fa1
+        addi    a1, a1, 4
+        fsw     fa0, (a0)
+        addi    a0, a0, 4
+        bnez    a2, 3b
+4:
+        ret
+
+NOHWF   fmv.x.w   a0, fa0
+        ret
+endfunc