diff mbox series

[FFmpeg-devel,20/29] lavc/audiodsp: RISC-V V vector_clipf

Message ID 20220922183726.38624-20-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,01/29] lavu/cpu: detect RISC-V base extensions | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont Sept. 22, 2022, 6:37 p.m. UTC
From: Rémi Denis-Courmont <remi@remlab.net>

---
 libavcodec/riscv/audiodsp_init.c |  7 ++++++-
 libavcodec/riscv/audiodsp_rvv.S  | 18 ++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/libavcodec/riscv/audiodsp_init.c b/libavcodec/riscv/audiodsp_init.c
index ce8b60ee52..ddd561484f 100644
--- a/libavcodec/riscv/audiodsp_init.c
+++ b/libavcodec/riscv/audiodsp_init.c
@@ -26,6 +26,7 @@ 
 
 void ff_vector_clipf_rvf(float *dst, const float *src, int len, float min, float max);
 
+void ff_vector_clipf_rvv(float *dst, const float *src, int len, float min, float max);
 void ff_vector_clip_int32_rvv(int32_t *dst, const int32_t *src, int32_t min,
                               int32_t max, unsigned int len);
 
@@ -36,7 +37,11 @@  av_cold void ff_audiodsp_init_riscv(AudioDSPContext *c)
     if (flags & AV_CPU_FLAG_RVF)
         c->vector_clipf = ff_vector_clipf_rvf;
 #if HAVE_RVV
-    if (flags & AV_CPU_FLAG_RV_ZVE32X)
+    if (flags & AV_CPU_FLAG_RV_ZVE32X) {
         c->vector_clip_int32 = ff_vector_clip_int32_rvv;
+
+        if (flags & AV_CPU_FLAG_RV_ZVE32F)
+            c->vector_clipf = ff_vector_clipf_rvv;
+    }
 #endif
 }
diff --git a/libavcodec/riscv/audiodsp_rvv.S b/libavcodec/riscv/audiodsp_rvv.S
index 26b3cdffcf..e5a09f3b19 100644
--- a/libavcodec/riscv/audiodsp_rvv.S
+++ b/libavcodec/riscv/audiodsp_rvv.S
@@ -35,3 +35,21 @@  func ff_vector_clip_int32_rvv, zve32x
 
         ret
 endfunc
+
+func ff_vector_clipf_rvv, zve32f
+NOHWF   fmv.w.x  fa0, a3
+NOHWF   fmv.w.x  fa1, a4
+1:
+        vsetvli  t0, a2, e32, m1, ta, ma
+        vle32.v  v8, (a1)
+        slli     t1, t0, 2
+        vfmax.vf v8, v8, fa0
+        add      a1, a1, t1
+        vfmin.vf v8, v8, fa1
+        sub      a2, a2, t0
+        vse32.v  v8, (a0)
+        add      a0, a0, t1
+        bnez     a2, 1b
+
+        ret
+endfunc