diff mbox series

[FFmpeg-devel,1/2] lavc/vp8dsp: add R-V V vp7_idct_dc_add

Message ID 20240601195613.44669-1-remi@remlab.net
State Accepted
Commit 30797e4ff6c8c537471c386cd019a6a48a721f01
Headers show
Series [FFmpeg-devel,1/2] lavc/vp8dsp: add R-V V vp7_idct_dc_add | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont June 1, 2024, 7:56 p.m. UTC
This just computes the direct coefficient and hands over to code shared
with VP8. Accordingly the bulk of changes are just rewriting the VP8
code to share.

Nothing to write home about:
vp7_idct_dc_add_c:       1.7
vp7_idct_dc_add_rvv_i32: 1.2
---
 libavcodec/riscv/vp7dsp_init.c | 12 +++++++++++-
 libavcodec/riscv/vp8dsp_rvv.S  | 30 +++++++++++++++++++++++-------
 2 files changed, 34 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/riscv/vp7dsp_init.c b/libavcodec/riscv/vp7dsp_init.c
index ae7f2d4277..491874483f 100644
--- a/libavcodec/riscv/vp7dsp_init.c
+++ b/libavcodec/riscv/vp7dsp_init.c
@@ -27,6 +27,15 @@ 
 
 void ff_vp7_luma_dc_wht_rvv(int16_t block[4][4][16], int16_t dc[16]);
 void ff_vp7_idct_add_rvv(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
+void ff_vp78_idct_dc_add_rvv(uint8_t *, int16_t block[16], ptrdiff_t, int dc);
+
+static void ff_vp7_idct_dc_add_rvv(uint8_t *dst, int16_t block[16],
+                                   ptrdiff_t stride)
+{
+    int dc = (23170 * (23170 * block[0] >> 14) + 0x20000) >> 18;
+
+    ff_vp78_idct_dc_add_rvv(dst, block, stride, dc);
+}
 
 av_cold void ff_vp7dsp_init_riscv(VP8DSPContext *c)
 {
@@ -37,8 +46,9 @@  av_cold void ff_vp7dsp_init_riscv(VP8DSPContext *c)
         ff_rv_vlen_least(128)) {
 #if __riscv_xlen >= 64
         c->vp8_luma_dc_wht = ff_vp7_luma_dc_wht_rvv;
-#endif
         c->vp8_idct_add = ff_vp7_idct_add_rvv;
+#endif
+        c->vp8_idct_dc_add = ff_vp7_idct_dc_add_rvv;
     }
 #endif
 }
diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index b187c6c7c9..02351be383 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv.S
@@ -100,6 +100,29 @@  func ff_vp8_luma_dc_wht_rvv, zve64x
 endfunc
 #endif
 
+func ff_vp8_idct_dc_add_rvv, zve32x
+        lh      a3, (a1)
+        addi    a3, a3, 4
+        srai    a3, a3, 3
+        # fall through
+endfunc
+
+func ff_vp78_idct_dc_add_rvv, zve32x
+        csrwi      vxrm, 0
+        vsetivli   zero, 4, e8, mf4, ta, ma
+        sh         zero, (a1)
+        vlse32.v   v8, (a0), a2
+        vsetivli   zero, 16, e16, m2, ta, ma
+        vzext.vf2  v16, v8
+        vadd.vx    v16, v16, a3
+        vmax.vx    v16, v16, zero
+        vsetvli    zero, zero, e8, m1, ta, ma
+        vnclipu.wi v8, v16, 0
+        vsetivli   zero, 4, e8, mf4, ta, ma
+        vsse32.v   v8, (a0), a2
+        ret
+endfunc
+
 .macro vp8_idct_dc_add
         vlse32.v      v0, (a0), a2
         lh            a5, 0(a1)
@@ -122,13 +145,6 @@  endfunc
         addi          a1, a1, 32
 .endm
 
-func ff_vp8_idct_dc_add_rvv, zve32x
-        vsetivli      zero, 4, e8, mf4, ta, ma
-        vp8_idct_dc_add
-
-        ret
-endfunc
-
 func ff_vp8_idct_dc_add4y_rvv, zve32x
         vsetivli      zero, 4, e8, mf4, ta, ma
         .rept 3