diff mbox series

[FFmpeg-devel,1/2] lavc/mpegvideoencdsp: R-V V try_8x8basis

Message ID 20240814171856.6360-1-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,1/2] lavc/mpegvideoencdsp: R-V V try_8x8basis | expand

Checks

Context Check Description
yinshiyou/configure_loongarch64 warning Failed to apply patch

Commit Message

Rémi Denis-Courmont Aug. 14, 2024, 5:18 p.m. UTC
T-Head C908:
try_8x8basis_c:       922.5
try_8x8basis_rvv_i32: 135.3

SpacemiT X60:
try_8x8basis_c:       926.1
try_8x8basis_rvv_i32: 103.1
---
 libavcodec/riscv/mpegvideoencdsp_init.c | 15 ++++++++---
 libavcodec/riscv/mpegvideoencdsp_rvv.S  | 35 +++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/riscv/mpegvideoencdsp_init.c b/libavcodec/riscv/mpegvideoencdsp_init.c
index eb5c8a5aed..4c156c1cf2 100644
--- a/libavcodec/riscv/mpegvideoencdsp_init.c
+++ b/libavcodec/riscv/mpegvideoencdsp_init.c
@@ -23,6 +23,8 @@ 
 #include "libavutil/cpu.h"
 #include "libavcodec/mpegvideoencdsp.h"
 
+int ff_try_8x8basis_rvv(const int16_t rem[64], const int16_t weight[64],
+                        const int16_t basis[16], int scale);
 int ff_pix_sum_rvv(const uint8_t *pix, int line_size);
 int ff_pix_norm1_rvv(const uint8_t *pix, int line_size);
 
@@ -32,10 +34,15 @@  av_cold void ff_mpegvideoencdsp_init_riscv(MpegvideoEncDSPContext *c,
 #if HAVE_RVV
     int flags = av_get_cpu_flags();
 
-    if (flags & AV_CPU_FLAG_RVV_I64) {
-        if ((flags & AV_CPU_FLAG_RVB) && ff_rv_vlen_least(128))
-            c->pix_sum = ff_pix_sum_rvv;
-        c->pix_norm1 = ff_pix_norm1_rvv;
+    if (flags & AV_CPU_FLAG_RVV_I32) {
+        if (flags & AV_CPU_FLAG_RVB)
+            c->try_8x8basis = ff_try_8x8basis_rvv;
+
+        if (flags & AV_CPU_FLAG_RVV_I64) {
+            if ((flags & AV_CPU_FLAG_RVB) && ff_rv_vlen_least(128))
+                c->pix_sum = ff_pix_sum_rvv;
+            c->pix_norm1 = ff_pix_norm1_rvv;
+        }
     }
 #endif
 }
diff --git a/libavcodec/riscv/mpegvideoencdsp_rvv.S b/libavcodec/riscv/mpegvideoencdsp_rvv.S
index 2f25b00eb2..9408de47c8 100644
--- a/libavcodec/riscv/mpegvideoencdsp_rvv.S
+++ b/libavcodec/riscv/mpegvideoencdsp_rvv.S
@@ -20,6 +20,41 @@ 
 
 #include "libavutil/riscv/asm.S"
 
+.equ    BASIS_SHIFT, 16
+.equ    RECON_SHIFT,  6
+
+func ff_try_8x8basis_rvv, zve32x, b
+        li      t1, 64
+        csrwi   vxrm, 0
+        vsetvli     t0, t1, e32, m8, ta, ma
+        vmv.v.x     v24, zero
+        vmv.s.x     v1, zero
+1:
+        vsetvli     zero, zero, e16, m4, ta, ma
+        vle16.v     v4, (a2)
+        sub     t1, t1, t0
+        vwmul.vx    v16, v4, a3
+        sh1add  a2, t0, a2
+        vle16.v     v8, (a0)
+        sh1add  a0, t0, a0
+        vnclip.wi   v4, v16, BASIS_SHIFT - RECON_SHIFT
+        vle16.v     v12, (a1)
+        sh1add  a1, t0, a1
+        vadd.vv     v4, v8, v4
+        vsra.vi     v4, v4, RECON_SHIFT
+        vwmul.vv    v16, v12, v4
+        vsetvli     zero, zero, e32, m8, ta, ma
+        vmul.vv     v16, v16, v16
+        vsra.vi     v16, v16, 4
+        vadd.vv     v24, v24, v16
+        bnez    t1, 1b
+
+        vredsum.vs  v1, v24, v1
+        vmv.x.s     a0, v1
+        srai    a0, a0, 2
+        ret
+endfunc
+
 func ff_pix_sum_rvv, zve64x, b
         lpad    0
         vsetivli    t0, 16, e16, m1, ta, ma