diff mbox series

[FFmpeg-devel,v4,2/5] lavc/vp9dsp: R-V V mc bilin h v

Message ID tencent_7AE8A918676E034B256C8EE75DF5CCE27805@qq.com
State New
Headers show
Series [FFmpeg-devel,v4,1/5] lavc/vp9dsp: R-V V mc avg | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

uk7b@foxmail.com May 18, 2024, 6:15 p.m. UTC
From: sunyuechi <sunyuechi@iscas.ac.cn>

C908:
vp9_avg_bilin_4h_8bpp_c: 5.2
vp9_avg_bilin_4h_8bpp_rvv_i64: 2.2
vp9_avg_bilin_4v_8bpp_c: 5.5
vp9_avg_bilin_4v_8bpp_rvv_i64: 2.2
vp9_avg_bilin_8h_8bpp_c: 20.0
vp9_avg_bilin_8h_8bpp_rvv_i64: 4.5
vp9_avg_bilin_8v_8bpp_c: 21.0
vp9_avg_bilin_8v_8bpp_rvv_i64: 4.2
vp9_avg_bilin_16h_8bpp_c: 78.2
vp9_avg_bilin_16h_8bpp_rvv_i64: 9.0
vp9_avg_bilin_16v_8bpp_c: 82.0
vp9_avg_bilin_16v_8bpp_rvv_i64: 9.0
vp9_avg_bilin_32h_8bpp_c: 325.5
vp9_avg_bilin_32h_8bpp_rvv_i64: 26.2
vp9_avg_bilin_32v_8bpp_c: 326.2
vp9_avg_bilin_32v_8bpp_rvv_i64: 26.2
vp9_avg_bilin_64h_8bpp_c: 1265.7
vp9_avg_bilin_64h_8bpp_rvv_i64: 91.5
vp9_avg_bilin_64v_8bpp_c: 1317.0
vp9_avg_bilin_64v_8bpp_rvv_i64: 91.2
vp9_put_bilin_4h_8bpp_c: 4.5
vp9_put_bilin_4h_8bpp_rvv_i64: 1.7
vp9_put_bilin_4v_8bpp_c: 4.7
vp9_put_bilin_4v_8bpp_rvv_i64: 1.7
vp9_put_bilin_8h_8bpp_c: 17.0
vp9_put_bilin_8h_8bpp_rvv_i64: 3.5
vp9_put_bilin_8v_8bpp_c: 18.0
vp9_put_bilin_8v_8bpp_rvv_i64: 3.5
vp9_put_bilin_16h_8bpp_c: 65.2
vp9_put_bilin_16h_8bpp_rvv_i64: 7.5
vp9_put_bilin_16v_8bpp_c: 85.7
vp9_put_bilin_16v_8bpp_rvv_i64: 7.5
vp9_put_bilin_32h_8bpp_c: 257.5
vp9_put_bilin_32h_8bpp_rvv_i64: 23.5
vp9_put_bilin_32v_8bpp_c: 274.5
vp9_put_bilin_32v_8bpp_rvv_i64: 23.5
vp9_put_bilin_64h_8bpp_c: 1040.5
vp9_put_bilin_64h_8bpp_rvv_i64: 82.5
vp9_put_bilin_64v_8bpp_c: 1108.7
vp9_put_bilin_64v_8bpp_rvv_i64: 82.2
---
 libavcodec/riscv/vp9_mc_rvv.S  | 43 ++++++++++++++++++++++++++++++++++
 libavcodec/riscv/vp9dsp_init.c | 21 +++++++++++++++++
 2 files changed, 64 insertions(+)
diff mbox series

Patch

diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
index 7811cd9928..b0052c0ece 100644
--- a/libavcodec/riscv/vp9_mc_rvv.S
+++ b/libavcodec/riscv/vp9_mc_rvv.S
@@ -53,6 +53,49 @@  func ff_avg\len\()_rvv, zve32x
 endfunc
 .endm
 
+.macro bilin_load dst len op type mn
+.ifc \type,v
+        add             t5, a2, a3
+.else
+        addi            t5, a2, 1
+.endif
+        vle8.v          v8, (a2)
+        vle8.v          v0, (t5)
+        vwmulu.vx       v16, v0, \mn
+        vwmaccsu.vx     v16, t1, v8
+        vwadd.wx        v16, v16, t4
+        vnsra.wi        v16, v16, 4
+        vadd.vv         \dst, v16, v8
+.ifc \op,avg
+        vle8.v          v16, (a0)
+        vaaddu.vv       \dst, \dst, v16
+.endif
+.endm
+
+.macro bilin_h_v len op type mn
+func ff_\op\()_bilin_\len\()\type\()_rvv, zve32x
+.ifc \op,avg
+        csrwi           vxrm, 0
+.endif
+        vsetvlstatic8   \len t0 64
+        li              t4, 8
+        neg             t1, \mn
+1:
+        addi            a4, a4, -1
+        bilin_load      v0, \len, \op, \type, \mn
+        vse8.v          v0, (a0)
+        add             a2, a2, a3
+        add             a0, a0, a1
+        bnez            a4, 1b
+
+        ret
+endfunc
+.endm
+
 .irp len, 64, 32, 16, 8, 4
         copy_avg \len
+        .irp op, put, avg
+                bilin_h_v \len \op h a5
+                bilin_h_v \len \op v a6
+        .endr
 .endr
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
index 6bfe23563a..565b68959f 100644
--- a/libavcodec/riscv/vp9dsp_init.c
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -63,6 +63,27 @@  static av_cold void vp9dsp_mc_init_riscv(VP9DSPContext *dsp, int bpp)
     init_fpel(3, 8);
     init_fpel(4, 4);
 
+    dsp->mc[0][FILTER_BILINEAR ][0][0][1] = ff_put_bilin_64v_rvv;
+    dsp->mc[0][FILTER_BILINEAR ][0][1][0] = ff_put_bilin_64h_rvv;
+    dsp->mc[0][FILTER_BILINEAR ][1][0][1] = ff_avg_bilin_64v_rvv;
+    dsp->mc[0][FILTER_BILINEAR ][1][1][0] = ff_avg_bilin_64h_rvv;
+    dsp->mc[1][FILTER_BILINEAR ][0][0][1] = ff_put_bilin_32v_rvv;
+    dsp->mc[1][FILTER_BILINEAR ][0][1][0] = ff_put_bilin_32h_rvv;
+    dsp->mc[1][FILTER_BILINEAR ][1][0][1] = ff_avg_bilin_32v_rvv;
+    dsp->mc[1][FILTER_BILINEAR ][1][1][0] = ff_avg_bilin_32h_rvv;
+    dsp->mc[2][FILTER_BILINEAR ][0][0][1] = ff_put_bilin_16v_rvv;
+    dsp->mc[2][FILTER_BILINEAR ][0][1][0] = ff_put_bilin_16h_rvv;
+    dsp->mc[2][FILTER_BILINEAR ][1][0][1] = ff_avg_bilin_16v_rvv;
+    dsp->mc[2][FILTER_BILINEAR ][1][1][0] = ff_avg_bilin_16h_rvv;
+    dsp->mc[3][FILTER_BILINEAR ][0][0][1] = ff_put_bilin_8v_rvv;
+    dsp->mc[3][FILTER_BILINEAR ][0][1][0] = ff_put_bilin_8h_rvv;
+    dsp->mc[3][FILTER_BILINEAR ][1][0][1] = ff_avg_bilin_8v_rvv;
+    dsp->mc[3][FILTER_BILINEAR ][1][1][0] = ff_avg_bilin_8h_rvv;
+    dsp->mc[4][FILTER_BILINEAR ][0][0][1] = ff_put_bilin_4v_rvv;
+    dsp->mc[4][FILTER_BILINEAR ][0][1][0] = ff_put_bilin_4h_rvv;
+    dsp->mc[4][FILTER_BILINEAR ][1][0][1] = ff_avg_bilin_4v_rvv;
+    dsp->mc[4][FILTER_BILINEAR ][1][1][0] = ff_avg_bilin_4h_rvv;
+
 #undef init_fpel
     }
 #endif