@@ -117,6 +117,49 @@
ret
.endm
+.macro bilin_v len type
+.ifc \type,avg
+ csrwi vxrm, 0
+.endif
+.ifc \len,4
+ vsetivli zero, 4, e8, mf4, ta, ma
+.elseif \len == 8
+ vsetivli zero, 8, e8, mf2, ta, ma
+.elseif \len == 16
+ vsetivli zero, 16, e8, m1, ta, ma
+.elseif \len == 32
+ li t0, 32
+ vsetvli zero, t0, e8, m2, ta, ma
+.elseif \len == 64
+ li t0, 64
+ vsetvli zero, t0, e8, m4, ta, ma
+.endif
+ li t4, 8
+ neg t1, a6
+1:
+ add t2, a2, a3
+ addi a4, a4, -1
+ vle8.v v0, (a2)
+ vle8.v v8, (t2)
+.ifc \type,avg
+ vle8.v v16, (a0)
+.endif
+ vwmulu.vx v24, v8, a6
+ vwmaccsu.vx v24, t1, v0
+ vwadd.wx v24, v24, t4
+ vnsra.wi v24, v24, 4
+ vadd.vv v0, v24, v0
+.ifc \type,avg
+ vaaddu.vv v0, v0, v16
+.endif
+ vse8.v v0, (a0)
+ add a2, a2, a3
+ add a0, a0, a1
+ bnez a4, 1b
+
+ ret
+.endm
+
.irp len 64, 32, 16
func ff_copy\len\()_rvv, zve32x
copy_avg \len copy
@@ -359,6 +402,12 @@ endfunc
func ff_avg_bilin_\len\()h_rvv, zve32x
bilin_h \len avg
endfunc
+func ff_put_bilin_\len\()v_rvv, zve32x
+ bilin_v \len put
+endfunc
+func ff_avg_bilin_\len\()v_rvv, zve32x
+ bilin_v \len avg
+endfunc
.irp name regular sharp smooth
.irp do put avg
From: sunyuechi <sunyuechi@iscas.ac.cn> C908: vp9_avg_bilin_4v_8bpp_c: 5.5 vp9_avg_bilin_4v_8bpp_rvv_i64: 2.2 vp9_avg_bilin_8v_8bpp_c: 20.7 vp9_avg_bilin_8v_8bpp_rvv_i64: 4.2 vp9_avg_bilin_16v_8bpp_c: 82.2 vp9_avg_bilin_16v_8bpp_rvv_i64: 9.0 vp9_avg_bilin_32v_8bpp_c: 342.5 vp9_avg_bilin_32v_8bpp_rvv_i64: 27.0 vp9_avg_bilin_64v_8bpp_c: 1319.2 vp9_avg_bilin_64v_8bpp_rvv_i64: 93.2 vp9_put_bilin_4v_8bpp_c: 4.7 vp9_put_bilin_4v_8bpp_rvv_i64: 1.7 vp9_put_bilin_8v_8bpp_c: 17.7 vp9_put_bilin_8v_8bpp_rvv_i64: 3.2 vp9_put_bilin_16v_8bpp_c: 69.2 vp9_put_bilin_16v_8bpp_rvv_i64: 7.5 vp9_put_bilin_32v_8bpp_c: 274.2 vp9_put_bilin_32v_8bpp_rvv_i64: 23.2 vp9_put_bilin_64v_8bpp_c: 1109.5 vp9_put_bilin_64v_8bpp_rvv_i64: 82.2 --- libavcodec/riscv/vp9_mc_rvv.S | 49 +++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+)