@@ -160,6 +160,37 @@
ret
.endm
+.macro bilin_hv len type
+.ifc \type,avg
+ csrwi vxrm, 0
+.endif
+ neg t1, a5
+ neg t2, a6
+ li t4, 8
+ li t5, 1
+ bilin_h_load v24, \len, put
+ add a2, a2, a3
+1:
+ addi a4, a4, -1
+ bilin_h_load v4, \len, put
+ vwmulu.vx v16, v4, a6
+ vwmaccsu.vx v16, t2, v24
+ vwadd.wx v16, v16, t4
+ vnsra.wi v16, v16, 4
+ vadd.vv v0, v16, v24
+.ifc \type,avg
+ vle8.v v16, (a0)
+ vaaddu.vv v0, v0, v16
+.endif
+ vse8.v v0, (a0)
+ vmv.v.v v24, v4
+ add a2, a2, a3
+ add a0, a0, a1
+ bnez a4, 1b
+
+ ret
+.endm
+
.irp len 64, 32, 16
func ff_copy\len\()_rvv, zve32x
copy_avg \len copy
@@ -438,6 +469,12 @@ endfunc
func ff_avg_bilin_\len\()v_rvv, zve32x
bilin_v \len avg
endfunc
+func ff_put_bilin_\len\()hv_rvv, zve32x
+ bilin_hv \len put
+endfunc
+func ff_avg_bilin_\len\()hv_rvv, zve32x
+ bilin_hv \len avg
+endfunc
.irp name regular sharp smooth
.irp do put avg
From: sunyuechi <sunyuechi@iscas.ac.cn> C908: vp9_avg_bilin_4hv_8bpp_c: 10.7 vp9_avg_bilin_4hv_8bpp_rvv_i64: 4.5 vp9_avg_bilin_8hv_8bpp_c: 38.7 vp9_avg_bilin_8hv_8bpp_rvv_i64: 8.2 vp9_avg_bilin_16hv_8bpp_c: 147.2 vp9_avg_bilin_16hv_8bpp_rvv_i64: 32.2 vp9_avg_bilin_32hv_8bpp_c: 590.7 vp9_avg_bilin_32hv_8bpp_rvv_i64: 47.5 vp9_avg_bilin_64hv_8bpp_c: 2323.7 vp9_avg_bilin_64hv_8bpp_rvv_i64: 153.5 vp9_put_bilin_4hv_8bpp_c: 10.0 vp9_put_bilin_4hv_8bpp_rvv_i64: 3.7 vp9_put_bilin_8hv_8bpp_c: 35.2 vp9_put_bilin_8hv_8bpp_rvv_i64: 7.2 vp9_put_bilin_16hv_8bpp_c: 133.7 vp9_put_bilin_16hv_8bpp_rvv_i64: 14.2 vp9_put_bilin_32hv_8bpp_c: 521.7 vp9_put_bilin_32hv_8bpp_rvv_i64: 43.0 vp9_put_bilin_64hv_8bpp_c: 2098.0 vp9_put_bilin_64hv_8bpp_rvv_i64: 144.5 --- libavcodec/riscv/vp9_mc_rvv.S | 37 +++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+)