@@ -222,7 +222,11 @@ endconst
.macro epel_filter name type regtype
lla \regtype\()2, subpel_filters_\name
li \regtype\()1, 8
+.ifc \type,v
+ mul \regtype\()0, a6, \regtype\()1
+.elseif \type == h
mul \regtype\()0, a5, \regtype\()1
+.endif
add \regtype\()0, \regtype\()0, \regtype\()2
.irp n 1,2,3,4,5,6
lb \regtype\n, \n(\regtype\()0)
@@ -239,6 +243,19 @@ endconst
li a5, 64
.ifc \from_mem, 1
vle8.v v22, (a2)
+.ifc \type,v
+ sub a2, a2, a3
+ vle8.v v20, (a2)
+ add a2, a2, a3
+ add a2, a2, a3
+ vle8.v v24, (a2)
+ add a2, a2, a3
+ vle8.v v26, (a2)
+ add a2, a2, a3
+ vle8.v v28, (a2)
+ add a2, a2, a3
+ vle8.v v30, (a2)
+.elseif \type == h
addi a2, a2, -1
vle8.v v20, (a2)
addi a2, a2, 2
@@ -249,6 +266,7 @@ endconst
vle8.v v28, (a2)
addi a2, a2, 1
vle8.v v30, (a2)
+.endif
.ifc \name,smooth
vwmulu.vx v16, v24, \regtype\()4
@@ -267,11 +285,23 @@ endconst
vwmaccsu.vx v16, s7, v30
.endif
+.ifc \type,v
+ .rept 6
+ sub a2, a2, a3
+ .endr
+ vle8.v v28, (a2)
+ sub a2, a2, a3
+ vle8.v v26, (a2)
+ .rept 3
+ add a2, a2, a3
+ .endr
+.elseif \type == h
addi a2, a2, -6
vle8.v v28, (a2)
addi a2, a2, -1
vle8.v v26, (a2)
addi a2, a2, 3
+.endif
.ifc \name,smooth
vwmaccsu.vx v16, \regtype\()1, v28
@@ -411,7 +441,7 @@ endfunc
.irp name regular sharp smooth
.irp do put avg
- .irp type h
+ .irp type h v
gen_epel \len \do \name \type
.endr
.endr
@@ -125,7 +125,8 @@ static av_cold void vp9dsp_mc_init_rvv(VP9DSPContext *dsp, int bpp)
init_subpel1(4, idx, idxh, idxv, 4, dir, type)
#define init_subpel3(idx, type) \
- init_subpel2(idx, 1, 0, h, type)
+ init_subpel2(idx, 1, 0, h, type); \
+ init_subpel2(idx, 0, 1, v, type)
init_subpel3(0, put);
init_subpel3(1, avg);
From: sunyuechi <sunyuechi@iscas.ac.cn> C908: vp9_avg_8tap_smooth_4v_8bpp_c: 13.7 vp9_avg_8tap_smooth_4v_8bpp_rvv_i64: 5.0 vp9_avg_8tap_smooth_8v_8bpp_c: 49.7 vp9_avg_8tap_smooth_8v_8bpp_rvv_i64: 9.2 vp9_avg_8tap_smooth_16v_8bpp_c: 191.5 vp9_avg_8tap_smooth_16v_8bpp_rvv_i64: 21.2 vp9_avg_8tap_smooth_32v_8bpp_c: 770.5 vp9_avg_8tap_smooth_32v_8bpp_rvv_i64: 66.0 vp9_avg_8tap_smooth_64v_8bpp_c: 3068.0 vp9_avg_8tap_smooth_64v_8bpp_rvv_i64: 262.5 vp9_put_8tap_smooth_4v_8bpp_c: 12.0 vp9_put_8tap_smooth_4v_8bpp_rvv_i64: 4.5 vp9_put_8tap_smooth_8v_8bpp_c: 43.7 vp9_put_8tap_smooth_8v_8bpp_rvv_i64: 8.5 vp9_put_8tap_smooth_16v_8bpp_c: 168.7 vp9_put_8tap_smooth_16v_8bpp_rvv_i64: 20.0 vp9_put_8tap_smooth_32v_8bpp_c: 681.5 vp9_put_8tap_smooth_32v_8bpp_rvv_i64: 63.7 vp9_put_8tap_smooth_64v_8bpp_c: 2692.7 vp9_put_8tap_smooth_64v_8bpp_rvv_i64: 253.5 --- libavcodec/riscv/vp9_mc_rvv.S | 32 +++++++++++++++++++++++++++++++- libavcodec/riscv/vp9dsp_init.c | 3 ++- 2 files changed, 33 insertions(+), 2 deletions(-)