@@ -362,6 +362,77 @@ func ff_\op\()_vp9_8tap_\name\()_\len\()\type\()_rvv\vlen\(), zve32x
endfunc
.endm
+#if __riscv_xlen == 64
+.macro epel_hv_once len name op
+ sub a2, a2, a3
+ sub a2, a2, a3
+ sub a2, a2, a3
+ .irp n,0,2,4,6,8,10,12,14
+ epel_load_inc v\n, \len, put, \name, h, 1, t
+ .endr
+ addi a4, a4, -1
+1:
+ addi a4, a4, -1
+ epel_load v30, \len, \op, \name, v, 0, s
+ vse8.v v30, (a0)
+ vmv.v.v v0, v2
+ vmv.v.v v2, v4
+ vmv.v.v v4, v6
+ vmv.v.v v6, v8
+ vmv.v.v v8, v10
+ vmv.v.v v10, v12
+ vmv.v.v v12, v14
+ epel_load v14, \len, put, \name, h, 1, t
+ add a2, a2, a3
+ add a0, a0, a1
+ bnez a4, 1b
+ epel_load v30, \len, \op, \name, v, 0, s
+ vse8.v v30, (a0)
+.endm
+
+.macro epel_hv op name len vlen
+func ff_\op\()_vp9_8tap_\name\()_\len\()hv_rvv\vlen\(), zve32x
+ addi sp, sp, -64
+ .irp n,0,1,2,3,4,5,6,7
+ sd s\n, \n\()<<3(sp)
+ .endr
+.if \len == 64 && \vlen < 256
+ addi sp, sp, -48
+ .irp n,0,1,2,3,4,5
+ sd a\n, \n\()<<3(sp)
+ .endr
+.endif
+.ifc \op,avg
+ csrwi vxrm, 0
+.endif
+ epel_filter \name, h, t
+ epel_filter \name, v, s
+.if \vlen < 256
+ vsetvlstatic8 \len, a6, 32, m2
+.else
+ vsetvlstatic8 \len, a6, 64, m2
+.endif
+ epel_hv_once \len, \name, \op
+.if \len == 64 && \vlen < 256
+ .irp n,0,1,2,3,4,5
+ ld a\n, \n\()<<3(sp)
+ .endr
+ addi sp, sp, 48
+ addi a0, a0, 32
+ addi a2, a2, 32
+ epel_filter \name, h, t
+ epel_hv_once \len, \name, \op
+.endif
+ .irp n,0,1,2,3,4,5,6,7
+ ld s\n, \n\()<<3(sp)
+ .endr
+ addi sp, sp, 64
+
+ ret
+endfunc
+.endm
+#endif
+
.irp len, 64, 32, 16, 8, 4
copy_avg \len
.irp op, put, avg
@@ -373,6 +444,10 @@ endfunc
epel \len, \op, \name, \type, 128
epel \len, \op, \name, \type, 256
.endr
+ #if __riscv_xlen == 64
+ epel_hv \op, \name, \len, 128
+ epel_hv \op, \name, \len, 256
+ #endif
.endr
.endr
.endr
@@ -118,6 +118,10 @@ static av_cold void vp9dsp_mc_init_riscv(VP9DSPContext *dsp, int bpp)
if (flags & AV_CPU_FLAG_RVB_ADDR) {
init_subpel2(0, 0, 1, v, put, 128);
init_subpel2(1, 0, 1, v, avg, 128);
+# if __riscv_xlen == 64
+ init_subpel2(0, 1, 1, hv, put, 128);
+ init_subpel2(1, 1, 1, hv, avg, 128);
+# endif
}
}
@@ -128,6 +132,10 @@ static av_cold void vp9dsp_mc_init_riscv(VP9DSPContext *dsp, int bpp)
if (flags & AV_CPU_FLAG_RVB_ADDR) {
init_subpel2(0, 0, 1, v, put, 256);
init_subpel2(1, 0, 1, v, avg, 256);
+# if __riscv_xlen == 64
+ init_subpel2(0, 1, 1, hv, put, 256);
+ init_subpel2(1, 1, 1, hv, avg, 256);
+# endif
}
}
}
From: sunyuechi <sunyuechi@iscas.ac.cn> C908 X60 vp9_avg_8tap_smooth_4hv_8bpp_c : 32.0 28.2 vp9_avg_8tap_smooth_4hv_8bpp_rvv_i32 : 15.0 13.2 vp9_avg_8tap_smooth_8hv_8bpp_c : 98.0 86.2 vp9_avg_8tap_smooth_8hv_8bpp_rvv_i32 : 23.7 21.0 vp9_avg_8tap_smooth_16hv_8bpp_c : 355.5 297.0 vp9_avg_8tap_smooth_16hv_8bpp_rvv_i32 : 62.7 41.2 vp9_avg_8tap_smooth_32hv_8bpp_c : 1273.0 1099.7 vp9_avg_8tap_smooth_32hv_8bpp_rvv_i32 : 133.7 119.2 vp9_avg_8tap_smooth_64hv_8bpp_c : 4933.0 4240.5 vp9_avg_8tap_smooth_64hv_8bpp_rvv_i32 : 506.7 227.0 vp9_put_8tap_smooth_4hv_8bpp_c : 30.2 27.0 vp9_put_8tap_smooth_4hv_8bpp_rvv_i32 : 14.5 12.7 vp9_put_8tap_smooth_8hv_8bpp_c : 91.2 81.2 vp9_put_8tap_smooth_8hv_8bpp_rvv_i32 : 22.7 20.2 vp9_put_8tap_smooth_16hv_8bpp_c : 329.2 277.7 vp9_put_8tap_smooth_16hv_8bpp_rvv_i32 : 44.7 40.0 vp9_put_8tap_smooth_32hv_8bpp_c : 1183.7 1022.7 vp9_put_8tap_smooth_32hv_8bpp_rvv_i32 : 130.7 116.5 vp9_put_8tap_smooth_64hv_8bpp_c : 4502.7 3954.5 vp9_put_8tap_smooth_64hv_8bpp_rvv_i32 : 496.0 224.7 --- libavcodec/riscv/vp9_mc_rvv.S | 75 ++++++++++++++++++++++++++++++++++ libavcodec/riscv/vp9dsp_init.c | 8 ++++ 2 files changed, 83 insertions(+)