@@ -356,6 +356,71 @@ func ff_\op\()_vp9_8tap_\name\()_\len\()\type\()_rvv\vlen\(), zve32x
endfunc
.endm
+#if __riscv_xlen == 64
+.macro epel_hv_once len, name, op
+ sub a2, a2, a3
+ sub a2, a2, a3
+ sub a2, a2, a3
+ .irp n,0,2,4,6,8,10,12,14
+ epel_load_inc v\n, \len, put, \name, h, 1, t
+ .endr
+ addi a4, a4, -1
+1:
+ addi a4, a4, -1
+ epel_load v30, \len, \op, \name, v, 0, s
+ vse8.v v30, (a0)
+ vmv.v.v v0, v2
+ vmv.v.v v2, v4
+ vmv.v.v v4, v6
+ vmv.v.v v6, v8
+ vmv.v.v v8, v10
+ vmv.v.v v10, v12
+ vmv.v.v v12, v14
+ epel_load v14, \len, put, \name, h, 1, t
+ add a2, a2, a3
+ add a0, a0, a1
+ bnez a4, 1b
+ epel_load v30, \len, \op, \name, v, 0, s
+ vse8.v v30, (a0)
+.endm
+
+.macro epel_hv op, name, len, vlen
+func ff_\op\()_vp9_8tap_\name\()_\len\()hv_rvv\vlen\(), zve32x
+ addi sp, sp, -64
+ .irp n,0,1,2,3,4,5,6,7
+ sd s\n, \n\()<<3(sp)
+ .endr
+.if \len == 64 && \vlen < 256
+ addi sp, sp, -48
+ .irp n,0,1,2,3,4,5
+ sd a\n, \n\()<<3(sp)
+ .endr
+.endif
+ csrwi vxrm, 0
+ epel_filter \name, h, t, a7
+ epel_filter \name, v, s, s7
+ vsetvlstatic8 \len, a6, 64, m2
+ epel_hv_once \len, \name, \op
+.if \len == 64 && \vlen < 256
+ .irp n,0,1,2,3,4,5
+ ld a\n, \n\()<<3(sp)
+ .endr
+ addi sp, sp, 48
+ addi a0, a0, 32
+ addi a2, a2, 32
+ epel_filter \name, h, t, a7
+ epel_hv_once \len, \name, \op
+.endif
+ .irp n,0,1,2,3,4,5,6,7
+ ld s\n, \n\()<<3(sp)
+ .endr
+ addi sp, sp, 64
+
+ ret
+endfunc
+.endm
+#endif
+
.irp len, 64, 32, 16, 8, 4
copy_avg \len
.irp op, put, avg
@@ -364,6 +429,10 @@ endfunc
epel \len, \op, \name, \type, 128
epel \len, \op, \name, \type, 256
.endr
+ #if __riscv_xlen == 64
+ epel_hv \op, \name, \len, 128
+ epel_hv \op, \name, \len, 256
+ #endif
.endr
.endr
.endr
@@ -119,6 +119,10 @@ static av_cold void vp9dsp_mc_init_riscv(VP9DSPContext *dsp, int bpp)
if (flags & AV_CPU_FLAG_RVB_ADDR) {
init_subpel2(0, 0, 1, v, put, 128);
init_subpel2(1, 0, 1, v, avg, 128);
+# if __riscv_xlen == 64
+ init_subpel2(0, 1, 1, hv, put, 128);
+ init_subpel2(1, 1, 1, hv, avg, 128);
+# endif
}
}
@@ -129,6 +133,10 @@ static av_cold void vp9dsp_mc_init_riscv(VP9DSPContext *dsp, int bpp)
if (flags & AV_CPU_FLAG_RVB_ADDR) {
init_subpel2(0, 0, 1, v, put, 256);
init_subpel2(1, 0, 1, v, avg, 256);
+# if __riscv_xlen == 64
+ init_subpel2(0, 1, 1, hv, put, 256);
+ init_subpel2(1, 1, 1, hv, avg, 256);
+# endif
}
}
}
From: sunyuechi <sunyuechi@iscas.ac.cn> C908 X60 vp9_avg_8tap_smooth_4hv_8bpp_c : 32.0 28.0 vp9_avg_8tap_smooth_4hv_8bpp_rvv_i32 : 15.0 13.2 vp9_avg_8tap_smooth_8hv_8bpp_c : 98.0 86.2 vp9_avg_8tap_smooth_8hv_8bpp_rvv_i32 : 23.7 21.2 vp9_avg_8tap_smooth_16hv_8bpp_c : 355.7 297.0 vp9_avg_8tap_smooth_16hv_8bpp_rvv_i32 : 47.0 41.5 vp9_avg_8tap_smooth_32hv_8bpp_c : 1272.7 1099.7 vp9_avg_8tap_smooth_32hv_8bpp_rvv_i32 : 134.7 119.7 vp9_avg_8tap_smooth_64hv_8bpp_c : 4937.0 4224.2 vp9_avg_8tap_smooth_64hv_8bpp_rvv_i32 : 528.5 228.5 vp9_put_8tap_smooth_4hv_8bpp_c : 30.2 26.7 vp9_put_8tap_smooth_4hv_8bpp_rvv_i32 : 30.5 12.5 vp9_put_8tap_smooth_8hv_8bpp_c : 91.5 81.2 vp9_put_8tap_smooth_8hv_8bpp_rvv_i32 : 22.7 20.2 vp9_put_8tap_smooth_16hv_8bpp_c : 313.2 277.5 vp9_put_8tap_smooth_16hv_8bpp_rvv_i32 : 45.2 40.2 vp9_put_8tap_smooth_32hv_8bpp_c : 1166.7 1022.2 vp9_put_8tap_smooth_32hv_8bpp_rvv_i32 : 131.7 117.2 vp9_put_8tap_smooth_64hv_8bpp_c : 4560.5 3961.7 vp9_put_8tap_smooth_64hv_8bpp_rvv_i32 : 517.0 223.2 --- libavcodec/riscv/vp9_mc_rvv.S | 69 ++++++++++++++++++++++++++++++++++ libavcodec/riscv/vp9dsp_init.c | 8 ++++ 2 files changed, 77 insertions(+)