From 7abd262daa281cee412a905ea75a5f10dd0b1fbe Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Fri, 1 Mar 2024 18:38:43 +0800
Subject: [PATCH 2/4] lavc/vp9dsp: R-V V ipred vert
C908:
vp9_vert_8x8_8bpp_c: 22.0
vp9_vert_8x8_8bpp_rvv_i64: 18.5
vp9_vert_16x16_8bpp_c: 71.2
vp9_vert_16x16_8bpp_rvv_i32: 50.7
vp9_vert_32x32_8bpp_c: 300.2
vp9_vert_32x32_8bpp_rvv_i32: 136.7
---
libavcodec/riscv/vp9_intra_rvv.S | 35 ++++++++++++++++++++++++++++++++
libavcodec/riscv/vp9dsp.h | 6 ++++++
libavcodec/riscv/vp9dsp_init.c | 3 +++
3 files changed, 44 insertions(+)
@@ -199,3 +199,38 @@ endfunc
func ff_dc_top_8x8_rvv, zve64x
dc8x8 top
endfunc
+
+func ff_v_32x32_rvv, zve32x
+ vsetivli zero, 8, e8, mf2, ta, ma
+ vle32.v v8, (a3)
+
+ .rept 31
+ vse32.v v8, (a0)
+ add a0, a0, a1
+ .endr
+ vse32.v v8, (a0)
+
+ ret
+endfunc
+
+func ff_v_16x16_rvv, zve32x
+ vsetivli zero, 4, e8, mf4, ta, ma
+ vle32.v v8, (a3)
+
+ .rept 15
+ vse32.v v8, (a0)
+ add a0, a0, a1
+ .endr
+ vse32.v v8, (a0)
+
+ ret
+endfunc
+
+func ff_v_8x8_rvv, zve64x
+ ld t0, (a3)
+ vsetivli zero, 8, e64, m4, ta, ma
+ vmv.v.x v8, t0
+ vsse64.v v8, (a0), a1
+
+ ret
+endfunc
@@ -60,5 +60,11 @@ void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
const uint8_t *a);
void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
const uint8_t *a);
+void ff_v_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_v_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_v_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
#endif // #ifndef AVCODEC_RISCV_VP9DSP_RISCV_H
@@ -36,6 +36,7 @@ static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv;
dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv;
dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv;
+ dsp->intra_pred[TX_8X8][VERT_PRED] = ff_v_8x8_rvv;
}
if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
@@ -51,6 +52,8 @@ static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv;
dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv;
dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv;
+ dsp->intra_pred[TX_32X32][VERT_PRED] = ff_v_32x32_rvv;
+ dsp->intra_pred[TX_16X16][VERT_PRED] = ff_v_16x16_rvv;
}
#endif
}
--
2.44.0