Message ID | tencent_425318010FBD6A9D05C7D05CB1C49C0B1606@qq.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,01/10] lavc/vp9dsp: R-V V ipred vert | expand |
Context | Check | Description |
---|---|---|
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
the github link: https://github.com/hleft/FFmpeg/tree/vp9 <uk7b@foxmail.com> 于2024年5月4日周六 23:03写道: > From: sunyuechi <sunyuechi@iscas.ac.cn> > > C908: > vp9_vert_8x8_8bpp_c: 22.0 > vp9_vert_8x8_8bpp_rvv_i64: 18.5 > vp9_vert_16x16_8bpp_c: 71.2 > vp9_vert_16x16_8bpp_rvv_i32: 50.7 > vp9_vert_32x32_8bpp_c: 300.2 > vp9_vert_32x32_8bpp_rvv_i32: 136.7 > --- > libavcodec/riscv/vp9_intra_rvv.S | 35 ++++++++++++++++++++++++++++++++ > libavcodec/riscv/vp9dsp.h | 6 ++++++ > libavcodec/riscv/vp9dsp_init.c | 3 +++ > 3 files changed, 44 insertions(+) > > diff --git a/libavcodec/riscv/vp9_intra_rvv.S > b/libavcodec/riscv/vp9_intra_rvv.S > index db9774c263..b5f0f9d3c3 100644 > --- a/libavcodec/riscv/vp9_intra_rvv.S > +++ b/libavcodec/riscv/vp9_intra_rvv.S > @@ -113,3 +113,38 @@ func_dc dc_left 8 left 3 0 zve64x > func_dc dc_top 32 top 5 1 zve32x > func_dc dc_top 16 top 4 1 zve32x > func_dc dc_top 8 top 3 0 zve64x > + > +func ff_v_32x32_rvv, zve32x > + vsetivli zero, 8, e8, mf2, ta, ma > + vle32.v v8, (a3) > + > + .rept 31 > + vse32.v v8, (a0) > + add a0, a0, a1 > + .endr > + vse32.v v8, (a0) > + > + ret > +endfunc > + > +func ff_v_16x16_rvv, zve32x > + vsetivli zero, 4, e8, mf4, ta, ma > + vle32.v v8, (a3) > + > + .rept 15 > + vse32.v v8, (a0) > + add a0, a0, a1 > + .endr > + vse32.v v8, (a0) > + > + ret > +endfunc > + > +func ff_v_8x8_rvv, zve64x > + ld t0, (a3) > + vsetivli zero, 8, e64, m4, ta, ma > + vmv.v.x v8, t0 > + vsse64.v v8, (a0), a1 > + > + ret > +endfunc > diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h > index 25047ed507..113397ce86 100644 > --- a/libavcodec/riscv/vp9dsp.h > +++ b/libavcodec/riscv/vp9dsp.h > @@ -60,6 +60,12 @@ void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t > stride, const uint8_t *l, > const uint8_t *a); > void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, > const uint8_t *a); > +void ff_v_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, > + const uint8_t *a); > +void ff_v_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, > + const uint8_t *a); > +void ff_v_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, > + const uint8_t *a); > > #define VP9_8TAP_RISCV_RVV_FUNC(SIZE, type, type_idx) > \ > void ff_put_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t > dststride, \ > diff --git a/libavcodec/riscv/vp9dsp_init.c > b/libavcodec/riscv/vp9dsp_init.c > index 69ab39004c..9c550d40b5 100644 > --- a/libavcodec/riscv/vp9dsp_init.c > +++ b/libavcodec/riscv/vp9dsp_init.c > @@ -36,6 +36,7 @@ static av_cold void > vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp) > dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv; > dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv; > dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv; > + dsp->intra_pred[TX_8X8][VERT_PRED] = ff_v_8x8_rvv; > } > > if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() > >= 16) { > @@ -51,6 +52,8 @@ static av_cold void > vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp) > dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv; > dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv; > dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv; > + dsp->intra_pred[TX_32X32][VERT_PRED] = ff_v_32x32_rvv; > + dsp->intra_pred[TX_16X16][VERT_PRED] = ff_v_16x16_rvv; > } > #endif > } > -- > 2.45.0 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
Le lauantaina 4. toukokuuta 2024, 18.03.04 EEST uk7b@foxmail.com a écrit : > From: sunyuechi <sunyuechi@iscas.ac.cn> > > C908: > vp9_vert_8x8_8bpp_c: 22.0 > vp9_vert_8x8_8bpp_rvv_i64: 18.5 > vp9_vert_16x16_8bpp_c: 71.2 > vp9_vert_16x16_8bpp_rvv_i32: 50.7 > vp9_vert_32x32_8bpp_c: 300.2 > vp9_vert_32x32_8bpp_rvv_i32: 136.7 > --- > libavcodec/riscv/vp9_intra_rvv.S | 35 ++++++++++++++++++++++++++++++++ > libavcodec/riscv/vp9dsp.h | 6 ++++++ > libavcodec/riscv/vp9dsp_init.c | 3 +++ > 3 files changed, 44 insertions(+) > > diff --git a/libavcodec/riscv/vp9_intra_rvv.S > b/libavcodec/riscv/vp9_intra_rvv.S index db9774c263..b5f0f9d3c3 100644 > --- a/libavcodec/riscv/vp9_intra_rvv.S > +++ b/libavcodec/riscv/vp9_intra_rvv.S > @@ -113,3 +113,38 @@ func_dc dc_left 8 left 3 0 zve64x > func_dc dc_top 32 top 5 1 zve32x > func_dc dc_top 16 top 4 1 zve32x > func_dc dc_top 8 top 3 0 zve64x > + > +func ff_v_32x32_rvv, zve32x > + vsetivli zero, 8, e8, mf2, ta, ma > + vle32.v v8, (a3) > + > + .rept 31 > + vse32.v v8, (a0) > + add a0, a0, a1 > + .endr > + vse32.v v8, (a0) > + > + ret > +endfunc This does not look like it needs vectors. Ditto the next one. > + > +func ff_v_16x16_rvv, zve32x > + vsetivli zero, 4, e8, mf4, ta, ma > + vle32.v v8, (a3) > + > + .rept 15 > + vse32.v v8, (a0) > + add a0, a0, a1 > + .endr > + vse32.v v8, (a0) > + > + ret > +endfunc > + > +func ff_v_8x8_rvv, zve64x > + ld t0, (a3) > + vsetivli zero, 8, e64, m4, ta, ma > + vmv.v.x v8, t0 > + vsse64.v v8, (a0), a1 > + > + ret > +endfunc This is missing a guard against RV32. > diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h > index 25047ed507..113397ce86 100644 > --- a/libavcodec/riscv/vp9dsp.h > +++ b/libavcodec/riscv/vp9dsp.h > @@ -60,6 +60,12 @@ void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t stride, > const uint8_t *l, const uint8_t *a); > void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, > const uint8_t *a); > +void ff_v_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, > + const uint8_t *a); > +void ff_v_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, > + const uint8_t *a); > +void ff_v_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, > + const uint8_t *a); > > #define VP9_8TAP_RISCV_RVV_FUNC(SIZE, type, type_idx) > \ void ff_put_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t > dststride, \ diff --git a/libavcodec/riscv/vp9dsp_init.c > b/libavcodec/riscv/vp9dsp_init.c index 69ab39004c..9c550d40b5 100644 > --- a/libavcodec/riscv/vp9dsp_init.c > +++ b/libavcodec/riscv/vp9dsp_init.c > @@ -36,6 +36,7 @@ static av_cold void > vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp) > dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv; > dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv; > dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv; + > dsp->intra_pred[TX_8X8][VERT_PRED] = ff_v_8x8_rvv; > } > > if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= > 16) { @@ -51,6 +52,8 @@ static av_cold void > vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp) > dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv; > dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv; > dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv; + > dsp->intra_pred[TX_32X32][VERT_PRED] = ff_v_32x32_rvv; + > dsp->intra_pred[TX_16X16][VERT_PRED] = ff_v_16x16_rvv; } > #endif > }
diff --git a/libavcodec/riscv/vp9_intra_rvv.S b/libavcodec/riscv/vp9_intra_rvv.S index db9774c263..b5f0f9d3c3 100644 --- a/libavcodec/riscv/vp9_intra_rvv.S +++ b/libavcodec/riscv/vp9_intra_rvv.S @@ -113,3 +113,38 @@ func_dc dc_left 8 left 3 0 zve64x func_dc dc_top 32 top 5 1 zve32x func_dc dc_top 16 top 4 1 zve32x func_dc dc_top 8 top 3 0 zve64x + +func ff_v_32x32_rvv, zve32x + vsetivli zero, 8, e8, mf2, ta, ma + vle32.v v8, (a3) + + .rept 31 + vse32.v v8, (a0) + add a0, a0, a1 + .endr + vse32.v v8, (a0) + + ret +endfunc + +func ff_v_16x16_rvv, zve32x + vsetivli zero, 4, e8, mf4, ta, ma + vle32.v v8, (a3) + + .rept 15 + vse32.v v8, (a0) + add a0, a0, a1 + .endr + vse32.v v8, (a0) + + ret +endfunc + +func ff_v_8x8_rvv, zve64x + ld t0, (a3) + vsetivli zero, 8, e64, m4, ta, ma + vmv.v.x v8, t0 + vsse64.v v8, (a0), a1 + + ret +endfunc diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h index 25047ed507..113397ce86 100644 --- a/libavcodec/riscv/vp9dsp.h +++ b/libavcodec/riscv/vp9dsp.h @@ -60,6 +60,12 @@ void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a); void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a); +void ff_v_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, + const uint8_t *a); +void ff_v_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, + const uint8_t *a); +void ff_v_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, + const uint8_t *a); #define VP9_8TAP_RISCV_RVV_FUNC(SIZE, type, type_idx) \ void ff_put_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \ diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c index 69ab39004c..9c550d40b5 100644 --- a/libavcodec/riscv/vp9dsp_init.c +++ b/libavcodec/riscv/vp9dsp_init.c @@ -36,6 +36,7 @@ static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp) dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv; dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv; dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv; + dsp->intra_pred[TX_8X8][VERT_PRED] = ff_v_8x8_rvv; } if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) { @@ -51,6 +52,8 @@ static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp) dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv; dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv; dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv; + dsp->intra_pred[TX_32X32][VERT_PRED] = ff_v_32x32_rvv; + dsp->intra_pred[TX_16X16][VERT_PRED] = ff_v_16x16_rvv; } #endif }
From: sunyuechi <sunyuechi@iscas.ac.cn> C908: vp9_vert_8x8_8bpp_c: 22.0 vp9_vert_8x8_8bpp_rvv_i64: 18.5 vp9_vert_16x16_8bpp_c: 71.2 vp9_vert_16x16_8bpp_rvv_i32: 50.7 vp9_vert_32x32_8bpp_c: 300.2 vp9_vert_32x32_8bpp_rvv_i32: 136.7 --- libavcodec/riscv/vp9_intra_rvv.S | 35 ++++++++++++++++++++++++++++++++ libavcodec/riscv/vp9dsp.h | 6 ++++++ libavcodec/riscv/vp9dsp_init.c | 3 +++ 3 files changed, 44 insertions(+)