Message ID | 20230615103645.25778-3-shenpeiting@eswincomputing.com |
---|---|
State | New |
Headers | show |
Series | RISC-V initial ac3dsp | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
Le torstaina 15. kesäkuuta 2023, 13.36.41 EEST Peiting Shen a écrit : > From: Shen Peiting <shenpeiting@eswincomputing.com> > > Vector instructions replaces scalar options of float convert to fixed > > Benchmarks on Spike(cycles): > len=16 > float_to_fixed24_c: 315 > float_to_fixed24_rvv: 27 > len=160 > float_to_fixed24_c: 2871 > float_to_fixed24_rvv: 67 > > Co-Authored by: Yang Xiaojun <yangxiaojun@eswincomputing.com> > Co-Authored by: Huang Xing <huangxing1@eswincomputing.com> > Co-Authored by: Zeng Fanchen <zengfanchen@eswincomputing.com> > Signed-off-by: Shen Peiting <shenpeiting@eswincomputing.com> > --- > libavcodec/riscv/ac3dsp_init.c | 5 ++++- > libavcodec/riscv/ac3dsp_rvv.S | 19 +++++++++++++++++++ > 2 files changed, 23 insertions(+), 1 deletion(-) > > diff --git a/libavcodec/riscv/ac3dsp_init.c b/libavcodec/riscv/ac3dsp_init.c > index bb67d86998..a4e75a7541 100644 > --- a/libavcodec/riscv/ac3dsp_init.c > +++ b/libavcodec/riscv/ac3dsp_init.c > @@ -25,13 +25,16 @@ > #include "config.h" > > void ff_ac3_exponent_min_rvv(uint8_t *exp, int num_reuse_blocks, int > nb_coefs); +void ff_float_to_fixed24_rvv(int32_t *dst, const float *src, > unsigned int len); > > av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c) > { > int flags = av_get_cpu_flags(); > #if HAVE_RVV > - if (flags & AV_CPU_FLAG_RVV_I32) > + if (flags & AV_CPU_FLAG_RVV_I32) { > c->ac3_exponent_min = ff_ac3_exponent_min_rvv; > + c->float_to_fixed24 = ff_float_to_fixed24_rvv; > + } > #endif > } > > diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S > index 879123f4a7..d98e72c12c 100644 > --- a/libavcodec/riscv/ac3dsp_rvv.S > +++ b/libavcodec/riscv/ac3dsp_rvv.S > @@ -44,3 +44,22 @@ func ff_ac3_exponent_min_rvv, zve32x > 3: > ret > endfunc > + > + > +func ff_float_to_fixed24_rvv, zve32x > + addi t1, x0, 1 That's `li t1, 1` please. > + slli t1, t1, 24 > + fcvt.s.w f1, t1 Please use ABI names for FPRs, e.g. `ft0`. Nobody wants to have to remember which ones are callee-saved and which ones aren't. > +1: > + vsetvli t0, a2, e32, m8 > + vle32.v v0, (a1) > + vfmul.vf v0, v0, f1 > + vfcvt.x.f.v v16, v0 > + vse32.v v16, (a0) > + sub a2, a2, t0 > + slli t0, t0, 2 > + add a1, a1, t0 > + add a0, a0, t0 Use sh2add to save one in three instruction here. And please interleave scalar and vector instructions so in-order CPU can potentially multi-issue. > + bgtz a2, 1b > + ret > +endfunc
diff --git a/libavcodec/riscv/ac3dsp_init.c b/libavcodec/riscv/ac3dsp_init.c index bb67d86998..a4e75a7541 100644 --- a/libavcodec/riscv/ac3dsp_init.c +++ b/libavcodec/riscv/ac3dsp_init.c @@ -25,13 +25,16 @@ #include "config.h" void ff_ac3_exponent_min_rvv(uint8_t *exp, int num_reuse_blocks, int nb_coefs); +void ff_float_to_fixed24_rvv(int32_t *dst, const float *src, unsigned int len); av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c) { int flags = av_get_cpu_flags(); #if HAVE_RVV - if (flags & AV_CPU_FLAG_RVV_I32) + if (flags & AV_CPU_FLAG_RVV_I32) { c->ac3_exponent_min = ff_ac3_exponent_min_rvv; + c->float_to_fixed24 = ff_float_to_fixed24_rvv; + } #endif } diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S index 879123f4a7..d98e72c12c 100644 --- a/libavcodec/riscv/ac3dsp_rvv.S +++ b/libavcodec/riscv/ac3dsp_rvv.S @@ -44,3 +44,22 @@ func ff_ac3_exponent_min_rvv, zve32x 3: ret endfunc + + +func ff_float_to_fixed24_rvv, zve32x + addi t1, x0, 1 + slli t1, t1, 24 + fcvt.s.w f1, t1 +1: + vsetvli t0, a2, e32, m8 + vle32.v v0, (a1) + vfmul.vf v0, v0, f1 + vfcvt.x.f.v v16, v0 + vse32.v v16, (a0) + sub a2, a2, t0 + slli t0, t0, 2 + add a1, a1, t0 + add a0, a0, t0 + bgtz a2, 1b + ret +endfunc