@@ -34,6 +34,9 @@ void ff_ps_hybrid_analysis_ileave_rvv(float (*out)[32][2], float L[2][38][64],
void ff_ps_hybrid_synthesis_deint_rvv(float out[2][38][64], float (*in)[32][2],
int i, int len);
+void ff_ps_stereo_interpolate_rvv(float (*l)[2], float (*r)[2],
+ float h[2][4], float h_step[2][4], int len);
+
av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
{
#if HAVE_RVV
@@ -43,6 +46,7 @@ av_cold void ff_psdsp_init_riscv(PSDSPContext *c)
c->add_squares = ff_ps_add_squares_rvv;
c->mul_pair_single = ff_ps_mul_pair_single_rvv;
c->hybrid_analysis = ff_ps_hybrid_analysis_rvv;
+ c->stereo_interpolate[0] = ff_ps_stereo_interpolate_rvv;
}
if (flags & AV_CPU_FLAG_RVV_I32) {
@@ -219,3 +219,59 @@ func ff_ps_hybrid_synthesis_deint_rvv, zve32x
3:
ret
endfunc
+
+func ff_ps_stereo_interpolate_rvv, zve32f
+ vsetvli t0, zero, e32, m1, ta, ma
+ vid.v v24
+ flw ft0, (a2)
+ vadd.vi v24, v24, 1 // v24[i] = i + 1
+ flw ft1, 4(a2)
+ vfcvt.f.xu.v v24, v24
+ flw ft2, 8(a2)
+ vfmv.v.f v16, ft0
+ flw ft3, 12(a2)
+ vfmv.v.f v17, ft1
+ flw ft0, (a3)
+ vfmv.v.f v18, ft2
+ flw ft1, 4(a3)
+ vfmv.v.f v19, ft3
+ flw ft2, 8(a3)
+ vfmv.v.f v20, ft0
+ flw ft3, 12(a3)
+ vfmv.v.f v21, ft1
+ fcvt.s.wu ft4, t0 // (float)(vlenb / sizeof (float))
+ vfmv.v.f v22, ft2
+ fmul.s ft0, ft0, ft4
+ vfmv.v.f v23, ft3
+ fmul.s ft1, ft1, ft4
+ vfmacc.vv v16, v24, v20 // h0 += (i + 1) * h0_step
+ fmul.s ft2, ft2, ft4
+ vfmacc.vv v17, v24, v21
+ fmul.s ft3, ft3, ft4
+ vfmacc.vv v18, v24, v22
+ vfmacc.vv v19, v24, v23
+1:
+ vsetvli t0, a4, e32, m1, ta, ma
+ vlseg2e32.v v8, (a0) // v8:l_re, v9:l_im
+ sub a4, a4, t0
+ vlseg2e32.v v10, (a1) // v10:r_re, v11:r_im
+ vfmul.vv v12, v8, v16
+ vfmul.vv v13, v9, v16
+ vfmul.vv v14, v8, v17
+ vfmul.vv v15, v9, v17
+ vfmacc.vv v12, v10, v18
+ vfmacc.vv v13, v11, v18
+ vfmacc.vv v14, v10, v19
+ vfmacc.vv v15, v11, v19
+ vsseg2e32.v v12, (a0)
+ sh3add a0, t0, a0
+ vsseg2e32.v v14, (a1)
+ sh3add a1, t0, a1
+ vfadd.vf v16, v16, ft0 // h0 += (vlenb / sizeof (float)) * h0_step
+ vfadd.vf v17, v17, ft1
+ vfadd.vf v18, v18, ft2
+ vfadd.vf v19, v19, ft3
+ bnez a4, 1b
+
+ ret
+endfunc
From: Rémi Denis-Courmont <remi@remlab.net> --- libavcodec/riscv/aacpsdsp_init.c | 4 +++ libavcodec/riscv/aacpsdsp_rvv.S | 56 ++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+)