@@ -35,6 +35,8 @@ void ff_ac3_sum_square_butterfly_float_rvv(float sum[4],
const float *coef1,
int len);
+void ff_ac3_compute_mantissa_size_rvv(uint16_t mant_cnt[6][16]);
+
av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
{
int flags = av_get_cpu_flags();
@@ -42,6 +44,7 @@ av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
if (flags & AV_CPU_FLAG_RVV_I32) {
c->ac3_exponent_min = ff_ac3_exponent_min_rvv;
c->float_to_fixed24 = ff_float_to_fixed24_rvv;
+ c->compute_mantissa_size = ff_ac3_compute_mantissa_size_rvv;
}
if (flags & AV_CPU_FLAG_RVV_F32)
c->sum_square_butterfly_float = ff_ac3_sum_square_butterfly_float_rvv;
@@ -170,3 +170,56 @@ func ff_ac3_sum_square_butterfly_float_rvv, zve32f
addi a0, a0, 4
ret
endfunc
+
+
+func ff_ac3_compute_mantissa_size_rvv, zve32x
+ li t1, 32
+ li t2, 3
+ vsetivli t0, 6, e16
+ vlsseg5e16.v v0, (a0), t1
+ #(clolum[[i]1]/3)
+ vdivu.vx v1, v1, t2
+ li t3, 5
+ vwmul.vx v22, v1, t3
+ #(clolum[[i]2]/3)
+ vdivu.vx v2, v2, t2
+ vwmacc.vx v22, t2, v3
+ vsra.vi v4, v4, 1
+ vadd.vv v4, v4, v2
+ li t2, 7
+ vwmacc.vx v22, t2, v4
+
+ addi a0, a0, 10
+ vlsseg8e16.v v5, (a0), t1
+ li t3, 4
+ vwmacc.vx v22, t3, v5
+ li t3, 5
+ vwmacc.vx v22, t3, v6
+ li t3, 6
+ vwmacc.vx v22, t3, v7
+ li t3, 7
+ vwmacc.vx v22, t3, v8
+ li t3, 8
+ vwmacc.vx v22, t3, v9
+ li t3, 9
+ vwmacc.vx v22, t3, v10
+ li t3, 10
+ vwmacc.vx v22, t3, v11
+ li t3, 11
+ vwmacc.vx v22, t3, v12
+
+ addi a0, a0, 16
+ vlsseg3e16.v v5, (a0), t1
+ li t3, 12
+ vwmacc.vx v22, t3, v5
+ li t3, 14
+ vwmacc.vx v22, t3, v6
+ li t3, 16
+ vwmacc.vx v22, t3, v7
+
+ vsetivli t0, 6, e32, m2
+ vmv.s.x v30, x0
+ vredsum.vs v30, v22, v30
+ vmv.x.s a0, v30
+ ret
+endfunc