diff mbox series

[FFmpeg-devel,5/6] lavc/ac3dsp: RISC-V V ac3_compute_mantissa_size

Message ID 20230615103645.25778-6-shenpeiting@eswincomputing.com
State New
Headers show
Series RISC-V initial ac3dsp | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

沈佩婷 June 15, 2023, 10:36 a.m. UTC
From: Shen Peiting <shenpeiting@eswincomputing.com>

Use RVV instruction vlseg<nf>e<eew> to operate on matrix columns.

Benchmarks on Spike(cycles):
ac3_compute_mantissa_size_c: 2338
ac3_compute_mantissa_size_rvv: 55

Co-Authored by: Yang Xiaojun <yangxiaojun@eswincomputing.com>
Co-Authored by: Huang Xing <huangxing1@eswincomputing.com>
Co-Authored by: Zeng Fanchen <zengfanchen@eswincomputing.com>
Signed-off-by: Shen Peiting <shenpeiting@eswincomputing.com>
---
 libavcodec/riscv/ac3dsp_init.c |  3 ++
 libavcodec/riscv/ac3dsp_rvv.S  | 53 ++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)
diff mbox series

Patch

diff --git a/libavcodec/riscv/ac3dsp_init.c b/libavcodec/riscv/ac3dsp_init.c
index d3aa20623a..4769213ebc 100644
--- a/libavcodec/riscv/ac3dsp_init.c
+++ b/libavcodec/riscv/ac3dsp_init.c
@@ -35,6 +35,8 @@  void ff_ac3_sum_square_butterfly_float_rvv(float sum[4],
                                             const float *coef1,
                                             int len);
 
+void ff_ac3_compute_mantissa_size_rvv(uint16_t mant_cnt[6][16]);
+
 av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
 {
     int flags = av_get_cpu_flags();
@@ -42,6 +44,7 @@  av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
     if (flags & AV_CPU_FLAG_RVV_I32) {
         c->ac3_exponent_min = ff_ac3_exponent_min_rvv;
         c->float_to_fixed24 = ff_float_to_fixed24_rvv;
+        c->compute_mantissa_size = ff_ac3_compute_mantissa_size_rvv;
     }
     if (flags & AV_CPU_FLAG_RVV_F32)
         c->sum_square_butterfly_float = ff_ac3_sum_square_butterfly_float_rvv;
diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S
index 05a4d44938..cedd3d7d05 100644
--- a/libavcodec/riscv/ac3dsp_rvv.S
+++ b/libavcodec/riscv/ac3dsp_rvv.S
@@ -170,3 +170,56 @@  func ff_ac3_sum_square_butterfly_float_rvv, zve32f
     addi            a0, a0, 4
     ret
 endfunc
+
+
+func ff_ac3_compute_mantissa_size_rvv, zve32x
+    li               t1, 32
+    li               t2, 3
+    vsetivli         t0, 6, e16
+    vlsseg5e16.v     v0, (a0), t1
+    #(clolum[[i]1]/3)
+    vdivu.vx         v1, v1, t2
+    li               t3, 5
+    vwmul.vx         v22, v1, t3
+    #(clolum[[i]2]/3)
+    vdivu.vx         v2, v2, t2
+    vwmacc.vx        v22, t2, v3
+    vsra.vi          v4, v4, 1
+    vadd.vv          v4, v4, v2
+    li               t2, 7
+    vwmacc.vx        v22, t2, v4
+
+    addi             a0, a0, 10
+    vlsseg8e16.v     v5, (a0), t1
+    li               t3, 4
+    vwmacc.vx        v22, t3, v5
+    li               t3, 5
+    vwmacc.vx        v22, t3, v6
+    li               t3, 6
+    vwmacc.vx        v22, t3, v7
+    li               t3, 7
+    vwmacc.vx        v22, t3, v8
+    li               t3, 8
+    vwmacc.vx        v22, t3, v9
+    li               t3, 9
+    vwmacc.vx        v22, t3, v10
+    li               t3, 10
+    vwmacc.vx        v22, t3, v11
+    li               t3, 11
+    vwmacc.vx        v22, t3, v12
+
+    addi             a0, a0, 16
+    vlsseg3e16.v     v5, (a0), t1
+    li               t3, 12
+    vwmacc.vx        v22, t3, v5
+    li               t3, 14
+    vwmacc.vx        v22, t3, v6
+    li               t3, 16
+    vwmacc.vx        v22, t3, v7
+
+    vsetivli         t0, 6, e32, m2
+    vmv.s.x          v30, x0
+    vredsum.vs       v30, v22, v30
+    vmv.x.s          a0, v30
+    ret
+endfunc