diff mbox series

[FFmpeg-devel] lavc/g722dsp: optimise R-V V apply_qmf

Message ID 20231119125413.14429-1-remi@remlab.net
State Accepted
Commit b88d4058f95de7ebf8322358d2e72cbeaffec49e
Headers show
Series [FFmpeg-devel] lavc/g722dsp: optimise R-V V apply_qmf | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont Nov. 19, 2023, 12:54 p.m. UTC
This stores the constant coefficients deinterleaved, so that they can be
loaded directly with NF=0. Unfortunately, we cannot optimise loading the
input, due to insufficient memory alignment (not 32-bit).

Before:
g722_apply_qmf_c:       82.5
g722_apply_qmf_rvv_i32: 78.2

After:
g722_apply_qmf_c:       82.5
g722_apply_qmf_rvv_i32: 65.2
---
 libavcodec/riscv/g722dsp_rvv.S | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/riscv/g722dsp_rvv.S b/libavcodec/riscv/g722dsp_rvv.S
index 350be8dc1f..981d5cecd8 100644
--- a/libavcodec/riscv/g722dsp_rvv.S
+++ b/libavcodec/riscv/g722dsp_rvv.S
@@ -24,7 +24,9 @@  func ff_g722_apply_qmf_rvv, zve32x
         lla         t0, qmf_coeffs
         vsetivli    zero, 12, e16, m2, ta, ma
         vlseg2e16.v v28, (a0)
-        vlseg2e16.v v24, (t0)
+        addi        t1, t0, 12 * 2
+        vle16.v     v24, (t0)
+        vle16.v     v26, (t1)
         vwmul.vv    v16, v28, v24
         vwmul.vv    v20, v30, v26
         vsetivli    zero, 12, e32, m4, ta, ma
@@ -41,26 +43,26 @@  endfunc
 const qmf_coeffs, align=2
         .short     3
         .short   -11
-        .short   -11
-        .short    53
         .short    12
-        .short  -156
         .short    32
-        .short   362
         .short  -210
-        .short  -805
         .short   951
         .short  3876
-        .short  3876
-        .short   951
         .short  -805
-        .short  -210
         .short   362
-        .short    32
         .short  -156
-        .short    12
         .short    53
         .short   -11
         .short   -11
+        .short    53
+        .short  -156
+        .short   362
+        .short  -805
+        .short  3876
+        .short   951
+        .short  -210
+        .short    32
+        .short    12
+        .short   -11
         .short     3
 endconst