diff mbox series

[FFmpeg-devel] lavc/alacdsp: unroll RISC-V V loops

Message ID 20230713202356.11127-1-remi@remlab.net
State Accepted
Commit c541ecf0dc38b6da3ba3d290b0db7d147775931f
Headers show
Series [FFmpeg-devel] lavc/alacdsp: unroll RISC-V V loops | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont July 13, 2023, 8:23 p.m. UTC
This increases the group multiplier as per T-Head C910 benchmarks:

alac_append_extra_bits_mono_c: 803.0
alac_append_extra_bits_stereo_c: 1604.2
alac_decorrelate_stereo_c: 1077.5

LMUL=1
alac_append_extra_bits_mono_rvv_i32: 418.2
alac_append_extra_bits_stereo_rvv_i32: 693.2
alac_decorrelate_stereo_rvv_i32: 673.5

LMUL=2
alac_append_extra_bits_mono_rvv_i32: 382.2
alac_append_extra_bits_stereo_rvv_i32: 648.2
alac_decorrelate_stereo_rvv_i32: 542.7

LMUL=4
alac_append_extra_bits_mono_rvv_i32: 241.5
alac_append_extra_bits_stereo_rvv_i32: 512.7
alac_decorrelate_stereo_rvv_i32: 364.2

LMUL=8
alac_append_extra_bits_mono_rvv_i32: 239.7
alac_append_extra_bits_stereo_rvv_i32: 497.2
alac_decorrelate_stereo_rvv_i32: 426.7
---
 libavcodec/riscv/alacdsp_rvv.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/riscv/alacdsp_rvv.S b/libavcodec/riscv/alacdsp_rvv.S
index 8fbe3fbe77..8efb04e0c8 100644
--- a/libavcodec/riscv/alacdsp_rvv.S
+++ b/libavcodec/riscv/alacdsp_rvv.S
@@ -25,7 +25,7 @@  func ff_alac_decorrelate_stereo_rvv, zve32x
         ld          a4, 8(a0)
         ld          a0, 0(a0)
 1:
-        vsetvli     t0, a1, e32, m1, ta, ma
+        vsetvli     t0, a1, e32, m4, ta, ma
         vle32.v     v24, (a4)
         sub         a1, a1, t0
         vle32.v     v16, (a0)
@@ -47,7 +47,7 @@  func ff_alac_append_extra_bits_mono_rvv, zve32x
         ld      a0, (a0)
         ld      a1, (a1)
 1:
-        vsetvli t0, a4, e32, m1, ta, ma
+        vsetvli t0, a4, e32, m8, ta, ma
         vle32.v v16, (a0)
         sub     a4, a4, t0
         vle32.v v24, (a1)
@@ -67,7 +67,7 @@  func ff_alac_append_extra_bits_stereo_rvv, zve32x
         ld      a7, 8(a1)
         ld      a1,  (a1)
 1:
-        vsetvli t0, a4, e32, m1, ta, ma
+        vsetvli t0, a4, e32, m8, ta, ma
         vle32.v v16, (a0)
         sub     a4, a4, t0
         vle32.v v0, (a6)