diff mbox series

[FFmpeg-devel,2/2] lavc/aacpsdsp: unroll RISC-V V add_squares

Message ID 20230715205705.34475-2-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,1/2] lavc/aacpsdsp: rework RISC-V add_squares | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont July 15, 2023, 8:57 p.m. UTC
This does not make much difference with the Device Under Test, but since
we can:

Before:
ps_add_squares_rvv_f32: 11973.7

After:
ps_add_squares_rvv_f32: 11958.2
---
 libavcodec/riscv/aacpsdsp_rvv.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S
index b7ea314fc3..bd8f905334 100644
--- a/libavcodec/riscv/aacpsdsp_rvv.S
+++ b/libavcodec/riscv/aacpsdsp_rvv.S
@@ -23,15 +23,15 @@ 
 func ff_ps_add_squares_rvv, zve32f
         li          t1, 32
 1:
-        vsetvli     t0, a2, e32, m1, ta, ma
+        vsetvli     t0, a2, e32, m4, ta, ma
         vle64.v     v8, (a1)
         vnsrl.wx    v24, v8, zero
         vle32.v     v16, (a0)
         sub         a2, a2, t0
-        vnsrl.wx    v25, v8, t1
+        vnsrl.wx    v28, v8, t1
         vfmacc.vv   v16, v24, v24
         sh3add      a1, t0, a1
-        vfmacc.vv   v16, v25, v25
+        vfmacc.vv   v16, v28, v28
         vse32.v     v16, (a0)
         sh2add      a0, t0, a0
         bnez        a2, 1b