diff mbox series

[FFmpeg-devel,4/5] lavc/vp8dsp: save one R-V GPR

Message ID 20240525153840.78147-4-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,1/5] lavc/vp8dsp: avoid one multiplication on RISC-V | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont May 25, 2024, 3:38 p.m. UTC
This saves one instruction and frees up A5, which will be repurposed in
later changes. Unfortunately, we need to add quite a lot of alternative
code for this.
---
 libavcodec/riscv/vp8dsp_rvv.S | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index 545c2e9728..a4fcd158a5 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv.S
@@ -202,23 +202,31 @@  func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
 1:
         addi            a4, a4, -1
 .ifc \type,v
-        mv              a5, a3
+        sub             t6, a2, a3
+        add             a7, a2, a3
 .else
-        li              a5, 1
+        addi            t6, a2, -1
+        addi            a7, a2, 1
 .endif
-        sub             t6, a2, a5
-        add             a7, a2, a5
-
         vle8.v          v24, (a2)
         vle8.v          v22, (t6)
         vle8.v          v26, (a7)
-        add             a7, a7, a5
+.ifc \type,v
+        add             a7, a7, a3
+.else
+        addi            a7, a7, 1
+.endif
         vle8.v          v28, (a7)
         vwmulu.vx       v16, v24, t2
         vwmulu.vx       v20, v26, t3
 .ifc \size,6
-        sub             t6, t6, a5
-        add             a7, a7, a5
+.ifc \type,v
+        sub             t6, t6, a3
+        add             a7, a7, a3
+.else
+        addi            t6, t6, -1
+        addi            a7, a7, 1
+.endif
         vle8.v          v24, (t6)
         vle8.v          v26, (a7)
         vwmaccu.vx      v16, t0, v24