diff mbox series

[FFmpeg-devel,2/2] riscv: allow passing addend to vtype_vli macro

Message ID 20240527155946.750660-2-remi@remlab.net
State Accepted
Commit 4fe8f2cc435bdbcaddd6ccfc401c0246b612873b
Headers show
Series [FFmpeg-devel,1/2] lavc/lpc: fix off-by-one in R-V V compute_autocorr | expand

Checks

Context Check Description
yinshiyou/configure_loongarch64 warning Failed to apply patch
andriy/configure_x86 warning Failed to apply patch

Commit Message

Rémi Denis-Courmont May 27, 2024, 3:59 p.m. UTC
A constant (-1) is added to the length value, so we can have an added
for free, and optimise the addition away if the addend is exactly 1.
---
 libavcodec/riscv/lpc_rvv.S | 2 +-
 libavutil/riscv/asm.S      | 9 ++++++---
 2 files changed, 7 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/riscv/lpc_rvv.S b/libavcodec/riscv/lpc_rvv.S
index 8cf79963f1..fe80305d9a 100644
--- a/libavcodec/riscv/lpc_rvv.S
+++ b/libavcodec/riscv/lpc_rvv.S
@@ -87,8 +87,8 @@  func ff_lpc_apply_welch_window_rvv, zve64d
 endfunc
 
 func ff_lpc_compute_autocorr_rvv, zve64d, zbb
+        vtype_vli t1, a2, t2, e64, ta, ma, 1
         addi      a2, a2, 1
-        vtype_vli t1, a2, t2, e64, ta, ma
         li        t0, 1
         vsetvl    zero, a2, t1
         fcvt.d.l  ft0, t0
diff --git a/libavutil/riscv/asm.S b/libavutil/riscv/asm.S
index 1e6358dcb5..2cf4f7b7ab 100644
--- a/libavutil/riscv/asm.S
+++ b/libavutil/riscv/asm.S
@@ -196,18 +196,21 @@ 
          * @param ew element width: e8, e16, e32 or e64
          * @param tp tail policy: tu or ta
          * @param mp mask policty: mu or ma
+         * @param addend optional addend for the vector length register
          */
-        .macro  vtype_vli rd, rs, tmp, ew, tp=tu, mp=mu
+        .macro  vtype_vli rd, rs, tmp, ew, tp=tu, mp=mu, addend=0
         parse_vtype \ew, \tp, \mp
         /*
          * The difference between the CLZ's notionally equals the VLMUL value
          * for 4-bit elements. But we want the value for SEW_MAX-bit elements.
          */
         slli    \tmp, \rs, 1 + VSEW_MAX
+        .if \addend - 1
+        addi    \tmp, \tmp, \addend - 1
+        .endif
         csrr    \rd, vlenb
-        addi    \tmp, \tmp, -1
-        clz     \rd, \rd
         clz     \tmp, \tmp
+        clz     \rd, \rd
         sub     \rd, \rd, \tmp
         max     \rd, \rd, zero // VLMUL must be >= VSEW - VSEW_MAX
         .if     vsew < VSEW_MAX