diff mbox series

[FFmpeg-devel,1/2] lavc/vc1dsp: fuse multiply-adds in R-V V inv_trans_4

Message ID 20240630115456.100163-1-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,1/2] lavc/vc1dsp: fuse multiply-adds in R-V V inv_trans_4 | expand

Checks

Context Check Description
andriy/configure_x86 warning Failed to apply patch
yinshiyou/configure_loongarch64 warning Failed to apply patch

Commit Message

Rémi Denis-Courmont June 30, 2024, 11:54 a.m. UTC
---
 libavcodec/riscv/vc1dsp_rvv.S | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

Comments

Rémi Denis-Courmont July 1, 2024, 2:41 p.m. UTC | #1
T-Head C908 (cycles):            before   after
vc1dsp.vc1_inv_trans_4x4_rvv_i32: 128.0   120.0
vc1dsp.vc1_inv_trans_4x8_rvv_i32: 244.0   240.0
vc1dsp.vc1_inv_trans_8x4_rvv_i32: 239.2   235.2
diff mbox series

Patch

diff --git a/libavcodec/riscv/vc1dsp_rvv.S b/libavcodec/riscv/vc1dsp_rvv.S
index 9d85377cec..8c127c7644 100644
--- a/libavcodec/riscv/vc1dsp_rvv.S
+++ b/libavcodec/riscv/vc1dsp_rvv.S
@@ -194,14 +194,12 @@  func ff_vc1_inv_trans_4_rvv, zve32x
         li      t4, 22
         vmul.vx v10, v2, t3
         li      t2, 10
-        vmul.vx v14, v1, t4
+        vmul.vx v26, v1, t4
+        vmul.vx v27, v3, t4
         vadd.vv v24, v8, v10  # t1
         vsub.vv v25, v8, v10  # t2
-        vmul.vx v16, v3, t2
-        vmul.vx v18, v3, t4
-        vmul.vx v20, v1, t2
-        vadd.vv v26, v14, v16 # t3
-        vsub.vv v27, v18, v20 # t4
+        vmacc.vx v26, t2, v3  # t3
+        vnmsac.vx v27, t2, v1 # t4
         vwadd.vv  v8, v24, v26
         vwsub.vv  v10, v25, v27
         vwadd.vv  v12, v25, v27