diff mbox series

[FFmpeg-devel,1/2] lavc/vc1dsp: fix potential overflow in R-V V inv_trans_4

Message ID 20240630090430.65753-1-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,1/2] lavc/vc1dsp: fix potential overflow in R-V V inv_trans_4 | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 fail Make fate failed
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont June 30, 2024, 9:04 a.m. UTC
Judging by the coefficients, the last round of add/sub can overflow
to 17 bits with a very small probability just as with the 8-point
transform. This is not observed under FATE, but better safe than sorry.
---
 libavcodec/riscv/vc1dsp_rvv.S | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/riscv/vc1dsp_rvv.S b/libavcodec/riscv/vc1dsp_rvv.S
index fa4bef2b8b..bf7864227a 100644
--- a/libavcodec/riscv/vc1dsp_rvv.S
+++ b/libavcodec/riscv/vc1dsp_rvv.S
@@ -202,13 +202,14 @@  func ff_vc1_inv_trans_4_rvv, zve32x
         vmul.vx v20, v1, t2
         vadd.vv v26, v14, v16 # t3
         vsub.vv v27, v18, v20 # t4
-        vadd.vv v0, v24, v26
-        vsub.vv v1, v25, v27
-        vadd.vv v2, v25, v27
-        vsub.vv v3, v24, v26
-        .irp    n,0,1,2,3
-        vssra.vx v\n, v\n, t1 # + 4 >> 3 or + 64 >> 7
-        .endr
+        vwadd.vv  v8, v24, v26
+        vwsub.vv  v10, v25, v27
+        vwadd.vv  v12, v25, v27
+        vwsub.vv  v14, v24, v26
+        vnclip.wx v0, v8, t1
+        vnclip.wx v1, v10, t1
+        vnclip.wx v2, v12, t1
+        vnclip.wx v3, v14, t1
         jr      t0
 endfunc