diff mbox

[FFmpeg-devel,2/3] avcodec/x86/lossless_videoencdsp: Fix handling of small widths

Message ID 20170929225829.2890-2-michael@niedermayer.cc
State Accepted
Commit df62b70de8aaa285168e72fe8f6e740843ca91fa
Headers show

Commit Message

Michael Niedermayer Sept. 29, 2017, 10:58 p.m. UTC
Fixes out of array access
Fixes: crash-huf.avi

Regression since: 6b41b4414934cc930468ccd5db598dd6ef643987

This could also be fixed by adding checks in the C code that calls the dsp

Found-by: Zhibin Hu and 连一汉 <lianyihan@360.cn>
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
---
 libavcodec/x86/lossless_videoencdsp.asm | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

Comments

Michael Niedermayer Oct. 4, 2017, 10:50 p.m. UTC | #1
On Sat, Sep 30, 2017 at 12:58:28AM +0200, Michael Niedermayer wrote:
> Fixes out of array access
> Fixes: crash-huf.avi
> 
> Regression since: 6b41b4414934cc930468ccd5db598dd6ef643987
> 
> This could also be fixed by adding checks in the C code that calls the dsp
> 
> Found-by: Zhibin Hu and 连一汉 <lianyihan@360.cn>
> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> ---
>  libavcodec/x86/lossless_videoencdsp.asm | 13 +++++++------
>  1 file changed, 7 insertions(+), 6 deletions(-)

will apply


[...]
diff mbox

Patch

diff --git a/libavcodec/x86/lossless_videoencdsp.asm b/libavcodec/x86/lossless_videoencdsp.asm
index 3cb7dce07f..a9c7a0a73c 100644
--- a/libavcodec/x86/lossless_videoencdsp.asm
+++ b/libavcodec/x86/lossless_videoencdsp.asm
@@ -42,10 +42,11 @@  cglobal diff_bytes, 4,5,2, dst, src1, src2, w
 %define i t0q
 %endmacro
 
-; label to jump to if w < regsize
-%macro DIFF_BYTES_LOOP_PREP 1
+; labels to jump to if w < regsize and w < 0
+%macro DIFF_BYTES_LOOP_PREP 2
     mov                i, wq
     and                i, -2 * regsize
+        js            %2
         jz            %1
     add             dstq, i
     add            src1q, i
@@ -87,7 +88,7 @@  cglobal diff_bytes, 4,5,2, dst, src1, src2, w
 %if mmsize > 16
     ; fall back to narrower xmm
     %define regsize mmsize / 2
-    DIFF_BYTES_LOOP_PREP .setup_loop_gpr_aa
+    DIFF_BYTES_LOOP_PREP .setup_loop_gpr_aa, .end_aa
 .loop2_%1%2:
     DIFF_BYTES_LOOP_CORE %1, %2, xm0, xm1
     add                i, 2 * regsize
@@ -114,7 +115,7 @@  cglobal diff_bytes, 4,5,2, dst, src1, src2, w
 INIT_MMX mmx
 DIFF_BYTES_PROLOGUE
     %define regsize mmsize
-    DIFF_BYTES_LOOP_PREP .skip_main_aa
+    DIFF_BYTES_LOOP_PREP .skip_main_aa, .end_aa
     DIFF_BYTES_BODY    a, a
 %undef i
 %endif
@@ -122,7 +123,7 @@  DIFF_BYTES_PROLOGUE
 INIT_XMM sse2
 DIFF_BYTES_PROLOGUE
     %define regsize mmsize
-    DIFF_BYTES_LOOP_PREP .skip_main_aa
+    DIFF_BYTES_LOOP_PREP .skip_main_aa, .end_aa
     test            dstq, regsize - 1
         jnz     .loop_uu
     test           src1q, regsize - 1
@@ -138,7 +139,7 @@  DIFF_BYTES_PROLOGUE
     %define regsize mmsize
     ; Directly using unaligned SSE2 version is marginally faster than
     ; branching based on arguments.
-    DIFF_BYTES_LOOP_PREP .skip_main_uu
+    DIFF_BYTES_LOOP_PREP .skip_main_uu, .end_uu
     test            dstq, regsize - 1
         jnz     .loop_uu
     test           src1q, regsize - 1