[FFmpeg-devel,2/2] avcodec/vc1_block: Fix invalid shifts in vc1_decode_i_blocks()

Submitted by Michael Niedermayer on June 30, 2019, 4:16 a.m.

Details

Message ID 20190630041604.GH3118@michaelspb
State New
Headers show

Commit Message

Michael Niedermayer June 30, 2019, 4:16 a.m.
On Thu, Jun 27, 2019 at 12:40:58AM +0200, Michael Niedermayer wrote:
> On Sat, Jun 22, 2019 at 04:55:47PM +0200, Paul B Mahol wrote:
> > On 6/22/19, Michael Niedermayer <michael@niedermayer.cc> wrote:
> > > Fixes: left shift of negative value -9
> > > Fixes:
> > > 15299/clusterfuzz-testcase-minimized-ffmpeg_AV_CODEC_ID_MSS2_fuzzer-5660922678345728
> > >
> > > Found-by: continuous fuzzing process
> > > https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg
> > > Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> > > ---
> > >  libavcodec/vc1_block.c | 4 ++--
> > >  1 file changed, 2 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/libavcodec/vc1_block.c b/libavcodec/vc1_block.c
> > > index 7e41791832..6137252580 100644
> > > --- a/libavcodec/vc1_block.c
> > > +++ b/libavcodec/vc1_block.c
> > > @@ -2600,13 +2600,13 @@ static void vc1_decode_i_blocks(VC1Context *v)
> > >                  if (v->rangeredfrm)
> > >                      for (k = 0; k < 6; k++)
> > >                          for (j = 0; j < 64; j++)
> > > -                            v->block[v->cur_blk_idx][block_map[k]][j] <<=
> > > 1;
> > > +                            v->block[v->cur_blk_idx][block_map[k]][j] *= 2;
> > >                  vc1_put_blocks_clamped(v, 1);
> > >              } else {
> > >                  if (v->rangeredfrm)
> > >                      for (k = 0; k < 6; k++)
> > >                          for (j = 0; j < 64; j++)
> > > -                            v->block[v->cur_blk_idx][block_map[k]][j] =
> > > (v->block[v->cur_blk_idx][block_map[k]][j] - 64) << 1;
> > > +                            v->block[v->cur_blk_idx][block_map[k]][j] =
> > > (v->block[v->cur_blk_idx][block_map[k]][j] - 64) * 2;
> > >                  vc1_put_blocks_clamped(v, 0);
> > >              }
> > >
> > > --
> > > 2.22.0
> > >
> > > _______________________________________________
> > > ffmpeg-devel mailing list
> > > ffmpeg-devel@ffmpeg.org
> > > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> > >
> > > To unsubscribe, visit link above, or email
> > > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> > 
> > 
> > This is much slower.
> 
> please provide your testcase and benchmarks or disassmbly

Noone ?
people just claim "This is much slower." without having tested it ?

ok heres the disassmbly of the 2 inner loops from the code with the patch applied
(that is with the multiplications in the source tested with gcc (4.8.5)
as you can see all multiplications are optimized to shifts or additions
even the old gcc used optmizes it to a shift


	.p2align 4,,10
	.p2align 3
.L1034:
	salw	(%rax)
	addq	$2, %rax
	cmpq	%rdx, %rax
	jne	.L1034
...
	.p2align 4,,10
	.p2align 3
.L1039:
	movswl	(%rax), %edx
	addq	$2, %rax
	leal	-128(%rdx,%rdx), %edx
	movw	%dx, -2(%rax)
	cmpq	%rcx, %rax
	jne	.L1039

	
before the patch it looked like this: 

	.p2align 4,,10
	.p2align 3
.L1034:
	salw	(%rax)
	addq	$2, %rax
	cmpq	%rdx, %rax
	jne	.L1034	
...
	.p2align 4,,10
	.p2align 3
.L1039:
	movswl	(%rax), %edx
	addq	$2, %rax
	leal	-128(%rdx,%rdx), %edx
	movw	%dx, -2(%rax)
	cmpq	%rcx, %rax
	jne	.L1039
	
I used this patch for testing and finding the parts of the code:

[...]

Patch hide | download patch | download mbox

--- a/libavcodec/vc1_block.c
+++ b/libavcodec/vc1_block.c
@@ -2579,16 +2579,20 @@  static void vc1_decode_i_blocks(VC1Context *v)
 
             if (v->overlap && v->pq >= 9) {
                 ff_vc1_i_overlap_filter(v);
+__asm volatile ("MARKA\n\t");
                 if (v->rangeredfrm)
                     for (k = 0; k < 6; k++)
                         for (j = 0; j < 64; j++)
-                            v->block[v->cur_blk_idx][block_map[k]][j] *= 2;
+                            v->block[v->cur_blk_idx][block_map[k]][j] <<= 1;
+__asm volatile ("MARKB\n\t");
                 vc1_put_blocks_clamped(v, 1);
             } else {
+__asm volatile ("MARKC\n\t");
                 if (v->rangeredfrm)
                     for (k = 0; k < 6; k++)
                         for (j = 0; j < 64; j++)
-                            v->block[v->cur_blk_idx][block_map[k]][j] = (v->block[v->cur_blk_idx][block_map[k]][j] - 64) * 2;
+                            v->block[v->cur_blk_idx][block_map[k]][j] = (v->block[v->cur_blk_idx][block_map[k]][j] - 64) << 1;
+__asm volatile ("MARKD\n\t");
                 vc1_put_blocks_clamped(v, 0);
             }