diff mbox

[FFmpeg-devel,1/2] checkasm: add sbrdsp tests

Message ID 6b3830b9-8bbd-f0df-c256-67e2eaf44919@gmail.com
State Accepted
Commit bcbe9e444790c6ac299aa01958dcb7e9ac70fc82
Headers show

Commit Message

James Almer July 4, 2017, 6:15 p.m. UTC
On 7/4/2017 2:31 PM, Michael Niedermayer wrote:
> On Mon, Jul 03, 2017 at 02:32:28PM +0200, Matthieu Bouron wrote:
>> On Fri, Jun 30, 2017 at 05:16:37PM +0200, Matthieu Bouron wrote:
>>> On Fri, Jun 30, 2017 at 03:55:52PM +0200, Michael Niedermayer wrote:
>>>> On Thu, Jun 29, 2017 at 10:53:06PM -0300, James Almer wrote:
>>>>> On 6/29/2017 10:14 PM, Henrik Gramner wrote:
>>>>>> On Fri, Jun 30, 2017 at 1:58 AM, Michael Niedermayer
>>>>>> <michael@niedermayer.cc> wrote:
>>>>>>> Program received signal SIGSEGV, Segmentation fault.
>>>>>>> 0x0000000000684919 in ff_sbr_hf_gen_sse ()
>>>>>>
>>>>>>>    0x0000000000684909 <ff_sbr_hf_gen_sse+25>:   sub    %r9,%r8
>>>>>>
>>>>>>> => 0x0000000000684919 <ff_sbr_hf_gen_sse+41>:   movaps (%rsi,%r8,1),%xmm0
>>>>>>
>>>>>>> r9             0xdeadbeef00000080       -2401053092612145024
>>>>>>
>>>>>> Another case of a 32-bit int being used as part of a 64-bit operation.
>>>>>
>>>>> I can't reproduce it on my ArchLinux x86_64 environment for some reason,
>>>>> but based on what you said i assume the attached patch should fix it.
>>>>
>>>> no crash occurs here with this, so it seems fixed
>>>
>>> Should i push the patchset or wait a little bit longer ?
>>
>> Patchset applied.
> 
> it seems theres some issue still in this:
> 
> checkasm: using random seed 3655967467
> MMX:
>  - audiodsp.audiodsp             [OK]
>  - blockdsp.blockdsp             [OK]
>  - h264dsp.idct                  [OK]
>  - h264pred.pred4x4              [OK]
>  - h264pred.pred8x8              [OK]
>  - h264pred.pred16x16            [OK]
>  - pixblockdsp.get_pixels        [OK]
>  - pixblockdsp.diff_pixels       [OK]
>  - vp8dsp.idct                   [OK]
>  - vp8dsp.mc                     [OK]
>  - vp9dsp.ipred                  [OK]
>  - vp9dsp.itxfm                  [OK]
>  - vp9dsp.mc                     [OK]
> MMXEXT:
>  - audiodsp.audiodsp             [OK]
>  - h264dsp.idct                  [OK]
>  - h264pred.pred4x4              [OK]
>  - h264pred.pred8x8              [OK]
>  - h264pred.pred16x16            [OK]
>  - h264pred.pred8x8l             [OK]
>  - h264qpel.put                  [OK]
>  - h264qpel.avg                  [OK]
>  - hevc_add_res.add_residual     [OK]
>  - hevc_idct.idct_dc             [OK]
>  - vp8dsp.mc                     [OK]
>  - vp9dsp.ipred                  [OK]
>  - vp9dsp.itxfm                  [OK]
>  - vp9dsp.loopfilter             [OK]
>  - vp9dsp.mc                     [OK]
> SSE:
>  - aacpsdsp.add_squares          [OK]
>  - aacpsdsp.mul_pair_single      [OK]
>  - aacpsdsp.hybrid_analysis      [OK]
>  - sbrdsp.sum64x5                [OK]
>  - sbrdsp.sum_square             [OK]
>  - sbrdsp.neg_odd_64             [OK]
>  - sbrdsp.qmf_post_shuffle       [OK]
>  - sbrdsp.qmf_deint_neg          [OK]
>  - sbrdsp.qmf_deint_bfly         [OK]
>  - sbrdsp.autocorrelate          [OK]
>  - sbrdsp.hf_gen                 [OK]
>  - sbrdsp.hf_g_filt              [OK]
>  - audiodsp.audiodsp             [OK]
>  - blockdsp.blockdsp             [OK]
>  - fmtconvert.fmtconvert         [OK]
>  - h264pred.pred16x16            [OK]
>  - vp8dsp.idct                   [OK]
>  - vp8dsp.mc                     [OK]
>  - vp9dsp.ipred                  [OK]
>  - vp9dsp.mc                     [OK]
>  - float_dsp.vector_fmul         [OK]
>  - float_dsp.vector_fmac         [OK]
>  - float_dsp.butterflies_float   [OK]
>  - float_dsp.scalarproduct_float [OK]
> SSE2:
>  - sbrdsp.qmf_pre_shuffle        [OK]
>  - sbrdsp.qmf_deint_bfly         [OK]
> 
> Program received signal SIGSEGV, Segmentation fault.
> apply_noise_main.loop () at libavcodec/x86/sbrdsp.asm:418
> 418         movu       m7, [Yq + 2*count + mmsize]
> (gdb) bt
> Python Exception <type 'exceptions.ImportError'> No module named gdb.frames:
> #0  apply_noise_main.loop () at libavcodec/x86/sbrdsp.asm:418
> #1  0x000000000043659b in checkasm_checked_call () at tests/checkasm/x86/checkasm.asm:77
> #2  0xdeadbeefdeadbeef in ?? ()
> #3  0xdeadbeefdeadbeef in ?? ()
> #4  0xdeadbeefdeadbeef in ?? ()
> #5  0xdeadbeefdeadbeef in ?? ()
> #6  0xdeadbeefdeadbeef in ?? ()
> #7  0xdeadbeefdeadbeef in ?? ()
> #8  0xdeadbeefdeadbeef in ?? ()
> #9  0xdeadbeefdeadbeef in ?? ()
> #10 0xdeadbeefdeadbeef in ?? ()
> #11 0xdeadbeefdeadbeef in ?? ()
> #12 0xdeadbeefdeadbeef in ?? ()
> #13 0xdeadbeefdeadbeef in ?? ()
> #14 0xdeadbeefdeadbeef in ?? ()
> #15 0xdeadbeefdeadbeef in ?? ()
> #16 0xdeadbeefdeadbeef in ?? ()
> #17 0xdeadbeefdeadbeef in ?? ()
> #18 0xdeadbeefdeadbeef in ?? ()
> #19 0x00007fffffffd870 in ?? ()
> #20 0x00007fffffffcc70 in ?? ()
> #21 0x00007fffffffce70 in ?? ()
> #22 0x0000000000000000 in ?? ()
> (gdb) info all-registers
> rax            0x0      0
> rbx            0xed56bb2dcb3c7736       -1344681633365854410
> rcx            0x8e8    2280
> rdx            0x7ab77bbbffffd070       8842672440749314160
> rsi            0x7ab77bbbffffce70       8842672440749313648
> rdi            0xf56e7777ffffdc70       -761539929699263376
> rbp            0x8bda43d3fd1a7e06       0x8bda43d3fd1a7e06
> rsp            0x7fffffffcae8   0x7fffffffcae8
> r8             0xdeadbeef00000000       -2401053092612145152
> r9             0x85490444000009c0       -8842531703260968512
> r10            0x684bf0 6835184
> r11            0x1      1
> r12            0x4a75479abd64e097       5365273261009854615
> r13            0x249214109d5d1c88       2635190793557318792
> r14            0xb64a9c9e5d318408       -5311260606547786744
> r15            0xdf9a54b303f1d3a3       -2334460328996121693
> rip            0x684cc9 0x684cc9 <apply_noise_main.loop+105>
> eflags         0x10206  [ PF IF RF ]
> cs             0x33     51
> ss             0x2b     43
> ds             0x0      0
> es             0x0      0
> fs             0x0      0
> gs             0x0      0
> st0            -nan(0x0fffb0005)        (raw 0xffff00000000fffb0005)
> st1            -nan(0x334fe50ff28fc84)  (raw 0xffff0334fe50ff28fc84)
> st2            -nan(0x0ff640150)        (raw 0xffff00000000ff640150)
> st3            -nan(0x0005e005a)        (raw 0xffff00000000005e005a)
> st4            -nan(0x0ff5bffe7)        (raw 0xffff00000000ff5bffe7)
> st5            -nan(0xff63fc2cfe94fee5) (raw 0xffffff63fc2cfe94fee5)
> st6            -nan(0x01c4df38a)        (raw 0xffff000000001c4df38a)
> st7            -nan(0x06215436f)        (raw 0xffff000000006215436f)
> 

Does the attached patch fix it?
From 14c4b77569af06ae181e521330aef6290f29fca1 Mon Sep 17 00:00:00 2001
From: James Almer <jamrial@gmail.com>
Date: Tue, 4 Jul 2017 15:05:47 -0300
Subject: [PATCH] x86/sbrdsp: zero extend m_max in apply_noise_main

Signed-off-by: James Almer <jamrial@gmail.com>
---
 libavcodec/x86/sbrdsp.asm | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

Comments

Michael Niedermayer July 5, 2017, 1:45 a.m. UTC | #1
On Tue, Jul 04, 2017 at 03:15:56PM -0300, James Almer wrote:
> On 7/4/2017 2:31 PM, Michael Niedermayer wrote:
> > On Mon, Jul 03, 2017 at 02:32:28PM +0200, Matthieu Bouron wrote:
> >> On Fri, Jun 30, 2017 at 05:16:37PM +0200, Matthieu Bouron wrote:
> >>> On Fri, Jun 30, 2017 at 03:55:52PM +0200, Michael Niedermayer wrote:
> >>>> On Thu, Jun 29, 2017 at 10:53:06PM -0300, James Almer wrote:
> >>>>> On 6/29/2017 10:14 PM, Henrik Gramner wrote:
> >>>>>> On Fri, Jun 30, 2017 at 1:58 AM, Michael Niedermayer
> >>>>>> <michael@niedermayer.cc> wrote:
> >>>>>>> Program received signal SIGSEGV, Segmentation fault.
> >>>>>>> 0x0000000000684919 in ff_sbr_hf_gen_sse ()
> >>>>>>
> >>>>>>>    0x0000000000684909 <ff_sbr_hf_gen_sse+25>:   sub    %r9,%r8
> >>>>>>
> >>>>>>> => 0x0000000000684919 <ff_sbr_hf_gen_sse+41>:   movaps (%rsi,%r8,1),%xmm0
> >>>>>>
> >>>>>>> r9             0xdeadbeef00000080       -2401053092612145024
> >>>>>>
> >>>>>> Another case of a 32-bit int being used as part of a 64-bit operation.
> >>>>>
> >>>>> I can't reproduce it on my ArchLinux x86_64 environment for some reason,
> >>>>> but based on what you said i assume the attached patch should fix it.
> >>>>
> >>>> no crash occurs here with this, so it seems fixed
> >>>
> >>> Should i push the patchset or wait a little bit longer ?
> >>
> >> Patchset applied.
> > 
> > it seems theres some issue still in this:
> > 
> > checkasm: using random seed 3655967467
> > MMX:
> >  - audiodsp.audiodsp             [OK]
> >  - blockdsp.blockdsp             [OK]
> >  - h264dsp.idct                  [OK]
> >  - h264pred.pred4x4              [OK]
> >  - h264pred.pred8x8              [OK]
> >  - h264pred.pred16x16            [OK]
> >  - pixblockdsp.get_pixels        [OK]
> >  - pixblockdsp.diff_pixels       [OK]
> >  - vp8dsp.idct                   [OK]
> >  - vp8dsp.mc                     [OK]
> >  - vp9dsp.ipred                  [OK]
> >  - vp9dsp.itxfm                  [OK]
> >  - vp9dsp.mc                     [OK]
> > MMXEXT:
> >  - audiodsp.audiodsp             [OK]
> >  - h264dsp.idct                  [OK]
> >  - h264pred.pred4x4              [OK]
> >  - h264pred.pred8x8              [OK]
> >  - h264pred.pred16x16            [OK]
> >  - h264pred.pred8x8l             [OK]
> >  - h264qpel.put                  [OK]
> >  - h264qpel.avg                  [OK]
> >  - hevc_add_res.add_residual     [OK]
> >  - hevc_idct.idct_dc             [OK]
> >  - vp8dsp.mc                     [OK]
> >  - vp9dsp.ipred                  [OK]
> >  - vp9dsp.itxfm                  [OK]
> >  - vp9dsp.loopfilter             [OK]
> >  - vp9dsp.mc                     [OK]
> > SSE:
> >  - aacpsdsp.add_squares          [OK]
> >  - aacpsdsp.mul_pair_single      [OK]
> >  - aacpsdsp.hybrid_analysis      [OK]
> >  - sbrdsp.sum64x5                [OK]
> >  - sbrdsp.sum_square             [OK]
> >  - sbrdsp.neg_odd_64             [OK]
> >  - sbrdsp.qmf_post_shuffle       [OK]
> >  - sbrdsp.qmf_deint_neg          [OK]
> >  - sbrdsp.qmf_deint_bfly         [OK]
> >  - sbrdsp.autocorrelate          [OK]
> >  - sbrdsp.hf_gen                 [OK]
> >  - sbrdsp.hf_g_filt              [OK]
> >  - audiodsp.audiodsp             [OK]
> >  - blockdsp.blockdsp             [OK]
> >  - fmtconvert.fmtconvert         [OK]
> >  - h264pred.pred16x16            [OK]
> >  - vp8dsp.idct                   [OK]
> >  - vp8dsp.mc                     [OK]
> >  - vp9dsp.ipred                  [OK]
> >  - vp9dsp.mc                     [OK]
> >  - float_dsp.vector_fmul         [OK]
> >  - float_dsp.vector_fmac         [OK]
> >  - float_dsp.butterflies_float   [OK]
> >  - float_dsp.scalarproduct_float [OK]
> > SSE2:
> >  - sbrdsp.qmf_pre_shuffle        [OK]
> >  - sbrdsp.qmf_deint_bfly         [OK]
> > 
> > Program received signal SIGSEGV, Segmentation fault.
> > apply_noise_main.loop () at libavcodec/x86/sbrdsp.asm:418
> > 418         movu       m7, [Yq + 2*count + mmsize]
> > (gdb) bt
> > Python Exception <type 'exceptions.ImportError'> No module named gdb.frames:
> > #0  apply_noise_main.loop () at libavcodec/x86/sbrdsp.asm:418
> > #1  0x000000000043659b in checkasm_checked_call () at tests/checkasm/x86/checkasm.asm:77
> > #2  0xdeadbeefdeadbeef in ?? ()
> > #3  0xdeadbeefdeadbeef in ?? ()
> > #4  0xdeadbeefdeadbeef in ?? ()
> > #5  0xdeadbeefdeadbeef in ?? ()
> > #6  0xdeadbeefdeadbeef in ?? ()
> > #7  0xdeadbeefdeadbeef in ?? ()
> > #8  0xdeadbeefdeadbeef in ?? ()
> > #9  0xdeadbeefdeadbeef in ?? ()
> > #10 0xdeadbeefdeadbeef in ?? ()
> > #11 0xdeadbeefdeadbeef in ?? ()
> > #12 0xdeadbeefdeadbeef in ?? ()
> > #13 0xdeadbeefdeadbeef in ?? ()
> > #14 0xdeadbeefdeadbeef in ?? ()
> > #15 0xdeadbeefdeadbeef in ?? ()
> > #16 0xdeadbeefdeadbeef in ?? ()
> > #17 0xdeadbeefdeadbeef in ?? ()
> > #18 0xdeadbeefdeadbeef in ?? ()
> > #19 0x00007fffffffd870 in ?? ()
> > #20 0x00007fffffffcc70 in ?? ()
> > #21 0x00007fffffffce70 in ?? ()
> > #22 0x0000000000000000 in ?? ()
> > (gdb) info all-registers
> > rax            0x0      0
> > rbx            0xed56bb2dcb3c7736       -1344681633365854410
> > rcx            0x8e8    2280
> > rdx            0x7ab77bbbffffd070       8842672440749314160
> > rsi            0x7ab77bbbffffce70       8842672440749313648
> > rdi            0xf56e7777ffffdc70       -761539929699263376
> > rbp            0x8bda43d3fd1a7e06       0x8bda43d3fd1a7e06
> > rsp            0x7fffffffcae8   0x7fffffffcae8
> > r8             0xdeadbeef00000000       -2401053092612145152
> > r9             0x85490444000009c0       -8842531703260968512
> > r10            0x684bf0 6835184
> > r11            0x1      1
> > r12            0x4a75479abd64e097       5365273261009854615
> > r13            0x249214109d5d1c88       2635190793557318792
> > r14            0xb64a9c9e5d318408       -5311260606547786744
> > r15            0xdf9a54b303f1d3a3       -2334460328996121693
> > rip            0x684cc9 0x684cc9 <apply_noise_main.loop+105>
> > eflags         0x10206  [ PF IF RF ]
> > cs             0x33     51
> > ss             0x2b     43
> > ds             0x0      0
> > es             0x0      0
> > fs             0x0      0
> > gs             0x0      0
> > st0            -nan(0x0fffb0005)        (raw 0xffff00000000fffb0005)
> > st1            -nan(0x334fe50ff28fc84)  (raw 0xffff0334fe50ff28fc84)
> > st2            -nan(0x0ff640150)        (raw 0xffff00000000ff640150)
> > st3            -nan(0x0005e005a)        (raw 0xffff00000000005e005a)
> > st4            -nan(0x0ff5bffe7)        (raw 0xffff00000000ff5bffe7)
> > st5            -nan(0xff63fc2cfe94fee5) (raw 0xffffff63fc2cfe94fee5)
> > st6            -nan(0x01c4df38a)        (raw 0xffff000000001c4df38a)
> > st7            -nan(0x06215436f)        (raw 0xffff000000006215436f)
> > 
> 
> Does the attached patch fix it?

linux x86-64, seems so

mingw-32 no

SSE:
 - aacpsdsp.add_squares          [OK]
 - aacpsdsp.mul_pair_single      [OK]
 - aacpsdsp.hybrid_analysis      [OK]
 - sbrdsp.sum64x5                [OK]
   sum_square_sse (failed to issue emms)
 - sbrdsp.sum_square             [FAILED]
 - sbrdsp.neg_odd_64             [OK]
 - sbrdsp.qmf_post_shuffle       [OK]


linux32:
 - aacpsdsp.hybrid_analysis      [OK]
 - sbrdsp.sum64x5                [OK]
   sum_square_sse (failed to issue emms)
 - sbrdsp.sum_square             [FAILED]
 - sbrdsp.neg_odd_64             [OK]
 - sbrdsp.qmf_post_shuffle       [OK]
 - sbrdsp.qmf_deint_neg          [OK]

 thx

 [...]
James Almer July 5, 2017, 2:04 a.m. UTC | #2
On 7/4/2017 10:45 PM, Michael Niedermayer wrote:
> On Tue, Jul 04, 2017 at 03:15:56PM -0300, James Almer wrote:
>> On 7/4/2017 2:31 PM, Michael Niedermayer wrote:
>>> On Mon, Jul 03, 2017 at 02:32:28PM +0200, Matthieu Bouron wrote:
>>>> On Fri, Jun 30, 2017 at 05:16:37PM +0200, Matthieu Bouron wrote:
>>>>> On Fri, Jun 30, 2017 at 03:55:52PM +0200, Michael Niedermayer wrote:
>>>>>> On Thu, Jun 29, 2017 at 10:53:06PM -0300, James Almer wrote:
>>>>>>> On 6/29/2017 10:14 PM, Henrik Gramner wrote:
>>>>>>>> On Fri, Jun 30, 2017 at 1:58 AM, Michael Niedermayer
>>>>>>>> <michael@niedermayer.cc> wrote:
>>>>>>>>> Program received signal SIGSEGV, Segmentation fault.
>>>>>>>>> 0x0000000000684919 in ff_sbr_hf_gen_sse ()
>>>>>>>>
>>>>>>>>>    0x0000000000684909 <ff_sbr_hf_gen_sse+25>:   sub    %r9,%r8
>>>>>>>>
>>>>>>>>> => 0x0000000000684919 <ff_sbr_hf_gen_sse+41>:   movaps (%rsi,%r8,1),%xmm0
>>>>>>>>
>>>>>>>>> r9             0xdeadbeef00000080       -2401053092612145024
>>>>>>>>
>>>>>>>> Another case of a 32-bit int being used as part of a 64-bit operation.
>>>>>>>
>>>>>>> I can't reproduce it on my ArchLinux x86_64 environment for some reason,
>>>>>>> but based on what you said i assume the attached patch should fix it.
>>>>>>
>>>>>> no crash occurs here with this, so it seems fixed
>>>>>
>>>>> Should i push the patchset or wait a little bit longer ?
>>>>
>>>> Patchset applied.
>>>
>>> it seems theres some issue still in this:
>>>
>>> checkasm: using random seed 3655967467
>>> MMX:
>>>  - audiodsp.audiodsp             [OK]
>>>  - blockdsp.blockdsp             [OK]
>>>  - h264dsp.idct                  [OK]
>>>  - h264pred.pred4x4              [OK]
>>>  - h264pred.pred8x8              [OK]
>>>  - h264pred.pred16x16            [OK]
>>>  - pixblockdsp.get_pixels        [OK]
>>>  - pixblockdsp.diff_pixels       [OK]
>>>  - vp8dsp.idct                   [OK]
>>>  - vp8dsp.mc                     [OK]
>>>  - vp9dsp.ipred                  [OK]
>>>  - vp9dsp.itxfm                  [OK]
>>>  - vp9dsp.mc                     [OK]
>>> MMXEXT:
>>>  - audiodsp.audiodsp             [OK]
>>>  - h264dsp.idct                  [OK]
>>>  - h264pred.pred4x4              [OK]
>>>  - h264pred.pred8x8              [OK]
>>>  - h264pred.pred16x16            [OK]
>>>  - h264pred.pred8x8l             [OK]
>>>  - h264qpel.put                  [OK]
>>>  - h264qpel.avg                  [OK]
>>>  - hevc_add_res.add_residual     [OK]
>>>  - hevc_idct.idct_dc             [OK]
>>>  - vp8dsp.mc                     [OK]
>>>  - vp9dsp.ipred                  [OK]
>>>  - vp9dsp.itxfm                  [OK]
>>>  - vp9dsp.loopfilter             [OK]
>>>  - vp9dsp.mc                     [OK]
>>> SSE:
>>>  - aacpsdsp.add_squares          [OK]
>>>  - aacpsdsp.mul_pair_single      [OK]
>>>  - aacpsdsp.hybrid_analysis      [OK]
>>>  - sbrdsp.sum64x5                [OK]
>>>  - sbrdsp.sum_square             [OK]
>>>  - sbrdsp.neg_odd_64             [OK]
>>>  - sbrdsp.qmf_post_shuffle       [OK]
>>>  - sbrdsp.qmf_deint_neg          [OK]
>>>  - sbrdsp.qmf_deint_bfly         [OK]
>>>  - sbrdsp.autocorrelate          [OK]
>>>  - sbrdsp.hf_gen                 [OK]
>>>  - sbrdsp.hf_g_filt              [OK]
>>>  - audiodsp.audiodsp             [OK]
>>>  - blockdsp.blockdsp             [OK]
>>>  - fmtconvert.fmtconvert         [OK]
>>>  - h264pred.pred16x16            [OK]
>>>  - vp8dsp.idct                   [OK]
>>>  - vp8dsp.mc                     [OK]
>>>  - vp9dsp.ipred                  [OK]
>>>  - vp9dsp.mc                     [OK]
>>>  - float_dsp.vector_fmul         [OK]
>>>  - float_dsp.vector_fmac         [OK]
>>>  - float_dsp.butterflies_float   [OK]
>>>  - float_dsp.scalarproduct_float [OK]
>>> SSE2:
>>>  - sbrdsp.qmf_pre_shuffle        [OK]
>>>  - sbrdsp.qmf_deint_bfly         [OK]
>>>
>>> Program received signal SIGSEGV, Segmentation fault.
>>> apply_noise_main.loop () at libavcodec/x86/sbrdsp.asm:418
>>> 418         movu       m7, [Yq + 2*count + mmsize]
>>> (gdb) bt
>>> Python Exception <type 'exceptions.ImportError'> No module named gdb.frames:
>>> #0  apply_noise_main.loop () at libavcodec/x86/sbrdsp.asm:418
>>> #1  0x000000000043659b in checkasm_checked_call () at tests/checkasm/x86/checkasm.asm:77
>>> #2  0xdeadbeefdeadbeef in ?? ()
>>> #3  0xdeadbeefdeadbeef in ?? ()
>>> #4  0xdeadbeefdeadbeef in ?? ()
>>> #5  0xdeadbeefdeadbeef in ?? ()
>>> #6  0xdeadbeefdeadbeef in ?? ()
>>> #7  0xdeadbeefdeadbeef in ?? ()
>>> #8  0xdeadbeefdeadbeef in ?? ()
>>> #9  0xdeadbeefdeadbeef in ?? ()
>>> #10 0xdeadbeefdeadbeef in ?? ()
>>> #11 0xdeadbeefdeadbeef in ?? ()
>>> #12 0xdeadbeefdeadbeef in ?? ()
>>> #13 0xdeadbeefdeadbeef in ?? ()
>>> #14 0xdeadbeefdeadbeef in ?? ()
>>> #15 0xdeadbeefdeadbeef in ?? ()
>>> #16 0xdeadbeefdeadbeef in ?? ()
>>> #17 0xdeadbeefdeadbeef in ?? ()
>>> #18 0xdeadbeefdeadbeef in ?? ()
>>> #19 0x00007fffffffd870 in ?? ()
>>> #20 0x00007fffffffcc70 in ?? ()
>>> #21 0x00007fffffffce70 in ?? ()
>>> #22 0x0000000000000000 in ?? ()
>>> (gdb) info all-registers
>>> rax            0x0      0
>>> rbx            0xed56bb2dcb3c7736       -1344681633365854410
>>> rcx            0x8e8    2280
>>> rdx            0x7ab77bbbffffd070       8842672440749314160
>>> rsi            0x7ab77bbbffffce70       8842672440749313648
>>> rdi            0xf56e7777ffffdc70       -761539929699263376
>>> rbp            0x8bda43d3fd1a7e06       0x8bda43d3fd1a7e06
>>> rsp            0x7fffffffcae8   0x7fffffffcae8
>>> r8             0xdeadbeef00000000       -2401053092612145152
>>> r9             0x85490444000009c0       -8842531703260968512
>>> r10            0x684bf0 6835184
>>> r11            0x1      1
>>> r12            0x4a75479abd64e097       5365273261009854615
>>> r13            0x249214109d5d1c88       2635190793557318792
>>> r14            0xb64a9c9e5d318408       -5311260606547786744
>>> r15            0xdf9a54b303f1d3a3       -2334460328996121693
>>> rip            0x684cc9 0x684cc9 <apply_noise_main.loop+105>
>>> eflags         0x10206  [ PF IF RF ]
>>> cs             0x33     51
>>> ss             0x2b     43
>>> ds             0x0      0
>>> es             0x0      0
>>> fs             0x0      0
>>> gs             0x0      0
>>> st0            -nan(0x0fffb0005)        (raw 0xffff00000000fffb0005)
>>> st1            -nan(0x334fe50ff28fc84)  (raw 0xffff0334fe50ff28fc84)
>>> st2            -nan(0x0ff640150)        (raw 0xffff00000000ff640150)
>>> st3            -nan(0x0005e005a)        (raw 0xffff00000000005e005a)
>>> st4            -nan(0x0ff5bffe7)        (raw 0xffff00000000ff5bffe7)
>>> st5            -nan(0xff63fc2cfe94fee5) (raw 0xffffff63fc2cfe94fee5)
>>> st6            -nan(0x01c4df38a)        (raw 0xffff000000001c4df38a)
>>> st7            -nan(0x06215436f)        (raw 0xffff000000006215436f)
>>>
>>
>> Does the attached patch fix it?
> 
> linux x86-64, seems so

Applied then.

> 
> mingw-32 no
> 
> SSE:
>  - aacpsdsp.add_squares          [OK]
>  - aacpsdsp.mul_pair_single      [OK]
>  - aacpsdsp.hybrid_analysis      [OK]
>  - sbrdsp.sum64x5                [OK]
>    sum_square_sse (failed to issue emms)
>  - sbrdsp.sum_square             [FAILED]
>  - sbrdsp.neg_odd_64             [OK]
>  - sbrdsp.qmf_post_shuffle       [OK]
> 
> 
> linux32:
>  - aacpsdsp.hybrid_analysis      [OK]
>  - sbrdsp.sum64x5                [OK]
>    sum_square_sse (failed to issue emms)
>  - sbrdsp.sum_square             [FAILED]
>  - sbrdsp.neg_odd_64             [OK]
>  - sbrdsp.qmf_post_shuffle       [OK]
>  - sbrdsp.qmf_deint_neg          [OK]
> 
>  thx

Fixed as well.
Paul B Mahol July 5, 2017, 9:40 a.m. UTC | #3
On 7/5/17, James Almer <jamrial@gmail.com> wrote:
> On 7/4/2017 10:45 PM, Michael Niedermayer wrote:
>> On Tue, Jul 04, 2017 at 03:15:56PM -0300, James Almer wrote:
>>> On 7/4/2017 2:31 PM, Michael Niedermayer wrote:
>>>> On Mon, Jul 03, 2017 at 02:32:28PM +0200, Matthieu Bouron wrote:
>>>>> On Fri, Jun 30, 2017 at 05:16:37PM +0200, Matthieu Bouron wrote:
>>>>>> On Fri, Jun 30, 2017 at 03:55:52PM +0200, Michael Niedermayer wrote:
>>>>>>> On Thu, Jun 29, 2017 at 10:53:06PM -0300, James Almer wrote:
>>>>>>>> On 6/29/2017 10:14 PM, Henrik Gramner wrote:
>>>>>>>>> On Fri, Jun 30, 2017 at 1:58 AM, Michael Niedermayer
>>>>>>>>> <michael@niedermayer.cc> wrote:
>>>>>>>>>> Program received signal SIGSEGV, Segmentation fault.
>>>>>>>>>> 0x0000000000684919 in ff_sbr_hf_gen_sse ()
>>>>>>>>>
>>>>>>>>>>    0x0000000000684909 <ff_sbr_hf_gen_sse+25>:   sub    %r9,%r8
>>>>>>>>>
>>>>>>>>>> => 0x0000000000684919 <ff_sbr_hf_gen_sse+41>:   movaps
>>>>>>>>>> (%rsi,%r8,1),%xmm0
>>>>>>>>>
>>>>>>>>>> r9             0xdeadbeef00000080       -2401053092612145024
>>>>>>>>>
>>>>>>>>> Another case of a 32-bit int being used as part of a 64-bit
>>>>>>>>> operation.
>>>>>>>>
>>>>>>>> I can't reproduce it on my ArchLinux x86_64 environment for some
>>>>>>>> reason,
>>>>>>>> but based on what you said i assume the attached patch should fix
>>>>>>>> it.
>>>>>>>
>>>>>>> no crash occurs here with this, so it seems fixed
>>>>>>
>>>>>> Should i push the patchset or wait a little bit longer ?
>>>>>
>>>>> Patchset applied.
>>>>
>>>> it seems theres some issue still in this:
>>>>
>>>> checkasm: using random seed 3655967467
>>>> MMX:
>>>>  - audiodsp.audiodsp             [OK]
>>>>  - blockdsp.blockdsp             [OK]
>>>>  - h264dsp.idct                  [OK]
>>>>  - h264pred.pred4x4              [OK]
>>>>  - h264pred.pred8x8              [OK]
>>>>  - h264pred.pred16x16            [OK]
>>>>  - pixblockdsp.get_pixels        [OK]
>>>>  - pixblockdsp.diff_pixels       [OK]
>>>>  - vp8dsp.idct                   [OK]
>>>>  - vp8dsp.mc                     [OK]
>>>>  - vp9dsp.ipred                  [OK]
>>>>  - vp9dsp.itxfm                  [OK]
>>>>  - vp9dsp.mc                     [OK]
>>>> MMXEXT:
>>>>  - audiodsp.audiodsp             [OK]
>>>>  - h264dsp.idct                  [OK]
>>>>  - h264pred.pred4x4              [OK]
>>>>  - h264pred.pred8x8              [OK]
>>>>  - h264pred.pred16x16            [OK]
>>>>  - h264pred.pred8x8l             [OK]
>>>>  - h264qpel.put                  [OK]
>>>>  - h264qpel.avg                  [OK]
>>>>  - hevc_add_res.add_residual     [OK]
>>>>  - hevc_idct.idct_dc             [OK]
>>>>  - vp8dsp.mc                     [OK]
>>>>  - vp9dsp.ipred                  [OK]
>>>>  - vp9dsp.itxfm                  [OK]
>>>>  - vp9dsp.loopfilter             [OK]
>>>>  - vp9dsp.mc                     [OK]
>>>> SSE:
>>>>  - aacpsdsp.add_squares          [OK]
>>>>  - aacpsdsp.mul_pair_single      [OK]
>>>>  - aacpsdsp.hybrid_analysis      [OK]
>>>>  - sbrdsp.sum64x5                [OK]
>>>>  - sbrdsp.sum_square             [OK]
>>>>  - sbrdsp.neg_odd_64             [OK]
>>>>  - sbrdsp.qmf_post_shuffle       [OK]
>>>>  - sbrdsp.qmf_deint_neg          [OK]
>>>>  - sbrdsp.qmf_deint_bfly         [OK]
>>>>  - sbrdsp.autocorrelate          [OK]
>>>>  - sbrdsp.hf_gen                 [OK]
>>>>  - sbrdsp.hf_g_filt              [OK]
>>>>  - audiodsp.audiodsp             [OK]
>>>>  - blockdsp.blockdsp             [OK]
>>>>  - fmtconvert.fmtconvert         [OK]
>>>>  - h264pred.pred16x16            [OK]
>>>>  - vp8dsp.idct                   [OK]
>>>>  - vp8dsp.mc                     [OK]
>>>>  - vp9dsp.ipred                  [OK]
>>>>  - vp9dsp.mc                     [OK]
>>>>  - float_dsp.vector_fmul         [OK]
>>>>  - float_dsp.vector_fmac         [OK]
>>>>  - float_dsp.butterflies_float   [OK]
>>>>  - float_dsp.scalarproduct_float [OK]
>>>> SSE2:
>>>>  - sbrdsp.qmf_pre_shuffle        [OK]
>>>>  - sbrdsp.qmf_deint_bfly         [OK]
>>>>
>>>> Program received signal SIGSEGV, Segmentation fault.
>>>> apply_noise_main.loop () at libavcodec/x86/sbrdsp.asm:418
>>>> 418         movu       m7, [Yq + 2*count + mmsize]
>>>> (gdb) bt
>>>> Python Exception <type 'exceptions.ImportError'> No module named
>>>> gdb.frames:
>>>> #0  apply_noise_main.loop () at libavcodec/x86/sbrdsp.asm:418
>>>> #1  0x000000000043659b in checkasm_checked_call () at
>>>> tests/checkasm/x86/checkasm.asm:77
>>>> #2  0xdeadbeefdeadbeef in ?? ()
>>>> #3  0xdeadbeefdeadbeef in ?? ()
>>>> #4  0xdeadbeefdeadbeef in ?? ()
>>>> #5  0xdeadbeefdeadbeef in ?? ()
>>>> #6  0xdeadbeefdeadbeef in ?? ()
>>>> #7  0xdeadbeefdeadbeef in ?? ()
>>>> #8  0xdeadbeefdeadbeef in ?? ()
>>>> #9  0xdeadbeefdeadbeef in ?? ()
>>>> #10 0xdeadbeefdeadbeef in ?? ()
>>>> #11 0xdeadbeefdeadbeef in ?? ()
>>>> #12 0xdeadbeefdeadbeef in ?? ()
>>>> #13 0xdeadbeefdeadbeef in ?? ()
>>>> #14 0xdeadbeefdeadbeef in ?? ()
>>>> #15 0xdeadbeefdeadbeef in ?? ()
>>>> #16 0xdeadbeefdeadbeef in ?? ()
>>>> #17 0xdeadbeefdeadbeef in ?? ()
>>>> #18 0xdeadbeefdeadbeef in ?? ()
>>>> #19 0x00007fffffffd870 in ?? ()
>>>> #20 0x00007fffffffcc70 in ?? ()
>>>> #21 0x00007fffffffce70 in ?? ()
>>>> #22 0x0000000000000000 in ?? ()
>>>> (gdb) info all-registers
>>>> rax            0x0      0
>>>> rbx            0xed56bb2dcb3c7736       -1344681633365854410
>>>> rcx            0x8e8    2280
>>>> rdx            0x7ab77bbbffffd070       8842672440749314160
>>>> rsi            0x7ab77bbbffffce70       8842672440749313648
>>>> rdi            0xf56e7777ffffdc70       -761539929699263376
>>>> rbp            0x8bda43d3fd1a7e06       0x8bda43d3fd1a7e06
>>>> rsp            0x7fffffffcae8   0x7fffffffcae8
>>>> r8             0xdeadbeef00000000       -2401053092612145152
>>>> r9             0x85490444000009c0       -8842531703260968512
>>>> r10            0x684bf0 6835184
>>>> r11            0x1      1
>>>> r12            0x4a75479abd64e097       5365273261009854615
>>>> r13            0x249214109d5d1c88       2635190793557318792
>>>> r14            0xb64a9c9e5d318408       -5311260606547786744
>>>> r15            0xdf9a54b303f1d3a3       -2334460328996121693
>>>> rip            0x684cc9 0x684cc9 <apply_noise_main.loop+105>
>>>> eflags         0x10206  [ PF IF RF ]
>>>> cs             0x33     51
>>>> ss             0x2b     43
>>>> ds             0x0      0
>>>> es             0x0      0
>>>> fs             0x0      0
>>>> gs             0x0      0
>>>> st0            -nan(0x0fffb0005)        (raw 0xffff00000000fffb0005)
>>>> st1            -nan(0x334fe50ff28fc84)  (raw 0xffff0334fe50ff28fc84)
>>>> st2            -nan(0x0ff640150)        (raw 0xffff00000000ff640150)
>>>> st3            -nan(0x0005e005a)        (raw 0xffff00000000005e005a)
>>>> st4            -nan(0x0ff5bffe7)        (raw 0xffff00000000ff5bffe7)
>>>> st5            -nan(0xff63fc2cfe94fee5) (raw 0xffffff63fc2cfe94fee5)
>>>> st6            -nan(0x01c4df38a)        (raw 0xffff000000001c4df38a)
>>>> st7            -nan(0x06215436f)        (raw 0xffff000000006215436f)
>>>>
>>>
>>> Does the attached patch fix it?
>>
>> linux x86-64, seems so
>
> Applied then.
>
>>
>> mingw-32 no
>>
>> SSE:
>>  - aacpsdsp.add_squares          [OK]
>>  - aacpsdsp.mul_pair_single      [OK]
>>  - aacpsdsp.hybrid_analysis      [OK]
>>  - sbrdsp.sum64x5                [OK]
>>    sum_square_sse (failed to issue emms)
>>  - sbrdsp.sum_square             [FAILED]
>>  - sbrdsp.neg_odd_64             [OK]
>>  - sbrdsp.qmf_post_shuffle       [OK]
>>
>>
>> linux32:
>>  - aacpsdsp.hybrid_analysis      [OK]
>>  - sbrdsp.sum64x5                [OK]
>>    sum_square_sse (failed to issue emms)
>>  - sbrdsp.sum_square             [FAILED]
>>  - sbrdsp.neg_odd_64             [OK]
>>  - sbrdsp.qmf_post_shuffle       [OK]
>>  - sbrdsp.qmf_deint_neg          [OK]
>>
>>  thx
>
> Fixed as well.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>

I get segfauls in sbrdsp code after some recent commit.
Hendrik Leppkes July 5, 2017, 10:52 a.m. UTC | #4
On Wed, Jul 5, 2017 at 11:40 AM, Paul B Mahol <onemda@gmail.com> wrote:
> On 7/5/17, James Almer <jamrial@gmail.com> wrote:
>> On 7/4/2017 10:45 PM, Michael Niedermayer wrote:
>>> On Tue, Jul 04, 2017 at 03:15:56PM -0300, James Almer wrote:
>>>> On 7/4/2017 2:31 PM, Michael Niedermayer wrote:
>>>>> On Mon, Jul 03, 2017 at 02:32:28PM +0200, Matthieu Bouron wrote:
>>>>>> On Fri, Jun 30, 2017 at 05:16:37PM +0200, Matthieu Bouron wrote:
>>>>>>> On Fri, Jun 30, 2017 at 03:55:52PM +0200, Michael Niedermayer wrote:
>>>>>>>> On Thu, Jun 29, 2017 at 10:53:06PM -0300, James Almer wrote:
>>>>>>>>> On 6/29/2017 10:14 PM, Henrik Gramner wrote:
>>>>>>>>>> On Fri, Jun 30, 2017 at 1:58 AM, Michael Niedermayer
>>>>>>>>>> <michael@niedermayer.cc> wrote:
>>>>>>>>>>> Program received signal SIGSEGV, Segmentation fault.
>>>>>>>>>>> 0x0000000000684919 in ff_sbr_hf_gen_sse ()
>>>>>>>>>>
>>>>>>>>>>>    0x0000000000684909 <ff_sbr_hf_gen_sse+25>:   sub    %r9,%r8
>>>>>>>>>>
>>>>>>>>>>> => 0x0000000000684919 <ff_sbr_hf_gen_sse+41>:   movaps
>>>>>>>>>>> (%rsi,%r8,1),%xmm0
>>>>>>>>>>
>>>>>>>>>>> r9             0xdeadbeef00000080       -2401053092612145024
>>>>>>>>>>
>>>>>>>>>> Another case of a 32-bit int being used as part of a 64-bit
>>>>>>>>>> operation.
>>>>>>>>>
>>>>>>>>> I can't reproduce it on my ArchLinux x86_64 environment for some
>>>>>>>>> reason,
>>>>>>>>> but based on what you said i assume the attached patch should fix
>>>>>>>>> it.
>>>>>>>>
>>>>>>>> no crash occurs here with this, so it seems fixed
>>>>>>>
>>>>>>> Should i push the patchset or wait a little bit longer ?
>>>>>>
>>>>>> Patchset applied.
>>>>>
>>>>> it seems theres some issue still in this:
>>>>>
>>>>> checkasm: using random seed 3655967467
>>>>> MMX:
>>>>>  - audiodsp.audiodsp             [OK]
>>>>>  - blockdsp.blockdsp             [OK]
>>>>>  - h264dsp.idct                  [OK]
>>>>>  - h264pred.pred4x4              [OK]
>>>>>  - h264pred.pred8x8              [OK]
>>>>>  - h264pred.pred16x16            [OK]
>>>>>  - pixblockdsp.get_pixels        [OK]
>>>>>  - pixblockdsp.diff_pixels       [OK]
>>>>>  - vp8dsp.idct                   [OK]
>>>>>  - vp8dsp.mc                     [OK]
>>>>>  - vp9dsp.ipred                  [OK]
>>>>>  - vp9dsp.itxfm                  [OK]
>>>>>  - vp9dsp.mc                     [OK]
>>>>> MMXEXT:
>>>>>  - audiodsp.audiodsp             [OK]
>>>>>  - h264dsp.idct                  [OK]
>>>>>  - h264pred.pred4x4              [OK]
>>>>>  - h264pred.pred8x8              [OK]
>>>>>  - h264pred.pred16x16            [OK]
>>>>>  - h264pred.pred8x8l             [OK]
>>>>>  - h264qpel.put                  [OK]
>>>>>  - h264qpel.avg                  [OK]
>>>>>  - hevc_add_res.add_residual     [OK]
>>>>>  - hevc_idct.idct_dc             [OK]
>>>>>  - vp8dsp.mc                     [OK]
>>>>>  - vp9dsp.ipred                  [OK]
>>>>>  - vp9dsp.itxfm                  [OK]
>>>>>  - vp9dsp.loopfilter             [OK]
>>>>>  - vp9dsp.mc                     [OK]
>>>>> SSE:
>>>>>  - aacpsdsp.add_squares          [OK]
>>>>>  - aacpsdsp.mul_pair_single      [OK]
>>>>>  - aacpsdsp.hybrid_analysis      [OK]
>>>>>  - sbrdsp.sum64x5                [OK]
>>>>>  - sbrdsp.sum_square             [OK]
>>>>>  - sbrdsp.neg_odd_64             [OK]
>>>>>  - sbrdsp.qmf_post_shuffle       [OK]
>>>>>  - sbrdsp.qmf_deint_neg          [OK]
>>>>>  - sbrdsp.qmf_deint_bfly         [OK]
>>>>>  - sbrdsp.autocorrelate          [OK]
>>>>>  - sbrdsp.hf_gen                 [OK]
>>>>>  - sbrdsp.hf_g_filt              [OK]
>>>>>  - audiodsp.audiodsp             [OK]
>>>>>  - blockdsp.blockdsp             [OK]
>>>>>  - fmtconvert.fmtconvert         [OK]
>>>>>  - h264pred.pred16x16            [OK]
>>>>>  - vp8dsp.idct                   [OK]
>>>>>  - vp8dsp.mc                     [OK]
>>>>>  - vp9dsp.ipred                  [OK]
>>>>>  - vp9dsp.mc                     [OK]
>>>>>  - float_dsp.vector_fmul         [OK]
>>>>>  - float_dsp.vector_fmac         [OK]
>>>>>  - float_dsp.butterflies_float   [OK]
>>>>>  - float_dsp.scalarproduct_float [OK]
>>>>> SSE2:
>>>>>  - sbrdsp.qmf_pre_shuffle        [OK]
>>>>>  - sbrdsp.qmf_deint_bfly         [OK]
>>>>>
>>>>> Program received signal SIGSEGV, Segmentation fault.
>>>>> apply_noise_main.loop () at libavcodec/x86/sbrdsp.asm:418
>>>>> 418         movu       m7, [Yq + 2*count + mmsize]
>>>>> (gdb) bt
>>>>> Python Exception <type 'exceptions.ImportError'> No module named
>>>>> gdb.frames:
>>>>> #0  apply_noise_main.loop () at libavcodec/x86/sbrdsp.asm:418
>>>>> #1  0x000000000043659b in checkasm_checked_call () at
>>>>> tests/checkasm/x86/checkasm.asm:77
>>>>> #2  0xdeadbeefdeadbeef in ?? ()
>>>>> #3  0xdeadbeefdeadbeef in ?? ()
>>>>> #4  0xdeadbeefdeadbeef in ?? ()
>>>>> #5  0xdeadbeefdeadbeef in ?? ()
>>>>> #6  0xdeadbeefdeadbeef in ?? ()
>>>>> #7  0xdeadbeefdeadbeef in ?? ()
>>>>> #8  0xdeadbeefdeadbeef in ?? ()
>>>>> #9  0xdeadbeefdeadbeef in ?? ()
>>>>> #10 0xdeadbeefdeadbeef in ?? ()
>>>>> #11 0xdeadbeefdeadbeef in ?? ()
>>>>> #12 0xdeadbeefdeadbeef in ?? ()
>>>>> #13 0xdeadbeefdeadbeef in ?? ()
>>>>> #14 0xdeadbeefdeadbeef in ?? ()
>>>>> #15 0xdeadbeefdeadbeef in ?? ()
>>>>> #16 0xdeadbeefdeadbeef in ?? ()
>>>>> #17 0xdeadbeefdeadbeef in ?? ()
>>>>> #18 0xdeadbeefdeadbeef in ?? ()
>>>>> #19 0x00007fffffffd870 in ?? ()
>>>>> #20 0x00007fffffffcc70 in ?? ()
>>>>> #21 0x00007fffffffce70 in ?? ()
>>>>> #22 0x0000000000000000 in ?? ()
>>>>> (gdb) info all-registers
>>>>> rax            0x0      0
>>>>> rbx            0xed56bb2dcb3c7736       -1344681633365854410
>>>>> rcx            0x8e8    2280
>>>>> rdx            0x7ab77bbbffffd070       8842672440749314160
>>>>> rsi            0x7ab77bbbffffce70       8842672440749313648
>>>>> rdi            0xf56e7777ffffdc70       -761539929699263376
>>>>> rbp            0x8bda43d3fd1a7e06       0x8bda43d3fd1a7e06
>>>>> rsp            0x7fffffffcae8   0x7fffffffcae8
>>>>> r8             0xdeadbeef00000000       -2401053092612145152
>>>>> r9             0x85490444000009c0       -8842531703260968512
>>>>> r10            0x684bf0 6835184
>>>>> r11            0x1      1
>>>>> r12            0x4a75479abd64e097       5365273261009854615
>>>>> r13            0x249214109d5d1c88       2635190793557318792
>>>>> r14            0xb64a9c9e5d318408       -5311260606547786744
>>>>> r15            0xdf9a54b303f1d3a3       -2334460328996121693
>>>>> rip            0x684cc9 0x684cc9 <apply_noise_main.loop+105>
>>>>> eflags         0x10206  [ PF IF RF ]
>>>>> cs             0x33     51
>>>>> ss             0x2b     43
>>>>> ds             0x0      0
>>>>> es             0x0      0
>>>>> fs             0x0      0
>>>>> gs             0x0      0
>>>>> st0            -nan(0x0fffb0005)        (raw 0xffff00000000fffb0005)
>>>>> st1            -nan(0x334fe50ff28fc84)  (raw 0xffff0334fe50ff28fc84)
>>>>> st2            -nan(0x0ff640150)        (raw 0xffff00000000ff640150)
>>>>> st3            -nan(0x0005e005a)        (raw 0xffff00000000005e005a)
>>>>> st4            -nan(0x0ff5bffe7)        (raw 0xffff00000000ff5bffe7)
>>>>> st5            -nan(0xff63fc2cfe94fee5) (raw 0xffffff63fc2cfe94fee5)
>>>>> st6            -nan(0x01c4df38a)        (raw 0xffff000000001c4df38a)
>>>>> st7            -nan(0x06215436f)        (raw 0xffff000000006215436f)
>>>>>
>>>>
>>>> Does the attached patch fix it?
>>>
>>> linux x86-64, seems so
>>
>> Applied then.
>>
>>>
>>> mingw-32 no
>>>
>>> SSE:
>>>  - aacpsdsp.add_squares          [OK]
>>>  - aacpsdsp.mul_pair_single      [OK]
>>>  - aacpsdsp.hybrid_analysis      [OK]
>>>  - sbrdsp.sum64x5                [OK]
>>>    sum_square_sse (failed to issue emms)
>>>  - sbrdsp.sum_square             [FAILED]
>>>  - sbrdsp.neg_odd_64             [OK]
>>>  - sbrdsp.qmf_post_shuffle       [OK]
>>>
>>>
>>> linux32:
>>>  - aacpsdsp.hybrid_analysis      [OK]
>>>  - sbrdsp.sum64x5                [OK]
>>>    sum_square_sse (failed to issue emms)
>>>  - sbrdsp.sum_square             [FAILED]
>>>  - sbrdsp.neg_odd_64             [OK]
>>>  - sbrdsp.qmf_post_shuffle       [OK]
>>>  - sbrdsp.qmf_deint_neg          [OK]
>>>
>>>  thx
>>
>> Fixed as well.
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>
> I get segfauls in sbrdsp code after some recent commit.

Indeed, fate also shows almost all AAC tests failing on x86_64 systems.

- Hendrik
James Almer July 5, 2017, 1:34 p.m. UTC | #5
On 7/5/2017 7:52 AM, Hendrik Leppkes wrote:
> On Wed, Jul 5, 2017 at 11:40 AM, Paul B Mahol <onemda@gmail.com> wrote:
>>
>> I get segfauls in sbrdsp code after some recent commit.
> 
> Indeed, fate also shows almost all AAC tests failing on x86_64 systems.
> 
> - Hendrik

Yeah, got a bit overzealous in an unrelated commit with some
micro-optimization that didn't break checkasm.
Should be reverted now, sorry for dropping the yellow paint bucket on fate.
diff mbox

Patch

diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index c716184b14..62bbe512ec 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -378,24 +378,24 @@  cglobal sbr_hf_apply_noise_3, 5,5+NREGS+UNIX64,8, Y,s_m,q_filt,noise,kx,m_max
 apply_noise_main:
 %if ARCH_X86_64 == 0 || WIN64
     mov       kxd, m_maxm
-%define count kxq
+    DEFINE_ARGS Y, s_m, q_filt, noise, count
 %else
-%define count m_maxq
+    DEFINE_ARGS Y, s_m, q_filt, noise, kx, count
 %endif
     movsxdifnidn    noiseq, noised
     dec    noiseq
-    shl    count, 2
+    shl    countd, 2
 %ifdef PIC
     lea NOISE_TABLE, [sbr_noise_table]
 %endif
-    lea        Yq, [Yq + 2*count]
-    add      s_mq, count
-    add   q_filtq, count
+    lea        Yq, [Yq + 2*countq]
+    add      s_mq, countq
+    add   q_filtq, countq
     shl    noiseq, 3
     pxor       m5, m5
-    neg    count
+    neg    countq
 .loop:
-    mova       m1, [q_filtq + count]
+    mova       m1, [q_filtq + countq]
     movu       m3, [noiseq + NOISE_TABLE + 1*mmsize]
     movu       m4, [noiseq + NOISE_TABLE + 2*mmsize]
     add    noiseq, 2*mmsize
@@ -404,7 +404,7 @@  apply_noise_main:
     punpckldq  m1, m1
     mulps      m1, m3 ; m2 = q_filt[m] * ff_sbr_noise_table[noise]
     mulps      m2, m4 ; m2 = q_filt[m] * ff_sbr_noise_table[noise]
-    mova       m3, [s_mq + count]
+    mova       m3, [s_mq + countq]
     ; TODO: replace by a vpermd in AVX2
     punpckhdq  m4, m3, m3
     punpckldq  m3, m3
@@ -414,15 +414,15 @@  apply_noise_main:
     mulps      m4, m0 ; s_m[m] * phi_sign
     pand       m1, m6
     pand       m2, m7
-    movu       m6, [Yq + 2*count]
-    movu       m7, [Yq + 2*count + mmsize]
+    movu       m6, [Yq + 2*countq]
+    movu       m7, [Yq + 2*countq + mmsize]
     addps      m3, m1
     addps      m4, m2
     addps      m6, m3
     addps      m7, m4
-    movu    [Yq + 2*count], m6
-    movu    [Yq + 2*count + mmsize], m7
-    add    count, mmsize
+    movu    [Yq + 2*countq], m6
+    movu    [Yq + 2*countq + mmsize], m7
+    add    countq, mmsize
     jl      .loop
     RET