diff mbox

[FFmpeg-devel,1/2] checkasm: add sbrdsp tests

Message ID 20170623150135.GD26847@tsuri.lan
State New
Headers show

Commit Message

Matthieu Bouron June 23, 2017, 3:01 p.m. UTC
On Thu, Jun 22, 2017 at 12:53:52PM -0300, James Almer wrote:
> On 6/22/2017 9:56 AM, Matthieu Bouron wrote:
> > ---
> > 
> > The following patchset applies on top of Clément's aacpsdsp patchset.
> > 
> > ---
> >  tests/checkasm/Makefile   |   3 +-
> >  tests/checkasm/checkasm.c |   1 +
> >  tests/checkasm/checkasm.h |   1 +
> >  tests/checkasm/sbrdsp.c   | 297 ++++++++++++++++++++++++++++++++++++++++++++++
> >  4 files changed, 301 insertions(+), 1 deletion(-)
> >  create mode 100644 tests/checkasm/sbrdsp.c
> > 
> > diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
> > index 638e811931..60e80ab738 100644
> > --- a/tests/checkasm/Makefile
> > +++ b/tests/checkasm/Makefile
> > @@ -13,7 +13,8 @@ AVCODECOBJS-$(CONFIG_VP8DSP)            += vp8dsp.o
> >  AVCODECOBJS-$(CONFIG_VIDEODSP)          += videodsp.o
> >  
> >  # decoders/encoders
> > -AVCODECOBJS-$(CONFIG_AAC_DECODER)       += aacpsdsp.o
> > +AVCODECOBJS-$(CONFIG_AAC_DECODER)       += aacpsdsp.o \
> > +                                           sbrdsp.o
> >  AVCODECOBJS-$(CONFIG_ALAC_DECODER)      += alacdsp.o
> >  AVCODECOBJS-$(CONFIG_DCA_DECODER)       += synth_filter.o
> >  AVCODECOBJS-$(CONFIG_JPEG2000_DECODER)  += jpeg2000dsp.o
> > diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
> > index e66744b162..29f201b1b3 100644
> > --- a/tests/checkasm/checkasm.c
> > +++ b/tests/checkasm/checkasm.c
> > @@ -67,6 +67,7 @@ static const struct {
> >  #if CONFIG_AVCODEC
> >      #if CONFIG_AAC_DECODER
> >          { "aacpsdsp", checkasm_check_aacpsdsp },
> > +        { "sbrdsp",   checkasm_check_sbrdsp },
> >      #endif
> >      #if CONFIG_ALAC_DECODER
> >          { "alacdsp", checkasm_check_alacdsp },
> > diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
> > index dfb0ce561c..fa51e71e4b 100644
> > --- a/tests/checkasm/checkasm.h
> > +++ b/tests/checkasm/checkasm.h
> > @@ -50,6 +50,7 @@ void checkasm_check_hevc_idct(void);
> >  void checkasm_check_jpeg2000dsp(void);
> >  void checkasm_check_llviddsp(void);
> >  void checkasm_check_pixblockdsp(void);
> > +void checkasm_check_sbrdsp(void);
> >  void checkasm_check_synth_filter(void);
> >  void checkasm_check_v210enc(void);
> >  void checkasm_check_vp8dsp(void);
> > diff --git a/tests/checkasm/sbrdsp.c b/tests/checkasm/sbrdsp.c
> > new file mode 100644
> > index 0000000000..8333510c6b
> > --- /dev/null
> > +++ b/tests/checkasm/sbrdsp.c
> > @@ -0,0 +1,297 @@
> > +/*
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License as published by
> > + * the Free Software Foundation; either version 2 of the License, or
> > + * (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License along
> > + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
> > + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> > + */
> > +
> > +#include "libavcodec/sbrdsp.h"
> > +
> > +#include "checkasm.h"
> > +
> > +#define randomize(buf, len) do {                                \
> > +    int i;                                                      \
> > +    for (i = 0; i < len; i++) {                                 \
> > +        const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX;          \
> > +        (buf)[i] = f;                                           \
> > +    }                                                           \
> > +} while (0)
> > +
> > +#define EPS 0.0001
> > +
> > +static void test_sum64x5(void)
> > +{
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64 + 256]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64 + 256]);
> > +
> > +    declare_func(void, INTFLOAT *z);
> > +
> > +    randomize((INTFLOAT *)dst0, 64 + 256);
> > +    memcpy(dst1, dst0, (64 + 256) * sizeof(INTFLOAT));
> > +    call_ref(dst0);
> > +    call_new(dst1);
> > +    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64 + 256))
> > +        fail();
> > +    bench_new(dst1);
> > +}
> > +
> > +static void test_sum_square(void)
> > +{
> > +    INTFLOAT res0;
> > +    INTFLOAT res1;
> > +    LOCAL_ALIGNED_16(INTFLOAT, src, [256], [2]);
> > +
> > +    declare_func(INTFLOAT, INTFLOAT (*x)[2], int n);
> > +
> > +    randomize((INTFLOAT *)src, 256 * 2);
> > +    res0 = call_ref(src, 256);
> > +    res1 = call_new(src, 256);
> 
> This one is failing on x86 because the second argument has garbage in
> the upper 32 bits of the grp.
> 
> The solution is to either change n from int to ptrdiff_t, or zero the
> upper bits of n in ff_sbr_sum_square_sse(), which can be done implicitly
> in the first instruction.
> 
> > +    if (!float_near_abs_eps(res0, res1, EPS))
> > +        fail();
> > +    bench_new(src, 256);
> > +}
> > +
> > +static void test_neg_odd_64(void)
> > +{
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64]);
> > +
> > +    declare_func(void, INTFLOAT *x);
> > +
> > +    randomize((INTFLOAT *)dst0, 64);
> > +    memcpy(dst1, dst0, (64) * sizeof(INTFLOAT));
> > +    call_ref(dst0);
> > +    call_new(dst1);
> > +    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64))
> > +        fail();
> > +    bench_new(dst1);
> > +}
> > +
> > +static void test_qmf_pre_shuffle(void)
> > +{
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128]);
> > +
> > +    declare_func(void, INTFLOAT *z);
> > +
> > +    randomize((INTFLOAT *)dst0, 128);
> > +    memcpy(dst1, dst0, (128) * sizeof(INTFLOAT));
> > +    call_ref(dst0);
> > +    call_new(dst1);
> > +    if (!float_near_abs_eps_array(dst0, dst1, EPS, 128))
> > +        fail();
> > +    bench_new(dst1);
> > +}
> > +
> > +static void test_qmf_post_shuffle(void)
> > +{
> > +    LOCAL_ALIGNED_16(INTFLOAT, src, [64]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [32], [2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [32], [2]);
> > +
> > +    declare_func(void, INTFLOAT W[32][2], const INTFLOAT *z);
> > +
> > +    randomize((INTFLOAT *)src, 64);
> > +    call_ref(dst0, src);
> > +    call_new(dst1, src);
> > +    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 64))
> > +        fail();
> > +    bench_new(dst1, src);
> > +}
> > +
> > +static void test_qmf_deint_neg(void)
> > +{
> > +    LOCAL_ALIGNED_16(INTFLOAT, src, [64]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64]);
> > +
> > +    declare_func(void, INTFLOAT *v, const INTFLOAT *src);
> > +
> > +    randomize((INTFLOAT *)src, 64);
> > +    call_ref(dst0, src);
> > +    call_new(dst1, src);
> > +    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64))
> > +        fail();
> > +    bench_new(dst1, src);
> > +}
> > +
> > +static void test_qmf_deint_bfly(void)
> > +{
> > +    LOCAL_ALIGNED_16(INTFLOAT, src0, [64]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, src1, [64]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128]);
> > +
> > +    declare_func(void, INTFLOAT *v, const INTFLOAT *src0, const INTFLOAT *src1);
> > +
> > +    memset(dst0, 0, 128 * sizeof(INTFLOAT));
> > +    memset(dst1, 0, 128 * sizeof(INTFLOAT));
> > +
> > +    randomize((INTFLOAT *)src0, 64);
> > +    randomize((INTFLOAT *)src1, 64);
> > +    call_ref(dst0, src0, src1);
> > +    call_new(dst1, src0, src1);
> > +    if (!float_near_abs_eps_array(dst0, dst1, EPS, 128))
> > +        fail();
> > +    bench_new(dst1, src0, src1);
> > +}
> > +
> > +static void test_autocorrelate(void)
> > +{
> > +    LOCAL_ALIGNED_16(INTFLOAT, src, [40], [2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [3], [2][2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [3], [2][2]);
> > +
> > +    declare_func(void, const INTFLOAT x[40][2], INTFLOAT phi[3][2][2]);
> > +
> > +    memset(dst0, 0, 3 * 2 * 2 * sizeof(INTFLOAT));
> > +    memset(dst1, 0, 3 * 2 * 2 * sizeof(INTFLOAT));
> > +
> > +    randomize((INTFLOAT *)src, 80);
> > +    call_ref(src, dst0);
> > +    call_new(src, dst1);
> > +    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 3 * 2 * 2))
> > +        fail();
> > +    bench_new(src, dst1);
> > +}
> > +
> > +static void test_hf_gen(void)
> > +{
> > +    LOCAL_ALIGNED_16(INTFLOAT, low, [128], [2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, alpha0, [2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, alpha1, [2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
> > +    INTFLOAT bw = (INTFLOAT)rnd() / UINT_MAX;
> > +    int i;
> > +
> > +    declare_func(void, INTFLOAT (*X_high)[2], const INTFLOAT (*X_low)[2],
> > +                       const INTFLOAT alpha0[2], const INTFLOAT alpha1[2],
> > +                       INTFLOAT bw, int start, int end);
> > +
> > +    randomize((INTFLOAT *)low, 128 * 2);
> > +    randomize((INTFLOAT *)alpha0, 2);
> > +    randomize((INTFLOAT *)alpha1, 2);
> > +    for (i = 2; i < 64; i += 2) {
> > +        memset(dst0, 0, 128 * 2 * sizeof(INTFLOAT));
> > +        memset(dst1, 0, 128 * 2 * sizeof(INTFLOAT));
> > +        call_ref(dst0, low, alpha0, alpha1, 0.0, i, 128);
> > +        call_new(dst1, low, alpha0, alpha1, 0.0, i, 128);
> > +        if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
> > +            fail();
> > +        bench_new(dst1, low, alpha0, alpha1, bw, i, 128);
> > +    }
> > +}
> > +
> > +static void test_hf_g_filt(void)
> > +{
> > +    LOCAL_ALIGNED_16(INTFLOAT, high, [128], [40][2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, g_filt, [128]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
> > +
> > +    declare_func(void, INTFLOAT (*Y)[2], const INTFLOAT (*X_high)[40][2],
> > +                       const INTFLOAT *g_filt, int m_max, intptr_t ixh);
> > +
> > +    randomize((INTFLOAT *)high, 128 * 40 * 2);
> > +    randomize((INTFLOAT *)g_filt, 128);
> > +
> > +    call_ref(dst0, high, g_filt, 128, 20);
> > +    call_new(dst1, high, g_filt, 128, 20);
> > +    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
> > +        fail();
> > +    bench_new(dst1, high, g_filt, 128, 20);
> > +}
> > +
> > +static void test_hf_apply_noise(void)
> > +{
> > +    LOCAL_ALIGNED_16(AAC_FLOAT, s_m, [128]);
> > +    LOCAL_ALIGNED_16(AAC_FLOAT, q_filt, [128]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, ref, [128], [2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
> > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
> > +    int noise = 0x2a;
> > +    int i;
> > +
> > +    declare_func(void, INTFLOAT (*Y)[2], const AAC_FLOAT *s_m,
> > +                       const AAC_FLOAT *q_filt, int noise,
> > +                       int kx, int m_max);
> > +
> > +    randomize((INTFLOAT *)ref, 128 * 2);
> > +    randomize((INTFLOAT *)s_m, 128);
> > +    randomize((INTFLOAT *)q_filt, 128);
> > +    for (i = 0; i < 2; i++) {
> > +        memcpy(dst0, ref, 128 * 2 * sizeof(INTFLOAT));
> > +        memcpy(dst1, ref, 128 * 2 * sizeof(INTFLOAT));
> > +        call_ref(dst0, s_m, q_filt, noise, i, 128);
> > +        call_new(dst1, s_m, q_filt, noise, i, 128);
> > +        if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
> > +            fail();
> > +        bench_new(dst1, s_m, q_filt, noise, i, 128);
> > +    }
> > +}
> > +
> > +void checkasm_check_sbrdsp(void)
> > +{
> > +    int i;
> > +    SBRDSPContext sbrdsp;
> > +
> > +    ff_sbrdsp_init(&sbrdsp);
> > +
> > +    if (check_func(sbrdsp.sum64x5, "sum64x5"))
> > +        test_sum64x5();
> > +    report("sum64x5");
> > +
> > +    if (check_func(sbrdsp.sum_square, "sum_square"))
> > +        test_sum_square();
> > +    report("sum_square");
> > +
> > +    if (check_func(sbrdsp.neg_odd_64, "neg_odd_64"))
> > +        test_neg_odd_64();
> > +    report("neg_odd_64");
> > +
> > +    if (check_func(sbrdsp.qmf_pre_shuffle, "qmf_pre_shuffle"))
> > +        test_qmf_pre_shuffle();
> > +    report("qmf_pre_shuffle");
> > +
> > +    if (check_func(sbrdsp.qmf_post_shuffle, "qmf_post_shuffle"))
> > +        test_qmf_post_shuffle();
> > +    report("qmf_post_shuffle");
> > +
> > +    if (check_func(sbrdsp.qmf_deint_neg, "qmf_deint_neg"))
> > +        test_qmf_deint_neg();
> > +    report("qmf_deint_neg");
> > +
> > +    if (check_func(sbrdsp.qmf_deint_bfly, "qmf_deint_bfly"))
> > +        test_qmf_deint_bfly();
> > +    report("qmf_deint_bfly");
> > +
> > +    if (check_func(sbrdsp.autocorrelate, "autocorrelate"))
> > +        test_autocorrelate();
> > +    report("autocorrelate");
> > +
> > +    if (check_func(sbrdsp.hf_gen, "hf_gen"))
> > +        test_hf_gen();
> > +    report("hf_gen");
> > +
> > +    if (check_func(sbrdsp.hf_g_filt, "hf_g_filt"))
> > +        test_hf_g_filt();
> > +    report("hf_g_filt");
> > +
> > +    for (i = 0; i < 4; i++) {
> 
> You could instead add this loop inside test_hf_apply_noise(). It would
> save you a bunch of unnecessary calls to randomize().

Done in the following version of the patch.

Thanks.

Comments

Matthieu Bouron June 28, 2017, 12:48 p.m. UTC | #1
On Fri, Jun 23, 2017 at 05:01:35PM +0200, Matthieu Bouron wrote:
> On Thu, Jun 22, 2017 at 12:53:52PM -0300, James Almer wrote:
> > On 6/22/2017 9:56 AM, Matthieu Bouron wrote:
> > > ---
> > > 
> > > The following patchset applies on top of Clément's aacpsdsp patchset.
> > > 
> > > ---
> > >  tests/checkasm/Makefile   |   3 +-
> > >  tests/checkasm/checkasm.c |   1 +
> > >  tests/checkasm/checkasm.h |   1 +
> > >  tests/checkasm/sbrdsp.c   | 297 ++++++++++++++++++++++++++++++++++++++++++++++
> > >  4 files changed, 301 insertions(+), 1 deletion(-)
> > >  create mode 100644 tests/checkasm/sbrdsp.c
> > > 
> > > diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
> > > index 638e811931..60e80ab738 100644
> > > --- a/tests/checkasm/Makefile
> > > +++ b/tests/checkasm/Makefile
> > > @@ -13,7 +13,8 @@ AVCODECOBJS-$(CONFIG_VP8DSP)            += vp8dsp.o
> > >  AVCODECOBJS-$(CONFIG_VIDEODSP)          += videodsp.o
> > >  
> > >  # decoders/encoders
> > > -AVCODECOBJS-$(CONFIG_AAC_DECODER)       += aacpsdsp.o
> > > +AVCODECOBJS-$(CONFIG_AAC_DECODER)       += aacpsdsp.o \
> > > +                                           sbrdsp.o
> > >  AVCODECOBJS-$(CONFIG_ALAC_DECODER)      += alacdsp.o
> > >  AVCODECOBJS-$(CONFIG_DCA_DECODER)       += synth_filter.o
> > >  AVCODECOBJS-$(CONFIG_JPEG2000_DECODER)  += jpeg2000dsp.o
> > > diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
> > > index e66744b162..29f201b1b3 100644
> > > --- a/tests/checkasm/checkasm.c
> > > +++ b/tests/checkasm/checkasm.c
> > > @@ -67,6 +67,7 @@ static const struct {
> > >  #if CONFIG_AVCODEC
> > >      #if CONFIG_AAC_DECODER
> > >          { "aacpsdsp", checkasm_check_aacpsdsp },
> > > +        { "sbrdsp",   checkasm_check_sbrdsp },
> > >      #endif
> > >      #if CONFIG_ALAC_DECODER
> > >          { "alacdsp", checkasm_check_alacdsp },
> > > diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
> > > index dfb0ce561c..fa51e71e4b 100644
> > > --- a/tests/checkasm/checkasm.h
> > > +++ b/tests/checkasm/checkasm.h
> > > @@ -50,6 +50,7 @@ void checkasm_check_hevc_idct(void);
> > >  void checkasm_check_jpeg2000dsp(void);
> > >  void checkasm_check_llviddsp(void);
> > >  void checkasm_check_pixblockdsp(void);
> > > +void checkasm_check_sbrdsp(void);
> > >  void checkasm_check_synth_filter(void);
> > >  void checkasm_check_v210enc(void);
> > >  void checkasm_check_vp8dsp(void);
> > > diff --git a/tests/checkasm/sbrdsp.c b/tests/checkasm/sbrdsp.c
> > > new file mode 100644
> > > index 0000000000..8333510c6b
> > > --- /dev/null
> > > +++ b/tests/checkasm/sbrdsp.c
> > > @@ -0,0 +1,297 @@
> > > +/*
> > > + * This file is part of FFmpeg.
> > > + *
> > > + * FFmpeg is free software; you can redistribute it and/or modify
> > > + * it under the terms of the GNU General Public License as published by
> > > + * the Free Software Foundation; either version 2 of the License, or
> > > + * (at your option) any later version.
> > > + *
> > > + * FFmpeg is distributed in the hope that it will be useful,
> > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > > + * GNU General Public License for more details.
> > > + *
> > > + * You should have received a copy of the GNU General Public License along
> > > + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
> > > + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> > > + */
> > > +
> > > +#include "libavcodec/sbrdsp.h"
> > > +
> > > +#include "checkasm.h"
> > > +
> > > +#define randomize(buf, len) do {                                \
> > > +    int i;                                                      \
> > > +    for (i = 0; i < len; i++) {                                 \
> > > +        const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX;          \
> > > +        (buf)[i] = f;                                           \
> > > +    }                                                           \
> > > +} while (0)
> > > +
> > > +#define EPS 0.0001
> > > +
> > > +static void test_sum64x5(void)
> > > +{
> > > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64 + 256]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64 + 256]);
> > > +
> > > +    declare_func(void, INTFLOAT *z);
> > > +
> > > +    randomize((INTFLOAT *)dst0, 64 + 256);
> > > +    memcpy(dst1, dst0, (64 + 256) * sizeof(INTFLOAT));
> > > +    call_ref(dst0);
> > > +    call_new(dst1);
> > > +    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64 + 256))
> > > +        fail();
> > > +    bench_new(dst1);
> > > +}
> > > +
> > > +static void test_sum_square(void)
> > > +{
> > > +    INTFLOAT res0;
> > > +    INTFLOAT res1;
> > > +    LOCAL_ALIGNED_16(INTFLOAT, src, [256], [2]);
> > > +
> > > +    declare_func(INTFLOAT, INTFLOAT (*x)[2], int n);
> > > +
> > > +    randomize((INTFLOAT *)src, 256 * 2);
> > > +    res0 = call_ref(src, 256);
> > > +    res1 = call_new(src, 256);
> > 
> > This one is failing on x86 because the second argument has garbage in
> > the upper 32 bits of the grp.
> > 
> > The solution is to either change n from int to ptrdiff_t, or zero the
> > upper bits of n in ff_sbr_sum_square_sse(), which can be done implicitly
> > in the first instruction.
> > 
> > > +    if (!float_near_abs_eps(res0, res1, EPS))
> > > +        fail();
> > > +    bench_new(src, 256);
> > > +}
> > > +
> > > +static void test_neg_odd_64(void)
> > > +{
> > > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64]);
> > > +
> > > +    declare_func(void, INTFLOAT *x);
> > > +
> > > +    randomize((INTFLOAT *)dst0, 64);
> > > +    memcpy(dst1, dst0, (64) * sizeof(INTFLOAT));
> > > +    call_ref(dst0);
> > > +    call_new(dst1);
> > > +    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64))
> > > +        fail();
> > > +    bench_new(dst1);
> > > +}
> > > +
> > > +static void test_qmf_pre_shuffle(void)
> > > +{
> > > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128]);
> > > +
> > > +    declare_func(void, INTFLOAT *z);
> > > +
> > > +    randomize((INTFLOAT *)dst0, 128);
> > > +    memcpy(dst1, dst0, (128) * sizeof(INTFLOAT));
> > > +    call_ref(dst0);
> > > +    call_new(dst1);
> > > +    if (!float_near_abs_eps_array(dst0, dst1, EPS, 128))
> > > +        fail();
> > > +    bench_new(dst1);
> > > +}
> > > +
> > > +static void test_qmf_post_shuffle(void)
> > > +{
> > > +    LOCAL_ALIGNED_16(INTFLOAT, src, [64]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [32], [2]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [32], [2]);
> > > +
> > > +    declare_func(void, INTFLOAT W[32][2], const INTFLOAT *z);
> > > +
> > > +    randomize((INTFLOAT *)src, 64);
> > > +    call_ref(dst0, src);
> > > +    call_new(dst1, src);
> > > +    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 64))
> > > +        fail();
> > > +    bench_new(dst1, src);
> > > +}
> > > +
> > > +static void test_qmf_deint_neg(void)
> > > +{
> > > +    LOCAL_ALIGNED_16(INTFLOAT, src, [64]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64]);
> > > +
> > > +    declare_func(void, INTFLOAT *v, const INTFLOAT *src);
> > > +
> > > +    randomize((INTFLOAT *)src, 64);
> > > +    call_ref(dst0, src);
> > > +    call_new(dst1, src);
> > > +    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64))
> > > +        fail();
> > > +    bench_new(dst1, src);
> > > +}
> > > +
> > > +static void test_qmf_deint_bfly(void)
> > > +{
> > > +    LOCAL_ALIGNED_16(INTFLOAT, src0, [64]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, src1, [64]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128]);
> > > +
> > > +    declare_func(void, INTFLOAT *v, const INTFLOAT *src0, const INTFLOAT *src1);
> > > +
> > > +    memset(dst0, 0, 128 * sizeof(INTFLOAT));
> > > +    memset(dst1, 0, 128 * sizeof(INTFLOAT));
> > > +
> > > +    randomize((INTFLOAT *)src0, 64);
> > > +    randomize((INTFLOAT *)src1, 64);
> > > +    call_ref(dst0, src0, src1);
> > > +    call_new(dst1, src0, src1);
> > > +    if (!float_near_abs_eps_array(dst0, dst1, EPS, 128))
> > > +        fail();
> > > +    bench_new(dst1, src0, src1);
> > > +}
> > > +
> > > +static void test_autocorrelate(void)
> > > +{
> > > +    LOCAL_ALIGNED_16(INTFLOAT, src, [40], [2]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [3], [2][2]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [3], [2][2]);
> > > +
> > > +    declare_func(void, const INTFLOAT x[40][2], INTFLOAT phi[3][2][2]);
> > > +
> > > +    memset(dst0, 0, 3 * 2 * 2 * sizeof(INTFLOAT));
> > > +    memset(dst1, 0, 3 * 2 * 2 * sizeof(INTFLOAT));
> > > +
> > > +    randomize((INTFLOAT *)src, 80);
> > > +    call_ref(src, dst0);
> > > +    call_new(src, dst1);
> > > +    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 3 * 2 * 2))
> > > +        fail();
> > > +    bench_new(src, dst1);
> > > +}
> > > +
> > > +static void test_hf_gen(void)
> > > +{
> > > +    LOCAL_ALIGNED_16(INTFLOAT, low, [128], [2]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, alpha0, [2]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, alpha1, [2]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
> > > +    INTFLOAT bw = (INTFLOAT)rnd() / UINT_MAX;
> > > +    int i;
> > > +
> > > +    declare_func(void, INTFLOAT (*X_high)[2], const INTFLOAT (*X_low)[2],
> > > +                       const INTFLOAT alpha0[2], const INTFLOAT alpha1[2],
> > > +                       INTFLOAT bw, int start, int end);
> > > +
> > > +    randomize((INTFLOAT *)low, 128 * 2);
> > > +    randomize((INTFLOAT *)alpha0, 2);
> > > +    randomize((INTFLOAT *)alpha1, 2);
> > > +    for (i = 2; i < 64; i += 2) {
> > > +        memset(dst0, 0, 128 * 2 * sizeof(INTFLOAT));
> > > +        memset(dst1, 0, 128 * 2 * sizeof(INTFLOAT));
> > > +        call_ref(dst0, low, alpha0, alpha1, 0.0, i, 128);
> > > +        call_new(dst1, low, alpha0, alpha1, 0.0, i, 128);
> > > +        if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
> > > +            fail();
> > > +        bench_new(dst1, low, alpha0, alpha1, bw, i, 128);
> > > +    }
> > > +}
> > > +
> > > +static void test_hf_g_filt(void)
> > > +{
> > > +    LOCAL_ALIGNED_16(INTFLOAT, high, [128], [40][2]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, g_filt, [128]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
> > > +
> > > +    declare_func(void, INTFLOAT (*Y)[2], const INTFLOAT (*X_high)[40][2],
> > > +                       const INTFLOAT *g_filt, int m_max, intptr_t ixh);
> > > +
> > > +    randomize((INTFLOAT *)high, 128 * 40 * 2);
> > > +    randomize((INTFLOAT *)g_filt, 128);
> > > +
> > > +    call_ref(dst0, high, g_filt, 128, 20);
> > > +    call_new(dst1, high, g_filt, 128, 20);
> > > +    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
> > > +        fail();
> > > +    bench_new(dst1, high, g_filt, 128, 20);
> > > +}
> > > +
> > > +static void test_hf_apply_noise(void)
> > > +{
> > > +    LOCAL_ALIGNED_16(AAC_FLOAT, s_m, [128]);
> > > +    LOCAL_ALIGNED_16(AAC_FLOAT, q_filt, [128]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, ref, [128], [2]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
> > > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
> > > +    int noise = 0x2a;
> > > +    int i;
> > > +
> > > +    declare_func(void, INTFLOAT (*Y)[2], const AAC_FLOAT *s_m,
> > > +                       const AAC_FLOAT *q_filt, int noise,
> > > +                       int kx, int m_max);
> > > +
> > > +    randomize((INTFLOAT *)ref, 128 * 2);
> > > +    randomize((INTFLOAT *)s_m, 128);
> > > +    randomize((INTFLOAT *)q_filt, 128);
> > > +    for (i = 0; i < 2; i++) {
> > > +        memcpy(dst0, ref, 128 * 2 * sizeof(INTFLOAT));
> > > +        memcpy(dst1, ref, 128 * 2 * sizeof(INTFLOAT));
> > > +        call_ref(dst0, s_m, q_filt, noise, i, 128);
> > > +        call_new(dst1, s_m, q_filt, noise, i, 128);
> > > +        if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
> > > +            fail();
> > > +        bench_new(dst1, s_m, q_filt, noise, i, 128);
> > > +    }
> > > +}
> > > +
> > > +void checkasm_check_sbrdsp(void)
> > > +{
> > > +    int i;
> > > +    SBRDSPContext sbrdsp;
> > > +
> > > +    ff_sbrdsp_init(&sbrdsp);
> > > +
> > > +    if (check_func(sbrdsp.sum64x5, "sum64x5"))
> > > +        test_sum64x5();
> > > +    report("sum64x5");
> > > +
> > > +    if (check_func(sbrdsp.sum_square, "sum_square"))
> > > +        test_sum_square();
> > > +    report("sum_square");
> > > +
> > > +    if (check_func(sbrdsp.neg_odd_64, "neg_odd_64"))
> > > +        test_neg_odd_64();
> > > +    report("neg_odd_64");
> > > +
> > > +    if (check_func(sbrdsp.qmf_pre_shuffle, "qmf_pre_shuffle"))
> > > +        test_qmf_pre_shuffle();
> > > +    report("qmf_pre_shuffle");
> > > +
> > > +    if (check_func(sbrdsp.qmf_post_shuffle, "qmf_post_shuffle"))
> > > +        test_qmf_post_shuffle();
> > > +    report("qmf_post_shuffle");
> > > +
> > > +    if (check_func(sbrdsp.qmf_deint_neg, "qmf_deint_neg"))
> > > +        test_qmf_deint_neg();
> > > +    report("qmf_deint_neg");
> > > +
> > > +    if (check_func(sbrdsp.qmf_deint_bfly, "qmf_deint_bfly"))
> > > +        test_qmf_deint_bfly();
> > > +    report("qmf_deint_bfly");
> > > +
> > > +    if (check_func(sbrdsp.autocorrelate, "autocorrelate"))
> > > +        test_autocorrelate();
> > > +    report("autocorrelate");
> > > +
> > > +    if (check_func(sbrdsp.hf_gen, "hf_gen"))
> > > +        test_hf_gen();
> > > +    report("hf_gen");
> > > +
> > > +    if (check_func(sbrdsp.hf_g_filt, "hf_g_filt"))
> > > +        test_hf_g_filt();
> > > +    report("hf_g_filt");
> > > +
> > > +    for (i = 0; i < 4; i++) {
> > 
> > You could instead add this loop inside test_hf_apply_noise(). It would
> > save you a bunch of unnecessary calls to randomize().
> 
> Done in the following version of the patch.

I will apply the patchset in two days if there is no objections.

[...]
Michael Niedermayer June 29, 2017, 11:58 p.m. UTC | #2
On Wed, Jun 28, 2017 at 02:48:55PM +0200, Matthieu Bouron wrote:
> On Fri, Jun 23, 2017 at 05:01:35PM +0200, Matthieu Bouron wrote:
> > On Thu, Jun 22, 2017 at 12:53:52PM -0300, James Almer wrote:
> > > On 6/22/2017 9:56 AM, Matthieu Bouron wrote:
> > > > ---
> > > > 
> > > > The following patchset applies on top of Clément's aacpsdsp patchset.
> > > > 
> > > > ---
> > > >  tests/checkasm/Makefile   |   3 +-
> > > >  tests/checkasm/checkasm.c |   1 +
> > > >  tests/checkasm/checkasm.h |   1 +
> > > >  tests/checkasm/sbrdsp.c   | 297 ++++++++++++++++++++++++++++++++++++++++++++++
> > > >  4 files changed, 301 insertions(+), 1 deletion(-)
> > > >  create mode 100644 tests/checkasm/sbrdsp.c
> > > > 
> > > > diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
> > > > index 638e811931..60e80ab738 100644
> > > > --- a/tests/checkasm/Makefile
> > > > +++ b/tests/checkasm/Makefile
> > > > @@ -13,7 +13,8 @@ AVCODECOBJS-$(CONFIG_VP8DSP)            += vp8dsp.o
> > > >  AVCODECOBJS-$(CONFIG_VIDEODSP)          += videodsp.o
> > > >  
> > > >  # decoders/encoders
> > > > -AVCODECOBJS-$(CONFIG_AAC_DECODER)       += aacpsdsp.o
> > > > +AVCODECOBJS-$(CONFIG_AAC_DECODER)       += aacpsdsp.o \
> > > > +                                           sbrdsp.o
> > > >  AVCODECOBJS-$(CONFIG_ALAC_DECODER)      += alacdsp.o
> > > >  AVCODECOBJS-$(CONFIG_DCA_DECODER)       += synth_filter.o
> > > >  AVCODECOBJS-$(CONFIG_JPEG2000_DECODER)  += jpeg2000dsp.o
> > > > diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
> > > > index e66744b162..29f201b1b3 100644
> > > > --- a/tests/checkasm/checkasm.c
> > > > +++ b/tests/checkasm/checkasm.c
> > > > @@ -67,6 +67,7 @@ static const struct {
> > > >  #if CONFIG_AVCODEC
> > > >      #if CONFIG_AAC_DECODER
> > > >          { "aacpsdsp", checkasm_check_aacpsdsp },
> > > > +        { "sbrdsp",   checkasm_check_sbrdsp },
> > > >      #endif
> > > >      #if CONFIG_ALAC_DECODER
> > > >          { "alacdsp", checkasm_check_alacdsp },
> > > > diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
> > > > index dfb0ce561c..fa51e71e4b 100644
> > > > --- a/tests/checkasm/checkasm.h
> > > > +++ b/tests/checkasm/checkasm.h
> > > > @@ -50,6 +50,7 @@ void checkasm_check_hevc_idct(void);
> > > >  void checkasm_check_jpeg2000dsp(void);
> > > >  void checkasm_check_llviddsp(void);
> > > >  void checkasm_check_pixblockdsp(void);
> > > > +void checkasm_check_sbrdsp(void);
> > > >  void checkasm_check_synth_filter(void);
> > > >  void checkasm_check_v210enc(void);
> > > >  void checkasm_check_vp8dsp(void);
> > > > diff --git a/tests/checkasm/sbrdsp.c b/tests/checkasm/sbrdsp.c
> > > > new file mode 100644
> > > > index 0000000000..8333510c6b
> > > > --- /dev/null
> > > > +++ b/tests/checkasm/sbrdsp.c
> > > > @@ -0,0 +1,297 @@
> > > > +/*
> > > > + * This file is part of FFmpeg.
> > > > + *
> > > > + * FFmpeg is free software; you can redistribute it and/or modify
> > > > + * it under the terms of the GNU General Public License as published by
> > > > + * the Free Software Foundation; either version 2 of the License, or
> > > > + * (at your option) any later version.
> > > > + *
> > > > + * FFmpeg is distributed in the hope that it will be useful,
> > > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > > > + * GNU General Public License for more details.
> > > > + *
> > > > + * You should have received a copy of the GNU General Public License along
> > > > + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
> > > > + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> > > > + */
> > > > +
> > > > +#include "libavcodec/sbrdsp.h"
> > > > +
> > > > +#include "checkasm.h"
> > > > +
> > > > +#define randomize(buf, len) do {                                \
> > > > +    int i;                                                      \
> > > > +    for (i = 0; i < len; i++) {                                 \
> > > > +        const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX;          \
> > > > +        (buf)[i] = f;                                           \
> > > > +    }                                                           \
> > > > +} while (0)
> > > > +
> > > > +#define EPS 0.0001
> > > > +
> > > > +static void test_sum64x5(void)
> > > > +{
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64 + 256]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64 + 256]);
> > > > +
> > > > +    declare_func(void, INTFLOAT *z);
> > > > +
> > > > +    randomize((INTFLOAT *)dst0, 64 + 256);
> > > > +    memcpy(dst1, dst0, (64 + 256) * sizeof(INTFLOAT));
> > > > +    call_ref(dst0);
> > > > +    call_new(dst1);
> > > > +    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64 + 256))
> > > > +        fail();
> > > > +    bench_new(dst1);
> > > > +}
> > > > +
> > > > +static void test_sum_square(void)
> > > > +{
> > > > +    INTFLOAT res0;
> > > > +    INTFLOAT res1;
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, src, [256], [2]);
> > > > +
> > > > +    declare_func(INTFLOAT, INTFLOAT (*x)[2], int n);
> > > > +
> > > > +    randomize((INTFLOAT *)src, 256 * 2);
> > > > +    res0 = call_ref(src, 256);
> > > > +    res1 = call_new(src, 256);
> > > 
> > > This one is failing on x86 because the second argument has garbage in
> > > the upper 32 bits of the grp.
> > > 
> > > The solution is to either change n from int to ptrdiff_t, or zero the
> > > upper bits of n in ff_sbr_sum_square_sse(), which can be done implicitly
> > > in the first instruction.
> > > 
> > > > +    if (!float_near_abs_eps(res0, res1, EPS))
> > > > +        fail();
> > > > +    bench_new(src, 256);
> > > > +}
> > > > +
> > > > +static void test_neg_odd_64(void)
> > > > +{
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64]);
> > > > +
> > > > +    declare_func(void, INTFLOAT *x);
> > > > +
> > > > +    randomize((INTFLOAT *)dst0, 64);
> > > > +    memcpy(dst1, dst0, (64) * sizeof(INTFLOAT));
> > > > +    call_ref(dst0);
> > > > +    call_new(dst1);
> > > > +    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64))
> > > > +        fail();
> > > > +    bench_new(dst1);
> > > > +}
> > > > +
> > > > +static void test_qmf_pre_shuffle(void)
> > > > +{
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128]);
> > > > +
> > > > +    declare_func(void, INTFLOAT *z);
> > > > +
> > > > +    randomize((INTFLOAT *)dst0, 128);
> > > > +    memcpy(dst1, dst0, (128) * sizeof(INTFLOAT));
> > > > +    call_ref(dst0);
> > > > +    call_new(dst1);
> > > > +    if (!float_near_abs_eps_array(dst0, dst1, EPS, 128))
> > > > +        fail();
> > > > +    bench_new(dst1);
> > > > +}
> > > > +
> > > > +static void test_qmf_post_shuffle(void)
> > > > +{
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, src, [64]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [32], [2]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [32], [2]);
> > > > +
> > > > +    declare_func(void, INTFLOAT W[32][2], const INTFLOAT *z);
> > > > +
> > > > +    randomize((INTFLOAT *)src, 64);
> > > > +    call_ref(dst0, src);
> > > > +    call_new(dst1, src);
> > > > +    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 64))
> > > > +        fail();
> > > > +    bench_new(dst1, src);
> > > > +}
> > > > +
> > > > +static void test_qmf_deint_neg(void)
> > > > +{
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, src, [64]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64]);
> > > > +
> > > > +    declare_func(void, INTFLOAT *v, const INTFLOAT *src);
> > > > +
> > > > +    randomize((INTFLOAT *)src, 64);
> > > > +    call_ref(dst0, src);
> > > > +    call_new(dst1, src);
> > > > +    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64))
> > > > +        fail();
> > > > +    bench_new(dst1, src);
> > > > +}
> > > > +
> > > > +static void test_qmf_deint_bfly(void)
> > > > +{
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, src0, [64]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, src1, [64]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128]);
> > > > +
> > > > +    declare_func(void, INTFLOAT *v, const INTFLOAT *src0, const INTFLOAT *src1);
> > > > +
> > > > +    memset(dst0, 0, 128 * sizeof(INTFLOAT));
> > > > +    memset(dst1, 0, 128 * sizeof(INTFLOAT));
> > > > +
> > > > +    randomize((INTFLOAT *)src0, 64);
> > > > +    randomize((INTFLOAT *)src1, 64);
> > > > +    call_ref(dst0, src0, src1);
> > > > +    call_new(dst1, src0, src1);
> > > > +    if (!float_near_abs_eps_array(dst0, dst1, EPS, 128))
> > > > +        fail();
> > > > +    bench_new(dst1, src0, src1);
> > > > +}
> > > > +
> > > > +static void test_autocorrelate(void)
> > > > +{
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, src, [40], [2]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [3], [2][2]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [3], [2][2]);
> > > > +
> > > > +    declare_func(void, const INTFLOAT x[40][2], INTFLOAT phi[3][2][2]);
> > > > +
> > > > +    memset(dst0, 0, 3 * 2 * 2 * sizeof(INTFLOAT));
> > > > +    memset(dst1, 0, 3 * 2 * 2 * sizeof(INTFLOAT));
> > > > +
> > > > +    randomize((INTFLOAT *)src, 80);
> > > > +    call_ref(src, dst0);
> > > > +    call_new(src, dst1);
> > > > +    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 3 * 2 * 2))
> > > > +        fail();
> > > > +    bench_new(src, dst1);
> > > > +}
> > > > +
> > > > +static void test_hf_gen(void)
> > > > +{
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, low, [128], [2]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, alpha0, [2]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, alpha1, [2]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
> > > > +    INTFLOAT bw = (INTFLOAT)rnd() / UINT_MAX;
> > > > +    int i;
> > > > +
> > > > +    declare_func(void, INTFLOAT (*X_high)[2], const INTFLOAT (*X_low)[2],
> > > > +                       const INTFLOAT alpha0[2], const INTFLOAT alpha1[2],
> > > > +                       INTFLOAT bw, int start, int end);
> > > > +
> > > > +    randomize((INTFLOAT *)low, 128 * 2);
> > > > +    randomize((INTFLOAT *)alpha0, 2);
> > > > +    randomize((INTFLOAT *)alpha1, 2);
> > > > +    for (i = 2; i < 64; i += 2) {
> > > > +        memset(dst0, 0, 128 * 2 * sizeof(INTFLOAT));
> > > > +        memset(dst1, 0, 128 * 2 * sizeof(INTFLOAT));
> > > > +        call_ref(dst0, low, alpha0, alpha1, 0.0, i, 128);
> > > > +        call_new(dst1, low, alpha0, alpha1, 0.0, i, 128);
> > > > +        if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
> > > > +            fail();
> > > > +        bench_new(dst1, low, alpha0, alpha1, bw, i, 128);
> > > > +    }
> > > > +}
> > > > +
> > > > +static void test_hf_g_filt(void)
> > > > +{
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, high, [128], [40][2]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, g_filt, [128]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
> > > > +
> > > > +    declare_func(void, INTFLOAT (*Y)[2], const INTFLOAT (*X_high)[40][2],
> > > > +                       const INTFLOAT *g_filt, int m_max, intptr_t ixh);
> > > > +
> > > > +    randomize((INTFLOAT *)high, 128 * 40 * 2);
> > > > +    randomize((INTFLOAT *)g_filt, 128);
> > > > +
> > > > +    call_ref(dst0, high, g_filt, 128, 20);
> > > > +    call_new(dst1, high, g_filt, 128, 20);
> > > > +    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
> > > > +        fail();
> > > > +    bench_new(dst1, high, g_filt, 128, 20);
> > > > +}
> > > > +
> > > > +static void test_hf_apply_noise(void)
> > > > +{
> > > > +    LOCAL_ALIGNED_16(AAC_FLOAT, s_m, [128]);
> > > > +    LOCAL_ALIGNED_16(AAC_FLOAT, q_filt, [128]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, ref, [128], [2]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
> > > > +    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
> > > > +    int noise = 0x2a;
> > > > +    int i;
> > > > +
> > > > +    declare_func(void, INTFLOAT (*Y)[2], const AAC_FLOAT *s_m,
> > > > +                       const AAC_FLOAT *q_filt, int noise,
> > > > +                       int kx, int m_max);
> > > > +
> > > > +    randomize((INTFLOAT *)ref, 128 * 2);
> > > > +    randomize((INTFLOAT *)s_m, 128);
> > > > +    randomize((INTFLOAT *)q_filt, 128);
> > > > +    for (i = 0; i < 2; i++) {
> > > > +        memcpy(dst0, ref, 128 * 2 * sizeof(INTFLOAT));
> > > > +        memcpy(dst1, ref, 128 * 2 * sizeof(INTFLOAT));
> > > > +        call_ref(dst0, s_m, q_filt, noise, i, 128);
> > > > +        call_new(dst1, s_m, q_filt, noise, i, 128);
> > > > +        if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
> > > > +            fail();
> > > > +        bench_new(dst1, s_m, q_filt, noise, i, 128);
> > > > +    }
> > > > +}
> > > > +
> > > > +void checkasm_check_sbrdsp(void)
> > > > +{
> > > > +    int i;
> > > > +    SBRDSPContext sbrdsp;
> > > > +
> > > > +    ff_sbrdsp_init(&sbrdsp);
> > > > +
> > > > +    if (check_func(sbrdsp.sum64x5, "sum64x5"))
> > > > +        test_sum64x5();
> > > > +    report("sum64x5");
> > > > +
> > > > +    if (check_func(sbrdsp.sum_square, "sum_square"))
> > > > +        test_sum_square();
> > > > +    report("sum_square");
> > > > +
> > > > +    if (check_func(sbrdsp.neg_odd_64, "neg_odd_64"))
> > > > +        test_neg_odd_64();
> > > > +    report("neg_odd_64");
> > > > +
> > > > +    if (check_func(sbrdsp.qmf_pre_shuffle, "qmf_pre_shuffle"))
> > > > +        test_qmf_pre_shuffle();
> > > > +    report("qmf_pre_shuffle");
> > > > +
> > > > +    if (check_func(sbrdsp.qmf_post_shuffle, "qmf_post_shuffle"))
> > > > +        test_qmf_post_shuffle();
> > > > +    report("qmf_post_shuffle");
> > > > +
> > > > +    if (check_func(sbrdsp.qmf_deint_neg, "qmf_deint_neg"))
> > > > +        test_qmf_deint_neg();
> > > > +    report("qmf_deint_neg");
> > > > +
> > > > +    if (check_func(sbrdsp.qmf_deint_bfly, "qmf_deint_bfly"))
> > > > +        test_qmf_deint_bfly();
> > > > +    report("qmf_deint_bfly");
> > > > +
> > > > +    if (check_func(sbrdsp.autocorrelate, "autocorrelate"))
> > > > +        test_autocorrelate();
> > > > +    report("autocorrelate");
> > > > +
> > > > +    if (check_func(sbrdsp.hf_gen, "hf_gen"))
> > > > +        test_hf_gen();
> > > > +    report("hf_gen");
> > > > +
> > > > +    if (check_func(sbrdsp.hf_g_filt, "hf_g_filt"))
> > > > +        test_hf_g_filt();
> > > > +    report("hf_g_filt");
> > > > +
> > > > +    for (i = 0; i < 4; i++) {
> > > 
> > > You could instead add this loop inside test_hf_apply_noise(). It would
> > > save you a bunch of unnecessary calls to randomize().
> > 
> > Done in the following version of the patch.
> 
> I will apply the patchset in two days if there is no objections.

this seems to fail if checkasm is run directly

make -j12 fate-checkasm
TEST    checkasm-aacpsdsp
TEST    checkasm-alacdsp
TEST    checkasm-audiodsp
TEST    checkasm-blockdsp
TEST    checkasm-bswapdsp
TEST    checkasm-fixed_dsp
TEST    checkasm-flacdsp
TEST    checkasm-float_dsp
TEST    checkasm-fmtconvert
TEST    checkasm-h264dsp
TEST    checkasm-h264pred
TEST    checkasm-h264qpel
TEST    checkasm-hevc_add_res
TEST    checkasm-hevc_idct
TEST    checkasm-jpeg2000dsp
TEST    checkasm-llviddsp
TEST    checkasm-pixblockdsp
TEST    checkasm-synth_filter
TEST    checkasm-vf_blend
TEST    checkasm-v210enc
TEST    checkasm-vf_colorspace
TEST    checkasm-videodsp
TEST    checkasm-vp8dsp
TEST    checkasm-vp9dsp


tests/checkasm/checkasm   123
checkasm: using random seed 123
MMX:
 - audiodsp.audiodsp             [OK]
 - blockdsp.blockdsp             [OK]
 - h264dsp.idct                  [OK]
 - h264pred.pred4x4              [OK]
 - h264pred.pred8x8              [OK]
 - h264pred.pred16x16            [OK]
 - pixblockdsp.get_pixels        [OK]
 - pixblockdsp.diff_pixels       [OK]
 - vp8dsp.idct                   [OK]
 - vp8dsp.mc                     [OK]
 - vp9dsp.ipred                  [OK]
 - vp9dsp.itxfm                  [OK]
 - vp9dsp.mc                     [OK]
MMXEXT:
 - audiodsp.audiodsp             [OK]
 - h264dsp.idct                  [OK]
 - h264pred.pred4x4              [OK]
 - h264pred.pred8x8              [OK]
 - h264pred.pred16x16            [OK]
 - h264pred.pred8x8l             [OK]
 - h264qpel.put                  [OK]
 - h264qpel.avg                  [OK]
 - hevc_add_res.add_residual     [OK]
 - hevc_idct.idct_dc             [OK]
 - vp8dsp.mc                     [OK]
 - vp9dsp.ipred                  [OK]
 - vp9dsp.itxfm                  [OK]
 - vp9dsp.loopfilter             [OK]
 - vp9dsp.mc                     [OK]
SSE:
 - aacpsdsp.add_squares          [OK]
 - aacpsdsp.mul_pair_single      [OK]
 - aacpsdsp.hybrid_analysis      [OK]
 - sbrdsp.sum64x5                [OK]
 - sbrdsp.sum_square             [OK]
 - sbrdsp.neg_odd_64             [OK]
 - sbrdsp.qmf_post_shuffle       [OK]
 - sbrdsp.qmf_deint_neg          [OK]
 - sbrdsp.qmf_deint_bfly         [OK]
 - sbrdsp.autocorrelate          [OK]
Segmentation fault (core dumped)

Program received signal SIGSEGV, Segmentation fault.
0x0000000000684919 in ff_sbr_hf_gen_sse ()
(gdb) bt
Python Exception <type 'exceptions.ImportError'> No module named gdb.frames:
#0  0x0000000000684919 in ff_sbr_hf_gen_sse ()
#1  0x000000000043659b in checkasm_checked_call ()

disassemble $rip-32,$rip+32
Dump of assembler code from 0x6848f9 to 0x684939:
   0x00000000006848f9 <ff_sbr_hf_gen_sse+9>:    add    %cl,(%rdi)
   0x00000000006848fb <ff_sbr_hf_gen_sse+11>:   pop    %rcx
   0x00000000006848fc <ff_sbr_hf_gen_sse+12>:   rorb   (%rdi)
   0x00000000006848fe <ff_sbr_hf_gen_sse+14>:   pop    %rcx
   0x00000000006848ff <ff_sbr_hf_gen_sse+15>:   enterq $0x590f,$0xd0
   0x0000000000684903 <ff_sbr_hf_gen_sse+19>:   movaps %xmm1,%xmm3
   0x0000000000684906 <ff_sbr_hf_gen_sse+22>:   movaps %xmm2,%xmm4
   0x0000000000684909 <ff_sbr_hf_gen_sse+25>:   sub    %r9,%r8
   0x000000000068490c <ff_sbr_hf_gen_sse+28>:   lea    (%rdi,%r9,8),%rdi
   0x0000000000684910 <ff_sbr_hf_gen_sse+32>:   lea    -0x10(%rsi,%r9,8),%rsi
   0x0000000000684915 <ff_sbr_hf_gen_sse+37>:   shl    $0x3,%r8
=> 0x0000000000684919 <ff_sbr_hf_gen_sse+41>:   movaps (%rsi,%r8,1),%xmm0
   0x000000000068491e <ff_sbr_hf_gen_sse+46>:   shufps $0x55,%xmm3,%xmm3
   0x0000000000684922 <ff_sbr_hf_gen_sse+50>:   shufps $0x55,%xmm4,%xmm4
   0x0000000000684926 <ff_sbr_hf_gen_sse+54>:   xorps  0x719030,%xmm3
   0x000000000068492e <ff_sbr_hf_gen_sse+62>:   shufps $0x0,%xmm1,%xmm1
   0x0000000000684932 <ff_sbr_hf_gen_sse+66>:   shufps $0x0,%xmm2,%xmm2
   0x0000000000684936 <ff_sbr_hf_gen_sse+70>:   xorps  0x719030,%xmm4
End of assembler dump.

rax            0x0      0
rbx            0xed56bb2dcb3c7736       -1344681633365854410
rcx            0x7fffffffd060   140737488343136
rdx            0x7fffffffd050   140737488343120
rsi            0xf56e7777ffffd460       -761539929699265440
rdi            0xf56e7777ffffdc70       -761539929699263376
rbp            0x8bda43d3fd1a7e06       0x8bda43d3fd1a7e06
rsp            0x7fffffffced8   0x7fffffffced8
r8             0xfffffffffffffc10       -1008
r9             0xdeadbeef00000080       -2401053092612145024
r10            0x6848f0 6834416
r11            0x201    513
r12            0x4a75479abd64e097       5365273261009854615
r13            0x249214109d5d1c88       2635190793557318792
r14            0xb64a9c9e5d318408       -5311260606547786744
r15            0xdf9a54b303f1d3a3       -2334460328996121693
rip            0x684919 0x684919 <ff_sbr_hf_gen_sse+41>

checkasm: using random seed 123
MMX:
 - audiodsp.audiodsp             [OK]
 - blockdsp.blockdsp             [OK]
 - h264dsp.idct                  [OK]
 - h264pred.pred4x4              [OK]
 - h264pred.pred8x8              [OK]
 - h264pred.pred16x16            [OK]
 - pixblockdsp.get_pixels        [OK]
 - pixblockdsp.diff_pixels       [OK]
 - vp8dsp.idct                   [OK]
 - vp8dsp.mc                     [OK]
 - vp9dsp.ipred                  [OK]
 - vp9dsp.itxfm                  [OK]
 - vp9dsp.mc                     [OK]
MMXEXT:
 - audiodsp.audiodsp             [OK]
 - h264dsp.idct                  [OK]
 - h264pred.pred4x4              [OK]
 - h264pred.pred8x8              [OK]
 - h264pred.pred16x16            [OK]
 - h264pred.pred8x8l             [OK]
 - h264qpel.put                  [OK]
 - h264qpel.avg                  [OK]
 - hevc_add_res.add_residual     [OK]
 - hevc_idct.idct_dc             [OK]
 - vp8dsp.mc                     [OK]
 - vp9dsp.ipred                  [OK]
 - vp9dsp.itxfm                  [OK]
 - vp9dsp.loopfilter             [OK]
 - vp9dsp.mc                     [OK]
SSE:
 - aacpsdsp.add_squares          [OK]
 - aacpsdsp.mul_pair_single      [OK]
 - aacpsdsp.hybrid_analysis      [OK]
 - sbrdsp.sum64x5                [OK]
 - sbrdsp.sum_square             [OK]
 - sbrdsp.neg_odd_64             [OK]
 - sbrdsp.qmf_post_shuffle       [OK]
 - sbrdsp.qmf_deint_neg          [OK]
 - sbrdsp.qmf_deint_bfly         [OK]
 - sbrdsp.autocorrelate          [OK]
==7217== Invalid read of size 8
==7217==    at 0x6B6789: ??? (libavcodec/x86/sbrdsp.asm:164)
==7217==    by 0x43489A: ??? (tests/checkasm/x86/checkasm.asm:77)
==7217==    by 0xDEADBEEFDEADBEEE: ???
==7217==    by 0xDEADBEEFDEADBEEE: ???
==7217==    by 0xDEADBEEFDEADBEEE: ???
==7217==    by 0xDEADBEEFDEADBEEE: ???
==7217==    by 0xDEADBEEFDEADBEEE: ???
==7217==    by 0xDEADBEEFDEADBEEE: ???
==7217==    by 0xDEADBEEFDEADBEEE: ???
==7217==    by 0xDEADBEEFDEADBEEE: ???
==7217==    by 0xDEADBEEFDEADBEEE: ???
==7217==    by 0xDEADBEEFDEADBEEE: ???
==7217==  Address 0xf56df77ffefff610 is not stack'd, malloc'd or (recently) free'd
==7217==
==7217==
==7217== Process terminating with default action of signal 11 (SIGSEGV)
==7217==  General Protection Fault
==7217==    at 0x6B6789: ??? (libavcodec/x86/sbrdsp.asm:164)
==7217==    by 0x43489A: ??? (tests/checkasm/x86/checkasm.asm:77)
==7217==    by 0xDEADBEEFDEADBEEE: ???
==7217==    by 0xDEADBEEFDEADBEEE: ???
==7217==    by 0xDEADBEEFDEADBEEE: ???
==7217==    by 0xDEADBEEFDEADBEEE: ???
==7217==    by 0xDEADBEEFDEADBEEE: ???
==7217==    by 0xDEADBEEFDEADBEEE: ???
==7217==    by 0xDEADBEEFDEADBEEE: ???
==7217==    by 0xDEADBEEFDEADBEEE: ???
==7217==    by 0xDEADBEEFDEADBEEE: ???
==7217==    by 0xDEADBEEFDEADBEEE: ???
==7217==
==7217== HEAP SUMMARY:
==7217==     in use at exit: 140,213 bytes in 1,755 blocks
==7217==   total heap usage: 1,800 allocs, 45 frees, 3,697,837 bytes allocated
==7217==
==7217== LEAK SUMMARY:
==7217==    definitely lost: 0 bytes in 0 blocks
==7217==    indirectly lost: 0 bytes in 0 blocks
==7217==      possibly lost: 0 bytes in 0 blocks
==7217==    still reachable: 140,213 bytes in 1,755 blocks
==7217==         suppressed: 0 bytes in 0 blocks
==7217== Rerun with --leak-check=full to see details of leaked memory
==7217==
==7217== For counts of detected and suppressed errors, rerun with: -v
==7217== Use --track-origins=yes to see where uninitialised values come from
==7217== ERROR SUMMARY: 6 errors from 3 contexts (suppressed: 0 from 0)
Segmentation fault (core dumped)



[...]
Henrik Gramner June 30, 2017, 1:14 a.m. UTC | #3
On Fri, Jun 30, 2017 at 1:58 AM, Michael Niedermayer
<michael@niedermayer.cc> wrote:
> Program received signal SIGSEGV, Segmentation fault.
> 0x0000000000684919 in ff_sbr_hf_gen_sse ()

>    0x0000000000684909 <ff_sbr_hf_gen_sse+25>:   sub    %r9,%r8

> => 0x0000000000684919 <ff_sbr_hf_gen_sse+41>:   movaps (%rsi,%r8,1),%xmm0

> r9             0xdeadbeef00000080       -2401053092612145024

Another case of a 32-bit int being used as part of a 64-bit operation.
diff mbox

Patch

From 749b74d2146cd7ac4dd8e71bcf2a789b901590d8 Mon Sep 17 00:00:00 2001
From: Matthieu Bouron <matthieu.bouron@gmail.com>
Date: Fri, 9 Jun 2017 09:34:12 +0000
Subject: [PATCH 1/3] checkasm: add sbrdsp tests

---
 tests/checkasm/Makefile   |   3 +-
 tests/checkasm/checkasm.c |   1 +
 tests/checkasm/checkasm.h |   1 +
 tests/checkasm/sbrdsp.c   | 298 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 302 insertions(+), 1 deletion(-)
 create mode 100644 tests/checkasm/sbrdsp.c

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 638e811931..60e80ab738 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -13,7 +13,8 @@  AVCODECOBJS-$(CONFIG_VP8DSP)            += vp8dsp.o
 AVCODECOBJS-$(CONFIG_VIDEODSP)          += videodsp.o
 
 # decoders/encoders
-AVCODECOBJS-$(CONFIG_AAC_DECODER)       += aacpsdsp.o
+AVCODECOBJS-$(CONFIG_AAC_DECODER)       += aacpsdsp.o \
+                                           sbrdsp.o
 AVCODECOBJS-$(CONFIG_ALAC_DECODER)      += alacdsp.o
 AVCODECOBJS-$(CONFIG_DCA_DECODER)       += synth_filter.o
 AVCODECOBJS-$(CONFIG_JPEG2000_DECODER)  += jpeg2000dsp.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index e66744b162..29f201b1b3 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -67,6 +67,7 @@  static const struct {
 #if CONFIG_AVCODEC
     #if CONFIG_AAC_DECODER
         { "aacpsdsp", checkasm_check_aacpsdsp },
+        { "sbrdsp",   checkasm_check_sbrdsp },
     #endif
     #if CONFIG_ALAC_DECODER
         { "alacdsp", checkasm_check_alacdsp },
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index dfb0ce561c..fa51e71e4b 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -50,6 +50,7 @@  void checkasm_check_hevc_idct(void);
 void checkasm_check_jpeg2000dsp(void);
 void checkasm_check_llviddsp(void);
 void checkasm_check_pixblockdsp(void);
+void checkasm_check_sbrdsp(void);
 void checkasm_check_synth_filter(void);
 void checkasm_check_v210enc(void);
 void checkasm_check_vp8dsp(void);
diff --git a/tests/checkasm/sbrdsp.c b/tests/checkasm/sbrdsp.c
new file mode 100644
index 0000000000..038318e021
--- /dev/null
+++ b/tests/checkasm/sbrdsp.c
@@ -0,0 +1,298 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "libavcodec/sbrdsp.h"
+
+#include "checkasm.h"
+
+#define randomize(buf, len) do {                                \
+    int i;                                                      \
+    for (i = 0; i < len; i++) {                                 \
+        const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX;          \
+        (buf)[i] = f;                                           \
+    }                                                           \
+} while (0)
+
+#define EPS 0.0001
+
+static void test_sum64x5(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64 + 256]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64 + 256]);
+
+    declare_func(void, INTFLOAT *z);
+
+    randomize((INTFLOAT *)dst0, 64 + 256);
+    memcpy(dst1, dst0, (64 + 256) * sizeof(INTFLOAT));
+    call_ref(dst0);
+    call_new(dst1);
+    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64 + 256))
+        fail();
+    bench_new(dst1);
+}
+
+static void test_sum_square(void)
+{
+    INTFLOAT res0;
+    INTFLOAT res1;
+    LOCAL_ALIGNED_16(INTFLOAT, src, [256], [2]);
+
+    declare_func(INTFLOAT, INTFLOAT (*x)[2], int n);
+
+    randomize((INTFLOAT *)src, 256 * 2);
+    res0 = call_ref(src, 256);
+    res1 = call_new(src, 256);
+    if (!float_near_abs_eps(res0, res1, EPS))
+        fail();
+    bench_new(src, 256);
+}
+
+static void test_neg_odd_64(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64]);
+
+    declare_func(void, INTFLOAT *x);
+
+    randomize((INTFLOAT *)dst0, 64);
+    memcpy(dst1, dst0, (64) * sizeof(INTFLOAT));
+    call_ref(dst0);
+    call_new(dst1);
+    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64))
+        fail();
+    bench_new(dst1);
+}
+
+static void test_qmf_pre_shuffle(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128]);
+
+    declare_func(void, INTFLOAT *z);
+
+    randomize((INTFLOAT *)dst0, 128);
+    memcpy(dst1, dst0, (128) * sizeof(INTFLOAT));
+    call_ref(dst0);
+    call_new(dst1);
+    if (!float_near_abs_eps_array(dst0, dst1, EPS, 128))
+        fail();
+    bench_new(dst1);
+}
+
+static void test_qmf_post_shuffle(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, src, [64]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [32], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [32], [2]);
+
+    declare_func(void, INTFLOAT W[32][2], const INTFLOAT *z);
+
+    randomize((INTFLOAT *)src, 64);
+    call_ref(dst0, src);
+    call_new(dst1, src);
+    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 64))
+        fail();
+    bench_new(dst1, src);
+}
+
+static void test_qmf_deint_neg(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, src, [64]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [64]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [64]);
+
+    declare_func(void, INTFLOAT *v, const INTFLOAT *src);
+
+    randomize((INTFLOAT *)src, 64);
+    call_ref(dst0, src);
+    call_new(dst1, src);
+    if (!float_near_abs_eps_array(dst0, dst1, EPS, 64))
+        fail();
+    bench_new(dst1, src);
+}
+
+static void test_qmf_deint_bfly(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, src0, [64]);
+    LOCAL_ALIGNED_16(INTFLOAT, src1, [64]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128]);
+
+    declare_func(void, INTFLOAT *v, const INTFLOAT *src0, const INTFLOAT *src1);
+
+    memset(dst0, 0, 128 * sizeof(INTFLOAT));
+    memset(dst1, 0, 128 * sizeof(INTFLOAT));
+
+    randomize((INTFLOAT *)src0, 64);
+    randomize((INTFLOAT *)src1, 64);
+    call_ref(dst0, src0, src1);
+    call_new(dst1, src0, src1);
+    if (!float_near_abs_eps_array(dst0, dst1, EPS, 128))
+        fail();
+    bench_new(dst1, src0, src1);
+}
+
+static void test_autocorrelate(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, src, [40], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [3], [2][2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [3], [2][2]);
+
+    declare_func(void, const INTFLOAT x[40][2], INTFLOAT phi[3][2][2]);
+
+    memset(dst0, 0, 3 * 2 * 2 * sizeof(INTFLOAT));
+    memset(dst1, 0, 3 * 2 * 2 * sizeof(INTFLOAT));
+
+    randomize((INTFLOAT *)src, 80);
+    call_ref(src, dst0);
+    call_new(src, dst1);
+    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 3 * 2 * 2))
+        fail();
+    bench_new(src, dst1);
+}
+
+static void test_hf_gen(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, low, [128], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, alpha0, [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, alpha1, [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
+    INTFLOAT bw = (INTFLOAT)rnd() / UINT_MAX;
+    int i;
+
+    declare_func(void, INTFLOAT (*X_high)[2], const INTFLOAT (*X_low)[2],
+                       const INTFLOAT alpha0[2], const INTFLOAT alpha1[2],
+                       INTFLOAT bw, int start, int end);
+
+    randomize((INTFLOAT *)low, 128 * 2);
+    randomize((INTFLOAT *)alpha0, 2);
+    randomize((INTFLOAT *)alpha1, 2);
+    for (i = 2; i < 64; i += 2) {
+        memset(dst0, 0, 128 * 2 * sizeof(INTFLOAT));
+        memset(dst1, 0, 128 * 2 * sizeof(INTFLOAT));
+        call_ref(dst0, low, alpha0, alpha1, 0.0, i, 128);
+        call_new(dst1, low, alpha0, alpha1, 0.0, i, 128);
+        if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
+            fail();
+        bench_new(dst1, low, alpha0, alpha1, bw, i, 128);
+    }
+}
+
+static void test_hf_g_filt(void)
+{
+    LOCAL_ALIGNED_16(INTFLOAT, high, [128], [40][2]);
+    LOCAL_ALIGNED_16(INTFLOAT, g_filt, [128]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
+
+    declare_func(void, INTFLOAT (*Y)[2], const INTFLOAT (*X_high)[40][2],
+                       const INTFLOAT *g_filt, int m_max, intptr_t ixh);
+
+    randomize((INTFLOAT *)high, 128 * 40 * 2);
+    randomize((INTFLOAT *)g_filt, 128);
+
+    call_ref(dst0, high, g_filt, 128, 20);
+    call_new(dst1, high, g_filt, 128, 20);
+    if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
+        fail();
+    bench_new(dst1, high, g_filt, 128, 20);
+}
+
+static void test_hf_apply_noise(const SBRDSPContext *sbrdsp)
+{
+    LOCAL_ALIGNED_16(AAC_FLOAT, s_m, [128]);
+    LOCAL_ALIGNED_16(AAC_FLOAT, q_filt, [128]);
+    LOCAL_ALIGNED_16(INTFLOAT, ref, [128], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst0, [128], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, dst1, [128], [2]);
+    int noise = 0x2a;
+    int i, j;
+
+    declare_func(void, INTFLOAT (*Y)[2], const AAC_FLOAT *s_m,
+                       const AAC_FLOAT *q_filt, int noise,
+                       int kx, int m_max);
+
+    randomize((INTFLOAT *)ref, 128 * 2);
+    randomize((INTFLOAT *)s_m, 128);
+    randomize((INTFLOAT *)q_filt, 128);
+
+    for (i = 0; i < 4; i++) {
+        if (check_func(sbrdsp->hf_apply_noise[i], "hf_apply_noise_%d", i)) {
+            for (j = 0; j < 2; j++) {
+                memcpy(dst0, ref, 128 * 2 * sizeof(INTFLOAT));
+                memcpy(dst1, ref, 128 * 2 * sizeof(INTFLOAT));
+                call_ref(dst0, s_m, q_filt, noise, j, 128);
+                call_new(dst1, s_m, q_filt, noise, j, 128);
+                if (!float_near_abs_eps_array((INTFLOAT *)dst0, (INTFLOAT *)dst1, EPS, 128 * 2))
+                    fail();
+                bench_new(dst1, s_m, q_filt, noise, j, 128);
+            }
+        }
+    }
+}
+
+void checkasm_check_sbrdsp(void)
+{
+    SBRDSPContext sbrdsp;
+
+    ff_sbrdsp_init(&sbrdsp);
+
+    if (check_func(sbrdsp.sum64x5, "sum64x5"))
+        test_sum64x5();
+    report("sum64x5");
+
+    if (check_func(sbrdsp.sum_square, "sum_square"))
+        test_sum_square();
+    report("sum_square");
+
+    if (check_func(sbrdsp.neg_odd_64, "neg_odd_64"))
+        test_neg_odd_64();
+    report("neg_odd_64");
+
+    if (check_func(sbrdsp.qmf_pre_shuffle, "qmf_pre_shuffle"))
+        test_qmf_pre_shuffle();
+    report("qmf_pre_shuffle");
+
+    if (check_func(sbrdsp.qmf_post_shuffle, "qmf_post_shuffle"))
+        test_qmf_post_shuffle();
+    report("qmf_post_shuffle");
+
+    if (check_func(sbrdsp.qmf_deint_neg, "qmf_deint_neg"))
+        test_qmf_deint_neg();
+    report("qmf_deint_neg");
+
+    if (check_func(sbrdsp.qmf_deint_bfly, "qmf_deint_bfly"))
+        test_qmf_deint_bfly();
+    report("qmf_deint_bfly");
+
+    if (check_func(sbrdsp.autocorrelate, "autocorrelate"))
+        test_autocorrelate();
+    report("autocorrelate");
+
+    if (check_func(sbrdsp.hf_gen, "hf_gen"))
+        test_hf_gen();
+    report("hf_gen");
+
+    if (check_func(sbrdsp.hf_g_filt, "hf_g_filt"))
+        test_hf_g_filt();
+    report("hf_g_filt");
+
+    test_hf_apply_noise(&sbrdsp);
+    report("hf_apply_noise");
+}
-- 
2.13.1