Message ID | 20191210201040.22050-1-martin@martin.st |
---|---|
State | Accepted |
Commit | aad0e26f9312d380e613e312a3b307609296fe58 |
Headers | show |
On Tue, 10 Dec 2019, Martin Storsjö wrote: > The stereo_interpolate functions add h_step to the values h > BUF_SIZE times. Within the stereo_interpolate C functions, the > values h (h0-h3, h00-h13) are declared as local float variables, > but the compiler is free to keep them in a register with extra > precision. > > If the accumulation is rounded to 32 bit float precision after > each step, the less significant bits of h_step end up ignored > and the sum can deviate, affecting the end result more than > the currently set EPS. > > By clearing the log2(BUF_SIZE) lower bits of h_step, we make sure > that the accumulation shouldn't differ significantly, regardless > of any extra precision in the accmulating register/variable. > > This fixes the aacpsdsp checkasm test when built with clang for > mingw/x86_32. > --- > tests/checkasm/aacpsdsp.c | 18 ++++++++++++++++++ > 1 file changed, 18 insertions(+) > > diff --git a/tests/checkasm/aacpsdsp.c b/tests/checkasm/aacpsdsp.c > index ea68b39fa9..2ceef4341f 100644 > --- a/tests/checkasm/aacpsdsp.c > +++ b/tests/checkasm/aacpsdsp.c > @@ -17,6 +17,7 @@ > */ > > #include "libavcodec/aacpsdsp.h" > +#include "libavutil/intfloat.h" > > #include "checkasm.h" > > @@ -34,6 +35,16 @@ > > #define EPS 0.005 > > +static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits) > +{ > + int i; > + for (i = 0; i < len; i++) { > + union av_intfloat32 u = { .f = buf[i] }; > + u.i &= (0xffffffff << bits); > + buf[i] = u.f; > + } > +} > + > static void test_add_squares(void) > { > LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]); > @@ -198,6 +209,13 @@ static void test_stereo_interpolate(PSDSPContext *psdsp) > > randomize((INTFLOAT *)h, 2 * 4); > randomize((INTFLOAT *)h_step, 2 * 4); > + // Clear the least significant 14 bits of h_step, to avoid > + // divergence when accumulating h_step BUF_SIZE times into > + // a float variable which may or may not have extra intermediate > + // precision. Therefore clear roughly log2(BUF_SIZE) less > + // significant bits, to get the same result regardless of any > + // extra precision in the accumulator. > + clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14); > > call_ref(l0, r0, h, h_step, BUF_SIZE); > call_new(l1, r1, h, h_step, BUF_SIZE); > -- > 2.17.1 Ping // Martin
On Sun, 15 Dec 2019, Martin Storsjö wrote: > On Tue, 10 Dec 2019, Martin Storsjö wrote: > >> The stereo_interpolate functions add h_step to the values h >> BUF_SIZE times. Within the stereo_interpolate C functions, the >> values h (h0-h3, h00-h13) are declared as local float variables, >> but the compiler is free to keep them in a register with extra >> precision. >> >> If the accumulation is rounded to 32 bit float precision after >> each step, the less significant bits of h_step end up ignored >> and the sum can deviate, affecting the end result more than >> the currently set EPS. >> >> By clearing the log2(BUF_SIZE) lower bits of h_step, we make sure >> that the accumulation shouldn't differ significantly, regardless >> of any extra precision in the accmulating register/variable. >> >> This fixes the aacpsdsp checkasm test when built with clang for >> mingw/x86_32. >> --- >> tests/checkasm/aacpsdsp.c | 18 ++++++++++++++++++ >> 1 file changed, 18 insertions(+) >> >> diff --git a/tests/checkasm/aacpsdsp.c b/tests/checkasm/aacpsdsp.c >> index ea68b39fa9..2ceef4341f 100644 >> --- a/tests/checkasm/aacpsdsp.c >> +++ b/tests/checkasm/aacpsdsp.c >> @@ -17,6 +17,7 @@ >> */ >> >> #include "libavcodec/aacpsdsp.h" >> +#include "libavutil/intfloat.h" >> >> #include "checkasm.h" >> >> @@ -34,6 +35,16 @@ >> >> #define EPS 0.005 >> >> +static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits) >> +{ >> + int i; >> + for (i = 0; i < len; i++) { >> + union av_intfloat32 u = { .f = buf[i] }; >> + u.i &= (0xffffffff << bits); >> + buf[i] = u.f; >> + } >> +} >> + >> static void test_add_squares(void) >> { >> LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]); >> @@ -198,6 +209,13 @@ static void test_stereo_interpolate(PSDSPContext > *psdsp) >> >> randomize((INTFLOAT *)h, 2 * 4); >> randomize((INTFLOAT *)h_step, 2 * 4); >> + // Clear the least significant 14 bits of h_step, to avoid >> + // divergence when accumulating h_step BUF_SIZE times into >> + // a float variable which may or may not have extra > intermediate >> + // precision. Therefore clear roughly log2(BUF_SIZE) less >> + // significant bits, to get the same result regardless of any >> + // extra precision in the accumulator. >> + clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14); >> >> call_ref(l0, r0, h, h_step, BUF_SIZE); >> call_new(l1, r1, h, h_step, BUF_SIZE); >> -- >> 2.17.1 > > Ping If there's no opposition to this one, I'll push it tomorrow; it's been out for review for over a week. // Martin
diff --git a/tests/checkasm/aacpsdsp.c b/tests/checkasm/aacpsdsp.c index ea68b39fa9..2ceef4341f 100644 --- a/tests/checkasm/aacpsdsp.c +++ b/tests/checkasm/aacpsdsp.c @@ -17,6 +17,7 @@ */ #include "libavcodec/aacpsdsp.h" +#include "libavutil/intfloat.h" #include "checkasm.h" @@ -34,6 +35,16 @@ #define EPS 0.005 +static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits) +{ + int i; + for (i = 0; i < len; i++) { + union av_intfloat32 u = { .f = buf[i] }; + u.i &= (0xffffffff << bits); + buf[i] = u.f; + } +} + static void test_add_squares(void) { LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]); @@ -198,6 +209,13 @@ static void test_stereo_interpolate(PSDSPContext *psdsp) randomize((INTFLOAT *)h, 2 * 4); randomize((INTFLOAT *)h_step, 2 * 4); + // Clear the least significant 14 bits of h_step, to avoid + // divergence when accumulating h_step BUF_SIZE times into + // a float variable which may or may not have extra intermediate + // precision. Therefore clear roughly log2(BUF_SIZE) less + // significant bits, to get the same result regardless of any + // extra precision in the accumulator. + clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14); call_ref(l0, r0, h, h_step, BUF_SIZE); call_new(l1, r1, h, h_step, BUF_SIZE);