diff mbox series

[FFmpeg-devel,PATCHv2,1/1] checkasm/lpc: test compute_autocorr

Message ID 20231217091343.34077-1-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,PATCHv2,1/1] checkasm/lpc: test compute_autocorr | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 fail Make fate failed

Commit Message

Rémi Denis-Courmont Dec. 17, 2023, 9:13 a.m. UTC
---
 tests/checkasm/lpc.c | 47 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 2 deletions(-)

Comments

James Almer Dec. 17, 2023, 4:09 p.m. UTC | #1
On 12/17/2023 6:13 AM, Rémi Denis-Courmont wrote:
> ---
>   tests/checkasm/lpc.c | 47 ++++++++++++++++++++++++++++++++++++++++++--
>   1 file changed, 45 insertions(+), 2 deletions(-)
> 
> diff --git a/tests/checkasm/lpc.c b/tests/checkasm/lpc.c
> index 592e34c03d..9b33f8a3b0 100644
> --- a/tests/checkasm/lpc.c
> +++ b/tests/checkasm/lpc.c
> @@ -57,10 +57,46 @@ static void test_window(int len)
>       bench_new(src, len, dst1);
>   }
>   
> +static void test_compute_autocorr(ptrdiff_t len, int lag)
> +{
> +    LOCAL_ALIGNED(32, double, src, [5000 + 2 + MAX_LPC_ORDER]);
> +    LOCAL_ALIGNED(16, double, dst0, [MAX_LPC_ORDER + 1]);
> +    LOCAL_ALIGNED(16, double, dst1, [MAX_LPC_ORDER + 1]);
> +
> +    declare_func(void, const double *in, ptrdiff_t len, int lag, double *out);
> +
> +    av_assert0(lag >= 0 && lag <= MAX_LPC_ORDER);
> +
> +    for (int i = 0; i < MAX_LPC_ORDER; i++)
> +        src[i] = 0.;
> +
> +    src += MAX_LPC_ORDER;
> +
> +    for (ptrdiff_t i = 0; i < len; i++) {
> +        src[i] = (double)rnd() / (double)UINT_MAX;
> +    }
> +
> +    call_ref(src, len, lag, dst0);
> +    call_new(src, len, lag, dst1);
> +
> +    for (size_t i = 0; i < lag; i++) {
> +        if (!double_near_abs_eps(dst0[i], dst1[i], EPS)) {

checkasm: using random seed 2504816888
SSE2:
  - lpc.apply_welch_window_even [OK]
  - lpc.apply_welch_window_odd  [OK]
0:  770.224646270451 -  770.382378714191 = -0.15773244374
    autocorr_10_sse2 (lpc.c:86)
  - lpc.compute_autocorr_10     [FAILED]
0:  807.574416481743 -  807.732148925482 = -0.157732443739
    autocorr_30_sse2 (lpc.c:86)
  - lpc.compute_autocorr_30     [FAILED]
0:  787.329053288888 -  787.486785732628 = -0.15773244374
    autocorr_32_sse2 (lpc.c:86)
  - lpc.compute_autocorr_32     [FAILED]

checkasm: using random seed 827008587
SSE2:
  - lpc.apply_welch_window_even [OK]
  - lpc.apply_welch_window_odd  [OK]
  - lpc.compute_autocorr_10     [OK]
  - lpc.compute_autocorr_30     [OK]
  - lpc.compute_autocorr_32     [OK]

Some seeds work, others don't. So i guess EPS is too small.

> +            fprintf(stderr, "%zu: %- .12f - %- .12f = % .12g\n",
> +                    i, dst0[i], dst1[i], dst0[i] - dst1[i]);
> +            fail();
> +            break;
> +        }
> +    }
> +
> +    bench_new(src, len, lag, dst1);
> +}
> +
>   void checkasm_check_lpc(void)
>   {
>       LPCContext ctx;
> -    int len = rnd() % 5000;
> +    int len = 2000 + (rnd() % 3000);
> +    static const int lags[] = { 10, 30, 32 };
> +
>       ff_lpc_init(&ctx, 32, 16, FF_LPC_TYPE_DEFAULT);
>   
>       if (check_func(ctx.lpc_apply_welch_window, "apply_welch_window_even")) {
> @@ -72,6 +108,13 @@ void checkasm_check_lpc(void)
>           test_window(len | 1);
>       }
>       report("apply_welch_window_odd");
> -
>       ff_lpc_end(&ctx);
> +
> +    for (size_t i = 0; i < FF_ARRAY_ELEMS(lags); i++) {
> +        ff_lpc_init(&ctx, len, lags[i], FF_LPC_TYPE_DEFAULT);
> +        if (check_func(ctx.lpc_compute_autocorr, "autocorr_%d", lags[i]))
> +            test_compute_autocorr(len, lags[i]);
> +        report("compute_autocorr_%d", lags[i]);
> +        ff_lpc_end(&ctx);
> +    }
>   }
Rémi Denis-Courmont Dec. 17, 2023, 4:35 p.m. UTC | #2
Le sunnuntaina 17. joulukuuta 2023, 18.09.45 EET James Almer a écrit :
> On 12/17/2023 6:13 AM, Rémi Denis-Courmont wrote:
> > ---
> > 
> >   tests/checkasm/lpc.c | 47 ++++++++++++++++++++++++++++++++++++++++++--
> >   1 file changed, 45 insertions(+), 2 deletions(-)
> > 
> > diff --git a/tests/checkasm/lpc.c b/tests/checkasm/lpc.c
> > index 592e34c03d..9b33f8a3b0 100644
> > --- a/tests/checkasm/lpc.c
> > +++ b/tests/checkasm/lpc.c
> > @@ -57,10 +57,46 @@ static void test_window(int len)
> > 
> >       bench_new(src, len, dst1);
> >   
> >   }
> > 
> > +static void test_compute_autocorr(ptrdiff_t len, int lag)
> > +{
> > +    LOCAL_ALIGNED(32, double, src, [5000 + 2 + MAX_LPC_ORDER]);
> > +    LOCAL_ALIGNED(16, double, dst0, [MAX_LPC_ORDER + 1]);
> > +    LOCAL_ALIGNED(16, double, dst1, [MAX_LPC_ORDER + 1]);
> > +
> > +    declare_func(void, const double *in, ptrdiff_t len, int lag, double
> > *out); +
> > +    av_assert0(lag >= 0 && lag <= MAX_LPC_ORDER);
> > +
> > +    for (int i = 0; i < MAX_LPC_ORDER; i++)
> > +        src[i] = 0.;
> > +
> > +    src += MAX_LPC_ORDER;
> > +
> > +    for (ptrdiff_t i = 0; i < len; i++) {
> > +        src[i] = (double)rnd() / (double)UINT_MAX;
> > +    }
> > +
> > +    call_ref(src, len, lag, dst0);
> > +    call_new(src, len, lag, dst1);
> > +
> > +    for (size_t i = 0; i < lag; i++) {
> > +        if (!double_near_abs_eps(dst0[i], dst1[i], EPS)) {
> 
> checkasm: using random seed 2504816888
> SSE2:
>   - lpc.apply_welch_window_even [OK]
>   - lpc.apply_welch_window_odd  [OK]
> 0:  770.224646270451 -  770.382378714191 = -0.15773244374
>     autocorr_10_sse2 (lpc.c:86)
>   - lpc.compute_autocorr_10     [FAILED]
> 0:  807.574416481743 -  807.732148925482 = -0.157732443739
>     autocorr_30_sse2 (lpc.c:86)
>   - lpc.compute_autocorr_30     [FAILED]
> 0:  787.329053288888 -  787.486785732628 = -0.15773244374
>     autocorr_32_sse2 (lpc.c:86)
>   - lpc.compute_autocorr_32     [FAILED]
> 
> checkasm: using random seed 827008587
> SSE2:
>   - lpc.apply_welch_window_even [OK]
>   - lpc.apply_welch_window_odd  [OK]
>   - lpc.compute_autocorr_10     [OK]
>   - lpc.compute_autocorr_30     [OK]
>   - lpc.compute_autocorr_32     [OK]
> 
> Some seeds work, others don't. So i guess EPS is too small

Rounding errors would not cause a constant gap across the different test cases. 
This is most likely an off-by-one in the x86 code. I don't know if this is a 
bug in the x86 code, or the test case being a little loose with input 
parameters, and I have neither time, nor motivation not to mention skills to 
figure that out, so there will be no test cases for this function form me 
afterall.

The RV loop has no such issue - always matches the C reference AFAICT.
Martin Storsjö Dec. 17, 2023, 9:57 p.m. UTC | #3
On Sun, 17 Dec 2023, Rémi Denis-Courmont wrote:

> Le sunnuntaina 17. joulukuuta 2023, 18.09.45 EET James Almer a écrit :
>> On 12/17/2023 6:13 AM, Rémi Denis-Courmont wrote:
>> > ---
>> > 
>> >   tests/checkasm/lpc.c | 47 ++++++++++++++++++++++++++++++++++++++++++--
>> >   1 file changed, 45 insertions(+), 2 deletions(-)
>> > 
>> > diff --git a/tests/checkasm/lpc.c b/tests/checkasm/lpc.c
>> > index 592e34c03d..9b33f8a3b0 100644
>> > --- a/tests/checkasm/lpc.c
>> > +++ b/tests/checkasm/lpc.c
>> > @@ -57,10 +57,46 @@ static void test_window(int len)
>> > 
>> >       bench_new(src, len, dst1);
>> > 
>> >   }
>> > 
>> > +static void test_compute_autocorr(ptrdiff_t len, int lag)
>> > +{
>> > +    LOCAL_ALIGNED(32, double, src, [5000 + 2 + MAX_LPC_ORDER]);
>> > +    LOCAL_ALIGNED(16, double, dst0, [MAX_LPC_ORDER + 1]);
>> > +    LOCAL_ALIGNED(16, double, dst1, [MAX_LPC_ORDER + 1]);
>> > +
>> > +    declare_func(void, const double *in, ptrdiff_t len, int lag, double
>> > *out); +
>> > +    av_assert0(lag >= 0 && lag <= MAX_LPC_ORDER);
>> > +
>> > +    for (int i = 0; i < MAX_LPC_ORDER; i++)
>> > +        src[i] = 0.;
>> > +
>> > +    src += MAX_LPC_ORDER;
>> > +
>> > +    for (ptrdiff_t i = 0; i < len; i++) {
>> > +        src[i] = (double)rnd() / (double)UINT_MAX;
>> > +    }
>> > +
>> > +    call_ref(src, len, lag, dst0);
>> > +    call_new(src, len, lag, dst1);
>> > +
>> > +    for (size_t i = 0; i < lag; i++) {
>> > +        if (!double_near_abs_eps(dst0[i], dst1[i], EPS)) {
>> 
>> checkasm: using random seed 2504816888
>> SSE2:
>>   - lpc.apply_welch_window_even [OK]
>>   - lpc.apply_welch_window_odd  [OK]
>> 0:  770.224646270451 -  770.382378714191 = -0.15773244374
>>     autocorr_10_sse2 (lpc.c:86)
>>   - lpc.compute_autocorr_10     [FAILED]
>> 0:  807.574416481743 -  807.732148925482 = -0.157732443739
>>     autocorr_30_sse2 (lpc.c:86)
>>   - lpc.compute_autocorr_30     [FAILED]
>> 0:  787.329053288888 -  787.486785732628 = -0.15773244374
>>     autocorr_32_sse2 (lpc.c:86)
>>   - lpc.compute_autocorr_32     [FAILED]
>> 
>> checkasm: using random seed 827008587
>> SSE2:
>>   - lpc.apply_welch_window_even [OK]
>>   - lpc.apply_welch_window_odd  [OK]
>>   - lpc.compute_autocorr_10     [OK]
>>   - lpc.compute_autocorr_30     [OK]
>>   - lpc.compute_autocorr_32     [OK]
>> 
>> Some seeds work, others don't. So i guess EPS is too small
>
> Rounding errors would not cause a constant gap across the different test cases. 
> This is most likely an off-by-one in the x86 code. I don't know if this is a 
> bug in the x86 code, or the test case being a little loose with input 
> parameters, and I have neither time, nor motivation not to mention skills to 
> figure that out, so there will be no test cases for this function form me 
> afterall.

FWIW, we've had these situations elsewhere before as well, in swscale, 
where the existing x86 assembly mismatches the C code in nontrivial ways, 
and we have new assembly (aarch64 in that case) that is missing a test 
(even if one was written) due to this.

First I considered if we should collect these extra checkasm tests in some 
branch somewhere, so they aren't lost, as they are useful when working on 
assembly on other architectures.

But rather than having the code rot, forgotten in a stray branch 
somewhere, I wonder if we should just go ahead and merge it with an #if 
!ARCH_X86 or something, together with a notable FIXME comment.

That would keep the test coverage for new asm implementations, avoid code 
rot, and leave the opportunity to sort things out easily available for 
whoever wants to dissect the old existing x86 assembly implementations.

That's clearly not ideal, but would pragmatically be better than to just 
not merge the new checkasm test at all. What do others think?

// Martin
Rémi Denis-Courmont Dec. 18, 2023, 4:34 p.m. UTC | #4
Le sunnuntaina 17. joulukuuta 2023, 23.57.50 EET Martin Storsjö a écrit :
> > Rounding errors would not cause a constant gap across the different test
> > cases. This is most likely an off-by-one in the x86 code. I don't know if
> > this is a bug in the x86 code, or the test case being a little loose with
> > input parameters, and I have neither time, nor motivation not to mention
> > skills to figure that out, so there will be no test cases for this
> > function form me afterall.
> 
> FWIW, we've had these situations elsewhere before as well, in swscale,
> where the existing x86 assembly mismatches the C code in nontrivial ways,
> and we have new assembly (aarch64 in that case) that is missing a test
> (even if one was written) due to this.
> 
> First I considered if we should collect these extra checkasm tests in some
> branch somewhere, so they aren't lost, as they are useful when working on
> assembly on other architectures.
> 
> But rather than having the code rot, forgotten in a stray branch
> somewhere, I wonder if we should just go ahead and merge it with an #if
> !ARCH_X86 or something, together with a notable FIXME comment.

I'd certainly welcome more checkasm that literally anyone other than me wrote. 
If the divergence in the X86 code is simply due to optimising an inexact 
algorithm differently, that seems fine. 

But if it is a case that the X86 code is demonstrably buggy, I think that it 
should be commented out or removed. That would not only fix a bug, but also put 
stronger incentives for X68 fanboys to actually fix it. Worst case, the 
optimisation has become meaningless and we have actually fixed a bug.

Though I don't know which case this nor your swscale tests are.
FFmpeg Technical Committee Dec. 18, 2023, 4:58 p.m. UTC | #5
Quoting Martin Storsjö (2023-12-17 22:57:50)
> 
> FWIW, we've had these situations elsewhere before as well, in swscale, 
> where the existing x86 assembly mismatches the C code in nontrivial ways, 
> and we have new assembly (aarch64 in that case) that is missing a test 
> (even if one was written) due to this.
> 
> First I considered if we should collect these extra checkasm tests in some 
> branch somewhere, so they aren't lost, as they are useful when working on 
> assembly on other architectures.
> 
> But rather than having the code rot, forgotten in a stray branch 
> somewhere, I wonder if we should just go ahead and merge it with an #if 
> !ARCH_X86 or something, together with a notable FIXME comment.
> 
> That would keep the test coverage for new asm implementations, avoid code 
> rot, and leave the opportunity to sort things out easily available for 
> whoever wants to dissect the old existing x86 assembly implementations.
> 
> That's clearly not ideal, but would pragmatically be better than to just 
> not merge the new checkasm test at all. What do others think?

FWIW what you propose sounds good to me.
Michael Niedermayer Dec. 18, 2023, 5:21 p.m. UTC | #6
On Sun, Dec 17, 2023 at 11:57:50PM +0200, Martin Storsjö wrote:
[...]
> FWIW, we've had these situations elsewhere before as well, in swscale, where
> the existing x86 assembly mismatches the C code in nontrivial ways, and we
> have new assembly (aarch64 in that case) that is missing a test (even if one
> was written) due to this.
> 
> First I considered if we should collect these extra checkasm tests in some
> branch somewhere, so they aren't lost, as they are useful when working on
> assembly on other architectures.
> 

> But rather than having the code rot, forgotten in a stray branch somewhere,
> I wonder if we should just go ahead and merge it with an #if !ARCH_X86 or
> something, together with a notable FIXME comment.

+1
i suggest, if its easy rather than disabling, adjust the threshold

thx

[...]
James Almer Dec. 18, 2023, 5:25 p.m. UTC | #7
On 12/18/2023 1:34 PM, Rémi Denis-Courmont wrote:
> Le sunnuntaina 17. joulukuuta 2023, 23.57.50 EET Martin Storsjö a écrit :
>>> Rounding errors would not cause a constant gap across the different test
>>> cases. This is most likely an off-by-one in the x86 code. I don't know if
>>> this is a bug in the x86 code, or the test case being a little loose with
>>> input parameters, and I have neither time, nor motivation not to mention
>>> skills to figure that out, so there will be no test cases for this
>>> function form me afterall.
>>
>> FWIW, we've had these situations elsewhere before as well, in swscale,
>> where the existing x86 assembly mismatches the C code in nontrivial ways,
>> and we have new assembly (aarch64 in that case) that is missing a test
>> (even if one was written) due to this.
>>
>> First I considered if we should collect these extra checkasm tests in some
>> branch somewhere, so they aren't lost, as they are useful when working on
>> assembly on other architectures.
>>
>> But rather than having the code rot, forgotten in a stray branch
>> somewhere, I wonder if we should just go ahead and merge it with an #if
>> !ARCH_X86 or something, together with a notable FIXME comment.
> 
> I'd certainly welcome more checkasm that literally anyone other than me wrote.
> If the divergence in the X86 code is simply due to optimising an inexact
> algorithm differently, that seems fine.
> 
> But if it is a case that the X86 code is demonstrably buggy, I think that it
> should be commented out or removed. That would not only fix a bug, but also put
> stronger incentives for X68 fanboys to actually fix it. Worst case, the
> optimisation has become meaningless and we have actually fixed a bug.

I looked at the sse2 implementation briefly and it may in fact be buggy.
diff mbox series

Patch

diff --git a/tests/checkasm/lpc.c b/tests/checkasm/lpc.c
index 592e34c03d..9b33f8a3b0 100644
--- a/tests/checkasm/lpc.c
+++ b/tests/checkasm/lpc.c
@@ -57,10 +57,46 @@  static void test_window(int len)
     bench_new(src, len, dst1);
 }
 
+static void test_compute_autocorr(ptrdiff_t len, int lag)
+{
+    LOCAL_ALIGNED(32, double, src, [5000 + 2 + MAX_LPC_ORDER]);
+    LOCAL_ALIGNED(16, double, dst0, [MAX_LPC_ORDER + 1]);
+    LOCAL_ALIGNED(16, double, dst1, [MAX_LPC_ORDER + 1]);
+
+    declare_func(void, const double *in, ptrdiff_t len, int lag, double *out);
+
+    av_assert0(lag >= 0 && lag <= MAX_LPC_ORDER);
+
+    for (int i = 0; i < MAX_LPC_ORDER; i++)
+        src[i] = 0.;
+
+    src += MAX_LPC_ORDER;
+
+    for (ptrdiff_t i = 0; i < len; i++) {
+        src[i] = (double)rnd() / (double)UINT_MAX;
+    }
+
+    call_ref(src, len, lag, dst0);
+    call_new(src, len, lag, dst1);
+
+    for (size_t i = 0; i < lag; i++) {
+        if (!double_near_abs_eps(dst0[i], dst1[i], EPS)) {
+            fprintf(stderr, "%zu: %- .12f - %- .12f = % .12g\n",
+                    i, dst0[i], dst1[i], dst0[i] - dst1[i]);
+            fail();
+            break;
+        }
+    }
+
+    bench_new(src, len, lag, dst1);
+}
+
 void checkasm_check_lpc(void)
 {
     LPCContext ctx;
-    int len = rnd() % 5000;
+    int len = 2000 + (rnd() % 3000);
+    static const int lags[] = { 10, 30, 32 };
+
     ff_lpc_init(&ctx, 32, 16, FF_LPC_TYPE_DEFAULT);
 
     if (check_func(ctx.lpc_apply_welch_window, "apply_welch_window_even")) {
@@ -72,6 +108,13 @@  void checkasm_check_lpc(void)
         test_window(len | 1);
     }
     report("apply_welch_window_odd");
-
     ff_lpc_end(&ctx);
+
+    for (size_t i = 0; i < FF_ARRAY_ELEMS(lags); i++) {
+        ff_lpc_init(&ctx, len, lags[i], FF_LPC_TYPE_DEFAULT);
+        if (check_func(ctx.lpc_compute_autocorr, "autocorr_%d", lags[i]))
+            test_compute_autocorr(len, lags[i]);
+        report("compute_autocorr_%d", lags[i]);
+        ff_lpc_end(&ctx);
+    }
 }