diff mbox series

[FFmpeg-devel,PATCHv3] checkasm/lpc: test compute_autocorr

Message ID 20240527160159.750956-1-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,PATCHv3] checkasm/lpc: test compute_autocorr | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 fail Make fate failed

Commit Message

Rémi Denis-Courmont May 27, 2024, 4:01 p.m. UTC
---
Changes since v2:
- Scale the error factor to length since this computes sums.
- Check the last element from results.
- Use fixed vector size for benchmarks.

---
 tests/checkasm/lpc.c | 51 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 48 insertions(+), 3 deletions(-)

Comments

Rémi Denis-Courmont May 27, 2024, 4:04 p.m. UTC | #1
Le maanantaina 27. toukokuuta 2024, 19.01.59 EEST Rémi Denis-Courmont a écrit 
:
> ---
> Changes since v2:
> - Scale the error factor to length since this computes sums.
> - Check the last element from results.
> - Use fixed vector size for benchmarks.
- Test real use lag values (from FATE).
- Do not disable x86.
James Almer May 27, 2024, 7:10 p.m. UTC | #2
On 5/27/2024 1:01 PM, Rémi Denis-Courmont wrote:
> ---
> Changes since v2:
> - Scale the error factor to length since this computes sums.
> - Check the last element from results.
> - Use fixed vector size for benchmarks.
> 
> ---
>   tests/checkasm/lpc.c | 51 +++++++++++++++++++++++++++++++++++++++++---
>   1 file changed, 48 insertions(+), 3 deletions(-)

checkasm: using random seed 883526087
checkasm: bench runs 1024 (1 << 10)
SSE2:
  - lpc.apply_welch_window_even [OK]
  - lpc.apply_welch_window_odd  [OK]
8:  666.011902576448 -  665.600444506565 =  0.411458069884
    autocorr_8_sse2 (lpc.c:88)
  - lpc.compute_autocorr        [FAILED]
James Almer May 27, 2024, 7:15 p.m. UTC | #3
On 5/27/2024 4:10 PM, James Almer wrote:
> On 5/27/2024 1:01 PM, Rémi Denis-Courmont wrote:
>> ---
>> Changes since v2:
>> - Scale the error factor to length since this computes sums.
>> - Check the last element from results.
>> - Use fixed vector size for benchmarks.
>>
>> ---
>>   tests/checkasm/lpc.c | 51 +++++++++++++++++++++++++++++++++++++++++---
>>   1 file changed, 48 insertions(+), 3 deletions(-)
> 
> checkasm: using random seed 883526087
> checkasm: bench runs 1024 (1 << 10)
> SSE2:
>   - lpc.apply_welch_window_even [OK]
>   - lpc.apply_welch_window_odd  [OK]
> 8:  666.011902576448 -  665.600444506565 =  0.411458069884
>     autocorr_8_sse2 (lpc.c:88)
>   - lpc.compute_autocorr        [FAILED]

The following fixes it:

> diff --git a/libavcodec/x86/lpc_init.c b/libavcodec/x86/lpc_init.c
> index f2fca53799..9f41639feb 100644
> --- a/libavcodec/x86/lpc_init.c
> +++ b/libavcodec/x86/lpc_init.c
> @@ -99,6 +99,15 @@ static void lpc_compute_autocorr_sse2(const double *data, ptrdiff_t len, int lag
>              );
>          }
>      }
> +
> +    if(j==lag){
> +        double sum = 1.0;
> +        for(int i=j-1; i<len; i+=2){
> +            sum += data[i  ] * data[i-j  ]
> +                 + data[i+1] * data[i-j+1];
> +        }
> +        autoc[j] = sum;
> +    }
>  }
> 
>  #endif /* HAVE_SSE2_INLINE */

So the SSE2 version is effectively broken, and ideally should be ported 
to nasm as it's fixed.
Rémi Denis-Courmont May 27, 2024, 7:21 p.m. UTC | #4
Le maanantaina 27. toukokuuta 2024, 22.15.40 EEST James Almer a écrit :
> On 5/27/2024 4:10 PM, James Almer wrote:
> > On 5/27/2024 1:01 PM, Rémi Denis-Courmont wrote:
> >> ---
> >> Changes since v2:
> >> - Scale the error factor to length since this computes sums.
> >> - Check the last element from results.
> >> - Use fixed vector size for benchmarks.
> >> 
> >> ---
> >>   tests/checkasm/lpc.c | 51 +++++++++++++++++++++++++++++++++++++++++---
> >>   1 file changed, 48 insertions(+), 3 deletions(-)
> > 
> > checkasm: using random seed 883526087
> > checkasm: bench runs 1024 (1 << 10)
> > 
> > SSE2:
> >   - lpc.apply_welch_window_even [OK]
> >   - lpc.apply_welch_window_odd  [OK]
> > 
> > 8:  666.011902576448 -  665.600444506565 =  0.411458069884
> > 
> >     autocorr_8_sse2 (lpc.c:88)
> >   - lpc.compute_autocorr        [FAILED]
> 
> The following fixes it:
> > diff --git a/libavcodec/x86/lpc_init.c b/libavcodec/x86/lpc_init.c
> > index f2fca53799..9f41639feb 100644
> > --- a/libavcodec/x86/lpc_init.c
> > +++ b/libavcodec/x86/lpc_init.c
> > @@ -99,6 +99,15 @@ static void lpc_compute_autocorr_sse2(const double
> > *data, ptrdiff_t len, int lag> 
> >              );
> >          
> >          }
> >      
> >      }
> > 
> > +
> > +    if(j==lag){
> > +        double sum = 1.0;
> > +        for(int i=j-1; i<len; i+=2){
> > +            sum += data[i  ] * data[i-j  ]
> > +                 + data[i+1] * data[i-j+1];
> > +        }
> > +        autoc[j] = sum;
> > +    }
> > 
> >  }
> >  
> >  #endif /* HAVE_SSE2_INLINE */
> 
> So the SSE2 version is effectively broken, and ideally should be ported
> to nasm as it's fixed.

I also have my doubts about the C version. The `i += 2` looks a bit suspicious 
on a tail case.
James Almer May 28, 2024, 12:22 a.m. UTC | #5
On 5/27/2024 4:15 PM, James Almer wrote:
> On 5/27/2024 4:10 PM, James Almer wrote:
>> On 5/27/2024 1:01 PM, Rémi Denis-Courmont wrote:
>>> ---
>>> Changes since v2:
>>> - Scale the error factor to length since this computes sums.
>>> - Check the last element from results.
>>> - Use fixed vector size for benchmarks.
>>>
>>> ---
>>>   tests/checkasm/lpc.c | 51 +++++++++++++++++++++++++++++++++++++++++---
>>>   1 file changed, 48 insertions(+), 3 deletions(-)
>>
>> checkasm: using random seed 883526087
>> checkasm: bench runs 1024 (1 << 10)
>> SSE2:
>>   - lpc.apply_welch_window_even [OK]
>>   - lpc.apply_welch_window_odd  [OK]
>> 8:  666.011902576448 -  665.600444506565 =  0.411458069884
>>     autocorr_8_sse2 (lpc.c:88)
>>   - lpc.compute_autocorr        [FAILED]
> 
> The following fixes it:
> 
>> diff --git a/libavcodec/x86/lpc_init.c b/libavcodec/x86/lpc_init.c
>> index f2fca53799..9f41639feb 100644
>> --- a/libavcodec/x86/lpc_init.c
>> +++ b/libavcodec/x86/lpc_init.c
>> @@ -99,6 +99,15 @@ static void lpc_compute_autocorr_sse2(const double 
>> *data, ptrdiff_t len, int lag
>>              );
>>          }
>>      }
>> +
>> +    if(j==lag){
>> +        double sum = 1.0;
>> +        for(int i=j-1; i<len; i+=2){
>> +            sum += data[i  ] * data[i-j  ]
>> +                 + data[i+1] * data[i-j+1];
>> +        }
>> +        autoc[j] = sum;
>> +    }
>>  }
>>
>>  #endif /* HAVE_SSE2_INLINE */
> 
> So the SSE2 version is effectively broken, and ideally should be ported 
> to nasm as it's fixed.

Actually, that only fixes setting the last value. There are still 
failures in random places using several different seeds.
James Almer May 28, 2024, 2:55 a.m. UTC | #6
On 5/27/2024 9:22 PM, James Almer wrote:
> On 5/27/2024 4:15 PM, James Almer wrote:
>> On 5/27/2024 4:10 PM, James Almer wrote:
>>> On 5/27/2024 1:01 PM, Rémi Denis-Courmont wrote:
>>>> ---
>>>> Changes since v2:
>>>> - Scale the error factor to length since this computes sums.
>>>> - Check the last element from results.
>>>> - Use fixed vector size for benchmarks.
>>>>
>>>> ---
>>>>   tests/checkasm/lpc.c | 51 
>>>> +++++++++++++++++++++++++++++++++++++++++---
>>>>   1 file changed, 48 insertions(+), 3 deletions(-)
>>>
>>> checkasm: using random seed 883526087
>>> checkasm: bench runs 1024 (1 << 10)
>>> SSE2:
>>>   - lpc.apply_welch_window_even [OK]
>>>   - lpc.apply_welch_window_odd  [OK]
>>> 8:  666.011902576448 -  665.600444506565 =  0.411458069884
>>>     autocorr_8_sse2 (lpc.c:88)
>>>   - lpc.compute_autocorr        [FAILED]
>>
>> The following fixes it:
>>
>>> diff --git a/libavcodec/x86/lpc_init.c b/libavcodec/x86/lpc_init.c
>>> index f2fca53799..9f41639feb 100644
>>> --- a/libavcodec/x86/lpc_init.c
>>> +++ b/libavcodec/x86/lpc_init.c
>>> @@ -99,6 +99,15 @@ static void lpc_compute_autocorr_sse2(const double 
>>> *data, ptrdiff_t len, int lag
>>>              );
>>>          }
>>>      }
>>> +
>>> +    if(j==lag){
>>> +        double sum = 1.0;
>>> +        for(int i=j-1; i<len; i+=2){
>>> +            sum += data[i  ] * data[i-j  ]
>>> +                 + data[i+1] * data[i-j+1];
>>> +        }
>>> +        autoc[j] = sum;
>>> +    }
>>>  }
>>>
>>>  #endif /* HAVE_SSE2_INLINE */
>>
>> So the SSE2 version is effectively broken, and ideally should be 
>> ported to nasm as it's fixed.
> 
> Actually, that only fixes setting the last value. There are still 
> failures in random places using several different seeds.

So the failures are only on odd input len values (With the change above 
to set the last output value applied, of course).
Maybe add both a test for even and odd values, same as we do for 
apply_welch_window, and disable the latter until the sse2 function is fixed.

I guess odd values are never really used in actual encoding scenarios 
seeing how fate is unaffected.
diff mbox series

Patch

diff --git a/tests/checkasm/lpc.c b/tests/checkasm/lpc.c
index 592e34c03d..22e5001ae8 100644
--- a/tests/checkasm/lpc.c
+++ b/tests/checkasm/lpc.c
@@ -16,6 +16,7 @@ 
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#include "libavutil/avassert.h"
 #include "libavutil/mem_internal.h"
 
 #include "libavcodec/lpc.h"
@@ -30,7 +31,7 @@ 
         }                                                                 \
     } while (0)
 
-#define EPS 0.005
+#define EPS 0.0001
 
 static void test_window(int len)
 {
@@ -57,10 +58,47 @@  static void test_window(int len)
     bench_new(src, len, dst1);
 }
 
+static void test_compute_autocorr(ptrdiff_t len, int lag)
+{
+    const double eps = EPS * (double)len;
+    LOCAL_ALIGNED(32, double, src, [5000 + 2 + MAX_LPC_ORDER]);
+    LOCAL_ALIGNED(16, double, dst0, [MAX_LPC_ORDER + 1]);
+    LOCAL_ALIGNED(16, double, dst1, [MAX_LPC_ORDER + 1]);
+
+    declare_func(void, const double *in, ptrdiff_t len, int lag, double *out);
+
+    av_assert0(lag >= 0 && lag <= MAX_LPC_ORDER);
+
+    for (int i = 0; i < MAX_LPC_ORDER; i++)
+        src[i] = 0.;
+
+    src += MAX_LPC_ORDER;
+
+    for (int i = 0; i < 5000 + 2; i++) {
+        src[i] = (double)rnd() / (double)UINT_MAX;
+    }
+
+    call_ref(src, len, lag, dst0);
+    call_new(src, len, lag, dst1);
+
+    for (size_t i = 0; i <= lag; i++) {
+        if (!double_near_abs_eps(dst0[i], dst1[i], eps)) {
+            fprintf(stderr, "%zu: %- .12f - %- .12f = % .12g\n",
+                    i, dst0[i], dst1[i], dst0[i] - dst1[i]);
+            fail();
+            break;
+        }
+    }
+
+    bench_new(src, 4608, lag, dst1);
+}
+
 void checkasm_check_lpc(void)
 {
     LPCContext ctx;
-    int len = rnd() % 5000;
+    int len = 2000 + (rnd() % 3000);
+    static const int lags[] = { 8, 12, };
+
     ff_lpc_init(&ctx, 32, 16, FF_LPC_TYPE_DEFAULT);
 
     if (check_func(ctx.lpc_apply_welch_window, "apply_welch_window_even")) {
@@ -72,6 +110,13 @@  void checkasm_check_lpc(void)
         test_window(len | 1);
     }
     report("apply_welch_window_odd");
-
     ff_lpc_end(&ctx);
+
+    for (size_t i = 0; i < FF_ARRAY_ELEMS(lags); i++) {
+        ff_lpc_init(&ctx, len, lags[i], FF_LPC_TYPE_DEFAULT);
+        if (check_func(ctx.lpc_compute_autocorr, "autocorr_%d", lags[i]))
+            test_compute_autocorr(len, lags[i]);
+        ff_lpc_end(&ctx);
+    }
+    report("compute_autocorr");
 }