Message ID | 20231217091343.34077-1-remi@remlab.net |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,PATCHv2,1/1] checkasm/lpc: test compute_autocorr | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | fail | Make fate failed |
On 12/17/2023 6:13 AM, Rémi Denis-Courmont wrote: > --- > tests/checkasm/lpc.c | 47 ++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 45 insertions(+), 2 deletions(-) > > diff --git a/tests/checkasm/lpc.c b/tests/checkasm/lpc.c > index 592e34c03d..9b33f8a3b0 100644 > --- a/tests/checkasm/lpc.c > +++ b/tests/checkasm/lpc.c > @@ -57,10 +57,46 @@ static void test_window(int len) > bench_new(src, len, dst1); > } > > +static void test_compute_autocorr(ptrdiff_t len, int lag) > +{ > + LOCAL_ALIGNED(32, double, src, [5000 + 2 + MAX_LPC_ORDER]); > + LOCAL_ALIGNED(16, double, dst0, [MAX_LPC_ORDER + 1]); > + LOCAL_ALIGNED(16, double, dst1, [MAX_LPC_ORDER + 1]); > + > + declare_func(void, const double *in, ptrdiff_t len, int lag, double *out); > + > + av_assert0(lag >= 0 && lag <= MAX_LPC_ORDER); > + > + for (int i = 0; i < MAX_LPC_ORDER; i++) > + src[i] = 0.; > + > + src += MAX_LPC_ORDER; > + > + for (ptrdiff_t i = 0; i < len; i++) { > + src[i] = (double)rnd() / (double)UINT_MAX; > + } > + > + call_ref(src, len, lag, dst0); > + call_new(src, len, lag, dst1); > + > + for (size_t i = 0; i < lag; i++) { > + if (!double_near_abs_eps(dst0[i], dst1[i], EPS)) { checkasm: using random seed 2504816888 SSE2: - lpc.apply_welch_window_even [OK] - lpc.apply_welch_window_odd [OK] 0: 770.224646270451 - 770.382378714191 = -0.15773244374 autocorr_10_sse2 (lpc.c:86) - lpc.compute_autocorr_10 [FAILED] 0: 807.574416481743 - 807.732148925482 = -0.157732443739 autocorr_30_sse2 (lpc.c:86) - lpc.compute_autocorr_30 [FAILED] 0: 787.329053288888 - 787.486785732628 = -0.15773244374 autocorr_32_sse2 (lpc.c:86) - lpc.compute_autocorr_32 [FAILED] checkasm: using random seed 827008587 SSE2: - lpc.apply_welch_window_even [OK] - lpc.apply_welch_window_odd [OK] - lpc.compute_autocorr_10 [OK] - lpc.compute_autocorr_30 [OK] - lpc.compute_autocorr_32 [OK] Some seeds work, others don't. So i guess EPS is too small. > + fprintf(stderr, "%zu: %- .12f - %- .12f = % .12g\n", > + i, dst0[i], dst1[i], dst0[i] - dst1[i]); > + fail(); > + break; > + } > + } > + > + bench_new(src, len, lag, dst1); > +} > + > void checkasm_check_lpc(void) > { > LPCContext ctx; > - int len = rnd() % 5000; > + int len = 2000 + (rnd() % 3000); > + static const int lags[] = { 10, 30, 32 }; > + > ff_lpc_init(&ctx, 32, 16, FF_LPC_TYPE_DEFAULT); > > if (check_func(ctx.lpc_apply_welch_window, "apply_welch_window_even")) { > @@ -72,6 +108,13 @@ void checkasm_check_lpc(void) > test_window(len | 1); > } > report("apply_welch_window_odd"); > - > ff_lpc_end(&ctx); > + > + for (size_t i = 0; i < FF_ARRAY_ELEMS(lags); i++) { > + ff_lpc_init(&ctx, len, lags[i], FF_LPC_TYPE_DEFAULT); > + if (check_func(ctx.lpc_compute_autocorr, "autocorr_%d", lags[i])) > + test_compute_autocorr(len, lags[i]); > + report("compute_autocorr_%d", lags[i]); > + ff_lpc_end(&ctx); > + } > }
Le sunnuntaina 17. joulukuuta 2023, 18.09.45 EET James Almer a écrit : > On 12/17/2023 6:13 AM, Rémi Denis-Courmont wrote: > > --- > > > > tests/checkasm/lpc.c | 47 ++++++++++++++++++++++++++++++++++++++++++-- > > 1 file changed, 45 insertions(+), 2 deletions(-) > > > > diff --git a/tests/checkasm/lpc.c b/tests/checkasm/lpc.c > > index 592e34c03d..9b33f8a3b0 100644 > > --- a/tests/checkasm/lpc.c > > +++ b/tests/checkasm/lpc.c > > @@ -57,10 +57,46 @@ static void test_window(int len) > > > > bench_new(src, len, dst1); > > > > } > > > > +static void test_compute_autocorr(ptrdiff_t len, int lag) > > +{ > > + LOCAL_ALIGNED(32, double, src, [5000 + 2 + MAX_LPC_ORDER]); > > + LOCAL_ALIGNED(16, double, dst0, [MAX_LPC_ORDER + 1]); > > + LOCAL_ALIGNED(16, double, dst1, [MAX_LPC_ORDER + 1]); > > + > > + declare_func(void, const double *in, ptrdiff_t len, int lag, double > > *out); + > > + av_assert0(lag >= 0 && lag <= MAX_LPC_ORDER); > > + > > + for (int i = 0; i < MAX_LPC_ORDER; i++) > > + src[i] = 0.; > > + > > + src += MAX_LPC_ORDER; > > + > > + for (ptrdiff_t i = 0; i < len; i++) { > > + src[i] = (double)rnd() / (double)UINT_MAX; > > + } > > + > > + call_ref(src, len, lag, dst0); > > + call_new(src, len, lag, dst1); > > + > > + for (size_t i = 0; i < lag; i++) { > > + if (!double_near_abs_eps(dst0[i], dst1[i], EPS)) { > > checkasm: using random seed 2504816888 > SSE2: > - lpc.apply_welch_window_even [OK] > - lpc.apply_welch_window_odd [OK] > 0: 770.224646270451 - 770.382378714191 = -0.15773244374 > autocorr_10_sse2 (lpc.c:86) > - lpc.compute_autocorr_10 [FAILED] > 0: 807.574416481743 - 807.732148925482 = -0.157732443739 > autocorr_30_sse2 (lpc.c:86) > - lpc.compute_autocorr_30 [FAILED] > 0: 787.329053288888 - 787.486785732628 = -0.15773244374 > autocorr_32_sse2 (lpc.c:86) > - lpc.compute_autocorr_32 [FAILED] > > checkasm: using random seed 827008587 > SSE2: > - lpc.apply_welch_window_even [OK] > - lpc.apply_welch_window_odd [OK] > - lpc.compute_autocorr_10 [OK] > - lpc.compute_autocorr_30 [OK] > - lpc.compute_autocorr_32 [OK] > > Some seeds work, others don't. So i guess EPS is too small Rounding errors would not cause a constant gap across the different test cases. This is most likely an off-by-one in the x86 code. I don't know if this is a bug in the x86 code, or the test case being a little loose with input parameters, and I have neither time, nor motivation not to mention skills to figure that out, so there will be no test cases for this function form me afterall. The RV loop has no such issue - always matches the C reference AFAICT.
On Sun, 17 Dec 2023, Rémi Denis-Courmont wrote: > Le sunnuntaina 17. joulukuuta 2023, 18.09.45 EET James Almer a écrit : >> On 12/17/2023 6:13 AM, Rémi Denis-Courmont wrote: >> > --- >> > >> > tests/checkasm/lpc.c | 47 ++++++++++++++++++++++++++++++++++++++++++-- >> > 1 file changed, 45 insertions(+), 2 deletions(-) >> > >> > diff --git a/tests/checkasm/lpc.c b/tests/checkasm/lpc.c >> > index 592e34c03d..9b33f8a3b0 100644 >> > --- a/tests/checkasm/lpc.c >> > +++ b/tests/checkasm/lpc.c >> > @@ -57,10 +57,46 @@ static void test_window(int len) >> > >> > bench_new(src, len, dst1); >> > >> > } >> > >> > +static void test_compute_autocorr(ptrdiff_t len, int lag) >> > +{ >> > + LOCAL_ALIGNED(32, double, src, [5000 + 2 + MAX_LPC_ORDER]); >> > + LOCAL_ALIGNED(16, double, dst0, [MAX_LPC_ORDER + 1]); >> > + LOCAL_ALIGNED(16, double, dst1, [MAX_LPC_ORDER + 1]); >> > + >> > + declare_func(void, const double *in, ptrdiff_t len, int lag, double >> > *out); + >> > + av_assert0(lag >= 0 && lag <= MAX_LPC_ORDER); >> > + >> > + for (int i = 0; i < MAX_LPC_ORDER; i++) >> > + src[i] = 0.; >> > + >> > + src += MAX_LPC_ORDER; >> > + >> > + for (ptrdiff_t i = 0; i < len; i++) { >> > + src[i] = (double)rnd() / (double)UINT_MAX; >> > + } >> > + >> > + call_ref(src, len, lag, dst0); >> > + call_new(src, len, lag, dst1); >> > + >> > + for (size_t i = 0; i < lag; i++) { >> > + if (!double_near_abs_eps(dst0[i], dst1[i], EPS)) { >> >> checkasm: using random seed 2504816888 >> SSE2: >> - lpc.apply_welch_window_even [OK] >> - lpc.apply_welch_window_odd [OK] >> 0: 770.224646270451 - 770.382378714191 = -0.15773244374 >> autocorr_10_sse2 (lpc.c:86) >> - lpc.compute_autocorr_10 [FAILED] >> 0: 807.574416481743 - 807.732148925482 = -0.157732443739 >> autocorr_30_sse2 (lpc.c:86) >> - lpc.compute_autocorr_30 [FAILED] >> 0: 787.329053288888 - 787.486785732628 = -0.15773244374 >> autocorr_32_sse2 (lpc.c:86) >> - lpc.compute_autocorr_32 [FAILED] >> >> checkasm: using random seed 827008587 >> SSE2: >> - lpc.apply_welch_window_even [OK] >> - lpc.apply_welch_window_odd [OK] >> - lpc.compute_autocorr_10 [OK] >> - lpc.compute_autocorr_30 [OK] >> - lpc.compute_autocorr_32 [OK] >> >> Some seeds work, others don't. So i guess EPS is too small > > Rounding errors would not cause a constant gap across the different test cases. > This is most likely an off-by-one in the x86 code. I don't know if this is a > bug in the x86 code, or the test case being a little loose with input > parameters, and I have neither time, nor motivation not to mention skills to > figure that out, so there will be no test cases for this function form me > afterall. FWIW, we've had these situations elsewhere before as well, in swscale, where the existing x86 assembly mismatches the C code in nontrivial ways, and we have new assembly (aarch64 in that case) that is missing a test (even if one was written) due to this. First I considered if we should collect these extra checkasm tests in some branch somewhere, so they aren't lost, as they are useful when working on assembly on other architectures. But rather than having the code rot, forgotten in a stray branch somewhere, I wonder if we should just go ahead and merge it with an #if !ARCH_X86 or something, together with a notable FIXME comment. That would keep the test coverage for new asm implementations, avoid code rot, and leave the opportunity to sort things out easily available for whoever wants to dissect the old existing x86 assembly implementations. That's clearly not ideal, but would pragmatically be better than to just not merge the new checkasm test at all. What do others think? // Martin
Le sunnuntaina 17. joulukuuta 2023, 23.57.50 EET Martin Storsjö a écrit : > > Rounding errors would not cause a constant gap across the different test > > cases. This is most likely an off-by-one in the x86 code. I don't know if > > this is a bug in the x86 code, or the test case being a little loose with > > input parameters, and I have neither time, nor motivation not to mention > > skills to figure that out, so there will be no test cases for this > > function form me afterall. > > FWIW, we've had these situations elsewhere before as well, in swscale, > where the existing x86 assembly mismatches the C code in nontrivial ways, > and we have new assembly (aarch64 in that case) that is missing a test > (even if one was written) due to this. > > First I considered if we should collect these extra checkasm tests in some > branch somewhere, so they aren't lost, as they are useful when working on > assembly on other architectures. > > But rather than having the code rot, forgotten in a stray branch > somewhere, I wonder if we should just go ahead and merge it with an #if > !ARCH_X86 or something, together with a notable FIXME comment. I'd certainly welcome more checkasm that literally anyone other than me wrote. If the divergence in the X86 code is simply due to optimising an inexact algorithm differently, that seems fine. But if it is a case that the X86 code is demonstrably buggy, I think that it should be commented out or removed. That would not only fix a bug, but also put stronger incentives for X68 fanboys to actually fix it. Worst case, the optimisation has become meaningless and we have actually fixed a bug. Though I don't know which case this nor your swscale tests are.
Quoting Martin Storsjö (2023-12-17 22:57:50) > > FWIW, we've had these situations elsewhere before as well, in swscale, > where the existing x86 assembly mismatches the C code in nontrivial ways, > and we have new assembly (aarch64 in that case) that is missing a test > (even if one was written) due to this. > > First I considered if we should collect these extra checkasm tests in some > branch somewhere, so they aren't lost, as they are useful when working on > assembly on other architectures. > > But rather than having the code rot, forgotten in a stray branch > somewhere, I wonder if we should just go ahead and merge it with an #if > !ARCH_X86 or something, together with a notable FIXME comment. > > That would keep the test coverage for new asm implementations, avoid code > rot, and leave the opportunity to sort things out easily available for > whoever wants to dissect the old existing x86 assembly implementations. > > That's clearly not ideal, but would pragmatically be better than to just > not merge the new checkasm test at all. What do others think? FWIW what you propose sounds good to me.
On Sun, Dec 17, 2023 at 11:57:50PM +0200, Martin Storsjö wrote: [...] > FWIW, we've had these situations elsewhere before as well, in swscale, where > the existing x86 assembly mismatches the C code in nontrivial ways, and we > have new assembly (aarch64 in that case) that is missing a test (even if one > was written) due to this. > > First I considered if we should collect these extra checkasm tests in some > branch somewhere, so they aren't lost, as they are useful when working on > assembly on other architectures. > > But rather than having the code rot, forgotten in a stray branch somewhere, > I wonder if we should just go ahead and merge it with an #if !ARCH_X86 or > something, together with a notable FIXME comment. +1 i suggest, if its easy rather than disabling, adjust the threshold thx [...]
On 12/18/2023 1:34 PM, Rémi Denis-Courmont wrote: > Le sunnuntaina 17. joulukuuta 2023, 23.57.50 EET Martin Storsjö a écrit : >>> Rounding errors would not cause a constant gap across the different test >>> cases. This is most likely an off-by-one in the x86 code. I don't know if >>> this is a bug in the x86 code, or the test case being a little loose with >>> input parameters, and I have neither time, nor motivation not to mention >>> skills to figure that out, so there will be no test cases for this >>> function form me afterall. >> >> FWIW, we've had these situations elsewhere before as well, in swscale, >> where the existing x86 assembly mismatches the C code in nontrivial ways, >> and we have new assembly (aarch64 in that case) that is missing a test >> (even if one was written) due to this. >> >> First I considered if we should collect these extra checkasm tests in some >> branch somewhere, so they aren't lost, as they are useful when working on >> assembly on other architectures. >> >> But rather than having the code rot, forgotten in a stray branch >> somewhere, I wonder if we should just go ahead and merge it with an #if >> !ARCH_X86 or something, together with a notable FIXME comment. > > I'd certainly welcome more checkasm that literally anyone other than me wrote. > If the divergence in the X86 code is simply due to optimising an inexact > algorithm differently, that seems fine. > > But if it is a case that the X86 code is demonstrably buggy, I think that it > should be commented out or removed. That would not only fix a bug, but also put > stronger incentives for X68 fanboys to actually fix it. Worst case, the > optimisation has become meaningless and we have actually fixed a bug. I looked at the sse2 implementation briefly and it may in fact be buggy.
diff --git a/tests/checkasm/lpc.c b/tests/checkasm/lpc.c index 592e34c03d..9b33f8a3b0 100644 --- a/tests/checkasm/lpc.c +++ b/tests/checkasm/lpc.c @@ -57,10 +57,46 @@ static void test_window(int len) bench_new(src, len, dst1); } +static void test_compute_autocorr(ptrdiff_t len, int lag) +{ + LOCAL_ALIGNED(32, double, src, [5000 + 2 + MAX_LPC_ORDER]); + LOCAL_ALIGNED(16, double, dst0, [MAX_LPC_ORDER + 1]); + LOCAL_ALIGNED(16, double, dst1, [MAX_LPC_ORDER + 1]); + + declare_func(void, const double *in, ptrdiff_t len, int lag, double *out); + + av_assert0(lag >= 0 && lag <= MAX_LPC_ORDER); + + for (int i = 0; i < MAX_LPC_ORDER; i++) + src[i] = 0.; + + src += MAX_LPC_ORDER; + + for (ptrdiff_t i = 0; i < len; i++) { + src[i] = (double)rnd() / (double)UINT_MAX; + } + + call_ref(src, len, lag, dst0); + call_new(src, len, lag, dst1); + + for (size_t i = 0; i < lag; i++) { + if (!double_near_abs_eps(dst0[i], dst1[i], EPS)) { + fprintf(stderr, "%zu: %- .12f - %- .12f = % .12g\n", + i, dst0[i], dst1[i], dst0[i] - dst1[i]); + fail(); + break; + } + } + + bench_new(src, len, lag, dst1); +} + void checkasm_check_lpc(void) { LPCContext ctx; - int len = rnd() % 5000; + int len = 2000 + (rnd() % 3000); + static const int lags[] = { 10, 30, 32 }; + ff_lpc_init(&ctx, 32, 16, FF_LPC_TYPE_DEFAULT); if (check_func(ctx.lpc_apply_welch_window, "apply_welch_window_even")) { @@ -72,6 +108,13 @@ void checkasm_check_lpc(void) test_window(len | 1); } report("apply_welch_window_odd"); - ff_lpc_end(&ctx); + + for (size_t i = 0; i < FF_ARRAY_ELEMS(lags); i++) { + ff_lpc_init(&ctx, len, lags[i], FF_LPC_TYPE_DEFAULT); + if (check_func(ctx.lpc_compute_autocorr, "autocorr_%d", lags[i])) + test_compute_autocorr(len, lags[i]); + report("compute_autocorr_%d", lags[i]); + ff_lpc_end(&ctx); + } }