Message ID | CAPYw7P4Set24dG+dZge_d97+hfTRaaEeyr41Md76Ht3bT=WY2Q@mail.gmail.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel] swresample: reuse DSP functions from avutil | expand |
Context | Check | Description |
---|---|---|
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
On Wed, May 10, 2023 at 04:25:06PM +0200, Paul B Mahol wrote: > Attached. > rematrix.c | 30 ++++++++++++++++++++++++++++-- > swresample.c | 5 +++++ > swresample_internal.h | 2 ++ > 3 files changed, 35 insertions(+), 2 deletions(-) > 2de854b7e4bacb3ea469a2fa6495053b679ae779 0001-swresample-reuse-DSP-functions-from-avutil.patch > From 1f6aded57aab1fd8c6c7ba6fa5261d7979666f66 Mon Sep 17 00:00:00 2001 > From: Paul B Mahol <onemda@gmail.com> > Date: Wed, 10 May 2023 15:41:01 +0200 > Subject: [PATCH] swresample: reuse DSP functions from avutil > > Improves generic mixing dramatically. Crashes libswresample/tests/swresample TEST: mono->quad, rate:16000->48000, fmt:dblp->dbl ==9257== ==9257== Process terminating with default action of signal 11 (SIGSEGV) ==9257== General Protection Fault ==9257== at 0x15552E: ??? (libavutil/x86/float_dsp.asm:290) ==9257== by 0x115B2A: swr_convert_internal (swresample.c:768) ==9257== by 0x116676: swr_convert (swresample.c:889) ==9257== by 0x11202E: main (swresample.c:355) ==9257== ==9257== HEAP SUMMARY: ==9257== in use at exit: 798,232 bytes in 59 blocks ==9257== total heap usage: 352 allocs, 293 frees, 21,612,392 bytes allocated ==9257== ==9257== LEAK SUMMARY: ==9257== definitely lost: 2,992 bytes in 34 blocks ==9257== indirectly lost: 0 bytes in 0 blocks ==9257== possibly lost: 0 bytes in 0 blocks ==9257== still reachable: 795,240 bytes in 25 blocks ==9257== suppressed: 0 bytes in 0 blocks ==9257== Rerun with --leak-check=full to see details of leaked memory ==9257== ==9257== For counts of detected and suppressed errors, rerun with: -v ==9257== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0) Segmentation fault (core dumped) [...]
With fixed alignment requirements.
On Fri, May 12, 2023 at 07:28:08PM +0200, Paul B Mahol wrote: > With fixed alignment requirements. > rematrix.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++-- > swresample.c | 5 ++++ > swresample_internal.h | 2 + > 3 files changed, 59 insertions(+), 2 deletions(-) > 3b99c9eb2e2f1f17d1f306e37ddd7107405fede4 0001-swresample-reuse-DSP-functions-from-avutil.patch > From 771bc1414b737475bc42c7263fd7f21b4d9cc9b7 Mon Sep 17 00:00:00 2001 > From: Paul B Mahol <onemda@gmail.com> > Date: Wed, 10 May 2023 15:41:01 +0200 > Subject: [PATCH] swresample: reuse DSP functions from avutil > > Improves generic mixing dramatically. > > Signed-off-by: Paul B Mahol <onemda@gmail.com> > --- > libswresample/rematrix.c | 54 +++++++++++++++++++++++++++-- > libswresample/swresample.c | 5 +++ > libswresample/swresample_internal.h | 2 ++ > 3 files changed, 59 insertions(+), 2 deletions(-) > > diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c > index 79e8a43eac..2133b0f90d 100644 > --- a/libswresample/rematrix.c > +++ b/libswresample/rematrix.c > @@ -652,7 +652,32 @@ int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mus > break;} > default: > if(s->int_sample_fmt == AV_SAMPLE_FMT_FLTP){ > - for(i=0; i<len; i++){ > + if (out->planar && in->planar) > + len1 = len & ~15; > + else > + len1 = 0; > + if ((intptr_t)out->ch[out_i] & 0x1f) > + len1 = 0; > + for (j = 0; j < s->matrix_ch[out_i][0] && len1 > 0; j++) { > + in_i = s->matrix_ch[out_i][1+j]; > + if ((intptr_t)in->ch[in_i] & 0x1f) { > + len1 = 0; > + break; > + } > + } Cant this be done outside the "inner" loop ? also this produces some new NaN values @@ -91810,16 +91810,16 @@ [e:0.246031 c:-nan max:0.988908] len: 936 [e:0.247006 c:-nan max:0.988908] len: 936 [e:0.247174 c:-nan max:0.988908] len: 936 -[e:0.197683 c:0.773693 max:0.825360] len: 936 -[e:0.192089 c:0.814010 max:0.820662] len: 936 +[e:0.245992 c:0.031094 max:0.988908] len: 936 +[e:0.246535 c:0.031025 max:0.988908] len: 936 [e:0.013306 c:0.996638 max:0.037320] len: 32 F: 2 [e:0.049179 c:0.909927 max:0.081071] len: 32 F: 2 [e:0.159079 c:-nan max:0.299026] len: 32 F: 2 [e:0.116819 c:-nan max:0.297598] len: 32 F: 2 [e:0.159382 c:-nan max:0.299980] len: 32 F: 2 [e:0.115993 c:-nan max:0.296648] len: 32 F: 2 -[e:0.099115 c:0.996999 max:0.189015] len: 32 F: 2 -[e:0.071657 c:0.998728 max:0.187209] len: 32 F: 2 +[e:0.159577 c:-nan max:0.299503] len: 32 F: 2 +[e:0.115367 c:-nan max:0.299503] len: 32 F: 2 [...]
On Sat, May 13, 2023 at 3:09 AM Michael Niedermayer <michael@niedermayer.cc> wrote: > On Fri, May 12, 2023 at 07:28:08PM +0200, Paul B Mahol wrote: > > With fixed alignment requirements. > > > rematrix.c | 54 > ++++++++++++++++++++++++++++++++++++++++++++++++-- > > swresample.c | 5 ++++ > > swresample_internal.h | 2 + > > 3 files changed, 59 insertions(+), 2 deletions(-) > > 3b99c9eb2e2f1f17d1f306e37ddd7107405fede4 > 0001-swresample-reuse-DSP-functions-from-avutil.patch > > From 771bc1414b737475bc42c7263fd7f21b4d9cc9b7 Mon Sep 17 00:00:00 2001 > > From: Paul B Mahol <onemda@gmail.com> > > Date: Wed, 10 May 2023 15:41:01 +0200 > > Subject: [PATCH] swresample: reuse DSP functions from avutil > > > > Improves generic mixing dramatically. > > > > Signed-off-by: Paul B Mahol <onemda@gmail.com> > > --- > > libswresample/rematrix.c | 54 +++++++++++++++++++++++++++-- > > libswresample/swresample.c | 5 +++ > > libswresample/swresample_internal.h | 2 ++ > > 3 files changed, 59 insertions(+), 2 deletions(-) > > > > diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c > > index 79e8a43eac..2133b0f90d 100644 > > --- a/libswresample/rematrix.c > > +++ b/libswresample/rematrix.c > > @@ -652,7 +652,32 @@ int swri_rematrix(SwrContext *s, AudioData *out, > AudioData *in, int len, int mus > > break;} > > default: > > if(s->int_sample_fmt == AV_SAMPLE_FMT_FLTP){ > > - for(i=0; i<len; i++){ > > + if (out->planar && in->planar) > > + len1 = len & ~15; > > + else > > + len1 = 0; > > + if ((intptr_t)out->ch[out_i] & 0x1f) > > + len1 = 0; > > + for (j = 0; j < s->matrix_ch[out_i][0] && len1 > 0; > j++) { > > + in_i = s->matrix_ch[out_i][1+j]; > > + if ((intptr_t)in->ch[in_i] & 0x1f) { > > + len1 = 0; > > + break; > > + } > > + } > > Cant this be done outside the "inner" loop ? Sure. > > > also this produces some new NaN values > > @@ -91810,16 +91810,16 @@ > [e:0.246031 c:-nan max:0.988908] len: 936 > [e:0.247006 c:-nan max:0.988908] len: 936 > [e:0.247174 c:-nan max:0.988908] len: 936 > -[e:0.197683 c:0.773693 max:0.825360] len: 936 > -[e:0.192089 c:0.814010 max:0.820662] len: 936 > +[e:0.245992 c:0.031094 max:0.988908] len: 936 > +[e:0.246535 c:0.031025 max:0.988908] len: 936 > [e:0.013306 c:0.996638 max:0.037320] len: 32 F: 2 > [e:0.049179 c:0.909927 max:0.081071] len: 32 F: 2 > [e:0.159079 c:-nan max:0.299026] len: 32 F: 2 > [e:0.116819 c:-nan max:0.297598] len: 32 F: 2 > [e:0.159382 c:-nan max:0.299980] len: 32 F: 2 > [e:0.115993 c:-nan max:0.296648] len: 32 F: 2 > -[e:0.099115 c:0.996999 max:0.189015] len: 32 F: 2 > -[e:0.071657 c:0.998728 max:0.187209] len: 32 F: 2 > +[e:0.159577 c:-nan max:0.299503] len: 32 F: 2 > +[e:0.115367 c:-nan max:0.299503] len: 32 F: 2 > > Is that really important to you? > > [...] > > -- > Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB > > No great genius has ever existed without some touch of madness. -- > Aristotle > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
On Wed, Jun 07, 2023 at 05:46:25PM +0200, Paul B Mahol wrote: > On Sat, May 13, 2023 at 3:09 AM Michael Niedermayer <michael@niedermayer.cc> > wrote: > > > On Fri, May 12, 2023 at 07:28:08PM +0200, Paul B Mahol wrote: > > > With fixed alignment requirements. > > > > > rematrix.c | 54 > > ++++++++++++++++++++++++++++++++++++++++++++++++-- > > > swresample.c | 5 ++++ > > > swresample_internal.h | 2 + > > > 3 files changed, 59 insertions(+), 2 deletions(-) > > > 3b99c9eb2e2f1f17d1f306e37ddd7107405fede4 > > 0001-swresample-reuse-DSP-functions-from-avutil.patch > > > From 771bc1414b737475bc42c7263fd7f21b4d9cc9b7 Mon Sep 17 00:00:00 2001 > > > From: Paul B Mahol <onemda@gmail.com> > > > Date: Wed, 10 May 2023 15:41:01 +0200 > > > Subject: [PATCH] swresample: reuse DSP functions from avutil > > > > > > Improves generic mixing dramatically. > > > > > > Signed-off-by: Paul B Mahol <onemda@gmail.com> > > > --- > > > libswresample/rematrix.c | 54 +++++++++++++++++++++++++++-- > > > libswresample/swresample.c | 5 +++ > > > libswresample/swresample_internal.h | 2 ++ > > > 3 files changed, 59 insertions(+), 2 deletions(-) > > > > > > diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c > > > index 79e8a43eac..2133b0f90d 100644 > > > --- a/libswresample/rematrix.c > > > +++ b/libswresample/rematrix.c > > > @@ -652,7 +652,32 @@ int swri_rematrix(SwrContext *s, AudioData *out, > > AudioData *in, int len, int mus > > > break;} > > > default: > > > if(s->int_sample_fmt == AV_SAMPLE_FMT_FLTP){ > > > - for(i=0; i<len; i++){ > > > + if (out->planar && in->planar) > > > + len1 = len & ~15; > > > + else > > > + len1 = 0; > > > + if ((intptr_t)out->ch[out_i] & 0x1f) > > > + len1 = 0; > > > + for (j = 0; j < s->matrix_ch[out_i][0] && len1 > 0; > > j++) { > > > + in_i = s->matrix_ch[out_i][1+j]; > > > + if ((intptr_t)in->ch[in_i] & 0x1f) { > > > + len1 = 0; > > > + break; > > > + } > > > + } > > > > Cant this be done outside the "inner" loop ? > > > Sure. > > > > > > > > > > also this produces some new NaN values > > > > @@ -91810,16 +91810,16 @@ > > [e:0.246031 c:-nan max:0.988908] len: 936 > > [e:0.247006 c:-nan max:0.988908] len: 936 > > [e:0.247174 c:-nan max:0.988908] len: 936 > > -[e:0.197683 c:0.773693 max:0.825360] len: 936 > > -[e:0.192089 c:0.814010 max:0.820662] len: 936 > > +[e:0.245992 c:0.031094 max:0.988908] len: 936 > > +[e:0.246535 c:0.031025 max:0.988908] len: 936 > > [e:0.013306 c:0.996638 max:0.037320] len: 32 F: 2 > > [e:0.049179 c:0.909927 max:0.081071] len: 32 F: 2 > > [e:0.159079 c:-nan max:0.299026] len: 32 F: 2 > > [e:0.116819 c:-nan max:0.297598] len: 32 F: 2 > > [e:0.159382 c:-nan max:0.299980] len: 32 F: 2 > > [e:0.115993 c:-nan max:0.296648] len: 32 F: 2 > > -[e:0.099115 c:0.996999 max:0.189015] len: 32 F: 2 > > -[e:0.071657 c:0.998728 max:0.187209] len: 32 F: 2 > > +[e:0.159577 c:-nan max:0.299503] len: 32 F: 2 > > +[e:0.115367 c:-nan max:0.299503] len: 32 F: 2 > > > > > Is that really important to you? The important part is not what the tool displays. But that this points to a worsening of the tested code (or a bug in the tool) The other numbers also seem to worsen by non trivial amounts Thx [...]
On Wed, Jun 7, 2023 at 6:01 PM Michael Niedermayer <michael@niedermayer.cc> wrote: > On Wed, Jun 07, 2023 at 05:46:25PM +0200, Paul B Mahol wrote: > > On Sat, May 13, 2023 at 3:09 AM Michael Niedermayer < > michael@niedermayer.cc> > > wrote: > > > > > On Fri, May 12, 2023 at 07:28:08PM +0200, Paul B Mahol wrote: > > > > With fixed alignment requirements. > > > > > > > rematrix.c | 54 > > > ++++++++++++++++++++++++++++++++++++++++++++++++-- > > > > swresample.c | 5 ++++ > > > > swresample_internal.h | 2 + > > > > 3 files changed, 59 insertions(+), 2 deletions(-) > > > > 3b99c9eb2e2f1f17d1f306e37ddd7107405fede4 > > > 0001-swresample-reuse-DSP-functions-from-avutil.patch > > > > From 771bc1414b737475bc42c7263fd7f21b4d9cc9b7 Mon Sep 17 00:00:00 > 2001 > > > > From: Paul B Mahol <onemda@gmail.com> > > > > Date: Wed, 10 May 2023 15:41:01 +0200 > > > > Subject: [PATCH] swresample: reuse DSP functions from avutil > > > > > > > > Improves generic mixing dramatically. > > > > > > > > Signed-off-by: Paul B Mahol <onemda@gmail.com> > > > > --- > > > > libswresample/rematrix.c | 54 > +++++++++++++++++++++++++++-- > > > > libswresample/swresample.c | 5 +++ > > > > libswresample/swresample_internal.h | 2 ++ > > > > 3 files changed, 59 insertions(+), 2 deletions(-) > > > > > > > > diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c > > > > index 79e8a43eac..2133b0f90d 100644 > > > > --- a/libswresample/rematrix.c > > > > +++ b/libswresample/rematrix.c > > > > @@ -652,7 +652,32 @@ int swri_rematrix(SwrContext *s, AudioData *out, > > > AudioData *in, int len, int mus > > > > break;} > > > > default: > > > > if(s->int_sample_fmt == AV_SAMPLE_FMT_FLTP){ > > > > - for(i=0; i<len; i++){ > > > > + if (out->planar && in->planar) > > > > + len1 = len & ~15; > > > > + else > > > > + len1 = 0; > > > > + if ((intptr_t)out->ch[out_i] & 0x1f) > > > > + len1 = 0; > > > > + for (j = 0; j < s->matrix_ch[out_i][0] && len1 > 0; > > > j++) { > > > > + in_i = s->matrix_ch[out_i][1+j]; > > > > + if ((intptr_t)in->ch[in_i] & 0x1f) { > > > > + len1 = 0; > > > > + break; > > > > + } > > > > + } > > > > > > Cant this be done outside the "inner" loop ? > > > > > > Sure. > > > > > > > > > > > > > > > > > also this produces some new NaN values > > > > > > @@ -91810,16 +91810,16 @@ > > > [e:0.246031 c:-nan max:0.988908] len: 936 > > > [e:0.247006 c:-nan max:0.988908] len: 936 > > > [e:0.247174 c:-nan max:0.988908] len: 936 > > > -[e:0.197683 c:0.773693 max:0.825360] len: 936 > > > -[e:0.192089 c:0.814010 max:0.820662] len: 936 > > > +[e:0.245992 c:0.031094 max:0.988908] len: 936 > > > +[e:0.246535 c:0.031025 max:0.988908] len: 936 > > > [e:0.013306 c:0.996638 max:0.037320] len: 32 F: 2 > > > [e:0.049179 c:0.909927 max:0.081071] len: 32 F: 2 > > > [e:0.159079 c:-nan max:0.299026] len: 32 F: 2 > > > [e:0.116819 c:-nan max:0.297598] len: 32 F: 2 > > > [e:0.159382 c:-nan max:0.299980] len: 32 F: 2 > > > [e:0.115993 c:-nan max:0.296648] len: 32 F: 2 > > > -[e:0.099115 c:0.996999 max:0.189015] len: 32 F: 2 > > > -[e:0.071657 c:0.998728 max:0.187209] len: 32 F: 2 > > > +[e:0.159577 c:-nan max:0.299503] len: 32 F: 2 > > > +[e:0.115367 c:-nan max:0.299503] len: 32 F: 2 > > > > > > > > Is that really important to you? > > The important part is not what the tool displays. But that this > points to a worsening of the tested code (or a bug in the tool) > The other numbers also seem to worsen by non trivial amounts > Can you elaborate your reasoning. Otherwise I will not take this into serious account for very slow library wasting CPU cycles. > > > Thx > > [...] > -- > Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB > > Asymptotically faster algorithms should always be preferred if you have > asymptotical amounts of data > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
On Wed, Jun 07, 2023 at 07:29:13PM +0200, Paul B Mahol wrote: > On Wed, Jun 7, 2023 at 6:01 PM Michael Niedermayer <michael@niedermayer.cc> > wrote: > > > On Wed, Jun 07, 2023 at 05:46:25PM +0200, Paul B Mahol wrote: > > > On Sat, May 13, 2023 at 3:09 AM Michael Niedermayer < > > michael@niedermayer.cc> > > > wrote: > > > > > > > On Fri, May 12, 2023 at 07:28:08PM +0200, Paul B Mahol wrote: > > > > > With fixed alignment requirements. > > > > > > > > > rematrix.c | 54 > > > > ++++++++++++++++++++++++++++++++++++++++++++++++-- > > > > > swresample.c | 5 ++++ > > > > > swresample_internal.h | 2 + > > > > > 3 files changed, 59 insertions(+), 2 deletions(-) > > > > > 3b99c9eb2e2f1f17d1f306e37ddd7107405fede4 > > > > 0001-swresample-reuse-DSP-functions-from-avutil.patch > > > > > From 771bc1414b737475bc42c7263fd7f21b4d9cc9b7 Mon Sep 17 00:00:00 > > 2001 > > > > > From: Paul B Mahol <onemda@gmail.com> > > > > > Date: Wed, 10 May 2023 15:41:01 +0200 > > > > > Subject: [PATCH] swresample: reuse DSP functions from avutil > > > > > > > > > > Improves generic mixing dramatically. > > > > > > > > > > Signed-off-by: Paul B Mahol <onemda@gmail.com> > > > > > --- > > > > > libswresample/rematrix.c | 54 > > +++++++++++++++++++++++++++-- > > > > > libswresample/swresample.c | 5 +++ > > > > > libswresample/swresample_internal.h | 2 ++ > > > > > 3 files changed, 59 insertions(+), 2 deletions(-) > > > > > > > > > > diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c > > > > > index 79e8a43eac..2133b0f90d 100644 > > > > > --- a/libswresample/rematrix.c > > > > > +++ b/libswresample/rematrix.c > > > > > @@ -652,7 +652,32 @@ int swri_rematrix(SwrContext *s, AudioData *out, > > > > AudioData *in, int len, int mus > > > > > break;} > > > > > default: > > > > > if(s->int_sample_fmt == AV_SAMPLE_FMT_FLTP){ > > > > > - for(i=0; i<len; i++){ > > > > > + if (out->planar && in->planar) > > > > > + len1 = len & ~15; > > > > > + else > > > > > + len1 = 0; > > > > > + if ((intptr_t)out->ch[out_i] & 0x1f) > > > > > + len1 = 0; > > > > > + for (j = 0; j < s->matrix_ch[out_i][0] && len1 > 0; > > > > j++) { > > > > > + in_i = s->matrix_ch[out_i][1+j]; > > > > > + if ((intptr_t)in->ch[in_i] & 0x1f) { > > > > > + len1 = 0; > > > > > + break; > > > > > + } > > > > > + } > > > > > > > > Cant this be done outside the "inner" loop ? > > > > > > > > > Sure. > > > > > > > > > > > > > > > > > > > > > > > > also this produces some new NaN values > > > > > > > > @@ -91810,16 +91810,16 @@ > > > > [e:0.246031 c:-nan max:0.988908] len: 936 > > > > [e:0.247006 c:-nan max:0.988908] len: 936 > > > > [e:0.247174 c:-nan max:0.988908] len: 936 > > > > -[e:0.197683 c:0.773693 max:0.825360] len: 936 > > > > -[e:0.192089 c:0.814010 max:0.820662] len: 936 > > > > +[e:0.245992 c:0.031094 max:0.988908] len: 936 > > > > +[e:0.246535 c:0.031025 max:0.988908] len: 936 > > > > [e:0.013306 c:0.996638 max:0.037320] len: 32 F: 2 > > > > [e:0.049179 c:0.909927 max:0.081071] len: 32 F: 2 > > > > [e:0.159079 c:-nan max:0.299026] len: 32 F: 2 > > > > [e:0.116819 c:-nan max:0.297598] len: 32 F: 2 > > > > [e:0.159382 c:-nan max:0.299980] len: 32 F: 2 > > > > [e:0.115993 c:-nan max:0.296648] len: 32 F: 2 > > > > -[e:0.099115 c:0.996999 max:0.189015] len: 32 F: 2 > > > > -[e:0.071657 c:0.998728 max:0.187209] len: 32 F: 2 > > > > +[e:0.159577 c:-nan max:0.299503] len: 32 F: 2 > > > > +[e:0.115367 c:-nan max:0.299503] len: 32 F: 2 > > > > > > > > > > > Is that really important to you? > > > > The important part is not what the tool displays. But that this > > points to a worsening of the tested code (or a bug in the tool) > > The other numbers also seem to worsen by non trivial amounts > > > > Can you elaborate your reasoning. Otherwise I will not take this into > serious > account for very slow library wasting CPU cycles. if you do care that the output resembles the input then this from above should be concerning: > > > > -[e:0.197683 c:0.773693 max:0.825360] len: 936 > > > > -[e:0.192089 c:0.814010 max:0.820662] len: 936 > > > > +[e:0.245992 c:0.031094 max:0.988908] len: 936 > > > > +[e:0.246535 c:0.031025 max:0.988908] len: 936 before there is 0.81 correlation and afterwards there is 0.03 correlation I mean the tool is telling us, we go kind of from 81% similarity to 3% similarity if thats not a bug in the tool thats very odd The NaN could mean that the resampler turned a non zero signal into all zeros or a all zero signal into non zero. Or maybe it returned NaN directly. Not sure these are the only ways you get a NaN there If both signals are all zero the max difference would be 0 and its not. This tool tries to excercise corner cases, so if it changes theres a good chance theres either a new bug in a corner case or one fixed. I dont think this should be ignored. thx [...]
On Wed, Jun 7, 2023 at 10:17 PM Michael Niedermayer <michael@niedermayer.cc> wrote: > On Wed, Jun 07, 2023 at 07:29:13PM +0200, Paul B Mahol wrote: > > On Wed, Jun 7, 2023 at 6:01 PM Michael Niedermayer < > michael@niedermayer.cc> > > wrote: > > > > > On Wed, Jun 07, 2023 at 05:46:25PM +0200, Paul B Mahol wrote: > > > > On Sat, May 13, 2023 at 3:09 AM Michael Niedermayer < > > > michael@niedermayer.cc> > > > > wrote: > > > > > > > > > On Fri, May 12, 2023 at 07:28:08PM +0200, Paul B Mahol wrote: > > > > > > With fixed alignment requirements. > > > > > > > > > > > rematrix.c | 54 > > > > > ++++++++++++++++++++++++++++++++++++++++++++++++-- > > > > > > swresample.c | 5 ++++ > > > > > > swresample_internal.h | 2 + > > > > > > 3 files changed, 59 insertions(+), 2 deletions(-) > > > > > > 3b99c9eb2e2f1f17d1f306e37ddd7107405fede4 > > > > > 0001-swresample-reuse-DSP-functions-from-avutil.patch > > > > > > From 771bc1414b737475bc42c7263fd7f21b4d9cc9b7 Mon Sep 17 00:00:00 > > > 2001 > > > > > > From: Paul B Mahol <onemda@gmail.com> > > > > > > Date: Wed, 10 May 2023 15:41:01 +0200 > > > > > > Subject: [PATCH] swresample: reuse DSP functions from avutil > > > > > > > > > > > > Improves generic mixing dramatically. > > > > > > > > > > > > Signed-off-by: Paul B Mahol <onemda@gmail.com> > > > > > > --- > > > > > > libswresample/rematrix.c | 54 > > > +++++++++++++++++++++++++++-- > > > > > > libswresample/swresample.c | 5 +++ > > > > > > libswresample/swresample_internal.h | 2 ++ > > > > > > 3 files changed, 59 insertions(+), 2 deletions(-) > > > > > > > > > > > > diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c > > > > > > index 79e8a43eac..2133b0f90d 100644 > > > > > > --- a/libswresample/rematrix.c > > > > > > +++ b/libswresample/rematrix.c > > > > > > @@ -652,7 +652,32 @@ int swri_rematrix(SwrContext *s, AudioData > *out, > > > > > AudioData *in, int len, int mus > > > > > > break;} > > > > > > default: > > > > > > if(s->int_sample_fmt == AV_SAMPLE_FMT_FLTP){ > > > > > > - for(i=0; i<len; i++){ > > > > > > + if (out->planar && in->planar) > > > > > > + len1 = len & ~15; > > > > > > + else > > > > > > + len1 = 0; > > > > > > + if ((intptr_t)out->ch[out_i] & 0x1f) > > > > > > + len1 = 0; > > > > > > + for (j = 0; j < s->matrix_ch[out_i][0] && len1 > > 0; > > > > > j++) { > > > > > > + in_i = s->matrix_ch[out_i][1+j]; > > > > > > + if ((intptr_t)in->ch[in_i] & 0x1f) { > > > > > > + len1 = 0; > > > > > > + break; > > > > > > + } > > > > > > + } > > > > > > > > > > Cant this be done outside the "inner" loop ? > > > > > > > > > > > > Sure. > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > also this produces some new NaN values > > > > > > > > > > @@ -91810,16 +91810,16 @@ > > > > > [e:0.246031 c:-nan max:0.988908] len: 936 > > > > > [e:0.247006 c:-nan max:0.988908] len: 936 > > > > > [e:0.247174 c:-nan max:0.988908] len: 936 > > > > > -[e:0.197683 c:0.773693 max:0.825360] len: 936 > > > > > -[e:0.192089 c:0.814010 max:0.820662] len: 936 > > > > > +[e:0.245992 c:0.031094 max:0.988908] len: 936 > > > > > +[e:0.246535 c:0.031025 max:0.988908] len: 936 > > > > > [e:0.013306 c:0.996638 max:0.037320] len: 32 F: 2 > > > > > [e:0.049179 c:0.909927 max:0.081071] len: 32 F: 2 > > > > > [e:0.159079 c:-nan max:0.299026] len: 32 F: 2 > > > > > [e:0.116819 c:-nan max:0.297598] len: 32 F: 2 > > > > > [e:0.159382 c:-nan max:0.299980] len: 32 F: 2 > > > > > [e:0.115993 c:-nan max:0.296648] len: 32 F: 2 > > > > > -[e:0.099115 c:0.996999 max:0.189015] len: 32 F: 2 > > > > > -[e:0.071657 c:0.998728 max:0.187209] len: 32 F: 2 > > > > > +[e:0.159577 c:-nan max:0.299503] len: 32 F: 2 > > > > > +[e:0.115367 c:-nan max:0.299503] len: 32 F: 2 > > > > > > > > > > > > > > Is that really important to you? > > > > > > The important part is not what the tool displays. But that this > > > points to a worsening of the tested code (or a bug in the tool) > > > The other numbers also seem to worsen by non trivial amounts > > > > > > > Can you elaborate your reasoning. Otherwise I will not take this into > > serious > > account for very slow library wasting CPU cycles. > > > if you do care that the output resembles the input then this from > above should be concerning: > > > > > > -[e:0.197683 c:0.773693 max:0.825360] len: 936 > > > > > -[e:0.192089 c:0.814010 max:0.820662] len: 936 > > > > > +[e:0.245992 c:0.031094 max:0.988908] len: 936 > > > > > +[e:0.246535 c:0.031025 max:0.988908] len: 936 > > before there is 0.81 correlation and afterwards there is > 0.03 correlation > > I mean the tool is telling us, we go kind of from 81% similarity to 3% > similarity > if thats not a bug in the tool thats very odd > > The NaN could mean that the resampler turned a non zero signal into all > zeros > or a all zero signal into non zero. Or maybe it returned NaN directly. Not > sure > these are the only ways you get a NaN there > If both signals are all zero the max difference would be 0 and its not. > > This tool tries to excercise corner cases, so if it changes theres a good > chance theres either a new bug in a corner case or one fixed. I dont think > this should be ignored. > The tool have not been touched for ages and is invalid and poorly designed. Its results should not be considered seriously, > > thx > > [...] > -- > Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB > > Observe your enemies, for they first find out your faults. -- Antisthenes > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
Gonna apply soon.
On Mon, Jun 12, 2023 at 06:08:47PM +0200, Paul B Mahol wrote: > On Wed, Jun 7, 2023 at 10:17 PM Michael Niedermayer <michael@niedermayer.cc> > wrote: > > > On Wed, Jun 07, 2023 at 07:29:13PM +0200, Paul B Mahol wrote: > > > On Wed, Jun 7, 2023 at 6:01 PM Michael Niedermayer < > > michael@niedermayer.cc> > > > wrote: > > > > > > > On Wed, Jun 07, 2023 at 05:46:25PM +0200, Paul B Mahol wrote: > > > > > On Sat, May 13, 2023 at 3:09 AM Michael Niedermayer < > > > > michael@niedermayer.cc> > > > > > wrote: > > > > > > > > > > > On Fri, May 12, 2023 at 07:28:08PM +0200, Paul B Mahol wrote: > > > > > > > With fixed alignment requirements. > > > > > > > > > > > > > rematrix.c | 54 > > > > > > ++++++++++++++++++++++++++++++++++++++++++++++++-- > > > > > > > swresample.c | 5 ++++ > > > > > > > swresample_internal.h | 2 + > > > > > > > 3 files changed, 59 insertions(+), 2 deletions(-) > > > > > > > 3b99c9eb2e2f1f17d1f306e37ddd7107405fede4 > > > > > > 0001-swresample-reuse-DSP-functions-from-avutil.patch > > > > > > > From 771bc1414b737475bc42c7263fd7f21b4d9cc9b7 Mon Sep 17 00:00:00 > > > > 2001 > > > > > > > From: Paul B Mahol <onemda@gmail.com> > > > > > > > Date: Wed, 10 May 2023 15:41:01 +0200 > > > > > > > Subject: [PATCH] swresample: reuse DSP functions from avutil > > > > > > > > > > > > > > Improves generic mixing dramatically. > > > > > > > > > > > > > > Signed-off-by: Paul B Mahol <onemda@gmail.com> > > > > > > > --- > > > > > > > libswresample/rematrix.c | 54 > > > > +++++++++++++++++++++++++++-- > > > > > > > libswresample/swresample.c | 5 +++ > > > > > > > libswresample/swresample_internal.h | 2 ++ > > > > > > > 3 files changed, 59 insertions(+), 2 deletions(-) > > > > > > > > > > > > > > diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c > > > > > > > index 79e8a43eac..2133b0f90d 100644 > > > > > > > --- a/libswresample/rematrix.c > > > > > > > +++ b/libswresample/rematrix.c > > > > > > > @@ -652,7 +652,32 @@ int swri_rematrix(SwrContext *s, AudioData > > *out, > > > > > > AudioData *in, int len, int mus > > > > > > > break;} > > > > > > > default: > > > > > > > if(s->int_sample_fmt == AV_SAMPLE_FMT_FLTP){ > > > > > > > - for(i=0; i<len; i++){ > > > > > > > + if (out->planar && in->planar) > > > > > > > + len1 = len & ~15; > > > > > > > + else > > > > > > > + len1 = 0; > > > > > > > + if ((intptr_t)out->ch[out_i] & 0x1f) > > > > > > > + len1 = 0; > > > > > > > + for (j = 0; j < s->matrix_ch[out_i][0] && len1 > > > 0; > > > > > > j++) { > > > > > > > + in_i = s->matrix_ch[out_i][1+j]; > > > > > > > + if ((intptr_t)in->ch[in_i] & 0x1f) { > > > > > > > + len1 = 0; > > > > > > > + break; > > > > > > > + } > > > > > > > + } > > > > > > > > > > > > Cant this be done outside the "inner" loop ? > > > > > > > > > > > > > > > Sure. > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > also this produces some new NaN values > > > > > > > > > > > > @@ -91810,16 +91810,16 @@ > > > > > > [e:0.246031 c:-nan max:0.988908] len: 936 > > > > > > [e:0.247006 c:-nan max:0.988908] len: 936 > > > > > > [e:0.247174 c:-nan max:0.988908] len: 936 > > > > > > -[e:0.197683 c:0.773693 max:0.825360] len: 936 > > > > > > -[e:0.192089 c:0.814010 max:0.820662] len: 936 > > > > > > +[e:0.245992 c:0.031094 max:0.988908] len: 936 > > > > > > +[e:0.246535 c:0.031025 max:0.988908] len: 936 > > > > > > [e:0.013306 c:0.996638 max:0.037320] len: 32 F: 2 > > > > > > [e:0.049179 c:0.909927 max:0.081071] len: 32 F: 2 > > > > > > [e:0.159079 c:-nan max:0.299026] len: 32 F: 2 > > > > > > [e:0.116819 c:-nan max:0.297598] len: 32 F: 2 > > > > > > [e:0.159382 c:-nan max:0.299980] len: 32 F: 2 > > > > > > [e:0.115993 c:-nan max:0.296648] len: 32 F: 2 > > > > > > -[e:0.099115 c:0.996999 max:0.189015] len: 32 F: 2 > > > > > > -[e:0.071657 c:0.998728 max:0.187209] len: 32 F: 2 > > > > > > +[e:0.159577 c:-nan max:0.299503] len: 32 F: 2 > > > > > > +[e:0.115367 c:-nan max:0.299503] len: 32 F: 2 > > > > > > > > > > > > > > > > > Is that really important to you? > > > > > > > > The important part is not what the tool displays. But that this > > > > points to a worsening of the tested code (or a bug in the tool) > > > > The other numbers also seem to worsen by non trivial amounts > > > > > > > > > > Can you elaborate your reasoning. Otherwise I will not take this into > > > serious > > > account for very slow library wasting CPU cycles. > > > > > > if you do care that the output resembles the input then this from > > above should be concerning: > > > > > > > > -[e:0.197683 c:0.773693 max:0.825360] len: 936 > > > > > > -[e:0.192089 c:0.814010 max:0.820662] len: 936 > > > > > > +[e:0.245992 c:0.031094 max:0.988908] len: 936 > > > > > > +[e:0.246535 c:0.031025 max:0.988908] len: 936 > > > > before there is 0.81 correlation and afterwards there is > > 0.03 correlation > > > > I mean the tool is telling us, we go kind of from 81% similarity to 3% > > similarity > > if thats not a bug in the tool thats very odd > > > > The NaN could mean that the resampler turned a non zero signal into all > > zeros > > or a all zero signal into non zero. Or maybe it returned NaN directly. Not > > sure > > these are the only ways you get a NaN there > > If both signals are all zero the max difference would be 0 and its not. > > > > This tool tries to excercise corner cases, so if it changes theres a good > > chance theres either a new bug in a corner case or one fixed. I dont think > > this should be ignored. > > > > The tool have not been touched for ages and is invalid and poorly designed. why do you think it is invalid ? please elaborate on what design is poor so it can be improved > Its results should not be considered seriously, [...]
From 1f6aded57aab1fd8c6c7ba6fa5261d7979666f66 Mon Sep 17 00:00:00 2001 From: Paul B Mahol <onemda@gmail.com> Date: Wed, 10 May 2023 15:41:01 +0200 Subject: [PATCH] swresample: reuse DSP functions from avutil Improves generic mixing dramatically. Signed-off-by: Paul B Mahol <onemda@gmail.com> --- libswresample/rematrix.c | 30 +++++++++++++++++++++++++++-- libswresample/swresample.c | 5 +++++ libswresample/swresample_internal.h | 2 ++ 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c index 79e8a43eac..0d234bc97e 100644 --- a/libswresample/rematrix.c +++ b/libswresample/rematrix.c @@ -652,7 +652,20 @@ int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mus break;} default: if(s->int_sample_fmt == AV_SAMPLE_FMT_FLTP){ - for(i=0; i<len; i++){ + len1 = len & ~15; + if (len1 > 0) { + in_i = s->matrix_ch[out_i][1+0]; + s->fdsp->vector_fmul_scalar((float *)out->ch[out_i], + (const float *)in->ch[in_i], + s->matrix_flt[out_i][in_i], len1); + for (j = 1; j < s->matrix_ch[out_i][0]; j++) { + in_i = s->matrix_ch[out_i][1+j]; + s->fdsp->vector_fmac_scalar((float *)out->ch[out_i], + (const float *)in->ch[in_i], + s->matrix_flt[out_i][in_i], len1); + } + } + for (i = len1; i < len; i++) { float v=0; for(j=0; j<s->matrix_ch[out_i][0]; j++){ in_i= s->matrix_ch[out_i][1+j]; @@ -661,7 +674,20 @@ int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mus ((float*)out->ch[out_i])[i]= v; } }else if(s->int_sample_fmt == AV_SAMPLE_FMT_DBLP){ - for(i=0; i<len; i++){ + len1 = len & ~15; + if (len1 > 0) { + in_i = s->matrix_ch[out_i][1+0]; + s->fdsp->vector_dmul_scalar((double *)out->ch[out_i], + (const double *)in->ch[in_i], + s->matrix[out_i][in_i], len1); + for (j = 1; j < s->matrix_ch[out_i][0]; j++) { + in_i = s->matrix_ch[out_i][1+j]; + s->fdsp->vector_dmac_scalar((double *)out->ch[out_i], + (const double *)in->ch[in_i], + s->matrix[out_i][in_i], len1); + } + } + for (i = len1; i < len; i++) { double v=0; for(j=0; j<s->matrix_ch[out_i][0]; j++){ in_i= s->matrix_ch[out_i][1+j]; diff --git a/libswresample/swresample.c b/libswresample/swresample.c index 6dc329a9d0..b0b6f6a5c5 100644 --- a/libswresample/swresample.c +++ b/libswresample/swresample.c @@ -181,6 +181,7 @@ av_cold void swr_free(SwrContext **ss){ if (s->resampler) s->resampler->free(&s->resample); + av_freep(&s->fdsp); } av_freep(ss); @@ -504,6 +505,10 @@ av_assert0(s->out.ch_count); goto fail; } + s->fdsp = avpriv_float_dsp_alloc(0); + if (!s->fdsp) + goto fail; + return 0; fail: swr_close(s); diff --git a/libswresample/swresample_internal.h b/libswresample/swresample_internal.h index ad902d73fa..dba678c502 100644 --- a/libswresample/swresample_internal.h +++ b/libswresample/swresample_internal.h @@ -22,6 +22,7 @@ #define SWRESAMPLE_SWRESAMPLE_INTERNAL_H #include "swresample.h" +#include "libavutil/float_dsp.h" #include "libavutil/channel_layout.h" #include "config.h" @@ -189,6 +190,7 @@ struct SwrContext { mix_any_func_type *mix_any_f; + AVFloatDSPContext *fdsp; /* TODO: callbacks for ASM optimizations */ }; -- 2.39.1