diff mbox series

[FFmpeg-devel] avfilter/af_atempo: switch to rdft from lavu/tx

Message ID 20220206112515.1421701-1-onemda@gmail.com
State New
Headers show
Series [FFmpeg-devel] avfilter/af_atempo: switch to rdft from lavu/tx | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished
andriy/make_aarch64_jetson success Make finished
andriy/make_fate_aarch64_jetson success Make fate finished
andriy/make_armv7_RPi4 success Make finished
andriy/make_fate_armv7_RPi4 success Make fate finished

Commit Message

Paul B Mahol Feb. 6, 2022, 11:25 a.m. UTC
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 configure               |   3 -
 libavfilter/af_atempo.c | 126 ++++++++++++++++++++--------------------
 2 files changed, 64 insertions(+), 65 deletions(-)

Comments

Pavel Koshevoy Feb. 6, 2022, 5:15 p.m. UTC | #1
On Sun, Feb 6, 2022 at 4:24 AM Paul B Mahol <onemda@gmail.com> wrote:

> Signed-off-by: Paul B Mahol <onemda@gmail.com>
> ---
>  configure               |   3 -
>  libavfilter/af_atempo.c | 126 ++++++++++++++++++++--------------------
>  2 files changed, 64 insertions(+), 65 deletions(-)
>
> diff --git a/configure b/configure
> index 5a8b52c77d..6ec25dd622 100755
> --- a/configure
> +++ b/configure
> @@ -3610,8 +3610,6 @@ amovie_filter_deps="avcodec avformat"
>  aresample_filter_deps="swresample"
>  asr_filter_deps="pocketsphinx"
>  ass_filter_deps="libass"
> -atempo_filter_deps="avcodec"
> -atempo_filter_select="rdft"
>  avgblur_opencl_filter_deps="opencl"
>  avgblur_vulkan_filter_deps="vulkan spirv_compiler"
>  azmq_filter_deps="libzmq"
> @@ -7387,7 +7385,6 @@ enabled zlib && add_cppflags -DZLIB_CONST
>  # conditional library dependencies, in any order
>  enabled amovie_filter       && prepend avfilter_deps "avformat avcodec"
>  enabled aresample_filter    && prepend avfilter_deps "swresample"
> -enabled atempo_filter       && prepend avfilter_deps "avcodec"
>  enabled bm3d_filter         && prepend avfilter_deps "avcodec"
>  enabled cover_rect_filter   && prepend avfilter_deps "avformat avcodec"
>  enabled ebur128_filter && enabled swresample && prepend avfilter_deps
> "swresample"
> diff --git a/libavfilter/af_atempo.c b/libavfilter/af_atempo.c
> index e9a6da7970..27f2f6daa0 100644
> --- a/libavfilter/af_atempo.c
> +++ b/libavfilter/af_atempo.c
> @@ -39,13 +39,13 @@
>   */
>
>  #include <float.h>
> -#include "libavcodec/avfft.h"
>  #include "libavutil/avassert.h"
>  #include "libavutil/avstring.h"
>  #include "libavutil/channel_layout.h"
>  #include "libavutil/eval.h"
>  #include "libavutil/opt.h"
>  #include "libavutil/samplefmt.h"
> +#include "libavutil/tx.h"
>  #include "avfilter.h"
>  #include "audio.h"
>  #include "internal.h"
> @@ -67,7 +67,8 @@ typedef struct AudioFragment {
>
>      // rDFT transform of the down-mixed mono fragment, used for
>      // fast waveform alignment via correlation in frequency domain:
> -    FFTSample *xdat;
> +    float *xdat_in;
> +    float *xdat;
>  } AudioFragment;
>
>
Is the old API being removed or deprecated?
Just wondering why this change is necessary.




>  /**
> @@ -140,9 +141,11 @@ typedef struct ATempoContext {
>      FilterState state;
>
>      // for fast correlation calculation in frequency domain:
> -    RDFTContext *real_to_complex;
> -    RDFTContext *complex_to_real;
> -    FFTSample *correlation;
> +    AVTXContext *real_to_complex;
> +    AVTXContext *complex_to_real;
> +    av_tx_fn r2c_fn, c2r_fn;
> +    float *correlation_in;
> +    float *correlation;
>
>      // for managing AVFilterPad.request_frame and AVFilterPad.filter_frame
>      AVFrame *dst_buffer;
> @@ -228,18 +231,18 @@ static void yae_release_buffers(ATempoContext
> *atempo)
>
>      av_freep(&atempo->frag[0].data);
>      av_freep(&atempo->frag[1].data);
> +    av_freep(&atempo->frag[0].xdat_in);
> +    av_freep(&atempo->frag[1].xdat_in);
>      av_freep(&atempo->frag[0].xdat);
>      av_freep(&atempo->frag[1].xdat);
>
>      av_freep(&atempo->buffer);
>      av_freep(&atempo->hann);
> +    av_freep(&atempo->correlation_in);
>      av_freep(&atempo->correlation);
>
> -    av_rdft_end(atempo->real_to_complex);
> -    atempo->real_to_complex = NULL;
> -
> -    av_rdft_end(atempo->complex_to_real);
> -    atempo->complex_to_real = NULL;
> +    av_tx_uninit(&atempo->real_to_complex);
> +    av_tx_uninit(&atempo->complex_to_real);
>  }
>
>  /* av_realloc is not aligned enough; fortunately, the data does not need
> to
> @@ -247,7 +250,7 @@ static void yae_release_buffers(ATempoContext *atempo)
>  #define RE_MALLOC_OR_FAIL(field, field_size)                    \
>      do {                                                        \
>          av_freep(&field);                                       \
> -        field = av_malloc(field_size);                          \
> +        field = av_calloc(field_size, 1);                       \
>          if (!field) {                                           \
>              yae_release_buffers(atempo);                        \
>              return AVERROR(ENOMEM);                             \
> @@ -265,6 +268,7 @@ static int yae_reset(ATempoContext *atempo,
>  {
>      const int sample_size = av_get_bytes_per_sample(format);
>      uint32_t nlevels  = 0;
> +    float scale = 1.f, iscale = 1.f;
>      uint32_t pot;
>      int i;
>
> @@ -288,29 +292,29 @@ static int yae_reset(ATempoContext *atempo,
>      // initialize audio fragment buffers:
>      RE_MALLOC_OR_FAIL(atempo->frag[0].data, atempo->window *
> atempo->stride);
>      RE_MALLOC_OR_FAIL(atempo->frag[1].data, atempo->window *
> atempo->stride);
> -    RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, atempo->window *
> sizeof(FFTComplex));
> -    RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, atempo->window *
> sizeof(FFTComplex));
> +    RE_MALLOC_OR_FAIL(atempo->frag[0].xdat_in, (atempo->window + 1) *
> sizeof(AVComplexFloat));
> +    RE_MALLOC_OR_FAIL(atempo->frag[1].xdat_in, (atempo->window + 1) *
> sizeof(AVComplexFloat));
> +    RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, (atempo->window + 1) *
> sizeof(AVComplexFloat));
> +    RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, (atempo->window + 1) *
> sizeof(AVComplexFloat));
>
>      // initialize rDFT contexts:
> -    av_rdft_end(atempo->real_to_complex);
> -    atempo->real_to_complex = NULL;
> -
> -    av_rdft_end(atempo->complex_to_real);
> -    atempo->complex_to_real = NULL;
> +    av_tx_uninit(&atempo->real_to_complex);
> +    av_tx_uninit(&atempo->complex_to_real);
>
> -    atempo->real_to_complex = av_rdft_init(nlevels + 1, DFT_R2C);
> +    av_tx_init(&atempo->real_to_complex, &atempo->r2c_fn,
> AV_TX_FLOAT_RDFT, 0, 1 << (nlevels + 1), &scale, 0);
>      if (!atempo->real_to_complex) {
>          yae_release_buffers(atempo);
>          return AVERROR(ENOMEM);
>      }
>
> -    atempo->complex_to_real = av_rdft_init(nlevels + 1, IDFT_C2R);
> +    av_tx_init(&atempo->complex_to_real, &atempo->c2r_fn,
> AV_TX_FLOAT_RDFT, 1, 1 << (nlevels + 1), &iscale, 0);
>      if (!atempo->complex_to_real) {
>          yae_release_buffers(atempo);
>          return AVERROR(ENOMEM);
>      }
>
> -    RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window *
> sizeof(FFTComplex));
> +    RE_MALLOC_OR_FAIL(atempo->correlation_in, (atempo->window + 1) *
> sizeof(AVComplexFloat));
> +    RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window *
> sizeof(AVComplexFloat));
>
>      atempo->ring = atempo->window * 3;
>      RE_MALLOC_OR_FAIL(atempo->buffer, atempo->ring * atempo->stride);
> @@ -348,7 +352,7 @@ static int yae_update(AVFilterContext *ctx)
>          const uint8_t *src_end = src +                                  \
>              frag->nsamples * atempo->channels * sizeof(scalar_type);    \
>                                                                          \
> -        FFTSample *xdat = frag->xdat;                                   \
> +        float *xdat = frag->xdat_in;                                    \
>          scalar_type tmp;                                                \
>                                                                          \
>          if (atempo->channels == 1) {                                    \
> @@ -356,27 +360,27 @@ static int yae_update(AVFilterContext *ctx)
>                  tmp = *(const scalar_type *)src;                        \
>                  src += sizeof(scalar_type);                             \
>                                                                          \
> -                *xdat = (FFTSample)tmp;                                 \
> +                *xdat = (float)tmp;                                     \
>              }                                                           \
>          } else {                                                        \
> -            FFTSample s, max, ti, si;                                   \
> +            float s, max, ti, si;                                       \
>              int i;                                                      \
>                                                                          \
>              for (; src < src_end; xdat++) {                             \
>                  tmp = *(const scalar_type *)src;                        \
>                  src += sizeof(scalar_type);                             \
>                                                                          \
> -                max = (FFTSample)tmp;                                   \
> -                s = FFMIN((FFTSample)scalar_max,                        \
> -                          (FFTSample)fabsf(max));                       \
> +                max = (float)tmp;                                       \
> +                s = FFMIN((float)scalar_max,                            \
> +                          (float)fabsf(max));                           \
>                                                                          \
>                  for (i = 1; i < atempo->channels; i++) {                \
>                      tmp = *(const scalar_type *)src;                    \
>                      src += sizeof(scalar_type);                         \
>                                                                          \
> -                    ti = (FFTSample)tmp;                                \
> -                    si = FFMIN((FFTSample)scalar_max,                   \
> -                               (FFTSample)fabsf(ti));                   \
> +                    ti = (float)tmp;                                    \
> +                    si = FFMIN((float)scalar_max,                       \
> +                               (float)fabsf(ti));                       \
>                                                                          \
>                      if (s < si) {                                       \
>                          s   = si;                                       \
> @@ -399,7 +403,7 @@ static void yae_downmix(ATempoContext *atempo,
> AudioFragment *frag)
>      const uint8_t *src = frag->data;
>
>      // init complex data buffer used for FFT and Correlation:
> -    memset(frag->xdat, 0, sizeof(FFTComplex) * atempo->window);
> +    memset(frag->xdat_in, 0, sizeof(AVComplexFloat) * atempo->window);
>
>      if (atempo->format == AV_SAMPLE_FMT_U8) {
>          yae_init_xdat(uint8_t, 127);
> @@ -598,32 +602,24 @@ static void yae_advance_to_next_frag(ATempoContext
> *atempo)
>   * Multiply two vectors of complex numbers (result of real_to_complex
> rDFT)
>   * and transform back via complex_to_real rDFT.
>   */
> -static void yae_xcorr_via_rdft(FFTSample *xcorr,
> -                               RDFTContext *complex_to_real,
> -                               const FFTComplex *xa,
> -                               const FFTComplex *xb,
> +static void yae_xcorr_via_rdft(float *xcorr_in,
> +                               float *xcorr,
> +                               AVTXContext *complex_to_real,
> +                               av_tx_fn c2r_fn,
> +                               const AVComplexFloat *xa,
> +                               const AVComplexFloat *xb,
>                                 const int window)
>  {
> -    FFTComplex *xc = (FFTComplex *)xcorr;
> +    AVComplexFloat *xc = (AVComplexFloat *)xcorr_in;
>      int i;
>
> -    // NOTE: first element requires special care -- Given Y = rDFT(X),
> -    // Im(Y[0]) and Im(Y[N/2]) are always zero, therefore av_rdft_calc
> -    // stores Re(Y[N/2]) in place of Im(Y[0]).
> -
> -    xc->re = xa->re * xb->re;
> -    xc->im = xa->im * xb->im;
> -    xa++;
> -    xb++;
> -    xc++;
> -
> -    for (i = 1; i < window; i++, xa++, xb++, xc++) {
> +    for (i = 0; i <= window; i++, xa++, xb++, xc++) {
>

This used to iterate over [1, window - 1] elements.
Now it iterates over [0, window] elements.
Is this correct?  That's 2 additional elements.



>          xc->re = (xa->re * xb->re + xa->im * xb->im);
>          xc->im = (xa->im * xb->re - xa->re * xb->im);
>      }
>
>      // apply inverse rDFT:
> -    av_rdft_calc(complex_to_real, xcorr);
> +    c2r_fn(complex_to_real, xcorr, xcorr_in, sizeof(float));
>  }
>
>  /**
> @@ -637,21 +633,25 @@ static int yae_align(AudioFragment *frag,
>                       const int window,
>                       const int delta_max,
>                       const int drift,
> -                     FFTSample *correlation,
> -                     RDFTContext *complex_to_real)
> +                     float *correlation_in,
> +                     float *correlation,
> +                     AVTXContext *complex_to_real,
> +                     av_tx_fn c2r_fn)
>  {
>      int       best_offset = -drift;
> -    FFTSample best_metric = -FLT_MAX;
> -    FFTSample *xcorr;
> +    float     best_metric = -FLT_MAX;
> +    float    *xcorr;
>
>      int i0;
>      int i1;
>      int i;
>
> -    yae_xcorr_via_rdft(correlation,
> +    yae_xcorr_via_rdft(correlation_in,
> +                       correlation,
>                         complex_to_real,
> -                       (const FFTComplex *)prev->xdat,
> -                       (const FFTComplex *)frag->xdat,
> +                       c2r_fn,
> +                       (const AVComplexFloat *)prev->xdat,
> +                       (const AVComplexFloat *)frag->xdat,
>                         window);
>
>      // identify search window boundaries:
> @@ -665,11 +665,11 @@ static int yae_align(AudioFragment *frag,
>      xcorr = correlation + i0;
>
>      for (i = i0; i < i1; i++, xcorr++) {
> -        FFTSample metric = *xcorr;
> +        float metric = *xcorr;
>
>          // normalize:
> -        FFTSample drifti = (FFTSample)(drift + i);
> -        metric *= drifti * (FFTSample)(i - i0) * (FFTSample)(i1 - i);
> +        float drifti = (float)(drift + i);
> +        metric *= drifti * (float)(i - i0) * (float)(i1 - i);
>
>          if (metric > best_metric) {
>              best_metric = metric;
> @@ -706,8 +706,10 @@ static int yae_adjust_position(ATempoContext *atempo)
>                                       atempo->window,
>                                       delta_max,
>                                       drift,
> +                                     atempo->correlation_in,
>                                       atempo->correlation,
> -                                     atempo->complex_to_real);
> +                                     atempo->complex_to_real,
> +                                     atempo->c2r_fn);
>
>      if (correction) {
>          // adjust fragment position:
> @@ -833,7 +835,7 @@ yae_apply(ATempoContext *atempo,
>              yae_downmix(atempo, yae_curr_frag(atempo));
>
>              // apply rDFT:
> -            av_rdft_calc(atempo->real_to_complex,
> yae_curr_frag(atempo)->xdat);
> +            atempo->r2c_fn(atempo->real_to_complex,
> yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float));
>
>              // must load the second fragment before alignment can start:
>              if (!atempo->nfrag) {
> @@ -865,7 +867,7 @@ yae_apply(ATempoContext *atempo,
>              yae_downmix(atempo, yae_curr_frag(atempo));
>
>              // apply rDFT:
> -            av_rdft_calc(atempo->real_to_complex,
> yae_curr_frag(atempo)->xdat);
> +            atempo->r2c_fn(atempo->real_to_complex,
> yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float));
>
>              atempo->state = YAE_OUTPUT_OVERLAP_ADD;
>          }
> @@ -929,7 +931,7 @@ static int yae_flush(ATempoContext *atempo,
>              yae_downmix(atempo, frag);
>
>              // apply rDFT:
> -            av_rdft_calc(atempo->real_to_complex, frag->xdat);
> +            atempo->r2c_fn(atempo->real_to_complex, frag->xdat,
> frag->xdat_in, sizeof(float));
>
>              // align current fragment to previous fragment:
>              if (yae_adjust_position(atempo)) {
> --
> 2.33.0
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Paul B Mahol Feb. 6, 2022, 5:24 p.m. UTC | #2
On Sun, Feb 6, 2022 at 6:16 PM Pavel Koshevoy <pkoshevoy@gmail.com> wrote:

> On Sun, Feb 6, 2022 at 4:24 AM Paul B Mahol <onemda@gmail.com> wrote:
>
> > Signed-off-by: Paul B Mahol <onemda@gmail.com>
> > ---
> >  configure               |   3 -
> >  libavfilter/af_atempo.c | 126 ++++++++++++++++++++--------------------
> >  2 files changed, 64 insertions(+), 65 deletions(-)
> >
> > diff --git a/configure b/configure
> > index 5a8b52c77d..6ec25dd622 100755
> > --- a/configure
> > +++ b/configure
> > @@ -3610,8 +3610,6 @@ amovie_filter_deps="avcodec avformat"
> >  aresample_filter_deps="swresample"
> >  asr_filter_deps="pocketsphinx"
> >  ass_filter_deps="libass"
> > -atempo_filter_deps="avcodec"
> > -atempo_filter_select="rdft"
> >  avgblur_opencl_filter_deps="opencl"
> >  avgblur_vulkan_filter_deps="vulkan spirv_compiler"
> >  azmq_filter_deps="libzmq"
> > @@ -7387,7 +7385,6 @@ enabled zlib && add_cppflags -DZLIB_CONST
> >  # conditional library dependencies, in any order
> >  enabled amovie_filter       && prepend avfilter_deps "avformat avcodec"
> >  enabled aresample_filter    && prepend avfilter_deps "swresample"
> > -enabled atempo_filter       && prepend avfilter_deps "avcodec"
> >  enabled bm3d_filter         && prepend avfilter_deps "avcodec"
> >  enabled cover_rect_filter   && prepend avfilter_deps "avformat avcodec"
> >  enabled ebur128_filter && enabled swresample && prepend avfilter_deps
> > "swresample"
> > diff --git a/libavfilter/af_atempo.c b/libavfilter/af_atempo.c
> > index e9a6da7970..27f2f6daa0 100644
> > --- a/libavfilter/af_atempo.c
> > +++ b/libavfilter/af_atempo.c
> > @@ -39,13 +39,13 @@
> >   */
> >
> >  #include <float.h>
> > -#include "libavcodec/avfft.h"
> >  #include "libavutil/avassert.h"
> >  #include "libavutil/avstring.h"
> >  #include "libavutil/channel_layout.h"
> >  #include "libavutil/eval.h"
> >  #include "libavutil/opt.h"
> >  #include "libavutil/samplefmt.h"
> > +#include "libavutil/tx.h"
> >  #include "avfilter.h"
> >  #include "audio.h"
> >  #include "internal.h"
> > @@ -67,7 +67,8 @@ typedef struct AudioFragment {
> >
> >      // rDFT transform of the down-mixed mono fragment, used for
> >      // fast waveform alignment via correlation in frequency domain:
> > -    FFTSample *xdat;
> > +    float *xdat_in;
> > +    float *xdat;
> >  } AudioFragment;
> >
> >
> Is the old API being removed or deprecated?
> Just wondering why this change is necessary.
>

New api is faster.

>
>
>
>
> >  /**
> > @@ -140,9 +141,11 @@ typedef struct ATempoContext {
> >      FilterState state;
> >
> >      // for fast correlation calculation in frequency domain:
> > -    RDFTContext *real_to_complex;
> > -    RDFTContext *complex_to_real;
> > -    FFTSample *correlation;
> > +    AVTXContext *real_to_complex;
> > +    AVTXContext *complex_to_real;
> > +    av_tx_fn r2c_fn, c2r_fn;
> > +    float *correlation_in;
> > +    float *correlation;
> >
> >      // for managing AVFilterPad.request_frame and
> AVFilterPad.filter_frame
> >      AVFrame *dst_buffer;
> > @@ -228,18 +231,18 @@ static void yae_release_buffers(ATempoContext
> > *atempo)
> >
> >      av_freep(&atempo->frag[0].data);
> >      av_freep(&atempo->frag[1].data);
> > +    av_freep(&atempo->frag[0].xdat_in);
> > +    av_freep(&atempo->frag[1].xdat_in);
> >      av_freep(&atempo->frag[0].xdat);
> >      av_freep(&atempo->frag[1].xdat);
> >
> >      av_freep(&atempo->buffer);
> >      av_freep(&atempo->hann);
> > +    av_freep(&atempo->correlation_in);
> >      av_freep(&atempo->correlation);
> >
> > -    av_rdft_end(atempo->real_to_complex);
> > -    atempo->real_to_complex = NULL;
> > -
> > -    av_rdft_end(atempo->complex_to_real);
> > -    atempo->complex_to_real = NULL;
> > +    av_tx_uninit(&atempo->real_to_complex);
> > +    av_tx_uninit(&atempo->complex_to_real);
> >  }
> >
> >  /* av_realloc is not aligned enough; fortunately, the data does not need
> > to
> > @@ -247,7 +250,7 @@ static void yae_release_buffers(ATempoContext
> *atempo)
> >  #define RE_MALLOC_OR_FAIL(field, field_size)                    \
> >      do {                                                        \
> >          av_freep(&field);                                       \
> > -        field = av_malloc(field_size);                          \
> > +        field = av_calloc(field_size, 1);                       \
> >          if (!field) {                                           \
> >              yae_release_buffers(atempo);                        \
> >              return AVERROR(ENOMEM);                             \
> > @@ -265,6 +268,7 @@ static int yae_reset(ATempoContext *atempo,
> >  {
> >      const int sample_size = av_get_bytes_per_sample(format);
> >      uint32_t nlevels  = 0;
> > +    float scale = 1.f, iscale = 1.f;
> >      uint32_t pot;
> >      int i;
> >
> > @@ -288,29 +292,29 @@ static int yae_reset(ATempoContext *atempo,
> >      // initialize audio fragment buffers:
> >      RE_MALLOC_OR_FAIL(atempo->frag[0].data, atempo->window *
> > atempo->stride);
> >      RE_MALLOC_OR_FAIL(atempo->frag[1].data, atempo->window *
> > atempo->stride);
> > -    RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, atempo->window *
> > sizeof(FFTComplex));
> > -    RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, atempo->window *
> > sizeof(FFTComplex));
> > +    RE_MALLOC_OR_FAIL(atempo->frag[0].xdat_in, (atempo->window + 1) *
> > sizeof(AVComplexFloat));
> > +    RE_MALLOC_OR_FAIL(atempo->frag[1].xdat_in, (atempo->window + 1) *
> > sizeof(AVComplexFloat));
> > +    RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, (atempo->window + 1) *
> > sizeof(AVComplexFloat));
> > +    RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, (atempo->window + 1) *
> > sizeof(AVComplexFloat));
> >
> >      // initialize rDFT contexts:
> > -    av_rdft_end(atempo->real_to_complex);
> > -    atempo->real_to_complex = NULL;
> > -
> > -    av_rdft_end(atempo->complex_to_real);
> > -    atempo->complex_to_real = NULL;
> > +    av_tx_uninit(&atempo->real_to_complex);
> > +    av_tx_uninit(&atempo->complex_to_real);
> >
> > -    atempo->real_to_complex = av_rdft_init(nlevels + 1, DFT_R2C);
> > +    av_tx_init(&atempo->real_to_complex, &atempo->r2c_fn,
> > AV_TX_FLOAT_RDFT, 0, 1 << (nlevels + 1), &scale, 0);
> >      if (!atempo->real_to_complex) {
> >          yae_release_buffers(atempo);
> >          return AVERROR(ENOMEM);
> >      }
> >
> > -    atempo->complex_to_real = av_rdft_init(nlevels + 1, IDFT_C2R);
> > +    av_tx_init(&atempo->complex_to_real, &atempo->c2r_fn,
> > AV_TX_FLOAT_RDFT, 1, 1 << (nlevels + 1), &iscale, 0);
> >      if (!atempo->complex_to_real) {
> >          yae_release_buffers(atempo);
> >          return AVERROR(ENOMEM);
> >      }
> >
> > -    RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window *
> > sizeof(FFTComplex));
> > +    RE_MALLOC_OR_FAIL(atempo->correlation_in, (atempo->window + 1) *
> > sizeof(AVComplexFloat));
> > +    RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window *
> > sizeof(AVComplexFloat));
> >
> >      atempo->ring = atempo->window * 3;
> >      RE_MALLOC_OR_FAIL(atempo->buffer, atempo->ring * atempo->stride);
> > @@ -348,7 +352,7 @@ static int yae_update(AVFilterContext *ctx)
> >          const uint8_t *src_end = src +
> \
> >              frag->nsamples * atempo->channels * sizeof(scalar_type);
> \
> >
> \
> > -        FFTSample *xdat = frag->xdat;
>  \
> > +        float *xdat = frag->xdat_in;
> \
> >          scalar_type tmp;
> \
> >
> \
> >          if (atempo->channels == 1) {
> \
> > @@ -356,27 +360,27 @@ static int yae_update(AVFilterContext *ctx)
> >                  tmp = *(const scalar_type *)src;
> \
> >                  src += sizeof(scalar_type);
>  \
> >
> \
> > -                *xdat = (FFTSample)tmp;
>  \
> > +                *xdat = (float)tmp;
>  \
> >              }
>  \
> >          } else {
> \
> > -            FFTSample s, max, ti, si;
>  \
> > +            float s, max, ti, si;
>  \
> >              int i;
> \
> >
> \
> >              for (; src < src_end; xdat++) {
>  \
> >                  tmp = *(const scalar_type *)src;
> \
> >                  src += sizeof(scalar_type);
>  \
> >
> \
> > -                max = (FFTSample)tmp;
>  \
> > -                s = FFMIN((FFTSample)scalar_max,
> \
> > -                          (FFTSample)fabsf(max));
>  \
> > +                max = (float)tmp;
>  \
> > +                s = FFMIN((float)scalar_max,
> \
> > +                          (float)fabsf(max));
>  \
> >
> \
> >                  for (i = 1; i < atempo->channels; i++) {
> \
> >                      tmp = *(const scalar_type *)src;
> \
> >                      src += sizeof(scalar_type);
>  \
> >
> \
> > -                    ti = (FFTSample)tmp;
> \
> > -                    si = FFMIN((FFTSample)scalar_max,
>  \
> > -                               (FFTSample)fabsf(ti));
>  \
> > +                    ti = (float)tmp;
> \
> > +                    si = FFMIN((float)scalar_max,
>  \
> > +                               (float)fabsf(ti));
>  \
> >
> \
> >                      if (s < si) {
>  \
> >                          s   = si;
>  \
> > @@ -399,7 +403,7 @@ static void yae_downmix(ATempoContext *atempo,
> > AudioFragment *frag)
> >      const uint8_t *src = frag->data;
> >
> >      // init complex data buffer used for FFT and Correlation:
> > -    memset(frag->xdat, 0, sizeof(FFTComplex) * atempo->window);
> > +    memset(frag->xdat_in, 0, sizeof(AVComplexFloat) * atempo->window);
> >
> >      if (atempo->format == AV_SAMPLE_FMT_U8) {
> >          yae_init_xdat(uint8_t, 127);
> > @@ -598,32 +602,24 @@ static void yae_advance_to_next_frag(ATempoContext
> > *atempo)
> >   * Multiply two vectors of complex numbers (result of real_to_complex
> > rDFT)
> >   * and transform back via complex_to_real rDFT.
> >   */
> > -static void yae_xcorr_via_rdft(FFTSample *xcorr,
> > -                               RDFTContext *complex_to_real,
> > -                               const FFTComplex *xa,
> > -                               const FFTComplex *xb,
> > +static void yae_xcorr_via_rdft(float *xcorr_in,
> > +                               float *xcorr,
> > +                               AVTXContext *complex_to_real,
> > +                               av_tx_fn c2r_fn,
> > +                               const AVComplexFloat *xa,
> > +                               const AVComplexFloat *xb,
> >                                 const int window)
> >  {
> > -    FFTComplex *xc = (FFTComplex *)xcorr;
> > +    AVComplexFloat *xc = (AVComplexFloat *)xcorr_in;
> >      int i;
> >
> > -    // NOTE: first element requires special care -- Given Y = rDFT(X),
> > -    // Im(Y[0]) and Im(Y[N/2]) are always zero, therefore av_rdft_calc
> > -    // stores Re(Y[N/2]) in place of Im(Y[0]).
> > -
> > -    xc->re = xa->re * xb->re;
> > -    xc->im = xa->im * xb->im;
> > -    xa++;
> > -    xb++;
> > -    xc++;
> > -
> > -    for (i = 1; i < window; i++, xa++, xb++, xc++) {
> > +    for (i = 0; i <= window; i++, xa++, xb++, xc++) {
> >
>
> This used to iterate over [1, window - 1] elements.
> Now it iterates over [0, window] elements.
> Is this correct?  That's 2 additional elements.
>

Yes, newer api does not use previous api hack of producing n/2 complex
numbers, but n/2 +1.

>
>
>
> >          xc->re = (xa->re * xb->re + xa->im * xb->im);
> >          xc->im = (xa->im * xb->re - xa->re * xb->im);
> >      }
> >
> >      // apply inverse rDFT:
> > -    av_rdft_calc(complex_to_real, xcorr);
> > +    c2r_fn(complex_to_real, xcorr, xcorr_in, sizeof(float));
> >  }
> >
> >  /**
> > @@ -637,21 +633,25 @@ static int yae_align(AudioFragment *frag,
> >                       const int window,
> >                       const int delta_max,
> >                       const int drift,
> > -                     FFTSample *correlation,
> > -                     RDFTContext *complex_to_real)
> > +                     float *correlation_in,
> > +                     float *correlation,
> > +                     AVTXContext *complex_to_real,
> > +                     av_tx_fn c2r_fn)
> >  {
> >      int       best_offset = -drift;
> > -    FFTSample best_metric = -FLT_MAX;
> > -    FFTSample *xcorr;
> > +    float     best_metric = -FLT_MAX;
> > +    float    *xcorr;
> >
> >      int i0;
> >      int i1;
> >      int i;
> >
> > -    yae_xcorr_via_rdft(correlation,
> > +    yae_xcorr_via_rdft(correlation_in,
> > +                       correlation,
> >                         complex_to_real,
> > -                       (const FFTComplex *)prev->xdat,
> > -                       (const FFTComplex *)frag->xdat,
> > +                       c2r_fn,
> > +                       (const AVComplexFloat *)prev->xdat,
> > +                       (const AVComplexFloat *)frag->xdat,
> >                         window);
> >
> >      // identify search window boundaries:
> > @@ -665,11 +665,11 @@ static int yae_align(AudioFragment *frag,
> >      xcorr = correlation + i0;
> >
> >      for (i = i0; i < i1; i++, xcorr++) {
> > -        FFTSample metric = *xcorr;
> > +        float metric = *xcorr;
> >
> >          // normalize:
> > -        FFTSample drifti = (FFTSample)(drift + i);
> > -        metric *= drifti * (FFTSample)(i - i0) * (FFTSample)(i1 - i);
> > +        float drifti = (float)(drift + i);
> > +        metric *= drifti * (float)(i - i0) * (float)(i1 - i);
> >
> >          if (metric > best_metric) {
> >              best_metric = metric;
> > @@ -706,8 +706,10 @@ static int yae_adjust_position(ATempoContext
> *atempo)
> >                                       atempo->window,
> >                                       delta_max,
> >                                       drift,
> > +                                     atempo->correlation_in,
> >                                       atempo->correlation,
> > -                                     atempo->complex_to_real);
> > +                                     atempo->complex_to_real,
> > +                                     atempo->c2r_fn);
> >
> >      if (correction) {
> >          // adjust fragment position:
> > @@ -833,7 +835,7 @@ yae_apply(ATempoContext *atempo,
> >              yae_downmix(atempo, yae_curr_frag(atempo));
> >
> >              // apply rDFT:
> > -            av_rdft_calc(atempo->real_to_complex,
> > yae_curr_frag(atempo)->xdat);
> > +            atempo->r2c_fn(atempo->real_to_complex,
> > yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in,
> sizeof(float));
> >
> >              // must load the second fragment before alignment can start:
> >              if (!atempo->nfrag) {
> > @@ -865,7 +867,7 @@ yae_apply(ATempoContext *atempo,
> >              yae_downmix(atempo, yae_curr_frag(atempo));
> >
> >              // apply rDFT:
> > -            av_rdft_calc(atempo->real_to_complex,
> > yae_curr_frag(atempo)->xdat);
> > +            atempo->r2c_fn(atempo->real_to_complex,
> > yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in,
> sizeof(float));
> >
> >              atempo->state = YAE_OUTPUT_OVERLAP_ADD;
> >          }
> > @@ -929,7 +931,7 @@ static int yae_flush(ATempoContext *atempo,
> >              yae_downmix(atempo, frag);
> >
> >              // apply rDFT:
> > -            av_rdft_calc(atempo->real_to_complex, frag->xdat);
> > +            atempo->r2c_fn(atempo->real_to_complex, frag->xdat,
> > frag->xdat_in, sizeof(float));
> >
> >              // align current fragment to previous fragment:
> >              if (yae_adjust_position(atempo)) {
> > --
> > 2.33.0
> >
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel@ffmpeg.org
> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >
> > To unsubscribe, visit link above, or email
> > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> >
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Pavel Koshevoy Feb. 6, 2022, 6:04 p.m. UTC | #3
On Sun, Feb 6, 2022 at 10:24 AM Paul B Mahol <onemda@gmail.com> wrote:

> On Sun, Feb 6, 2022 at 6:16 PM Pavel Koshevoy <pkoshevoy@gmail.com> wrote:
>
> > On Sun, Feb 6, 2022 at 4:24 AM Paul B Mahol <onemda@gmail.com> wrote:
> >
> > > Signed-off-by: Paul B Mahol <onemda@gmail.com>
> > > ---
> > >  configure               |   3 -
> > >  libavfilter/af_atempo.c | 126 ++++++++++++++++++++--------------------
> > >  2 files changed, 64 insertions(+), 65 deletions(-)
> > >
> > > diff --git a/configure b/configure
> > > index 5a8b52c77d..6ec25dd622 100755
> > > --- a/configure
> > > +++ b/configure
> > > @@ -3610,8 +3610,6 @@ amovie_filter_deps="avcodec avformat"
> > >  aresample_filter_deps="swresample"
> > >  asr_filter_deps="pocketsphinx"
> > >  ass_filter_deps="libass"
> > > -atempo_filter_deps="avcodec"
> > > -atempo_filter_select="rdft"
> > >  avgblur_opencl_filter_deps="opencl"
> > >  avgblur_vulkan_filter_deps="vulkan spirv_compiler"
> > >  azmq_filter_deps="libzmq"
> > > @@ -7387,7 +7385,6 @@ enabled zlib && add_cppflags -DZLIB_CONST
> > >  # conditional library dependencies, in any order
> > >  enabled amovie_filter       && prepend avfilter_deps "avformat
> avcodec"
> > >  enabled aresample_filter    && prepend avfilter_deps "swresample"
> > > -enabled atempo_filter       && prepend avfilter_deps "avcodec"
> > >  enabled bm3d_filter         && prepend avfilter_deps "avcodec"
> > >  enabled cover_rect_filter   && prepend avfilter_deps "avformat
> avcodec"
> > >  enabled ebur128_filter && enabled swresample && prepend avfilter_deps
> > > "swresample"
> > > diff --git a/libavfilter/af_atempo.c b/libavfilter/af_atempo.c
> > > index e9a6da7970..27f2f6daa0 100644
> > > --- a/libavfilter/af_atempo.c
> > > +++ b/libavfilter/af_atempo.c
> > > @@ -39,13 +39,13 @@
> > >   */
> > >
> > >  #include <float.h>
> > > -#include "libavcodec/avfft.h"
> > >  #include "libavutil/avassert.h"
> > >  #include "libavutil/avstring.h"
> > >  #include "libavutil/channel_layout.h"
> > >  #include "libavutil/eval.h"
> > >  #include "libavutil/opt.h"
> > >  #include "libavutil/samplefmt.h"
> > > +#include "libavutil/tx.h"
> > >  #include "avfilter.h"
> > >  #include "audio.h"
> > >  #include "internal.h"
> > > @@ -67,7 +67,8 @@ typedef struct AudioFragment {
> > >
> > >      // rDFT transform of the down-mixed mono fragment, used for
> > >      // fast waveform alignment via correlation in frequency domain:
> > > -    FFTSample *xdat;
> > > +    float *xdat_in;
> > > +    float *xdat;
> > >  } AudioFragment;
> > >
> > >
> > Is the old API being removed or deprecated?
> > Just wondering why this change is necessary.
> >
>
> New api is faster.
>
> >
> >
> >
> >
> > >  /**
> > > @@ -140,9 +141,11 @@ typedef struct ATempoContext {
> > >      FilterState state;
> > >
> > >      // for fast correlation calculation in frequency domain:
> > > -    RDFTContext *real_to_complex;
> > > -    RDFTContext *complex_to_real;
> > > -    FFTSample *correlation;
> > > +    AVTXContext *real_to_complex;
> > > +    AVTXContext *complex_to_real;
> > > +    av_tx_fn r2c_fn, c2r_fn;
> > > +    float *correlation_in;
> > > +    float *correlation;
> > >
> > >      // for managing AVFilterPad.request_frame and
> > AVFilterPad.filter_frame
> > >      AVFrame *dst_buffer;
> > > @@ -228,18 +231,18 @@ static void yae_release_buffers(ATempoContext
> > > *atempo)
> > >
> > >      av_freep(&atempo->frag[0].data);
> > >      av_freep(&atempo->frag[1].data);
> > > +    av_freep(&atempo->frag[0].xdat_in);
> > > +    av_freep(&atempo->frag[1].xdat_in);
> > >      av_freep(&atempo->frag[0].xdat);
> > >      av_freep(&atempo->frag[1].xdat);
> > >
> > >      av_freep(&atempo->buffer);
> > >      av_freep(&atempo->hann);
> > > +    av_freep(&atempo->correlation_in);
> > >      av_freep(&atempo->correlation);
> > >
> > > -    av_rdft_end(atempo->real_to_complex);
> > > -    atempo->real_to_complex = NULL;
> > > -
> > > -    av_rdft_end(atempo->complex_to_real);
> > > -    atempo->complex_to_real = NULL;
> > > +    av_tx_uninit(&atempo->real_to_complex);
> > > +    av_tx_uninit(&atempo->complex_to_real);
> > >  }
> > >
> > >  /* av_realloc is not aligned enough; fortunately, the data does not
> need
> > > to
> > > @@ -247,7 +250,7 @@ static void yae_release_buffers(ATempoContext
> > *atempo)
> > >  #define RE_MALLOC_OR_FAIL(field, field_size)                    \
> > >      do {                                                        \
> > >          av_freep(&field);                                       \
> > > -        field = av_malloc(field_size);                          \
> > > +        field = av_calloc(field_size, 1);                       \
> > >          if (!field) {                                           \
> > >              yae_release_buffers(atempo);                        \
> > >              return AVERROR(ENOMEM);                             \
> > > @@ -265,6 +268,7 @@ static int yae_reset(ATempoContext *atempo,
> > >  {
> > >      const int sample_size = av_get_bytes_per_sample(format);
> > >      uint32_t nlevels  = 0;
> > > +    float scale = 1.f, iscale = 1.f;
> > >      uint32_t pot;
> > >      int i;
> > >
> > > @@ -288,29 +292,29 @@ static int yae_reset(ATempoContext *atempo,
> > >      // initialize audio fragment buffers:
> > >      RE_MALLOC_OR_FAIL(atempo->frag[0].data, atempo->window *
> > > atempo->stride);
> > >      RE_MALLOC_OR_FAIL(atempo->frag[1].data, atempo->window *
> > > atempo->stride);
> > > -    RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, atempo->window *
> > > sizeof(FFTComplex));
> > > -    RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, atempo->window *
> > > sizeof(FFTComplex));
> > > +    RE_MALLOC_OR_FAIL(atempo->frag[0].xdat_in, (atempo->window + 1) *
> > > sizeof(AVComplexFloat));
> > > +    RE_MALLOC_OR_FAIL(atempo->frag[1].xdat_in, (atempo->window + 1) *
> > > sizeof(AVComplexFloat));
> > > +    RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, (atempo->window + 1) *
> > > sizeof(AVComplexFloat));
> > > +    RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, (atempo->window + 1) *
> > > sizeof(AVComplexFloat));
> > >
> > >      // initialize rDFT contexts:
> > > -    av_rdft_end(atempo->real_to_complex);
> > > -    atempo->real_to_complex = NULL;
> > > -
> > > -    av_rdft_end(atempo->complex_to_real);
> > > -    atempo->complex_to_real = NULL;
> > > +    av_tx_uninit(&atempo->real_to_complex);
> > > +    av_tx_uninit(&atempo->complex_to_real);
> > >
> > > -    atempo->real_to_complex = av_rdft_init(nlevels + 1, DFT_R2C);
> > > +    av_tx_init(&atempo->real_to_complex, &atempo->r2c_fn,
> > > AV_TX_FLOAT_RDFT, 0, 1 << (nlevels + 1), &scale, 0);
> > >      if (!atempo->real_to_complex) {
> > >          yae_release_buffers(atempo);
> > >          return AVERROR(ENOMEM);
> > >      }
> > >
> > > -    atempo->complex_to_real = av_rdft_init(nlevels + 1, IDFT_C2R);
> > > +    av_tx_init(&atempo->complex_to_real, &atempo->c2r_fn,
> > > AV_TX_FLOAT_RDFT, 1, 1 << (nlevels + 1), &iscale, 0);
> > >      if (!atempo->complex_to_real) {
> > >          yae_release_buffers(atempo);
> > >          return AVERROR(ENOMEM);
> > >      }
> > >
> > > -    RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window *
> > > sizeof(FFTComplex));
> > > +    RE_MALLOC_OR_FAIL(atempo->correlation_in, (atempo->window + 1) *
> > > sizeof(AVComplexFloat));
> > > +    RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window *
> > > sizeof(AVComplexFloat));
> > >
> > >      atempo->ring = atempo->window * 3;
> > >      RE_MALLOC_OR_FAIL(atempo->buffer, atempo->ring * atempo->stride);
> > > @@ -348,7 +352,7 @@ static int yae_update(AVFilterContext *ctx)
> > >          const uint8_t *src_end = src +
> > \
> > >              frag->nsamples * atempo->channels * sizeof(scalar_type);
> > \
> > >
> > \
> > > -        FFTSample *xdat = frag->xdat;
> >  \
> > > +        float *xdat = frag->xdat_in;
> > \
> > >          scalar_type tmp;
> > \
> > >
> > \
> > >          if (atempo->channels == 1) {
> > \
> > > @@ -356,27 +360,27 @@ static int yae_update(AVFilterContext *ctx)
> > >                  tmp = *(const scalar_type *)src;
> > \
> > >                  src += sizeof(scalar_type);
> >  \
> > >
> > \
> > > -                *xdat = (FFTSample)tmp;
> >  \
> > > +                *xdat = (float)tmp;
> >  \
> > >              }
> >  \
> > >          } else {
> > \
> > > -            FFTSample s, max, ti, si;
> >  \
> > > +            float s, max, ti, si;
> >  \
> > >              int i;
> > \
> > >
> > \
> > >              for (; src < src_end; xdat++) {
> >  \
> > >                  tmp = *(const scalar_type *)src;
> > \
> > >                  src += sizeof(scalar_type);
> >  \
> > >
> > \
> > > -                max = (FFTSample)tmp;
> >  \
> > > -                s = FFMIN((FFTSample)scalar_max,
> > \
> > > -                          (FFTSample)fabsf(max));
> >  \
> > > +                max = (float)tmp;
> >  \
> > > +                s = FFMIN((float)scalar_max,
> > \
> > > +                          (float)fabsf(max));
> >  \
> > >
> > \
> > >                  for (i = 1; i < atempo->channels; i++) {
> > \
> > >                      tmp = *(const scalar_type *)src;
> > \
> > >                      src += sizeof(scalar_type);
> >  \
> > >
> > \
> > > -                    ti = (FFTSample)tmp;
> > \
> > > -                    si = FFMIN((FFTSample)scalar_max,
> >  \
> > > -                               (FFTSample)fabsf(ti));
> >  \
> > > +                    ti = (float)tmp;
> > \
> > > +                    si = FFMIN((float)scalar_max,
> >  \
> > > +                               (float)fabsf(ti));
> >  \
> > >
> > \
> > >                      if (s < si) {
> >  \
> > >                          s   = si;
> >  \
> > > @@ -399,7 +403,7 @@ static void yae_downmix(ATempoContext *atempo,
> > > AudioFragment *frag)
> > >      const uint8_t *src = frag->data;
> > >
> > >      // init complex data buffer used for FFT and Correlation:
> > > -    memset(frag->xdat, 0, sizeof(FFTComplex) * atempo->window);
> > > +    memset(frag->xdat_in, 0, sizeof(AVComplexFloat) * atempo->window);
> > >
> > >      if (atempo->format == AV_SAMPLE_FMT_U8) {
> > >          yae_init_xdat(uint8_t, 127);
> > > @@ -598,32 +602,24 @@ static void
> yae_advance_to_next_frag(ATempoContext
> > > *atempo)
> > >   * Multiply two vectors of complex numbers (result of real_to_complex
> > > rDFT)
> > >   * and transform back via complex_to_real rDFT.
> > >   */
> > > -static void yae_xcorr_via_rdft(FFTSample *xcorr,
> > > -                               RDFTContext *complex_to_real,
> > > -                               const FFTComplex *xa,
> > > -                               const FFTComplex *xb,
> > > +static void yae_xcorr_via_rdft(float *xcorr_in,
> > > +                               float *xcorr,
> > > +                               AVTXContext *complex_to_real,
> > > +                               av_tx_fn c2r_fn,
> > > +                               const AVComplexFloat *xa,
> > > +                               const AVComplexFloat *xb,
> > >                                 const int window)
> > >  {
> > > -    FFTComplex *xc = (FFTComplex *)xcorr;
> > > +    AVComplexFloat *xc = (AVComplexFloat *)xcorr_in;
> > >      int i;
> > >
> > > -    // NOTE: first element requires special care -- Given Y = rDFT(X),
> > > -    // Im(Y[0]) and Im(Y[N/2]) are always zero, therefore av_rdft_calc
> > > -    // stores Re(Y[N/2]) in place of Im(Y[0]).
> > > -
> > > -    xc->re = xa->re * xb->re;
> > > -    xc->im = xa->im * xb->im;
> > > -    xa++;
> > > -    xb++;
> > > -    xc++;
> > > -
> > > -    for (i = 1; i < window; i++, xa++, xb++, xc++) {
> > > +    for (i = 0; i <= window; i++, xa++, xb++, xc++) {
> > >
> >
> > This used to iterate over [1, window - 1] elements.
> > Now it iterates over [0, window] elements.
> > Is this correct?  That's 2 additional elements.
> >
>
> Yes, newer api does not use previous api hack of producing n/2 complex
> numbers, but n/2 +1.
>

cool, thanks ... lgtm if it still works





>
> >
> >
> >
> > >          xc->re = (xa->re * xb->re + xa->im * xb->im);
> > >          xc->im = (xa->im * xb->re - xa->re * xb->im);
> > >      }
> > >
> > >      // apply inverse rDFT:
> > > -    av_rdft_calc(complex_to_real, xcorr);
> > > +    c2r_fn(complex_to_real, xcorr, xcorr_in, sizeof(float));
> > >  }
> > >
> > >  /**
> > > @@ -637,21 +633,25 @@ static int yae_align(AudioFragment *frag,
> > >                       const int window,
> > >                       const int delta_max,
> > >                       const int drift,
> > > -                     FFTSample *correlation,
> > > -                     RDFTContext *complex_to_real)
> > > +                     float *correlation_in,
> > > +                     float *correlation,
> > > +                     AVTXContext *complex_to_real,
> > > +                     av_tx_fn c2r_fn)
> > >  {
> > >      int       best_offset = -drift;
> > > -    FFTSample best_metric = -FLT_MAX;
> > > -    FFTSample *xcorr;
> > > +    float     best_metric = -FLT_MAX;
> > > +    float    *xcorr;
> > >
> > >      int i0;
> > >      int i1;
> > >      int i;
> > >
> > > -    yae_xcorr_via_rdft(correlation,
> > > +    yae_xcorr_via_rdft(correlation_in,
> > > +                       correlation,
> > >                         complex_to_real,
> > > -                       (const FFTComplex *)prev->xdat,
> > > -                       (const FFTComplex *)frag->xdat,
> > > +                       c2r_fn,
> > > +                       (const AVComplexFloat *)prev->xdat,
> > > +                       (const AVComplexFloat *)frag->xdat,
> > >                         window);
> > >
> > >      // identify search window boundaries:
> > > @@ -665,11 +665,11 @@ static int yae_align(AudioFragment *frag,
> > >      xcorr = correlation + i0;
> > >
> > >      for (i = i0; i < i1; i++, xcorr++) {
> > > -        FFTSample metric = *xcorr;
> > > +        float metric = *xcorr;
> > >
> > >          // normalize:
> > > -        FFTSample drifti = (FFTSample)(drift + i);
> > > -        metric *= drifti * (FFTSample)(i - i0) * (FFTSample)(i1 - i);
> > > +        float drifti = (float)(drift + i);
> > > +        metric *= drifti * (float)(i - i0) * (float)(i1 - i);
> > >
> > >          if (metric > best_metric) {
> > >              best_metric = metric;
> > > @@ -706,8 +706,10 @@ static int yae_adjust_position(ATempoContext
> > *atempo)
> > >                                       atempo->window,
> > >                                       delta_max,
> > >                                       drift,
> > > +                                     atempo->correlation_in,
> > >                                       atempo->correlation,
> > > -                                     atempo->complex_to_real);
> > > +                                     atempo->complex_to_real,
> > > +                                     atempo->c2r_fn);
> > >
> > >      if (correction) {
> > >          // adjust fragment position:
> > > @@ -833,7 +835,7 @@ yae_apply(ATempoContext *atempo,
> > >              yae_downmix(atempo, yae_curr_frag(atempo));
> > >
> > >              // apply rDFT:
> > > -            av_rdft_calc(atempo->real_to_complex,
> > > yae_curr_frag(atempo)->xdat);
> > > +            atempo->r2c_fn(atempo->real_to_complex,
> > > yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in,
> > sizeof(float));
> > >
> > >              // must load the second fragment before alignment can
> start:
> > >              if (!atempo->nfrag) {
> > > @@ -865,7 +867,7 @@ yae_apply(ATempoContext *atempo,
> > >              yae_downmix(atempo, yae_curr_frag(atempo));
> > >
> > >              // apply rDFT:
> > > -            av_rdft_calc(atempo->real_to_complex,
> > > yae_curr_frag(atempo)->xdat);
> > > +            atempo->r2c_fn(atempo->real_to_complex,
> > > yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in,
> > sizeof(float));
> > >
> > >              atempo->state = YAE_OUTPUT_OVERLAP_ADD;
> > >          }
> > > @@ -929,7 +931,7 @@ static int yae_flush(ATempoContext *atempo,
> > >              yae_downmix(atempo, frag);
> > >
> > >              // apply rDFT:
> > > -            av_rdft_calc(atempo->real_to_complex, frag->xdat);
> > > +            atempo->r2c_fn(atempo->real_to_complex, frag->xdat,
> > > frag->xdat_in, sizeof(float));
> > >
> > >              // align current fragment to previous fragment:
> > >              if (yae_adjust_position(atempo)) {
> > > --
> > > 2.33.0
> > >
> > > _______________________________________________
> > > ffmpeg-devel mailing list
> > > ffmpeg-devel@ffmpeg.org
> > > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> > >
> > > To unsubscribe, visit link above, or email
> > > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> > >
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel@ffmpeg.org
> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >
> > To unsubscribe, visit link above, or email
> > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> >
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Lynne Feb. 6, 2022, 7:02 p.m. UTC | #4
6 Feb 2022, 19:04 by pkoshevoy@gmail.com:

> On Sun, Feb 6, 2022 at 10:24 AM Paul B Mahol <onemda@gmail.com> wrote:
>
>> On Sun, Feb 6, 2022 at 6:16 PM Pavel Koshevoy <pkoshevoy@gmail.com> wrote:
>>
>> > On Sun, Feb 6, 2022 at 4:24 AM Paul B Mahol <onemda@gmail.com> wrote:
>>
>> > >
>> > >
>> > Is the old API being removed or deprecated?
>> > Just wondering why this change is necessary.
>> >
>>
>> New api is faster.
>>

Old API will get deprecated soon, but we have to replace its use
in our code first.
New API is faster, and supports non-power-of-two lengths.
diff mbox series

Patch

diff --git a/configure b/configure
index 5a8b52c77d..6ec25dd622 100755
--- a/configure
+++ b/configure
@@ -3610,8 +3610,6 @@  amovie_filter_deps="avcodec avformat"
 aresample_filter_deps="swresample"
 asr_filter_deps="pocketsphinx"
 ass_filter_deps="libass"
-atempo_filter_deps="avcodec"
-atempo_filter_select="rdft"
 avgblur_opencl_filter_deps="opencl"
 avgblur_vulkan_filter_deps="vulkan spirv_compiler"
 azmq_filter_deps="libzmq"
@@ -7387,7 +7385,6 @@  enabled zlib && add_cppflags -DZLIB_CONST
 # conditional library dependencies, in any order
 enabled amovie_filter       && prepend avfilter_deps "avformat avcodec"
 enabled aresample_filter    && prepend avfilter_deps "swresample"
-enabled atempo_filter       && prepend avfilter_deps "avcodec"
 enabled bm3d_filter         && prepend avfilter_deps "avcodec"
 enabled cover_rect_filter   && prepend avfilter_deps "avformat avcodec"
 enabled ebur128_filter && enabled swresample && prepend avfilter_deps "swresample"
diff --git a/libavfilter/af_atempo.c b/libavfilter/af_atempo.c
index e9a6da7970..27f2f6daa0 100644
--- a/libavfilter/af_atempo.c
+++ b/libavfilter/af_atempo.c
@@ -39,13 +39,13 @@ 
  */
 
 #include <float.h>
-#include "libavcodec/avfft.h"
 #include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/eval.h"
 #include "libavutil/opt.h"
 #include "libavutil/samplefmt.h"
+#include "libavutil/tx.h"
 #include "avfilter.h"
 #include "audio.h"
 #include "internal.h"
@@ -67,7 +67,8 @@  typedef struct AudioFragment {
 
     // rDFT transform of the down-mixed mono fragment, used for
     // fast waveform alignment via correlation in frequency domain:
-    FFTSample *xdat;
+    float *xdat_in;
+    float *xdat;
 } AudioFragment;
 
 /**
@@ -140,9 +141,11 @@  typedef struct ATempoContext {
     FilterState state;
 
     // for fast correlation calculation in frequency domain:
-    RDFTContext *real_to_complex;
-    RDFTContext *complex_to_real;
-    FFTSample *correlation;
+    AVTXContext *real_to_complex;
+    AVTXContext *complex_to_real;
+    av_tx_fn r2c_fn, c2r_fn;
+    float *correlation_in;
+    float *correlation;
 
     // for managing AVFilterPad.request_frame and AVFilterPad.filter_frame
     AVFrame *dst_buffer;
@@ -228,18 +231,18 @@  static void yae_release_buffers(ATempoContext *atempo)
 
     av_freep(&atempo->frag[0].data);
     av_freep(&atempo->frag[1].data);
+    av_freep(&atempo->frag[0].xdat_in);
+    av_freep(&atempo->frag[1].xdat_in);
     av_freep(&atempo->frag[0].xdat);
     av_freep(&atempo->frag[1].xdat);
 
     av_freep(&atempo->buffer);
     av_freep(&atempo->hann);
+    av_freep(&atempo->correlation_in);
     av_freep(&atempo->correlation);
 
-    av_rdft_end(atempo->real_to_complex);
-    atempo->real_to_complex = NULL;
-
-    av_rdft_end(atempo->complex_to_real);
-    atempo->complex_to_real = NULL;
+    av_tx_uninit(&atempo->real_to_complex);
+    av_tx_uninit(&atempo->complex_to_real);
 }
 
 /* av_realloc is not aligned enough; fortunately, the data does not need to
@@ -247,7 +250,7 @@  static void yae_release_buffers(ATempoContext *atempo)
 #define RE_MALLOC_OR_FAIL(field, field_size)                    \
     do {                                                        \
         av_freep(&field);                                       \
-        field = av_malloc(field_size);                          \
+        field = av_calloc(field_size, 1);                       \
         if (!field) {                                           \
             yae_release_buffers(atempo);                        \
             return AVERROR(ENOMEM);                             \
@@ -265,6 +268,7 @@  static int yae_reset(ATempoContext *atempo,
 {
     const int sample_size = av_get_bytes_per_sample(format);
     uint32_t nlevels  = 0;
+    float scale = 1.f, iscale = 1.f;
     uint32_t pot;
     int i;
 
@@ -288,29 +292,29 @@  static int yae_reset(ATempoContext *atempo,
     // initialize audio fragment buffers:
     RE_MALLOC_OR_FAIL(atempo->frag[0].data, atempo->window * atempo->stride);
     RE_MALLOC_OR_FAIL(atempo->frag[1].data, atempo->window * atempo->stride);
-    RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, atempo->window * sizeof(FFTComplex));
-    RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, atempo->window * sizeof(FFTComplex));
+    RE_MALLOC_OR_FAIL(atempo->frag[0].xdat_in, (atempo->window + 1) * sizeof(AVComplexFloat));
+    RE_MALLOC_OR_FAIL(atempo->frag[1].xdat_in, (atempo->window + 1) * sizeof(AVComplexFloat));
+    RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, (atempo->window + 1) * sizeof(AVComplexFloat));
+    RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, (atempo->window + 1) * sizeof(AVComplexFloat));
 
     // initialize rDFT contexts:
-    av_rdft_end(atempo->real_to_complex);
-    atempo->real_to_complex = NULL;
-
-    av_rdft_end(atempo->complex_to_real);
-    atempo->complex_to_real = NULL;
+    av_tx_uninit(&atempo->real_to_complex);
+    av_tx_uninit(&atempo->complex_to_real);
 
-    atempo->real_to_complex = av_rdft_init(nlevels + 1, DFT_R2C);
+    av_tx_init(&atempo->real_to_complex, &atempo->r2c_fn, AV_TX_FLOAT_RDFT, 0, 1 << (nlevels + 1), &scale, 0);
     if (!atempo->real_to_complex) {
         yae_release_buffers(atempo);
         return AVERROR(ENOMEM);
     }
 
-    atempo->complex_to_real = av_rdft_init(nlevels + 1, IDFT_C2R);
+    av_tx_init(&atempo->complex_to_real, &atempo->c2r_fn, AV_TX_FLOAT_RDFT, 1, 1 << (nlevels + 1), &iscale, 0);
     if (!atempo->complex_to_real) {
         yae_release_buffers(atempo);
         return AVERROR(ENOMEM);
     }
 
-    RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window * sizeof(FFTComplex));
+    RE_MALLOC_OR_FAIL(atempo->correlation_in, (atempo->window + 1) * sizeof(AVComplexFloat));
+    RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window * sizeof(AVComplexFloat));
 
     atempo->ring = atempo->window * 3;
     RE_MALLOC_OR_FAIL(atempo->buffer, atempo->ring * atempo->stride);
@@ -348,7 +352,7 @@  static int yae_update(AVFilterContext *ctx)
         const uint8_t *src_end = src +                                  \
             frag->nsamples * atempo->channels * sizeof(scalar_type);    \
                                                                         \
-        FFTSample *xdat = frag->xdat;                                   \
+        float *xdat = frag->xdat_in;                                    \
         scalar_type tmp;                                                \
                                                                         \
         if (atempo->channels == 1) {                                    \
@@ -356,27 +360,27 @@  static int yae_update(AVFilterContext *ctx)
                 tmp = *(const scalar_type *)src;                        \
                 src += sizeof(scalar_type);                             \
                                                                         \
-                *xdat = (FFTSample)tmp;                                 \
+                *xdat = (float)tmp;                                     \
             }                                                           \
         } else {                                                        \
-            FFTSample s, max, ti, si;                                   \
+            float s, max, ti, si;                                       \
             int i;                                                      \
                                                                         \
             for (; src < src_end; xdat++) {                             \
                 tmp = *(const scalar_type *)src;                        \
                 src += sizeof(scalar_type);                             \
                                                                         \
-                max = (FFTSample)tmp;                                   \
-                s = FFMIN((FFTSample)scalar_max,                        \
-                          (FFTSample)fabsf(max));                       \
+                max = (float)tmp;                                       \
+                s = FFMIN((float)scalar_max,                            \
+                          (float)fabsf(max));                           \
                                                                         \
                 for (i = 1; i < atempo->channels; i++) {                \
                     tmp = *(const scalar_type *)src;                    \
                     src += sizeof(scalar_type);                         \
                                                                         \
-                    ti = (FFTSample)tmp;                                \
-                    si = FFMIN((FFTSample)scalar_max,                   \
-                               (FFTSample)fabsf(ti));                   \
+                    ti = (float)tmp;                                    \
+                    si = FFMIN((float)scalar_max,                       \
+                               (float)fabsf(ti));                       \
                                                                         \
                     if (s < si) {                                       \
                         s   = si;                                       \
@@ -399,7 +403,7 @@  static void yae_downmix(ATempoContext *atempo, AudioFragment *frag)
     const uint8_t *src = frag->data;
 
     // init complex data buffer used for FFT and Correlation:
-    memset(frag->xdat, 0, sizeof(FFTComplex) * atempo->window);
+    memset(frag->xdat_in, 0, sizeof(AVComplexFloat) * atempo->window);
 
     if (atempo->format == AV_SAMPLE_FMT_U8) {
         yae_init_xdat(uint8_t, 127);
@@ -598,32 +602,24 @@  static void yae_advance_to_next_frag(ATempoContext *atempo)
  * Multiply two vectors of complex numbers (result of real_to_complex rDFT)
  * and transform back via complex_to_real rDFT.
  */
-static void yae_xcorr_via_rdft(FFTSample *xcorr,
-                               RDFTContext *complex_to_real,
-                               const FFTComplex *xa,
-                               const FFTComplex *xb,
+static void yae_xcorr_via_rdft(float *xcorr_in,
+                               float *xcorr,
+                               AVTXContext *complex_to_real,
+                               av_tx_fn c2r_fn,
+                               const AVComplexFloat *xa,
+                               const AVComplexFloat *xb,
                                const int window)
 {
-    FFTComplex *xc = (FFTComplex *)xcorr;
+    AVComplexFloat *xc = (AVComplexFloat *)xcorr_in;
     int i;
 
-    // NOTE: first element requires special care -- Given Y = rDFT(X),
-    // Im(Y[0]) and Im(Y[N/2]) are always zero, therefore av_rdft_calc
-    // stores Re(Y[N/2]) in place of Im(Y[0]).
-
-    xc->re = xa->re * xb->re;
-    xc->im = xa->im * xb->im;
-    xa++;
-    xb++;
-    xc++;
-
-    for (i = 1; i < window; i++, xa++, xb++, xc++) {
+    for (i = 0; i <= window; i++, xa++, xb++, xc++) {
         xc->re = (xa->re * xb->re + xa->im * xb->im);
         xc->im = (xa->im * xb->re - xa->re * xb->im);
     }
 
     // apply inverse rDFT:
-    av_rdft_calc(complex_to_real, xcorr);
+    c2r_fn(complex_to_real, xcorr, xcorr_in, sizeof(float));
 }
 
 /**
@@ -637,21 +633,25 @@  static int yae_align(AudioFragment *frag,
                      const int window,
                      const int delta_max,
                      const int drift,
-                     FFTSample *correlation,
-                     RDFTContext *complex_to_real)
+                     float *correlation_in,
+                     float *correlation,
+                     AVTXContext *complex_to_real,
+                     av_tx_fn c2r_fn)
 {
     int       best_offset = -drift;
-    FFTSample best_metric = -FLT_MAX;
-    FFTSample *xcorr;
+    float     best_metric = -FLT_MAX;
+    float    *xcorr;
 
     int i0;
     int i1;
     int i;
 
-    yae_xcorr_via_rdft(correlation,
+    yae_xcorr_via_rdft(correlation_in,
+                       correlation,
                        complex_to_real,
-                       (const FFTComplex *)prev->xdat,
-                       (const FFTComplex *)frag->xdat,
+                       c2r_fn,
+                       (const AVComplexFloat *)prev->xdat,
+                       (const AVComplexFloat *)frag->xdat,
                        window);
 
     // identify search window boundaries:
@@ -665,11 +665,11 @@  static int yae_align(AudioFragment *frag,
     xcorr = correlation + i0;
 
     for (i = i0; i < i1; i++, xcorr++) {
-        FFTSample metric = *xcorr;
+        float metric = *xcorr;
 
         // normalize:
-        FFTSample drifti = (FFTSample)(drift + i);
-        metric *= drifti * (FFTSample)(i - i0) * (FFTSample)(i1 - i);
+        float drifti = (float)(drift + i);
+        metric *= drifti * (float)(i - i0) * (float)(i1 - i);
 
         if (metric > best_metric) {
             best_metric = metric;
@@ -706,8 +706,10 @@  static int yae_adjust_position(ATempoContext *atempo)
                                      atempo->window,
                                      delta_max,
                                      drift,
+                                     atempo->correlation_in,
                                      atempo->correlation,
-                                     atempo->complex_to_real);
+                                     atempo->complex_to_real,
+                                     atempo->c2r_fn);
 
     if (correction) {
         // adjust fragment position:
@@ -833,7 +835,7 @@  yae_apply(ATempoContext *atempo,
             yae_downmix(atempo, yae_curr_frag(atempo));
 
             // apply rDFT:
-            av_rdft_calc(atempo->real_to_complex, yae_curr_frag(atempo)->xdat);
+            atempo->r2c_fn(atempo->real_to_complex, yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float));
 
             // must load the second fragment before alignment can start:
             if (!atempo->nfrag) {
@@ -865,7 +867,7 @@  yae_apply(ATempoContext *atempo,
             yae_downmix(atempo, yae_curr_frag(atempo));
 
             // apply rDFT:
-            av_rdft_calc(atempo->real_to_complex, yae_curr_frag(atempo)->xdat);
+            atempo->r2c_fn(atempo->real_to_complex, yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float));
 
             atempo->state = YAE_OUTPUT_OVERLAP_ADD;
         }
@@ -929,7 +931,7 @@  static int yae_flush(ATempoContext *atempo,
             yae_downmix(atempo, frag);
 
             // apply rDFT:
-            av_rdft_calc(atempo->real_to_complex, frag->xdat);
+            atempo->r2c_fn(atempo->real_to_complex, frag->xdat, frag->xdat_in, sizeof(float));
 
             // align current fragment to previous fragment:
             if (yae_adjust_position(atempo)) {