[FFmpeg-devel] avfilter/af_atempo: fix sound shake when change speed

Submitted by Steven Liu on Feb. 17, 2017, 8:55 a.m.

Details

Message ID 20170217085551.1291-1-lq@chinaffmpeg.org
State New
Headers show

Commit Message

Steven Liu Feb. 17, 2017, 8:55 a.m.
commandline:
./ffmpeg -i ~/Downloads/test.wav -af atempo=1.5 -acodec aac -y
output.aac

play the output.aac, the sound is very shake, terrible.
after this patch,
play the sound is smooth

Signed-off-by: Steven Liu <lq@chinaffmpeg.org>
---
 libavfilter/af_atempo.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

Comments

Pavel Koshevoy Feb. 18, 2017, 2:28 a.m.
On Fri, Feb 17, 2017 at 1:55 AM, Steven Liu <lq@chinaffmpeg.org> wrote:
> commandline:
> ./ffmpeg -i ~/Downloads/test.wav -af atempo=1.5 -acodec aac -y
> output.aac
>
> play the output.aac, the sound is very shake, terrible.
> after this patch,
> play the sound is smooth
>
> Signed-off-by: Steven Liu <lq@chinaffmpeg.org>
> ---
>  libavfilter/af_atempo.c | 17 +++++++----------
>  1 file changed, 7 insertions(+), 10 deletions(-)
>
> diff --git a/libavfilter/af_atempo.c b/libavfilter/af_atempo.c
> index a487882..db2f981 100644
> --- a/libavfilter/af_atempo.c
> +++ b/libavfilter/af_atempo.c
> @@ -123,6 +123,8 @@ typedef struct {
>      // tempo scaling factor:
>      double tempo;
>
> +    int drift;
> +
>      // a snapshot of previous fragment input and output position values
>      // captured when the tempo scale factor was set most recently:
>      int64_t origin[2];
> @@ -179,6 +181,7 @@ static void yae_clear(ATempoContext *atempo)
>      atempo->head = 0;
>      atempo->tail = 0;
>
> +    atempo->drift = 0;
>      atempo->nfrag = 0;
>      atempo->state = YAE_LOAD_FRAGMENT;
>
> @@ -696,21 +699,12 @@ static int yae_adjust_position(ATempoContext *atempo)
>      const AudioFragment *prev = yae_prev_frag(atempo);
>      AudioFragment       *frag = yae_curr_frag(atempo);
>
> -    const double prev_output_position =
> -        (double)(prev->position[1] - atempo->origin[1] + atempo->window / 2);
> -
> -    const double ideal_output_position =
> -        (double)(prev->position[0] - atempo->origin[0] + atempo->window / 2) /
> -        atempo->tempo;
> -
> -    const int drift = (int)(prev_output_position - ideal_output_position);
> -
>      const int delta_max  = atempo->window / 2;
>      const int correction = yae_align(frag,
>                                       prev,
>                                       atempo->window,
>                                       delta_max,
> -                                     drift,
> +                                     atempo->drift,
>                                       atempo->correlation,
>                                       atempo->complex_to_real);
>
> @@ -722,6 +716,9 @@ static int yae_adjust_position(ATempoContext *atempo)
>          frag->nsamples = 0;
>      }
>
> +    // update cumulative correction drift counter:
> +    atempo->drift += correction;
> +
>      return correction;
>  }
>


Essentially, what you are doing here is reverting
0c77cdb491de7a178e4142cb6a24538eec6b4f64

    libavfilter/af_atempo: Avoid round-off error build-up, ticket #2484

    Current method for constraining fragment position drift suffers from
    round-off error build up.

    Instead of calculating cumulative drift as a sum of input fragment
    position corrections, it is more accurate to calculate drift as the
    difference between current fragment position and the ideal position
    specified by the tempo scale factor.


Can you open another ticket and attach your sample file to it so I can
see how terrible it really sounds, and attempt a less destructive
solution perhaps?  Please attach your output file as well.

Thank you,
    Pavel.
Steven Liu Feb. 18, 2017, 6:38 a.m.
2017-02-18 10:28 GMT+08:00 Pavel Koshevoy <pkoshevoy@gmail.com>:

> On Fri, Feb 17, 2017 at 1:55 AM, Steven Liu <lq@chinaffmpeg.org> wrote:
> > commandline:
> > ./ffmpeg -i ~/Downloads/test.wav -af atempo=1.5 -acodec aac -y
> > output.aac
> >
> > play the output.aac, the sound is very shake, terrible.
> > after this patch,
> > play the sound is smooth
> >
> > Signed-off-by: Steven Liu <lq@chinaffmpeg.org>
> > ---
> >  libavfilter/af_atempo.c | 17 +++++++----------
> >  1 file changed, 7 insertions(+), 10 deletions(-)
> >
> > diff --git a/libavfilter/af_atempo.c b/libavfilter/af_atempo.c
> > index a487882..db2f981 100644
> > --- a/libavfilter/af_atempo.c
> > +++ b/libavfilter/af_atempo.c
> > @@ -123,6 +123,8 @@ typedef struct {
> >      // tempo scaling factor:
> >      double tempo;
> >
> > +    int drift;
> > +
> >      // a snapshot of previous fragment input and output position values
> >      // captured when the tempo scale factor was set most recently:
> >      int64_t origin[2];
> > @@ -179,6 +181,7 @@ static void yae_clear(ATempoContext *atempo)
> >      atempo->head = 0;
> >      atempo->tail = 0;
> >
> > +    atempo->drift = 0;
> >      atempo->nfrag = 0;
> >      atempo->state = YAE_LOAD_FRAGMENT;
> >
> > @@ -696,21 +699,12 @@ static int yae_adjust_position(ATempoContext
> *atempo)
> >      const AudioFragment *prev = yae_prev_frag(atempo);
> >      AudioFragment       *frag = yae_curr_frag(atempo);
> >
> > -    const double prev_output_position =
> > -        (double)(prev->position[1] - atempo->origin[1] + atempo->window
> / 2);
> > -
> > -    const double ideal_output_position =
> > -        (double)(prev->position[0] - atempo->origin[0] + atempo->window
> / 2) /
> > -        atempo->tempo;
> > -
> > -    const int drift = (int)(prev_output_position -
> ideal_output_position);
> > -
> >      const int delta_max  = atempo->window / 2;
> >      const int correction = yae_align(frag,
> >                                       prev,
> >                                       atempo->window,
> >                                       delta_max,
> > -                                     drift,
> > +                                     atempo->drift,
> >                                       atempo->correlation,
> >                                       atempo->complex_to_real);
> >
> > @@ -722,6 +716,9 @@ static int yae_adjust_position(ATempoContext
> *atempo)
> >          frag->nsamples = 0;
> >      }
> >
> > +    // update cumulative correction drift counter:
> > +    atempo->drift += correction;
> > +
> >      return correction;
> >  }
> >
>
>
> Essentially, what you are doing here is reverting
> 0c77cdb491de7a178e4142cb6a24538eec6b4f64
>
>     libavfilter/af_atempo: Avoid round-off error build-up, ticket #2484
>
>     Current method for constraining fragment position drift suffers from
>     round-off error build up.
>
>     Instead of calculating cumulative drift as a sum of input fragment
>     position corrections, it is more accurate to calculate drift as the
>     difference between current fragment position and the ideal position
>     specified by the tempo scale factor.
>
>
> Can you open another ticket and attach your sample file to it so I can
> see how terrible it really sounds, and attempt a less destructive
> solution perhaps?  Please attach your output file as well.
>
> Thank you,
>     Pavel.




Ok, tickets has been create;
https://trac.ffmpeg.org/ticket/6157

Patch hide | download patch | download mbox

diff --git a/libavfilter/af_atempo.c b/libavfilter/af_atempo.c
index a487882..db2f981 100644
--- a/libavfilter/af_atempo.c
+++ b/libavfilter/af_atempo.c
@@ -123,6 +123,8 @@  typedef struct {
     // tempo scaling factor:
     double tempo;
 
+    int drift;
+
     // a snapshot of previous fragment input and output position values
     // captured when the tempo scale factor was set most recently:
     int64_t origin[2];
@@ -179,6 +181,7 @@  static void yae_clear(ATempoContext *atempo)
     atempo->head = 0;
     atempo->tail = 0;
 
+    atempo->drift = 0;
     atempo->nfrag = 0;
     atempo->state = YAE_LOAD_FRAGMENT;
 
@@ -696,21 +699,12 @@  static int yae_adjust_position(ATempoContext *atempo)
     const AudioFragment *prev = yae_prev_frag(atempo);
     AudioFragment       *frag = yae_curr_frag(atempo);
 
-    const double prev_output_position =
-        (double)(prev->position[1] - atempo->origin[1] + atempo->window / 2);
-
-    const double ideal_output_position =
-        (double)(prev->position[0] - atempo->origin[0] + atempo->window / 2) /
-        atempo->tempo;
-
-    const int drift = (int)(prev_output_position - ideal_output_position);
-
     const int delta_max  = atempo->window / 2;
     const int correction = yae_align(frag,
                                      prev,
                                      atempo->window,
                                      delta_max,
-                                     drift,
+                                     atempo->drift,
                                      atempo->correlation,
                                      atempo->complex_to_real);
 
@@ -722,6 +716,9 @@  static int yae_adjust_position(ATempoContext *atempo)
         frag->nsamples = 0;
     }
 
+    // update cumulative correction drift counter:
+    atempo->drift += correction;
+
     return correction;
 }