diff mbox

[FFmpeg-devel] avfilter/af_silenceremove: add optional tone when silence is removed

Message ID BN6PR10MB12505361FE3D3161F34E8B8AC3DC0@BN6PR10MB1250.namprd10.prod.outlook.com
State Superseded
Headers show

Commit Message

Greg Rowe Oct. 13, 2016, 8:48 p.m. UTC
The attached patch adds two optional parameters to af_silenceremove for the purpose of inserting a tone in place of where silence was removed.  This alerts the user that silence has been trimmed from the original stream.  The parameters are tone_duration which defaults to 0.0 which disables the feature and tone_hz which allows you to specify the frequency of the tone.


Thanks,

Greg


--

Greg Rowe

www.shoretel.com
From 41405e90cb2fb41441a6cf29c7a0d14362fd1b1f Mon Sep 17 00:00:00 2001
From: Greg Rowe <growe@shoretel.com>
Date: Fri, 7 Oct 2016 13:39:58 -0400
Subject: [PATCH] avfilter/af_silenceremove: add optional tone when silence is
 removed

This commit adds two options to the af_silenceremove filter.  It adds
tone_duration and tone_hz making it possible to insert a tone when
silence is removed.  Tone insertion is disabled by default (by using a
tone_duration of 0.0 seconds).

Signed-off-by: Greg Rowe <growe@shoretel.com>
---
 Changelog                      |   1 +
 doc/filters.texi               |  11 ++-
 libavfilter/af_silenceremove.c | 161 +++++++++++++++++++++++++++++++++++------
 libavfilter/version.h          |   2 +-
 4 files changed, 151 insertions(+), 24 deletions(-)

Comments

Michael Niedermayer Oct. 14, 2016, 1:43 a.m. UTC | #1
On Thu, Oct 13, 2016 at 08:48:11PM +0000, Greg Rowe wrote:
> The attached patch adds two optional parameters to af_silenceremove for the purpose of inserting a tone in place of where silence was removed.  This alerts the user that silence has been trimmed from the original stream.  The parameters are tone_duration which defaults to 0.0 which disables the feature and tone_hz which allows you to specify the frequency of the tone.
> 
> 
> Thanks,
> 
> Greg
> 
> 
> --
> 
> Greg Rowe
> 
> www.shoretel.com

> From 41405e90cb2fb41441a6cf29c7a0d14362fd1b1f Mon Sep 17 00:00:00 2001
> From: Greg Rowe <growe@shoretel.com>
> Date: Fri, 7 Oct 2016 13:39:58 -0400
> Subject: [PATCH] avfilter/af_silenceremove: add optional tone when silence is
>  removed
> 
> This commit adds two options to the af_silenceremove filter.  It adds
> tone_duration and tone_hz making it possible to insert a tone when
> silence is removed.  Tone insertion is disabled by default (by using a
> tone_duration of 0.0 seconds).
> 
> Signed-off-by: Greg Rowe <growe@shoretel.com>
> ---
>  Changelog                      |   1 +
>  doc/filters.texi               |  11 ++-
>  libavfilter/af_silenceremove.c | 161 +++++++++++++++++++++++++++++++++++------
>  libavfilter/version.h          |   2 +-
>  4 files changed, 151 insertions(+), 24 deletions(-)
> 
> diff --git a/Changelog b/Changelog
> index 0da009c..86e031c 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -2,6 +2,7 @@ Entries are sorted chronologically from oldest to youngest within each release,
>  releases are sorted from youngest to oldest.
>  
>  version <next>:
> +- Added optional tone insertion in af_silenceremove
>  - libopenmpt demuxer
>  - tee protocol
>  - Changed metadata print option to accept general urls
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 4b2f7bf..e09a303 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -3340,7 +3340,8 @@ ffmpeg -i silence.mp3 -af silencedetect=noise=0.0001 -f null -
>  
>  @section silenceremove
>  
> -Remove silence from the beginning, middle or end of the audio.
> +Remove silence from the beginning, middle or end of the audio while
> +optionally inserting a tone where silence was removed.
>  
>  The filter accepts the following options:
>  
> @@ -3401,6 +3402,14 @@ Default value is @code{rms}.
>  @item window
>  Set ratio used to calculate size of window for detecting silence.
>  Default value is @code{0.02}. Allowed range is from @code{0} to @code{10}.
> +
> +@item tone_duration
> +Set the duration of the tone inserted in the stream when silence is removed.  A value of @code{0} disables tone insertion.
> +Default value is @code{0.0}.
> +
> +@item tone_hz
> +Set the frequency of the tone inserted in the stream when silence is removed.
> +Default value is @code{1000.0}.
>  @end table
>  
>  @subsection Examples
> diff --git a/libavfilter/af_silenceremove.c b/libavfilter/af_silenceremove.c
> index f156d18..07cf428 100644
> --- a/libavfilter/af_silenceremove.c
> +++ b/libavfilter/af_silenceremove.c
> @@ -3,6 +3,7 @@
>   * Copyright (c) 2001 Chris Bagwell
>   * Copyright (c) 2003 Donnie Smith
>   * Copyright (c) 2014 Paul B Mahol
> + * Copyright (c) 2016 Shoretel <growe@shoretel.com>
>   *
>   * This file is part of FFmpeg.
>   *

> @@ -31,11 +32,20 @@
>  #include "internal.h"
>  
>  enum SilenceMode {
> -    SILENCE_TRIM,
> +    SILENCE_TRIM = 0,

unrelated change and thus should not be in this patch


>      SILENCE_TRIM_FLUSH,
>      SILENCE_COPY,
>      SILENCE_COPY_FLUSH,
> -    SILENCE_STOP
> +    SILENCE_STOP,
> +    SILENCE_END_MARKER
> +};
> +
> +static const char* SILENCE_MODE_NAMES[] = {
> +    NULL_IF_CONFIG_SMALL("TRIM"),
> +    NULL_IF_CONFIG_SMALL("TRIM_FLUSH"),
> +    NULL_IF_CONFIG_SMALL("COPY"),
> +    NULL_IF_CONFIG_SMALL("COPY_FLUSH"),
> +    NULL_IF_CONFIG_SMALL("STOP")
>  };
>  
>  typedef struct SilenceRemoveContext {
> @@ -75,6 +85,10 @@ typedef struct SilenceRemoveContext {
>      int detection;
>      void (*update)(struct SilenceRemoveContext *s, double sample);
>      double(*compute)(struct SilenceRemoveContext *s, double sample);
> +
> +    double last_pts_seconds;
> +    double tone_duration;
> +    double tone_hz;
>  } SilenceRemoveContext;
>  
>  #define OFFSET(x) offsetof(SilenceRemoveContext, x)
> @@ -91,11 +105,51 @@ static const AVOption silenceremove_options[] = {
>      {   "peak",          0,    0,                       AV_OPT_TYPE_CONST,    {.i64=0},     0,       0, FLAGS, "detection" },
>      {   "rms",           0,    0,                       AV_OPT_TYPE_CONST,    {.i64=1},     0,       0, FLAGS, "detection" },
>      { "window",          NULL, OFFSET(window_ratio),    AV_OPT_TYPE_DOUBLE,   {.dbl=0.02},  0,      10, FLAGS },
> +    {
> +        .name = "tone_duration",
> +        .help = "length of tone inserted when silence is detected (0 to disable)",
> +        .offset = OFFSET(tone_duration),
> +        .type = AV_OPT_TYPE_DOUBLE,
> +        .default_val = {.dbl=0.0},
> +        .min = 0.0,
> +        .max = DBL_MAX,
> +        .flags = FLAGS,
> +        .unit = "tone",
> +    },
> +    {
> +        .name = "tone_hz",
> +        .help = "frequency of tone inserted when silence is removed, 1 kHz default",
> +        .offset = OFFSET(tone_hz),
> +        .type = AV_OPT_TYPE_DOUBLE,
> +        .default_val = {.dbl=1000.0},
> +        .min = 0.0,
> +        .max = DBL_MAX,
> +        .flags = FLAGS,
> +        .unit = "tone",
> +    },

> -    { NULL }
> +    {NULL}

unrelated


>  };
>  
>  AVFILTER_DEFINE_CLASS(silenceremove);
>  
> +static const char* mode_to_string(enum SilenceMode mode)
> +{
> +    if (mode >= SILENCE_END_MARKER) {
> +        return "";
> +    }
> +    /* This can be null if the config is small.  */
> +    return SILENCE_MODE_NAMES[mode] ? SILENCE_MODE_NAMES[mode]:"";
> +}
> +
> +
> +static void set_mode(AVFilterContext *ctx, enum SilenceMode new)
> +{
> +    SilenceRemoveContext *s = ctx->priv;
> +    av_log(ctx, AV_LOG_DEBUG, "changing state %s=>%s\n",
> +           mode_to_string(s->mode), mode_to_string(new));
> +    s->mode = new;
> +}

looks unneeded


> +
>  static double compute_peak(SilenceRemoveContext *s, double sample)
>  {
>      double new_sum;
> @@ -209,14 +263,46 @@ static int config_input(AVFilterLink *inlink)
>      s->stop_holdoff_end    = 0;
>      s->stop_found_periods  = 0;
>  
> -    if (s->start_periods)
> -        s->mode = SILENCE_TRIM;
> -    else
> -        s->mode = SILENCE_COPY;
> +    set_mode(ctx, s->start_periods ? SILENCE_TRIM:SILENCE_COPY);

unrelated


>  
>      return 0;
>  }
>  
> +static int insert_tone(AVFilterLink *inlink,
> +                       AVFilterLink *outlink,
> +                       double tone_hz,
> +                       double duration)
> +{
> +    AVFilterContext *ctx = inlink->dst;
> +    int sample_count = duration * inlink->sample_rate;
> +    double twopi = 2.0 * M_PI;
> +    int i = 0;
> +    AVFrame *out = NULL;
> +    double *obuf = NULL;
> +    double step = 0.0;
> +    double s = 0.0;
> +
> +    out = ff_get_audio_buffer(inlink, sample_count / inlink->channels);
> +    if (!out) {
> +        return AVERROR(ENOMEM);
> +    }
> +    obuf = (double *)out->data[0];
> +    step = tone_hz / (double)out->sample_rate;
> +    s = step;
> +
> +    av_log(ctx, AV_LOG_DEBUG,
> +           "insert beep tone=%fhz duration=%f seconds\n",
> +           tone_hz, duration);
> +
> +
> +    for (i=0; i<sample_count; ++i) {
> +        *obuf++ = sin(twopi * s);
> +        s += step;
> +    }
> +    return ff_filter_frame(outlink, out);
> +}
> +
> +
>  static void flush(AVFrame *out, AVFilterLink *outlink,
>                    int *nb_samples_written, int *ret)
>  {

> @@ -229,6 +315,28 @@ static void flush(AVFrame *out, AVFilterLink *outlink,
>      }
>  }
>  
> +
> +static int process_tone(AVFilterLink *inlink)
> +{
> +    int ret = 0;
> +    double pts_seconds = 0.0;
> +    AVFilterContext *ctx = inlink->dst;
> +    AVFilterLink *outlink = ctx->outputs[0];
> +    SilenceRemoveContext *s = ctx->priv;
> +    pts_seconds = (inlink->current_pts_us / 1000000.0) / AV_TIME_BASE;

no need to use floating point here or in many other cases
using floating point makes regression tests harder as results can
differ between platforms, also its not accurate double has too few
mantisse bits to accuratly represent int64_t


[...]

> +
> +    return ret;
> +}
> +
>  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
>  {
>      AVFilterContext *ctx = inlink->dst;

> @@ -243,7 +351,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
>  
>      switch (s->mode) {
>      case SILENCE_TRIM:
> -silence_trim:
> +    silence_trim:

unrelated

[...]
Moritz Barsnick Oct. 14, 2016, 1:16 p.m. UTC | #2
>  enum SilenceMode {
> -    SILENCE_TRIM,
> +    SILENCE_TRIM = 0,
>      SILENCE_TRIM_FLUSH,
>      SILENCE_COPY,
>      SILENCE_COPY_FLUSH,
> -    SILENCE_STOP
> +    SILENCE_STOP,
> +    SILENCE_END_MARKER
> +};
> +
> +static const char* SILENCE_MODE_NAMES[] = {
> +    NULL_IF_CONFIG_SMALL("TRIM"),
> +    NULL_IF_CONFIG_SMALL("TRIM_FLUSH"),
> +    NULL_IF_CONFIG_SMALL("COPY"),
> +    NULL_IF_CONFIG_SMALL("COPY_FLUSH"),
> +    NULL_IF_CONFIG_SMALL("STOP")
>  };

This (and related functions) is probably unrelated to the feature
you're adding, and rather a commodity, so I guess it should be in a
separate patch.

And I personally would probably name the frequency option
"tone_frequency" as just that, and not with its unit Hz as "tone_hz",
but that's cosmetic.

Moritz

P.S.: Nice feature for debug.
diff mbox

Patch

diff --git a/Changelog b/Changelog
index 0da009c..86e031c 100644
--- a/Changelog
+++ b/Changelog
@@ -2,6 +2,7 @@  Entries are sorted chronologically from oldest to youngest within each release,
 releases are sorted from youngest to oldest.
 
 version <next>:
+- Added optional tone insertion in af_silenceremove
 - libopenmpt demuxer
 - tee protocol
 - Changed metadata print option to accept general urls
diff --git a/doc/filters.texi b/doc/filters.texi
index 4b2f7bf..e09a303 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -3340,7 +3340,8 @@  ffmpeg -i silence.mp3 -af silencedetect=noise=0.0001 -f null -
 
 @section silenceremove
 
-Remove silence from the beginning, middle or end of the audio.
+Remove silence from the beginning, middle or end of the audio while
+optionally inserting a tone where silence was removed.
 
 The filter accepts the following options:
 
@@ -3401,6 +3402,14 @@  Default value is @code{rms}.
 @item window
 Set ratio used to calculate size of window for detecting silence.
 Default value is @code{0.02}. Allowed range is from @code{0} to @code{10}.
+
+@item tone_duration
+Set the duration of the tone inserted in the stream when silence is removed.  A value of @code{0} disables tone insertion.
+Default value is @code{0.0}.
+
+@item tone_hz
+Set the frequency of the tone inserted in the stream when silence is removed.
+Default value is @code{1000.0}.
 @end table
 
 @subsection Examples
diff --git a/libavfilter/af_silenceremove.c b/libavfilter/af_silenceremove.c
index f156d18..07cf428 100644
--- a/libavfilter/af_silenceremove.c
+++ b/libavfilter/af_silenceremove.c
@@ -3,6 +3,7 @@ 
  * Copyright (c) 2001 Chris Bagwell
  * Copyright (c) 2003 Donnie Smith
  * Copyright (c) 2014 Paul B Mahol
+ * Copyright (c) 2016 Shoretel <growe@shoretel.com>
  *
  * This file is part of FFmpeg.
  *
@@ -31,11 +32,20 @@ 
 #include "internal.h"
 
 enum SilenceMode {
-    SILENCE_TRIM,
+    SILENCE_TRIM = 0,
     SILENCE_TRIM_FLUSH,
     SILENCE_COPY,
     SILENCE_COPY_FLUSH,
-    SILENCE_STOP
+    SILENCE_STOP,
+    SILENCE_END_MARKER
+};
+
+static const char* SILENCE_MODE_NAMES[] = {
+    NULL_IF_CONFIG_SMALL("TRIM"),
+    NULL_IF_CONFIG_SMALL("TRIM_FLUSH"),
+    NULL_IF_CONFIG_SMALL("COPY"),
+    NULL_IF_CONFIG_SMALL("COPY_FLUSH"),
+    NULL_IF_CONFIG_SMALL("STOP")
 };
 
 typedef struct SilenceRemoveContext {
@@ -75,6 +85,10 @@  typedef struct SilenceRemoveContext {
     int detection;
     void (*update)(struct SilenceRemoveContext *s, double sample);
     double(*compute)(struct SilenceRemoveContext *s, double sample);
+
+    double last_pts_seconds;
+    double tone_duration;
+    double tone_hz;
 } SilenceRemoveContext;
 
 #define OFFSET(x) offsetof(SilenceRemoveContext, x)
@@ -91,11 +105,51 @@  static const AVOption silenceremove_options[] = {
     {   "peak",          0,    0,                       AV_OPT_TYPE_CONST,    {.i64=0},     0,       0, FLAGS, "detection" },
     {   "rms",           0,    0,                       AV_OPT_TYPE_CONST,    {.i64=1},     0,       0, FLAGS, "detection" },
     { "window",          NULL, OFFSET(window_ratio),    AV_OPT_TYPE_DOUBLE,   {.dbl=0.02},  0,      10, FLAGS },
-    { NULL }
+    {
+        .name = "tone_duration",
+        .help = "length of tone inserted when silence is detected (0 to disable)",
+        .offset = OFFSET(tone_duration),
+        .type = AV_OPT_TYPE_DOUBLE,
+        .default_val = {.dbl=0.0},
+        .min = 0.0,
+        .max = DBL_MAX,
+        .flags = FLAGS,
+        .unit = "tone",
+    },
+    {
+        .name = "tone_hz",
+        .help = "frequency of tone inserted when silence is removed, 1 kHz default",
+        .offset = OFFSET(tone_hz),
+        .type = AV_OPT_TYPE_DOUBLE,
+        .default_val = {.dbl=1000.0},
+        .min = 0.0,
+        .max = DBL_MAX,
+        .flags = FLAGS,
+        .unit = "tone",
+    },
+    {NULL}
 };
 
 AVFILTER_DEFINE_CLASS(silenceremove);
 
+static const char* mode_to_string(enum SilenceMode mode)
+{
+    if (mode >= SILENCE_END_MARKER) {
+        return "";
+    }
+    /* This can be null if the config is small.  */
+    return SILENCE_MODE_NAMES[mode] ? SILENCE_MODE_NAMES[mode]:"";
+}
+
+
+static void set_mode(AVFilterContext *ctx, enum SilenceMode new)
+{
+    SilenceRemoveContext *s = ctx->priv;
+    av_log(ctx, AV_LOG_DEBUG, "changing state %s=>%s\n",
+           mode_to_string(s->mode), mode_to_string(new));
+    s->mode = new;
+}
+
 static double compute_peak(SilenceRemoveContext *s, double sample)
 {
     double new_sum;
@@ -209,14 +263,46 @@  static int config_input(AVFilterLink *inlink)
     s->stop_holdoff_end    = 0;
     s->stop_found_periods  = 0;
 
-    if (s->start_periods)
-        s->mode = SILENCE_TRIM;
-    else
-        s->mode = SILENCE_COPY;
+    set_mode(ctx, s->start_periods ? SILENCE_TRIM:SILENCE_COPY);
 
     return 0;
 }
 
+static int insert_tone(AVFilterLink *inlink,
+                       AVFilterLink *outlink,
+                       double tone_hz,
+                       double duration)
+{
+    AVFilterContext *ctx = inlink->dst;
+    int sample_count = duration * inlink->sample_rate;
+    double twopi = 2.0 * M_PI;
+    int i = 0;
+    AVFrame *out = NULL;
+    double *obuf = NULL;
+    double step = 0.0;
+    double s = 0.0;
+
+    out = ff_get_audio_buffer(inlink, sample_count / inlink->channels);
+    if (!out) {
+        return AVERROR(ENOMEM);
+    }
+    obuf = (double *)out->data[0];
+    step = tone_hz / (double)out->sample_rate;
+    s = step;
+
+    av_log(ctx, AV_LOG_DEBUG,
+           "insert beep tone=%fhz duration=%f seconds\n",
+           tone_hz, duration);
+
+
+    for (i=0; i<sample_count; ++i) {
+        *obuf++ = sin(twopi * s);
+        s += step;
+    }
+    return ff_filter_frame(outlink, out);
+}
+
+
 static void flush(AVFrame *out, AVFilterLink *outlink,
                   int *nb_samples_written, int *ret)
 {
@@ -229,6 +315,28 @@  static void flush(AVFrame *out, AVFilterLink *outlink,
     }
 }
 
+
+static int process_tone(AVFilterLink *inlink)
+{
+    int ret = 0;
+    double pts_seconds = 0.0;
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    SilenceRemoveContext *s = ctx->priv;
+    pts_seconds = (inlink->current_pts_us / 1000000.0) / AV_TIME_BASE;
+
+    /* Check to be certain that we don't flood the stream with
+     * annoying tones. */
+    if ((s->last_pts_seconds == 0.0)
+        || (pts_seconds - s->last_pts_seconds) > (s->tone_duration * 2.0)) {
+
+        ret = insert_tone(inlink, outlink, s->tone_hz, s->tone_duration);
+        s->last_pts_seconds = pts_seconds;
+    }
+
+    return ret;
+}
+
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
     AVFilterContext *ctx = inlink->dst;
@@ -243,7 +351,7 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 
     switch (s->mode) {
     case SILENCE_TRIM:
-silence_trim:
+    silence_trim:
         nbs = in->nb_samples - nb_samples_read / inlink->channels;
         if (!nbs)
             break;
@@ -263,7 +371,7 @@  silence_trim:
 
                 if (s->start_holdoff_end >= s->start_duration * inlink->channels) {
                     if (++s->start_found_periods >= s->start_periods) {
-                        s->mode = SILENCE_TRIM_FLUSH;
+                        set_mode(ctx, SILENCE_TRIM_FLUSH);
                         goto silence_trim_flush;
                     }
 
@@ -283,7 +391,7 @@  silence_trim:
         break;
 
     case SILENCE_TRIM_FLUSH:
-silence_trim_flush:
+    silence_trim_flush:
         nbs  = s->start_holdoff_end - s->start_holdoff_offset;
         nbs -= nbs % inlink->channels;
         if (!nbs)
@@ -304,13 +412,13 @@  silence_trim_flush:
         if (s->start_holdoff_offset == s->start_holdoff_end) {
             s->start_holdoff_offset = 0;
             s->start_holdoff_end = 0;
-            s->mode = SILENCE_COPY;
+            set_mode(ctx, SILENCE_COPY);
             goto silence_copy;
         }
         break;
 
     case SILENCE_COPY:
-silence_copy:
+    silence_copy:
         nbs = in->nb_samples - nb_samples_read / inlink->channels;
         if (!nbs)
             break;
@@ -329,7 +437,7 @@  silence_copy:
                     threshold &= s->compute(s, ibuf[j]) > s->stop_threshold;
 
                 if (threshold && s->stop_holdoff_end && !s->leave_silence) {
-                    s->mode = SILENCE_COPY_FLUSH;
+                    set_mode(ctx, SILENCE_COPY_FLUSH);
                     flush(out, outlink, &nb_samples_written, &ret);
                     goto silence_copy_flush;
                 } else if (threshold) {
@@ -357,7 +465,7 @@  silence_copy:
                             s->stop_holdoff_end = 0;
 
                             if (!s->restart) {
-                                s->mode = SILENCE_STOP;
+                                set_mode(ctx, SILENCE_STOP);
                                 flush(out, outlink, &nb_samples_written, &ret);
                                 goto silence_stop;
                             } else {
@@ -366,12 +474,19 @@  silence_copy:
                                 s->start_holdoff_offset = 0;
                                 s->start_holdoff_end = 0;
                                 clear_window(s);
-                                s->mode = SILENCE_TRIM;
-                                flush(out, outlink, &nb_samples_written, &ret);
-                                goto silence_trim;
+                                set_mode(ctx, SILENCE_TRIM);
+
+                                if (s->tone_duration > 0.0) {
+                                    ret = process_tone(inlink);
+                                }
+                                if (!ret) {
+                                    flush(out, outlink,
+                                          &nb_samples_written, &ret);
+                                    goto silence_trim;
+                                }
                             }
                         }
-                        s->mode = SILENCE_COPY_FLUSH;
+                        set_mode(ctx, SILENCE_COPY_FLUSH);
                         flush(out, outlink, &nb_samples_written, &ret);
                         goto silence_copy_flush;
                     }
@@ -385,7 +500,7 @@  silence_copy:
         break;
 
     case SILENCE_COPY_FLUSH:
-silence_copy_flush:
+    silence_copy_flush:
         nbs  = s->stop_holdoff_end - s->stop_holdoff_offset;
         nbs -= nbs % inlink->channels;
         if (!nbs)
@@ -406,12 +521,12 @@  silence_copy_flush:
         if (s->stop_holdoff_offset == s->stop_holdoff_end) {
             s->stop_holdoff_offset = 0;
             s->stop_holdoff_end = 0;
-            s->mode = SILENCE_COPY;
+            set_mode(ctx, SILENCE_COPY);
             goto silence_copy;
         }
         break;
     case SILENCE_STOP:
-silence_stop:
+    silence_stop:
         break;
     }
 
@@ -427,6 +542,8 @@  static int request_frame(AVFilterLink *outlink)
     int ret;
 
     ret = ff_request_frame(ctx->inputs[0]);
+    /* If there is no more data but the holdoff buffer still has data
+     * then copy the holdoff buffer out */
     if (ret == AVERROR_EOF && (s->mode == SILENCE_COPY_FLUSH ||
                                s->mode == SILENCE_COPY)) {
         int nbs = s->stop_holdoff_end - s->stop_holdoff_offset;
@@ -441,7 +558,7 @@  static int request_frame(AVFilterLink *outlink)
                    nbs * sizeof(double));
             ret = ff_filter_frame(ctx->inputs[0], frame);
         }
-        s->mode = SILENCE_STOP;
+        set_mode(ctx, SILENCE_STOP);
     }
     return ret;
 }
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 93d249b..4626ca4 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -31,7 +31,7 @@ 
 
 #define LIBAVFILTER_VERSION_MAJOR   6
 #define LIBAVFILTER_VERSION_MINOR  63
-#define LIBAVFILTER_VERSION_MICRO 100
+#define LIBAVFILTER_VERSION_MICRO 101
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
                                                LIBAVFILTER_VERSION_MINOR, \