@@ -2,6 +2,7 @@ Entries are sorted chronologically from oldest to youngest within each release,
releases are sorted from youngest to oldest.
version <next>:
+- Added optional tone insertion in af_silenceremove
- libopenmpt demuxer
- tee protocol
- Changed metadata print option to accept general urls
@@ -3340,7 +3340,8 @@ ffmpeg -i silence.mp3 -af silencedetect=noise=0.0001 -f null -
@section silenceremove
-Remove silence from the beginning, middle or end of the audio.
+Remove silence from the beginning, middle or end of the audio while
+optionally inserting a tone where silence was removed.
The filter accepts the following options:
@@ -3401,6 +3402,14 @@ Default value is @code{rms}.
@item window
Set ratio used to calculate size of window for detecting silence.
Default value is @code{0.02}. Allowed range is from @code{0} to @code{10}.
+
+@item tone_duration
+Set the duration of the tone inserted in the stream when silence is removed. A value of @code{0} disables tone insertion.
+Default value is @code{0.0}.
+
+@item tone_hz
+Set the frequency of the tone inserted in the stream when silence is removed.
+Default value is @code{1000.0}.
@end table
@subsection Examples
@@ -3,6 +3,7 @@
* Copyright (c) 2001 Chris Bagwell
* Copyright (c) 2003 Donnie Smith
* Copyright (c) 2014 Paul B Mahol
+ * Copyright (c) 2016 Shoretel <growe@shoretel.com>
*
* This file is part of FFmpeg.
*
@@ -31,11 +32,20 @@
#include "internal.h"
enum SilenceMode {
- SILENCE_TRIM,
+ SILENCE_TRIM = 0,
SILENCE_TRIM_FLUSH,
SILENCE_COPY,
SILENCE_COPY_FLUSH,
- SILENCE_STOP
+ SILENCE_STOP,
+ SILENCE_END_MARKER
+};
+
+static const char* SILENCE_MODE_NAMES[] = {
+ NULL_IF_CONFIG_SMALL("TRIM"),
+ NULL_IF_CONFIG_SMALL("TRIM_FLUSH"),
+ NULL_IF_CONFIG_SMALL("COPY"),
+ NULL_IF_CONFIG_SMALL("COPY_FLUSH"),
+ NULL_IF_CONFIG_SMALL("STOP")
};
typedef struct SilenceRemoveContext {
@@ -75,6 +85,10 @@ typedef struct SilenceRemoveContext {
int detection;
void (*update)(struct SilenceRemoveContext *s, double sample);
double(*compute)(struct SilenceRemoveContext *s, double sample);
+
+ double last_pts_seconds;
+ double tone_duration;
+ double tone_hz;
} SilenceRemoveContext;
#define OFFSET(x) offsetof(SilenceRemoveContext, x)
@@ -91,11 +105,51 @@ static const AVOption silenceremove_options[] = {
{ "peak", 0, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "detection" },
{ "rms", 0, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "detection" },
{ "window", NULL, OFFSET(window_ratio), AV_OPT_TYPE_DOUBLE, {.dbl=0.02}, 0, 10, FLAGS },
- { NULL }
+ {
+ .name = "tone_duration",
+ .help = "length of tone inserted when silence is detected (0 to disable)",
+ .offset = OFFSET(tone_duration),
+ .type = AV_OPT_TYPE_DOUBLE,
+ .default_val = {.dbl=0.0},
+ .min = 0.0,
+ .max = DBL_MAX,
+ .flags = FLAGS,
+ .unit = "tone",
+ },
+ {
+ .name = "tone_hz",
+ .help = "frequency of tone inserted when silence is removed, 1 kHz default",
+ .offset = OFFSET(tone_hz),
+ .type = AV_OPT_TYPE_DOUBLE,
+ .default_val = {.dbl=1000.0},
+ .min = 0.0,
+ .max = DBL_MAX,
+ .flags = FLAGS,
+ .unit = "tone",
+ },
+ {NULL}
};
AVFILTER_DEFINE_CLASS(silenceremove);
+static const char* mode_to_string(enum SilenceMode mode)
+{
+ if (mode >= SILENCE_END_MARKER) {
+ return "";
+ }
+ /* This can be null if the config is small. */
+ return SILENCE_MODE_NAMES[mode] ? SILENCE_MODE_NAMES[mode]:"";
+}
+
+
+static void set_mode(AVFilterContext *ctx, enum SilenceMode new)
+{
+ SilenceRemoveContext *s = ctx->priv;
+ av_log(ctx, AV_LOG_DEBUG, "changing state %s=>%s\n",
+ mode_to_string(s->mode), mode_to_string(new));
+ s->mode = new;
+}
+
static double compute_peak(SilenceRemoveContext *s, double sample)
{
double new_sum;
@@ -209,14 +263,46 @@ static int config_input(AVFilterLink *inlink)
s->stop_holdoff_end = 0;
s->stop_found_periods = 0;
- if (s->start_periods)
- s->mode = SILENCE_TRIM;
- else
- s->mode = SILENCE_COPY;
+ set_mode(ctx, s->start_periods ? SILENCE_TRIM:SILENCE_COPY);
return 0;
}
+static int insert_tone(AVFilterLink *inlink,
+ AVFilterLink *outlink,
+ double tone_hz,
+ double duration)
+{
+ AVFilterContext *ctx = inlink->dst;
+ int sample_count = duration * inlink->sample_rate;
+ double twopi = 2.0 * M_PI;
+ int i = 0;
+ AVFrame *out = NULL;
+ double *obuf = NULL;
+ double step = 0.0;
+ double s = 0.0;
+
+ out = ff_get_audio_buffer(inlink, sample_count / inlink->channels);
+ if (!out) {
+ return AVERROR(ENOMEM);
+ }
+ obuf = (double *)out->data[0];
+ step = tone_hz / (double)out->sample_rate;
+ s = step;
+
+ av_log(ctx, AV_LOG_DEBUG,
+ "insert beep tone=%fhz duration=%f seconds\n",
+ tone_hz, duration);
+
+
+ for (i=0; i<sample_count; ++i) {
+ *obuf++ = sin(twopi * s);
+ s += step;
+ }
+ return ff_filter_frame(outlink, out);
+}
+
+
static void flush(AVFrame *out, AVFilterLink *outlink,
int *nb_samples_written, int *ret)
{
@@ -229,6 +315,28 @@ static void flush(AVFrame *out, AVFilterLink *outlink,
}
}
+
+static int process_tone(AVFilterLink *inlink)
+{
+ int ret = 0;
+ double pts_seconds = 0.0;
+ AVFilterContext *ctx = inlink->dst;
+ AVFilterLink *outlink = ctx->outputs[0];
+ SilenceRemoveContext *s = ctx->priv;
+ pts_seconds = (inlink->current_pts_us / 1000000.0) / AV_TIME_BASE;
+
+ /* Check to be certain that we don't flood the stream with
+ * annoying tones. */
+ if ((s->last_pts_seconds == 0.0)
+ || (pts_seconds - s->last_pts_seconds) > (s->tone_duration * 2.0)) {
+
+ ret = insert_tone(inlink, outlink, s->tone_hz, s->tone_duration);
+ s->last_pts_seconds = pts_seconds;
+ }
+
+ return ret;
+}
+
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
{
AVFilterContext *ctx = inlink->dst;
@@ -243,7 +351,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
switch (s->mode) {
case SILENCE_TRIM:
-silence_trim:
+ silence_trim:
nbs = in->nb_samples - nb_samples_read / inlink->channels;
if (!nbs)
break;
@@ -263,7 +371,7 @@ silence_trim:
if (s->start_holdoff_end >= s->start_duration * inlink->channels) {
if (++s->start_found_periods >= s->start_periods) {
- s->mode = SILENCE_TRIM_FLUSH;
+ set_mode(ctx, SILENCE_TRIM_FLUSH);
goto silence_trim_flush;
}
@@ -283,7 +391,7 @@ silence_trim:
break;
case SILENCE_TRIM_FLUSH:
-silence_trim_flush:
+ silence_trim_flush:
nbs = s->start_holdoff_end - s->start_holdoff_offset;
nbs -= nbs % inlink->channels;
if (!nbs)
@@ -304,13 +412,13 @@ silence_trim_flush:
if (s->start_holdoff_offset == s->start_holdoff_end) {
s->start_holdoff_offset = 0;
s->start_holdoff_end = 0;
- s->mode = SILENCE_COPY;
+ set_mode(ctx, SILENCE_COPY);
goto silence_copy;
}
break;
case SILENCE_COPY:
-silence_copy:
+ silence_copy:
nbs = in->nb_samples - nb_samples_read / inlink->channels;
if (!nbs)
break;
@@ -329,7 +437,7 @@ silence_copy:
threshold &= s->compute(s, ibuf[j]) > s->stop_threshold;
if (threshold && s->stop_holdoff_end && !s->leave_silence) {
- s->mode = SILENCE_COPY_FLUSH;
+ set_mode(ctx, SILENCE_COPY_FLUSH);
flush(out, outlink, &nb_samples_written, &ret);
goto silence_copy_flush;
} else if (threshold) {
@@ -357,7 +465,7 @@ silence_copy:
s->stop_holdoff_end = 0;
if (!s->restart) {
- s->mode = SILENCE_STOP;
+ set_mode(ctx, SILENCE_STOP);
flush(out, outlink, &nb_samples_written, &ret);
goto silence_stop;
} else {
@@ -366,12 +474,19 @@ silence_copy:
s->start_holdoff_offset = 0;
s->start_holdoff_end = 0;
clear_window(s);
- s->mode = SILENCE_TRIM;
- flush(out, outlink, &nb_samples_written, &ret);
- goto silence_trim;
+ set_mode(ctx, SILENCE_TRIM);
+
+ if (s->tone_duration > 0.0) {
+ ret = process_tone(inlink);
+ }
+ if (!ret) {
+ flush(out, outlink,
+ &nb_samples_written, &ret);
+ goto silence_trim;
+ }
}
}
- s->mode = SILENCE_COPY_FLUSH;
+ set_mode(ctx, SILENCE_COPY_FLUSH);
flush(out, outlink, &nb_samples_written, &ret);
goto silence_copy_flush;
}
@@ -385,7 +500,7 @@ silence_copy:
break;
case SILENCE_COPY_FLUSH:
-silence_copy_flush:
+ silence_copy_flush:
nbs = s->stop_holdoff_end - s->stop_holdoff_offset;
nbs -= nbs % inlink->channels;
if (!nbs)
@@ -406,12 +521,12 @@ silence_copy_flush:
if (s->stop_holdoff_offset == s->stop_holdoff_end) {
s->stop_holdoff_offset = 0;
s->stop_holdoff_end = 0;
- s->mode = SILENCE_COPY;
+ set_mode(ctx, SILENCE_COPY);
goto silence_copy;
}
break;
case SILENCE_STOP:
-silence_stop:
+ silence_stop:
break;
}
@@ -427,6 +542,8 @@ static int request_frame(AVFilterLink *outlink)
int ret;
ret = ff_request_frame(ctx->inputs[0]);
+ /* If there is no more data but the holdoff buffer still has data
+ * then copy the holdoff buffer out */
if (ret == AVERROR_EOF && (s->mode == SILENCE_COPY_FLUSH ||
s->mode == SILENCE_COPY)) {
int nbs = s->stop_holdoff_end - s->stop_holdoff_offset;
@@ -441,7 +558,7 @@ static int request_frame(AVFilterLink *outlink)
nbs * sizeof(double));
ret = ff_filter_frame(ctx->inputs[0], frame);
}
- s->mode = SILENCE_STOP;
+ set_mode(ctx, SILENCE_STOP);
}
return ret;
}
@@ -31,7 +31,7 @@
#define LIBAVFILTER_VERSION_MAJOR 6
#define LIBAVFILTER_VERSION_MINOR 63
-#define LIBAVFILTER_VERSION_MICRO 100
+#define LIBAVFILTER_VERSION_MICRO 101
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
LIBAVFILTER_VERSION_MINOR, \