diff mbox series

[FFmpeg-devel] avfilter/select: add metadata detection function

Message ID 20210618125022.5304-1-timo@rothenpieler.org
State New
Headers show
Series [FFmpeg-devel] avfilter/select: add metadata detection function | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Timo Rothenpieler June 18, 2021, 12:50 p.m. UTC
---
 doc/filters.texi       | 18 ++++++++++
 libavfilter/f_select.c | 79 ++++++++++++++++++++++++++++++++++++++++--
 libavfilter/version.h  |  2 +-
 3 files changed, 96 insertions(+), 3 deletions(-)

Comments

Timo Rothenpieler June 22, 2021, 11:55 a.m. UTC | #1
On 18.06.2021 14:50, Timo Rothenpieler wrote:
> ---
>   doc/filters.texi       | 18 ++++++++++
>   libavfilter/f_select.c | 79 ++++++++++++++++++++++++++++++++++++++++--
>   libavfilter/version.h  |  2 +-
>   3 files changed, 96 insertions(+), 3 deletions(-)

Will push this soon
Tobias Rapp June 23, 2021, 3:28 p.m. UTC | #2
On 18.06.2021 14:50, Timo Rothenpieler wrote:
 > [...]
 >
 > +@item detected(kind)
 > +Evaluates the metadata added to frames by various detection filters.
 > +Returns -1 if the respective filter has detected what it was looking 
for,
 > +0 otherwise.
 > +
 > +Possible values for the @var{kind} parameter:
 > +@table @option
 > +@item SILENCE (audio only)
 > +Looks for metadata added by @ref{silencedetect}.
 > +@item FREEZE (video only)
 > +Looks for metadata added by @ref{freezedetect}.
 > +@item BLACK (video only)
 > +Looks for metadata added by @ref{blackdetect}.
 > +@end table
 > +
 >   @end table

If I understand that description correctly a filter line like 
"aselect=detected(SILENCE)" kill keep the silence and remove the rest? 
So for most use cases a wrapping not() shall be used? Adding an example 
in the docs might clarify this.

I think it is a bit unfortunate that the new syntax differs from the 
existing "concatdec_select" which is similar in concept. In my very 
personal opinion using variables like "silence_detected" would have been 
more consistent, but take this as a suggestion - not objection.

Regards,
Tobias
diff mbox series

Patch

diff --git a/doc/filters.texi b/doc/filters.texi
index da8f7d7726..05fec04b55 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -5404,6 +5404,7 @@  Set sidechain gain. Default is 1. Range is from 0.015625 to 64.
 
 This filter supports the all above options as @ref{commands}.
 
+@anchor{silencedetect}
 @section silencedetect
 
 Detect silence in an audio stream.
@@ -7263,6 +7264,7 @@  Filter out noisy pixels from @code{bitplane} set above.
 Default is disabled.
 @end table
 
+@anchor{blackdetect}
 @section blackdetect
 
 Detect video intervals that are (almost) completely black. Can be
@@ -12518,6 +12520,7 @@  Select frame after every @code{step} frames.
 Allowed values are positive integers higher than 0. Default value is @code{1}.
 @end table
 
+@anchor{freezedetect}
 @section freezedetect
 
 Detect frozen video.
@@ -25564,6 +25567,21 @@  missing.
 That basically means that an input frame is selected if its pts is within the
 interval set by the concat demuxer.
 
+@item detected(kind)
+Evaluates the metadata added to frames by various detection filters.
+Returns -1 if the respective filter has detected what it was looking for,
+0 otherwise.
+
+Possible values for the @var{kind} parameter:
+@table @option
+@item SILENCE (audio only)
+Looks for metadata added by @ref{silencedetect}.
+@item FREEZE (video only)
+Looks for metadata added by @ref{freezedetect}.
+@item BLACK (video only)
+Looks for metadata added by @ref{blackdetect}.
+@end table
+
 @end table
 
 The default value of the select expression is "1".
diff --git a/libavfilter/f_select.c b/libavfilter/f_select.c
index f0468078e8..b1eba67876 100644
--- a/libavfilter/f_select.c
+++ b/libavfilter/f_select.c
@@ -86,6 +86,10 @@  static const char *const var_names[] = {
 
     "concatdec_select",  ///< frame is within the interval set by the concat demuxer
 
+    "SILENCE",           ///< silencedetect detected silence for this frame
+    "FREEZE",            ///< freezedetect detected frozen frames
+    "BLACK",             ///< blackdetect detected black frames
+
     NULL
 };
 
@@ -138,14 +142,42 @@  enum var_name {
 
     VAR_CONCATDEC_SELECT,
 
+    VAR_SILENCE,
+    VAR_FREEZE,
+    VAR_BLACK,
+
     VAR_VARS_NB
 };
 
+enum meta_name {
+    META_SILENCE,
+    META_FREEZE,
+    META_BLACK,
+
+    META_NAMES_NB
+};
+
+static const char *const func1_names[] = {
+    "detected", ///< function to eval metadata from various detection filters (silencedetect, ...)
+
+    NULL
+};
+
+static double detect_metadata(void *p, double kind);
+
+static double (*func1_funcs[])(void *, double) = {
+    detect_metadata,
+
+    NULL
+};
+
 typedef struct SelectContext {
     const AVClass *class;
     char *expr_str;
     AVExpr *expr;
     double var_values[VAR_VARS_NB];
+    int meta_detected[META_NAMES_NB];
+    AVFrame *cur_frame;             ///< current frame, for use in expression parser functions ONLY
     int bitdepth;
     int nb_planes;
     ptrdiff_t width[4];
@@ -177,7 +209,7 @@  static av_cold int init(AVFilterContext *ctx)
     int i, ret;
 
     if ((ret = av_expr_parse(&select->expr, select->expr_str,
-                             var_names, NULL, NULL, NULL, NULL, 0, ctx)) < 0) {
+                             var_names, func1_names, func1_funcs, NULL, NULL, 0, ctx)) < 0) {
         av_log(ctx, AV_LOG_ERROR, "Error while parsing expression '%s'\n",
                select->expr_str);
         return ret;
@@ -267,6 +299,11 @@  static int config_input(AVFilterLink *inlink)
         if (!select->sad)
             return AVERROR(EINVAL);
     }
+
+    select->var_values[VAR_SILENCE] = META_SILENCE;
+    select->var_values[VAR_FREEZE]  = META_FREEZE;
+    select->var_values[VAR_BLACK]   = META_BLACK;
+
     return 0;
 }
 
@@ -325,6 +362,40 @@  static double get_concatdec_select(AVFrame *frame, int64_t pts)
     return NAN;
 }
 
+static double detect_metadata(void *p, double kind)
+{
+    AVFilterContext *ctx = p;
+    SelectContext *select = ctx->priv;
+    AVDictionary *metadata = select->cur_frame->metadata;
+    int kind_i = (int)(kind + 0.5);
+    const char *start, *end;
+
+    switch(kind_i) {
+    case META_SILENCE:
+        start = "lavfi.silence_start";
+        end = "lavfi.silence_end";
+        break;
+    case META_FREEZE:
+        start = "lavfi.freezedetect.freeze_start";
+        end = "lavfi.freezedetect.freeze_end";
+        break;
+    case META_BLACK:
+        start = "lavfi.black_start";
+        end = "lavfi.black_end";
+        break;
+    default:
+        av_log(ctx, AV_LOG_WARNING, "Invalid metadata detection kind!\n");
+        return NAN;
+    }
+
+    if (av_dict_get(metadata, start, NULL, 0))
+        select->meta_detected[kind_i] = -1;
+    if (av_dict_get(metadata, end, NULL, 0))
+        select->meta_detected[kind_i] = 0;
+
+    return select->meta_detected[kind_i];
+}
+
 static void select_frame(AVFilterContext *ctx, AVFrame *frame)
 {
     SelectContext *select = ctx->priv;
@@ -363,7 +434,9 @@  static void select_frame(AVFilterContext *ctx, AVFrame *frame)
         break;
     }
 
-    select->select = res = av_expr_eval(select->expr, select->var_values, NULL);
+    select->cur_frame = frame;
+
+    select->select = res = av_expr_eval(select->expr, select->var_values, ctx);
     av_log(inlink->dst, AV_LOG_DEBUG,
            "n:%f pts:%f t:%f key:%d",
            select->var_values[VAR_N],
@@ -371,6 +444,8 @@  static void select_frame(AVFilterContext *ctx, AVFrame *frame)
            select->var_values[VAR_T],
            frame->key_frame);
 
+    select->cur_frame = NULL;
+
     switch (inlink->type) {
     case AVMEDIA_TYPE_VIDEO:
         av_log(inlink->dst, AV_LOG_DEBUG, " interlace_type:%c pict_type:%c scene:%f",
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 5052681653..fbb81ef31c 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -31,7 +31,7 @@ 
 
 #define LIBAVFILTER_VERSION_MAJOR   8
 #define LIBAVFILTER_VERSION_MINOR   0
-#define LIBAVFILTER_VERSION_MICRO 102
+#define LIBAVFILTER_VERSION_MICRO 103
 
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \