diff mbox series

[FFmpeg-devel,1/2] avfilter/vf_decimate: Improve decimation factor precision

Message ID CAF96VJ53eEygs5FSN360tVtQrDG7z88wkWzZpiy18OGQ-_F1qQ@mail.gmail.com
State New
Headers show
Series [FFmpeg-devel,1/2] avfilter/vf_decimate: Improve decimation factor precision | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished

Commit Message

Vladimir Petrov Oct. 3, 2023, 1:13 p.m. UTC
Currently, decimate video filter supports dropping of
only single frame per cycle, limiting the range of
available framerate decimation factors. Now, adding
a new option 'dropcount' allows increasing of drop
count, so more fractional factors could be chosen.
Also added an option 'spread' to set percentage of
dropped frame metrics spreading to adjanced frames.
diff mbox series

Patch

From 127bed0a4fd2ca05f43fa117ba4bf859430fc5ff Mon Sep 17 00:00:00 2001
From: Vladimir Petrov <vppetrovmms@gmail.com>
Date: Sat, 23 Sep 2023 00:37:32 +0300
Subject: [PATCH 1/2] avfilter/vf_decimate: Improve decimation factor precision

Currently, decimate video filter supports dropping of
only single frame per cycle, limiting the range of
available framerate decimation factors. Now, adding
a new option 'dropcount' allows increasing of drop
count, so more fractional factors could be chosen.
Also added an option 'spread' to set percentage of
dropped frame metrics spreading to adjanced frames.

Signed-off-by: Vladimir Petrov <vppetrovmms@gmail.com>
---
 doc/filters.texi          |  16 ++-
 libavfilter/vf_decimate.c | 269 +++++++++++++++++++++++++++++++-------
 2 files changed, 237 insertions(+), 48 deletions(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index a729a08dce..eef76d71b0 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -11395,10 +11395,22 @@  The filter accepts the following options:
 
 @table @option
 @item cycle
-Set the number of frames from which one will be dropped. Setting this to
-@var{N} means one frame in every batch of @var{N} frames will be dropped.
+Set the number of frames per cycle. Setting this to @var{N} and @var{dropcount}
+to @var{M} means @var{M} frames in every batch of @var{N} frames will be dropped.
 Default is @code{5}.
 
+@item dropcount
+Set the number of frames to be dropped from each cycle.
+Must be smaller than @var{cycle}.
+Default is @code{1}
+
+@item spread
+Set percentage of dropped frame metrics spreading to adjanced frames. Bigger value means
+that non-duplicate frames will be distributed more evenly to output. Especially, this is
+useful in case of static scenes to avoid skipping of large frame sequences, i.e. to avoid
+jumpy motion.
+Default is @code{75}.
+
 @item dupthresh
 Set the threshold for duplicate detection. If the difference metric for a frame
 is less than or equal to this value, then it is declared as duplicate. Default
diff --git a/libavfilter/vf_decimate.c b/libavfilter/vf_decimate.c
index dbeca427f1..67896eaa0d 100644
--- a/libavfilter/vf_decimate.c
+++ b/libavfilter/vf_decimate.c
@@ -19,6 +19,7 @@ 
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/common.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/timestamp.h"
@@ -33,11 +34,12 @@  struct qitem {
     AVFrame *frame;
     int64_t maxbdiff;
     int64_t totdiff;
+    int drop;
 };
 
 typedef struct DecimateContext {
     const AVClass *class;
-    struct qitem *queue;    ///< window of cycle frames and the associated data diff
+    struct qitem *queue;    ///< window of cycle+1 frames and the associated data diff
     int fid;                ///< current frame id in the queue
     int filled;             ///< 1 if the queue is filled, 0 otherwise
     AVFrame *last;          ///< last frame from the previous queue
@@ -58,6 +60,8 @@  typedef struct DecimateContext {
 
     /* options */
     int cycle;
+    int dropcount;
+    int spread;
     double dupthresh_flt;
     double scthresh_flt;
     int64_t dupthresh;
@@ -72,7 +76,9 @@  typedef struct DecimateContext {
 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
 
 static const AVOption decimate_options[] = {
-    { "cycle",     "set the number of frame from which one will be dropped", OFFSET(cycle), AV_OPT_TYPE_INT, {.i64 = 5}, 2, 25, FLAGS },
+    { "cycle",     "set the number of frames per cycle", OFFSET(cycle), AV_OPT_TYPE_INT, {.i64 = 5}, 2, 25, FLAGS },
+    { "dropcount", "set the number of frames to be dropped from each cycle", OFFSET(dropcount), AV_OPT_TYPE_INT, {.i64 = 1}, 1, 24, FLAGS },
+    { "spread",    "set percentage of dropped frame metrics spreading to adjanced frames", OFFSET(spread), AV_OPT_TYPE_INT, {.i64 = 75}, 0, 200, FLAGS },
     { "dupthresh", "set duplicate threshold",    OFFSET(dupthresh_flt), AV_OPT_TYPE_DOUBLE, {.dbl =  1.1}, 0, 100, FLAGS },
     { "scthresh",  "set scene change threshold", OFFSET(scthresh_flt),  AV_OPT_TYPE_DOUBLE, {.dbl = 15.0}, 0, 100, FLAGS },
     { "blockx",    "set the size of the x-axis blocks used during metric calculations", OFFSET(blockx), AV_OPT_TYPE_INT, {.i64 = 32}, 4, 1<<9, FLAGS },
@@ -150,14 +156,154 @@  static void calc_diffs(const DecimateContext *dm, struct qitem *q,
     q->maxbdiff = maxdiff;
 }
 
+static int dup_cnt(const DecimateContext* const dm)
+{
+    int i;
+    int ret = 0;
+
+    for (i = 0; i < dm->cycle && dm->queue[i].frame; i++) {
+        if (!dm->queue[i].drop && dm->queue[i].maxbdiff < dm->dupthresh)
+            ret++;
+    }
+
+    return ret;
+}
+
+static int q_cnt(const DecimateContext* const dm)
+{
+    int i;
+    int ret = 0;
+
+    for (i = 0; i < dm->cycle && dm->queue[i].frame; i++) {
+        ret++;
+    }
+
+    return ret;
+}
+
+static int drop_cnt(const DecimateContext* const dm)
+{
+    int i;
+    int ret = 0;
+
+    for (i = 0; i < dm->cycle && dm->queue[i].frame; i++) {
+        if (dm->queue[i].drop)
+            ret++;
+    }
+
+    return ret;
+}
+
+static int prev_frame_idx(const DecimateContext* const dm, const int cur_frame_idx)
+{
+    int i = 0;
+
+    for (i = cur_frame_idx - 1; 0 <= i; i--) {
+        if (!dm->queue[i].drop)
+            break;
+    }
+
+    return i;
+}
+
+static int next_frame_idx(const DecimateContext* const dm, const int cur_frame_idx)
+{
+    int i = 0;
+
+    for (i = cur_frame_idx + 1; dm->cycle > i && dm->queue[i].frame; i++) {
+        if (!dm->queue[i].drop)
+            break;
+    }
+
+    if (dm->cycle == i || !dm->queue[i].frame)
+        i = -1;
+
+    return i;
+}
+
+static int best_dup_idx(const DecimateContext* const dm)
+{
+    int i = 0;
+    int ret = next_frame_idx(dm, -1);
+
+    if (0 <= ret) {
+        for (i = ret + 1; dm->cycle > i && dm->queue[i].frame; i++) {
+            if (!dm->queue[i].drop && dm->queue[i].maxbdiff < dm->queue[ret].maxbdiff)
+                ret = i;
+        }
+    }
+
+    return ret;
+}
+
+static int best_sc_idx(const DecimateContext* const dm)
+{
+    int i = 0;
+    int ret = next_frame_idx(dm, -1);
+
+    if (0 <= ret) {
+        for (i = ret + 1; dm->cycle > i && dm->queue[i].frame; i++) {
+            if (!dm->queue[i].drop && dm->queue[i].totdiff > dm->queue[ret].totdiff)
+                ret = i;
+        }
+    }
+
+    return ret;
+}
+
+static void update_frame_metrics(DecimateContext *dm, const int idx_p, const int idx, int idx_dr)
+{
+    int idx_nxt;
+    AVFrame *f1, *f2;
+    int64_t bdiff, bdiff_dr;
+
+    idx_nxt = (0 <= idx) ? idx : dm->cycle;
+    f1 = (0 <= idx_p) ? dm->queue[idx_p].frame : dm->last;
+    f2 = dm->queue[idx_nxt].frame;
+    bdiff = dm->queue[idx_nxt].maxbdiff;
+    bdiff_dr = (0 <= idx_dr) ? dm->queue[idx_dr].maxbdiff : 0;
+
+    if (!f1) {
+        dm->queue[idx_nxt].maxbdiff = INT64_MAX;
+        dm->queue[idx_nxt].totdiff  = INT64_MAX;
+    } else if (dm->mixed && ((AV_FRAME_FLAG_INTERLACED & f1->flags) || (f2 && (AV_FRAME_FLAG_INTERLACED & f2->flags)))) {
+        dm->queue[idx_nxt].maxbdiff = INT64_MAX - 1;
+        dm->queue[idx_nxt].totdiff  = 0;
+    } else if (f2) {
+        calc_diffs(dm, &dm->queue[idx_nxt], f1, f2);
+    }
+
+    if (bdiff < dm->dupthresh || bdiff_dr < dm->dupthresh)
+        bdiff = bdiff_dr = 0;
+    dm->queue[idx_nxt].maxbdiff = av_sat_add64(dm->queue[idx_nxt].maxbdiff, av_clip64(bdiff, 0, INT64_MAX/200) * dm->spread / 100);
+    if (0 <= idx_p)
+        dm->queue[idx_p].maxbdiff = av_sat_add64(dm->queue[idx_p].maxbdiff, av_clip64(bdiff_dr, 0, INT64_MAX/200) * dm->spread / 100);
+}
+
+static void mark_drop_frame(DecimateContext *dm, const int idx, const int drop_id)
+{
+    dm->queue[idx].drop = drop_id;
+}
+
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
-    int scpos = -1, duppos = -1;
-    int drop = INT_MIN, i, lowest = 0, ret;
+    int i, ret;
+    int idx_prv = -1, idx_nxt = -1, idx_dup = -1, idx_sc = -1, idx_drop = -1;
+    int drop = 0, drop_case = 0, flush_drops = 0;
     AVFilterContext *ctx  = inlink->dst;
     AVFilterLink *outlink = ctx->outputs[0];
     DecimateContext *dm   = ctx->priv;
-    AVFrame *prv;
+
+    /* move already pre-read frame and clear the rest */
+    if (0 == dm->fid && dm->queue[dm->cycle].frame) {
+        if (dm->ppsrc) {
+            memcpy(&dm->clean_src[0], &dm->clean_src[dm->cycle], sizeof(dm->clean_src[0]));
+            memset(&dm->clean_src[1], 0, sizeof(dm->clean_src[1]) * dm->cycle);
+        }
+        memcpy(&dm->queue[0], &dm->queue[dm->cycle], sizeof(dm->queue[0]));
+        memset(&dm->queue[1], 0, sizeof(dm->queue[1]) * dm->cycle);
+        dm->fid++;
+    }
 
     /* update frames queue(s) */
     if (FF_INLINK_IDX(inlink) == INPUT_MAIN) {
@@ -171,59 +317,85 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         return 0;
     dm->got_frame[INPUT_MAIN] = dm->got_frame[INPUT_CLEANSRC] = 0;
 
-    if (dm->ppsrc)
-        in = dm->queue[dm->fid].frame;
-
-    if (in) {
+    if (dm->queue[dm->fid].frame) {
         /* update frame metrics */
-        prv = dm->fid ? dm->queue[dm->fid - 1].frame : dm->last;
-        if (!prv) {
-            dm->queue[dm->fid].maxbdiff = INT64_MAX;
-            dm->queue[dm->fid].totdiff  = INT64_MAX;
-        } else {
-            calc_diffs(dm, &dm->queue[dm->fid], prv, in);
-        }
-        if (++dm->fid != dm->cycle)
+        update_frame_metrics(dm, dm->fid - 1, dm->fid, -1);
+        if (++dm->fid <= dm->cycle)
             return 0;
-        av_frame_free(&dm->last);
-        dm->last = av_frame_clone(in);
         dm->fid = 0;
 
-        /* we have a complete cycle, select the frame to drop */
-        lowest = 0;
-        for (i = 0; i < dm->cycle; i++) {
-            if (dm->queue[i].totdiff > dm->scthresh)
-                scpos = i;
-            if (dm->queue[i].maxbdiff < dm->queue[lowest].maxbdiff)
-                lowest = i;
+        /* we have a complete cycle, select frames to drop */
+        if (!(dm->mixed && dup_cnt(dm) < dm->dropcount)) {
+            drop = 1;
+            while (drop_cnt(dm) < dm->dropcount) {
+                idx_dup = best_dup_idx(dm);
+                idx_sc = best_sc_idx(dm);
+                if (0 <= idx_dup && dm->queue[idx_dup].maxbdiff < dm->dupthresh) {
+                    drop_case = 1;
+                    idx_drop = idx_dup;
+                } else if (0 <= idx_sc && dm->queue[idx_sc].totdiff > dm->scthresh) {
+                    drop_case = 2;
+                    idx_drop = idx_sc;
+                } else {
+                    drop_case = 3;
+                    idx_drop = idx_dup;
+                }
+
+                if (0 > idx_drop) {
+                    break;
+                } else {
+                    idx_prv = prev_frame_idx(dm, idx_drop);
+                    idx_nxt = next_frame_idx(dm, idx_drop);
+                    update_frame_metrics(dm, idx_prv, idx_nxt, idx_drop);
+                    mark_drop_frame(dm, idx_drop, drop_case);
+                }
+            }
         }
-        if (dm->queue[lowest].maxbdiff < dm->dupthresh)
-            duppos = lowest;
-
-        if (dm->mixed && duppos < 0) {
-            drop = -1; // no drop if mixed content + no frame in cycle below threshold
-        } else {
-            drop = scpos >= 0 && duppos < 0 ? scpos : lowest;
+        av_frame_free(&dm->last);
+        idx_prv = prev_frame_idx(dm, dm->cycle);
+        if (0 <= idx_prv && dm->queue[idx_prv].frame)
+            dm->last = av_frame_clone(dm->queue[idx_prv].frame);
+    } else {
+        /* prepare flushing */
+        flush_drops = dm->dropcount * q_cnt(dm) / dm->cycle;
+        if (!(dm->mixed && dup_cnt(dm) < flush_drops)) {
+            drop = 1;
+            while (drop_cnt(dm) < flush_drops) {
+                idx_drop = best_dup_idx(dm);
+                if (0 > idx_drop) {
+                    break;
+                } else {
+                    idx_prv = prev_frame_idx(dm, idx_drop);
+                    idx_nxt = next_frame_idx(dm, idx_drop);
+                    update_frame_metrics(dm, idx_prv, idx_nxt, idx_drop);
+                    mark_drop_frame(dm, idx_drop, 4);
+                }
+            }
         }
+        av_frame_free(&dm->last);
     }
 
+
     /* metrics debug */
     if (av_log_get_level() >= AV_LOG_DEBUG) {
-        av_log(ctx, AV_LOG_DEBUG, "1/%d frame drop:\n", dm->cycle);
+        av_log(ctx, AV_LOG_DEBUG, "%d/%d %s drop:\n", dm->dropcount, dm->cycle, drop ? "frame" : "no");
         for (i = 0; i < dm->cycle && dm->queue[i].frame; i++) {
-            av_log(ctx, AV_LOG_DEBUG,"  #%d: totdiff=%08"PRIx64" maxbdiff=%08"PRIx64"%s%s%s%s\n",
+            av_log(ctx, AV_LOG_DEBUG,"  #%d: totdiff=%08"PRIx64" maxbdiff=%08"PRIx64"%s%s%s%s%s%s%s\n",
                    i + 1, dm->queue[i].totdiff, dm->queue[i].maxbdiff,
-                   i == scpos  ? " sc"     : "",
-                   i == duppos ? " dup"    : "",
-                   i == lowest ? " lowest" : "",
-                   i == drop   ? " [DROP]" : "");
+                   dm->queue[i].totdiff > dm->scthresh ? " sc" : "   ",
+                   dm->queue[i].maxbdiff < dm->dupthresh ? " dup" : "    ",
+                   1 == dm->queue[i].drop ? " [DROP-DUP]" : "",
+                   2 == dm->queue[i].drop ? " [DROP-SCN]" : "",
+                   3 == dm->queue[i].drop ? " [DROP-LOW]" : "",
+                   4 == dm->queue[i].drop ? " [DROP-FLU]" : "",
+                   5 <= dm->queue[i].drop ? " [DROP-UKN]" : "");
         }
     }
 
     /* push all frames except the drop */
     ret = 0;
     for (i = 0; i < dm->cycle && dm->queue[i].frame; i++) {
-        if (i == drop) {
+        if (drop && dm->queue[i].drop) {
             if (dm->ppsrc)
                 av_frame_free(&dm->clean_src[i]);
             av_frame_free(&dm->queue[i].frame);
@@ -243,7 +415,7 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 
             frame->pts = dm->last_duration ? dm->last_pts + dm->last_duration :
                          (dm->start_pts == AV_NOPTS_VALUE ? 0 : dm->start_pts);
-            frame->duration = dm->mixed ? av_div_q(drop < 0 ? dm->nondec_tb : dm->dec_tb, outlink->time_base).num : 1;
+            frame->duration = dm->mixed ? av_div_q(drop ? dm->dec_tb : dm->nondec_tb, outlink->time_base).num : 1;
             dm->last_duration = frame->duration;
             dm->last_pts = frame->pts;
             ret = ff_filter_frame(outlink, frame);
@@ -340,6 +512,11 @@  static av_cold int decimate_init(AVFilterContext *ctx)
         return AVERROR(EINVAL);
     }
 
+    if (!(dm->cycle > dm->dropcount)) {
+        dm->dropcount = dm->cycle - 1;
+        av_log(ctx, AV_LOG_WARNING, "Reducing drop count to %d\n", dm->dropcount);
+    }
+
     dm->start_pts = AV_NOPTS_VALUE;
     dm->last_duration = 0;
 
@@ -354,12 +531,12 @@  static av_cold void decimate_uninit(AVFilterContext *ctx)
     av_frame_free(&dm->last);
     av_freep(&dm->bdiffs);
     if (dm->queue) {
-        for (i = 0; i < dm->cycle; i++)
+        for (i = 0; i <= dm->cycle; i++)
             av_frame_free(&dm->queue[i].frame);
     }
     av_freep(&dm->queue);
     if (dm->clean_src) {
-        for (i = 0; i < dm->cycle; i++)
+        for (i = 0; i <= dm->cycle; i++)
             av_frame_free(&dm->clean_src[i]);
     }
     av_freep(&dm->clean_src);
@@ -400,16 +577,16 @@  static int config_output(AVFilterLink *outlink)
     dm->nyblocks  = (h + dm->blocky/2 - 1) / (dm->blocky/2);
     dm->bdiffsize = dm->nxblocks * dm->nyblocks;
     dm->bdiffs    = av_malloc_array(dm->bdiffsize, sizeof(*dm->bdiffs));
-    dm->queue     = av_calloc(dm->cycle, sizeof(*dm->queue));
+    dm->queue     = av_calloc(dm->cycle + 1, sizeof(*dm->queue));
     dm->in_tb     = inlink->time_base;
     dm->nondec_tb = av_inv_q(fps);
-    dm->dec_tb    = av_mul_q(dm->nondec_tb, (AVRational){dm->cycle, dm->cycle - 1});
+    dm->dec_tb    = av_mul_q(dm->nondec_tb, (AVRational){dm->cycle, dm->cycle - dm->dropcount});
 
     if (!dm->bdiffs || !dm->queue)
         return AVERROR(ENOMEM);
 
     if (dm->ppsrc) {
-        dm->clean_src = av_calloc(dm->cycle, sizeof(*dm->clean_src));
+        dm->clean_src = av_calloc(dm->cycle + 1, sizeof(*dm->clean_src));
         if (!dm->clean_src)
             return AVERROR(ENOMEM);
     }
-- 
2.39.2