diff mbox series

[FFmpeg-devel] Update xfade filter to support transition between two points in single video stream.

Message ID DB7PR09MB2585E3225D7F3B5BA924F736B167A@DB7PR09MB2585.eurprd09.prod.outlook.com
State New
Headers show
Series [FFmpeg-devel] Update xfade filter to support transition between two points in single video stream. | expand

Checks

Context Check Description
andriy/commit_msg_x86 warning The first line of the commit message must start with a context terminated by a colon and a space, for example "lavu/opt: " or "doc: ".
yinshiyou/commit_msg_loongarch64 warning The first line of the commit message must start with a context terminated by a colon and a space, for example "lavu/opt: " or "doc: ".
andriy/make_fate_x86 success Make fate finished
andriy/make_x86 warning New warnings during build

Commit Message

Aidan O Connor Jan. 4, 2024, 2:48 a.m. UTC
Signed-off-by: Aidan O'Connor <aoc@outlook.com>
---
 libavfilter/vf_xfade.c | 266 +++++++++++++++++++++++++++++++++--------
 1 file changed, 213 insertions(+), 53 deletions(-)

--
2.40.1.windows.1
diff mbox series

Patch

diff --git a/libavfilter/vf_xfade.c b/libavfilter/vf_xfade.c
index 890995a608..65611beeb5 100644
--- a/libavfilter/vf_xfade.c
+++ b/libavfilter/vf_xfade.c
@@ -93,11 +93,18 @@  enum XFadeTransitions {
 typedef struct XFadeContext {
     const AVClass *class;

+    // Number of inputs. May be 1 for transition within stream or 2 for cross-fade between streams.
+    int nb_inputs;
+
     int     transition;
     int64_t duration;
     int64_t offset;
     char   *custom_str;

+    // Start & end time user options (single input only)
+    int64_t start;
+    int64_t end;
+
     int nb_planes;
     int depth;
     int is_rgb;
@@ -105,12 +112,18 @@  typedef struct XFadeContext {
     // PTS when the fade should start (in first inputs timebase)
     int64_t start_pts;

+    // PTS when the fade should end (single input only)
+    int64_t end_pts;
+
     // PTS offset between first and second input
     int64_t inputs_offset_pts;

     // Duration of the transition
     int64_t duration_pts;

+    // Frame duration (single input only)
+    int64_t frame_duration;
+
     // Current PTS of the first input
     int64_t pts;

@@ -118,6 +131,12 @@  typedef struct XFadeContext {
     // like before and after the actual transition.
     int passthrough;

+    // Copy of transition start frame (single input only)
+    AVFrame *start_frame;
+
+    // Number of input frames discarded that are to be regenerated for the transition (single input only)
+    int nb_frames;
+
     int status[2];
     AVFrame *xf[2];
     int max_value;
@@ -169,6 +188,7 @@  static av_cold void uninit(AVFilterContext *ctx)
 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)

 static const AVOption xfade_options[] = {
+    { "inputs", "set number of inputs", OFFSET(nb_inputs), AV_OPT_TYPE_INT, { .i64 = 2 }, 1, 2, FLAGS },
     { "transition", "set cross fade transition", OFFSET(transition), AV_OPT_TYPE_INT, {.i64=FADE}, -1, NB_TRANSITIONS-1, FLAGS, "transition" },
     {   "custom",    "custom transition",     0, AV_OPT_TYPE_CONST, {.i64=CUSTOM},    0, 0, FLAGS, "transition" },
     {   "fade",      "fade transition",       0, AV_OPT_TYPE_CONST, {.i64=FADE},      0, 0, FLAGS, "transition" },
@@ -231,6 +251,8 @@  static const AVOption xfade_options[] = {
     {   "revealdown", "reveal down transition", 0, AV_OPT_TYPE_CONST, {.i64=REVEALDOWN}, 0, 0, FLAGS, "transition" },
     { "duration", "set cross fade duration", OFFSET(duration), AV_OPT_TYPE_DURATION, {.i64=1000000}, 0, 60000000, FLAGS },
     { "offset",   "set cross fade start relative to first input stream", OFFSET(offset), AV_OPT_TYPE_DURATION, {.i64=0}, INT64_MIN, INT64_MAX, FLAGS },
+    { "start", "set fade start time (single input only)", OFFSET(start), AV_OPT_TYPE_DURATION, {.i64=0}, INT64_MIN, INT64_MAX, FLAGS },
+    { "end", "set fade end time (single input only)", OFFSET(end), AV_OPT_TYPE_DURATION, {.i64=0}, INT64_MIN, INT64_MAX, FLAGS },
     { "expr",   "set expression for custom transition", OFFSET(custom_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
     { NULL }
 };
@@ -2039,44 +2061,53 @@  static double b3(void *priv, double x, double y) { return getpix(priv, x, y, 3,
 static int config_output(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
-    AVFilterLink *inlink0 = ctx->inputs[0];
-    AVFilterLink *inlink1 = ctx->inputs[1];
     XFadeContext *s = ctx->priv;
+    unsigned nb_inputs = s->nb_inputs;
+    AVFilterLink *inlink0 = ctx->inputs[0];
+    AVFilterLink *inlink1 = nb_inputs > 1 ? ctx->inputs[1] : NULL;
     const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink0->format);

-    if (inlink0->w != inlink1->w || inlink0->h != inlink1->h) {
-        av_log(ctx, AV_LOG_ERROR, "First input link %s parameters "
-               "(size %dx%d) do not match the corresponding "
-               "second input link %s parameters (size %dx%d)\n",
-               ctx->input_pads[0].name, inlink0->w, inlink0->h,
-               ctx->input_pads[1].name, inlink1->w, inlink1->h);
-        return AVERROR(EINVAL);
-    }
+    if (nb_inputs == 1) {
+        if (!inlink0->frame_rate.num || !inlink0->frame_rate.den) {
+            av_log(ctx, AV_LOG_ERROR, "The input needs to be a constant frame rate; "
+                "current rate of %d/%d is invalid\n", inlink0->frame_rate.num, inlink0->frame_rate.den);
+            return AVERROR(EINVAL);
+        }
+    } else if (nb_inputs == 2) {
+        if (inlink0->w != inlink1->w || inlink0->h != inlink1->h) {
+            av_log(ctx, AV_LOG_ERROR, "First input link %s parameters "
+                "(size %dx%d) do not match the corresponding "
+                "second input link %s parameters (size %dx%d)\n",
+                ctx->input_pads[0].name, inlink0->w, inlink0->h,
+                ctx->input_pads[1].name, inlink1->w, inlink1->h);
+            return AVERROR(EINVAL);
+        }

-    if (inlink0->time_base.num != inlink1->time_base.num ||
-        inlink0->time_base.den != inlink1->time_base.den) {
-        av_log(ctx, AV_LOG_ERROR, "First input link %s timebase "
-               "(%d/%d) do not match the corresponding "
-               "second input link %s timebase (%d/%d)\n",
-               ctx->input_pads[0].name, inlink0->time_base.num, inlink0->time_base.den,
-               ctx->input_pads[1].name, inlink1->time_base.num, inlink1->time_base.den);
-        return AVERROR(EINVAL);
-    }
+        if (inlink0->time_base.num != inlink1->time_base.num ||
+            inlink0->time_base.den != inlink1->time_base.den) {
+            av_log(ctx, AV_LOG_ERROR, "First input link %s timebase "
+                "(%d/%d) do not match the corresponding "
+                "second input link %s timebase (%d/%d)\n",
+                ctx->input_pads[0].name, inlink0->time_base.num, inlink0->time_base.den,
+                ctx->input_pads[1].name, inlink1->time_base.num, inlink1->time_base.den);
+            return AVERROR(EINVAL);
+        }

-    if (!inlink0->frame_rate.num || !inlink0->frame_rate.den) {
-        av_log(ctx, AV_LOG_ERROR, "The inputs needs to be a constant frame rate; "
-               "current rate of %d/%d is invalid\n", inlink0->frame_rate.num, inlink0->frame_rate.den);
-        return AVERROR(EINVAL);
-    }
+        if (!inlink0->frame_rate.num || !inlink0->frame_rate.den) {
+            av_log(ctx, AV_LOG_ERROR, "The inputs needs to be a constant frame rate; "
+                "current rate of %d/%d is invalid\n", inlink0->frame_rate.num, inlink0->frame_rate.den);
+            return AVERROR(EINVAL);
+        }

-    if (inlink0->frame_rate.num != inlink1->frame_rate.num ||
-        inlink0->frame_rate.den != inlink1->frame_rate.den) {
-        av_log(ctx, AV_LOG_ERROR, "First input link %s frame rate "
-               "(%d/%d) do not match the corresponding "
-               "second input link %s frame rate (%d/%d)\n",
-               ctx->input_pads[0].name, inlink0->frame_rate.num, inlink0->frame_rate.den,
-               ctx->input_pads[1].name, inlink1->frame_rate.num, inlink1->frame_rate.den);
-        return AVERROR(EINVAL);
+        if (inlink0->frame_rate.num != inlink1->frame_rate.num ||
+            inlink0->frame_rate.den != inlink1->frame_rate.den) {
+            av_log(ctx, AV_LOG_ERROR, "First input link %s frame rate "
+                "(%d/%d) do not match the corresponding "
+                "second input link %s frame rate (%d/%d)\n",
+                ctx->input_pads[0].name, inlink0->frame_rate.num, inlink0->frame_rate.den,
+                ctx->input_pads[1].name, inlink1->frame_rate.num, inlink1->frame_rate.den);
+            return AVERROR(EINVAL);
+        }
     }

     outlink->w = inlink0->w;
@@ -2199,11 +2230,9 @@  static int xfade_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
     return 0;
 }

-static int xfade_frame(AVFilterContext *ctx, AVFrame *a, AVFrame *b)
+static int xfade_frame(AVFilterContext *ctx, AVFrame *a, AVFrame *b, int64_t pts, float progress)
 {
-    XFadeContext *s = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
-    float progress = av_clipf(1.f - ((float)(s->pts - s->start_pts) / s->duration_pts), 0.f, 1.f);
     ThreadData td;
     AVFrame *out;

@@ -2216,7 +2245,7 @@  static int xfade_frame(AVFilterContext *ctx, AVFrame *a, AVFrame *b)
     ff_filter_execute(ctx, xfade_slice, &td, NULL,
                       FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));

-    out->pts = s->pts;
+    out->pts = pts;

     return ff_filter_frame(outlink, out);
 }
@@ -2297,6 +2326,7 @@  static int xfade_activate(AVFilterContext *avctx)
         // We are transitioning, so we need a frame from second input
         if (ff_inlink_check_available_frame(in_b)) {
             int ret;
+            float progress;
             ff_inlink_consume_frame(avctx->inputs[0], &s->xf[0]);
             ff_inlink_consume_frame(avctx->inputs[1], &s->xf[1]);

@@ -2311,7 +2341,8 @@  static int xfade_activate(AVFilterContext *avctx)
                 ff_inlink_set_status(in_a, AVERROR_EOF);
                 s->passthrough = 1;
             }
-            ret = xfade_frame(avctx, s->xf[0], s->xf[1]);
+            progress = av_clipf(1.f - ((float)(s->pts - s->start_pts) / s->duration_pts), 0.f, 1.f);
+            ret = xfade_frame(avctx, s->xf[0], s->xf[1], s->pts, progress);
             av_frame_free(&s->xf[0]);
             av_frame_free(&s->xf[1]);
             return ret;
@@ -2349,6 +2380,112 @@  static int xfade_activate(AVFilterContext *avctx)
     return FFERROR_NOT_READY;
 }

+/**
+ * Perform a transition between two points in the input video stream.
+ * Transition between the last frame before the specified start time to the first frame after the specified end time.
+ * All input frames between these points are discarded and replaced with new frames.
+ */
+static int tfade_activate(AVFilterContext *avctx)
+{
+    XFadeContext *s = avctx->priv;
+    AVFilterLink *in = avctx->inputs[0];
+    AVFilterLink *outlink = avctx->outputs[0];
+    int64_t status_pts;
+    AVFrame *frame;
+    AVFrame* end_frame;
+    int ret;
+
+    FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, avctx);
+
+    // We did not finish transitioning yet and the stream did not end either, so check if there are more frames to consume.
+    if (ff_inlink_check_available_frame(in)) {
+        AVFrame *peeked_frame = ff_inlink_peek_frame(in, 0);
+        s->pts = peeked_frame->pts;
+
+        // Initiliaze PTS values on first call.
+        if (s->start_pts == AV_NOPTS_VALUE) {
+            s->start_pts = s->pts + av_rescale_q(s->start, AV_TIME_BASE_Q, in->time_base);
+            s->end_pts = s->pts + av_rescale_q(s->end, AV_TIME_BASE_Q, in->time_base);
+            s->frame_duration = av_rescale_q(1, av_inv_q(in->frame_rate), in->time_base);
+            av_log(avctx, AV_LOG_INFO, "tfade_activate(); start_pts=%lld, end_pt=%lld, frame_duration=%lld\n", s->start_pts, s->end_pts, s->frame_duration);
+        }
+
+        if (s->pts <= s->start_pts) {
+            // Select the last frame before the specified start time
+            if (s->pts >= s->start_pts && s->pts < (s->start_pts + s->frame_duration)) {
+                av_log(avctx, AV_LOG_INFO, "tfade_activate(): start frame PTS=%lld\n", s->pts);
+                s->start_frame = av_frame_clone(peeked_frame);
+            }
+
+            s->passthrough = 1;
+            ff_inlink_consume_frame(in, &frame);
+            return ff_filter_frame(outlink, frame);
+        } else if (s->pts > s->start_pts && s->pts < s->end_pts) {
+            // During transition just discard input frame. Count discarded frames so they can be replaced later
+            s->passthrough = 0;
+            s->nb_frames++;
+
+            ff_inlink_consume_frame(in, &frame);
+            ff_inlink_request_frame(in);
+            return 0;
+        } else if (s->pts >= s->end_pts) {
+            // Select the first frame after the specified end time
+            if (s->pts >= s->end_pts && s->pts < (s->end_pts + s->frame_duration)) {
+                ff_inlink_consume_frame(in, &end_frame);
+                s->nb_frames++;
+
+                av_log(avctx, AV_LOG_INFO, "tfade_activate(): End frame PTS=%lld, Number of frames = %d\n", s->pts, s->nb_frames);
+
+                // Replace discarded input frames with transition frames
+                for (int i = 0; i < s->nb_frames; i++) {
+                    int64_t pts = s->start_pts + (s->frame_duration * i);
+                    float progress = av_clipf(1.f - ((float)i / s->nb_frames), 0.f, 1.f);
+                    ret = xfade_frame(avctx, s->start_frame, end_frame, pts, progress);
+                }
+
+                av_frame_free(&s->start_frame);
+                av_frame_free(&end_frame);
+
+                return ret;
+            } else {
+                // After the end transiton point just request and formward the input frame
+                s->passthrough = 1;
+                ff_inlink_consume_frame(in, &s->start_frame);
+                return ff_filter_frame(outlink, s->start_frame);
+            }
+        }
+    }
+
+    // We did not get a frame from input, check its status.
+    if (ff_inlink_acknowledge_status(in, &s->status[0], &status_pts)) {
+        // Input is EOF so report EOF output.
+        ff_outlink_set_status(outlink, s->status[0], s->pts);
+        return 0;
+    }
+
+    // We have no frames yet from input and no EOF, so request some.
+    if (ff_outlink_frame_wanted(outlink)) {
+        ff_inlink_request_frame(in);
+        return 0;
+    }
+
+    return FFERROR_NOT_READY;
+}
+
+/**
+ * Select between the single-stream tfade or dual-stream xfade.
+ */
+static int activate(AVFilterContext *avctx)
+{
+    XFadeContext *s = avctx->priv;
+    if (s->nb_inputs == 1)
+        return tfade_activate(avctx);
+    else if (s->nb_inputs == 2)
+        return xfade_activate(avctx);
+    else
+        return AVERROR_BUG;
+}
+
 static AVFrame *get_video_buffer(AVFilterLink *inlink, int w, int h)
 {
     XFadeContext *s = inlink->dst->priv;
@@ -2358,18 +2495,40 @@  static AVFrame *get_video_buffer(AVFilterLink *inlink, int w, int h)
         ff_default_get_video_buffer(inlink, w, h);
 }

-static const AVFilterPad xfade_inputs[] = {
-    {
-        .name          = "main",
-        .type          = AVMEDIA_TYPE_VIDEO,
-        .get_buffer.video = get_video_buffer,
-    },
-    {
-        .name          = "xfade",
-        .type          = AVMEDIA_TYPE_VIDEO,
-        .get_buffer.video = get_video_buffer,
-    },
-};
+/**
+ * Setup the input pads depending on whether single or dual-stream inputs.
+ */
+static av_cold int init(AVFilterContext *avctx)
+{
+    XFadeContext *s = avctx->priv;
+    int ret;
+
+    if (s->nb_inputs == 1) {
+        AVFilterPad pad = {
+            .name = "main",
+            .type = AVMEDIA_TYPE_VIDEO,
+            .get_buffer = {get_video_buffer}
+        };
+        if ((ret = ff_append_inpad(avctx, &pad)) < 0)
+            return ret;
+
+        //avctx->filter->activate = tfade_activate; // Is there an API to assign activate function?
+    }
+
+    if (s->nb_inputs == 2) {
+        AVFilterPad pad = {
+            .name = "xfade",
+            .type = AVMEDIA_TYPE_VIDEO,
+            .get_buffer = {get_video_buffer}
+        };
+        if ((ret = ff_append_inpad(avctx, &pad)) < 0)
+            return ret;
+
+        //avctx->filter->activate = xfade_activate;
+    }
+
+    return 0;
+}

 static const AVFilterPad xfade_outputs[] = {
     {
@@ -2381,13 +2540,14 @@  static const AVFilterPad xfade_outputs[] = {

 const AVFilter ff_vf_xfade = {
     .name          = "xfade",
-    .description   = NULL_IF_CONFIG_SMALL("Cross fade one video with another video."),
+    .description   = NULL_IF_CONFIG_SMALL("Cross fade one video with another video, or between two points in a single video."),
     .priv_size     = sizeof(XFadeContext),
     .priv_class    = &xfade_class,
-    .activate      = xfade_activate,
+    .init          = init,
+    .activate      = activate,
     .uninit        = uninit,
-    FILTER_INPUTS(xfade_inputs),
+    .inputs        = NULL,
     FILTER_OUTPUTS(xfade_outputs),
     FILTER_PIXFMTS_ARRAY(pix_fmts),
-    .flags         = AVFILTER_FLAG_SLICE_THREADS,
+    .flags         = AVFILTER_FLAG_SLICE_THREADS | AVFILTER_FLAG_DYNAMIC_INPUTS,
 };