diff mbox series

[FFmpeg-devel,3/5] avfilter/vf_scale: add optional "ref" input

Message ID 20240424110257.38715-4-ffmpeg@haasn.xyz
State Accepted
Commit bb8044581366fe286e16b14515d873979133dbda
Headers show
Series replace scale2ref by scale=rw:rh | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished

Commit Message

Niklas Haas April 24, 2024, 10:51 a.m. UTC
From: Niklas Haas <git@haasn.dev>

This is automatically enabled if the width/height expressions reference
any ref_* variable. This will ultimately serve as a more principled
replacement for the fundamentally broken scale2ref.

See-Also: https://trac.ffmpeg.org/ticket/10795
---
 Changelog              |   1 +
 doc/filters.texi       |  26 ++++++++
 libavfilter/vf_scale.c | 132 ++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 156 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/Changelog b/Changelog
index 8db14f02b4..e821e5ac74 100644
--- a/Changelog
+++ b/Changelog
@@ -7,6 +7,7 @@  version <next>:
 - ffmpeg CLI filtergraph chaining
 - LC3/LC3plus demuxer and muxer
 - pad_vaapi, drawbox_vaapi filters
+- vf_scale supports secondary ref input and framesync options
 
 
 version 7.0:
diff --git a/doc/filters.texi b/doc/filters.texi
index f20b72ab96..cf884568b0 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -21562,8 +21562,34 @@  The position (byte offset) of the frame in the input stream, or NaN if
 this information is unavailable and/or meaningless (for example in case of synthetic video).
 Only available with @code{eval=frame}.
 Deprecated, do not use.
+
+@item ref_w, rw
+@item ref_h, rh
+@item ref_a
+@item ref_dar, rdar
+@item ref_n
+@item ref_t
+@item ref_pos
+Eqvuialent to the above, but for a second reference input. If any of these
+variables are present, this filter accepts two inputs.
 @end table
 
+@subsection Examples
+
+@itemize
+@item
+Scale a subtitle stream (sub) to match the main video (main) in size before overlaying
+@example
+'[main]split[a][b]; [ref][a]scale=rw:rh[c]; [b][c]overlay'
+@end example
+
+@item
+Scale a logo to 1/10th the height of a video, while preserving its display aspect ratio.
+@example
+[logo-in][video-in]scale=w=oh*dar:h=rh/10[logo-out]
+@end example
+@end itemize
+
 @section scale2ref
 
 Scale (resize) the input video, based on a reference video.
diff --git a/libavfilter/vf_scale.c b/libavfilter/vf_scale.c
index a986dc97ae..f174651333 100644
--- a/libavfilter/vf_scale.c
+++ b/libavfilter/vf_scale.c
@@ -59,6 +59,17 @@  static const char *const var_names[] = {
 #if FF_API_FRAME_PKT
     "pos",
 #endif
+    "ref_w", "rw",
+    "ref_h", "rh",
+    "ref_a",
+    "ref_sar",
+    "ref_dar", "rdar",
+    "ref_hsub",
+    "ref_vsub",
+    "ref_n",
+    "ref_t",
+    "ref_pos",
+    /* Legacy variables for scale2ref */
     "main_w",
     "main_h",
     "main_a",
@@ -89,6 +100,16 @@  enum var_name {
 #if FF_API_FRAME_PKT
     VAR_POS,
 #endif
+    VAR_REF_W, VAR_RW,
+    VAR_REF_H, VAR_RH,
+    VAR_REF_A,
+    VAR_REF_SAR,
+    VAR_REF_DAR, VAR_RDAR,
+    VAR_REF_HSUB,
+    VAR_REF_VSUB,
+    VAR_REF_N,
+    VAR_REF_T,
+    VAR_REF_POS,
     VAR_S2R_MAIN_W,
     VAR_S2R_MAIN_H,
     VAR_S2R_MAIN_A,
@@ -131,6 +152,7 @@  typedef struct ScaleContext {
     int input_is_pal;           ///< set to 1 if the input format is paletted
     int output_is_pal;          ///< set to 1 if the output format is paletted
     int interlaced;
+    int uses_ref;
 
     char *w_expr;               ///< width  expression string
     char *h_expr;               ///< height expression string
@@ -190,6 +212,38 @@  static int check_exprs(AVFilterContext *ctx)
         av_log(ctx, AV_LOG_WARNING, "Circular references detected for width '%s' and height '%s' - possibly invalid.\n", scale->w_expr, scale->h_expr);
     }
 
+    if (vars_w[VAR_REF_W]    || vars_h[VAR_REF_W]    ||
+        vars_w[VAR_RW]       || vars_h[VAR_RW]       ||
+        vars_w[VAR_REF_H]    || vars_h[VAR_REF_H]    ||
+        vars_w[VAR_RH]       || vars_h[VAR_RH]       ||
+        vars_w[VAR_REF_A]    || vars_h[VAR_REF_A]    ||
+        vars_w[VAR_REF_SAR]  || vars_h[VAR_REF_SAR]  ||
+        vars_w[VAR_REF_DAR]  || vars_h[VAR_REF_DAR]  ||
+        vars_w[VAR_RDAR]     || vars_h[VAR_RDAR]     ||
+        vars_w[VAR_REF_HSUB] || vars_h[VAR_REF_HSUB] ||
+        vars_w[VAR_REF_VSUB] || vars_h[VAR_REF_VSUB] ||
+        vars_w[VAR_REF_N]    || vars_h[VAR_REF_N]    ||
+        vars_w[VAR_REF_T]    || vars_h[VAR_REF_T]    ||
+        vars_w[VAR_REF_POS]  || vars_h[VAR_REF_POS]) {
+        scale->uses_ref = 1;
+    }
+
+    if (ctx->filter != &ff_vf_scale2ref &&
+        (vars_w[VAR_S2R_MAIN_W]    || vars_h[VAR_S2R_MAIN_W]    ||
+         vars_w[VAR_S2R_MAIN_H]    || vars_h[VAR_S2R_MAIN_H]    ||
+         vars_w[VAR_S2R_MAIN_A]    || vars_h[VAR_S2R_MAIN_A]    ||
+         vars_w[VAR_S2R_MAIN_SAR]  || vars_h[VAR_S2R_MAIN_SAR]  ||
+         vars_w[VAR_S2R_MAIN_DAR]  || vars_h[VAR_S2R_MAIN_DAR]  ||
+         vars_w[VAR_S2R_MDAR]      || vars_h[VAR_S2R_MDAR]      ||
+         vars_w[VAR_S2R_MAIN_HSUB] || vars_h[VAR_S2R_MAIN_HSUB] ||
+         vars_w[VAR_S2R_MAIN_VSUB] || vars_h[VAR_S2R_MAIN_VSUB] ||
+         vars_w[VAR_S2R_MAIN_N]    || vars_h[VAR_S2R_MAIN_N]    ||
+         vars_w[VAR_S2R_MAIN_T]    || vars_h[VAR_S2R_MAIN_T]    ||
+         vars_w[VAR_S2R_MAIN_POS]  || vars_h[VAR_S2R_MAIN_POS]) ) {
+        av_log(ctx, AV_LOG_ERROR, "Expressions with scale2ref variables are not valid in scale filter.\n");
+        return AVERROR(EINVAL);
+    }
+
     if (ctx->filter != &ff_vf_scale2ref &&
         (vars_w[VAR_S2R_MAIN_W]    || vars_h[VAR_S2R_MAIN_W]    ||
          vars_w[VAR_S2R_MAIN_H]    || vars_h[VAR_S2R_MAIN_H]    ||
@@ -385,6 +439,9 @@  static av_cold int init(AVFilterContext *ctx)
     if (!threads)
         av_opt_set_int(scale->sws_opts, "threads", ff_filter_get_nb_threads(ctx), 0);
 
+    if (ctx->filter != &ff_vf_scale2ref)
+        ctx->nb_inputs = scale->uses_ref ? 2 : 1;
+
     return 0;
 }
 
@@ -506,6 +563,20 @@  static int scale_eval_dimensions(AVFilterContext *ctx)
         scale->var_values[VAR_S2R_MAIN_VSUB] = 1 << main_desc->log2_chroma_h;
     }
 
+    if (scale->uses_ref) {
+        const AVFilterLink *reflink = ctx->inputs[1];
+        const AVPixFmtDescriptor *ref_desc = av_pix_fmt_desc_get(reflink->format);
+        scale->var_values[VAR_REF_W] = scale->var_values[VAR_RW] = reflink->w;
+        scale->var_values[VAR_REF_H] = scale->var_values[VAR_RH] = reflink->h;
+        scale->var_values[VAR_REF_A] = (double) reflink->w / reflink->h;
+        scale->var_values[VAR_REF_SAR] = reflink->sample_aspect_ratio.num ?
+            (double) reflink->sample_aspect_ratio.num / reflink->sample_aspect_ratio.den : 1;
+        scale->var_values[VAR_REF_DAR] = scale->var_values[VAR_RDAR] =
+            scale->var_values[VAR_REF_A] * scale->var_values[VAR_REF_SAR];
+        scale->var_values[VAR_REF_HSUB] = 1 << ref_desc->log2_chroma_w;
+        scale->var_values[VAR_REF_VSUB] = 1 << ref_desc->log2_chroma_h;
+    }
+
     res = av_expr_eval(scale->w_pexpr, scale->var_values, NULL);
     eval_w = scale->var_values[VAR_OUT_W] = scale->var_values[VAR_OW] = (int) res == 0 ? inlink->w : (int) res;
 
@@ -693,6 +764,13 @@  static int config_props(AVFilterLink *outlink)
         scale->fs.in[0].sync      = 1;
         scale->fs.in[0].before    = EXT_STOP;
         scale->fs.in[0].after     = EXT_STOP;
+        if (scale->uses_ref) {
+            av_assert0(ctx->nb_inputs == 2);
+            scale->fs.in[1].time_base = ctx->inputs[1]->time_base;
+            scale->fs.in[1].sync      = 0;
+            scale->fs.in[1].before    = EXT_NULL;
+            scale->fs.in[1].after     = EXT_INFINITY;
+        }
 
         ret = ff_framesync_configure(&scale->fs);
         if (ret < 0)
@@ -919,13 +997,55 @@  scale:
 static int do_scale(FFFrameSync *fs)
 {
     AVFilterContext *ctx = fs->parent;
+    ScaleContext *scale = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
-    AVFrame *in, *out;
-    int ret;
+    AVFrame *out, *in = NULL, *ref = NULL;
+    int ret = 0, frame_changed;
 
     ret = ff_framesync_get_frame(fs, 0, &in, 1);
     if (ret < 0)
-        return ret;
+        goto err;
+
+    if (scale->uses_ref) {
+        ret = ff_framesync_get_frame(fs, 1, &ref, 0);
+        if (ret < 0)
+            goto err;
+    }
+
+    if (ref) {
+        AVFilterLink *reflink = ctx->inputs[1];
+        frame_changed = ref->width  != reflink->w ||
+                        ref->height != reflink->h ||
+                        ref->format != reflink->format ||
+                        ref->sample_aspect_ratio.den != reflink->sample_aspect_ratio.den ||
+                        ref->sample_aspect_ratio.num != reflink->sample_aspect_ratio.num ||
+                        ref->colorspace != reflink->colorspace ||
+                        ref->color_range != reflink->color_range;
+
+        if (frame_changed) {
+            reflink->format = ref->format;
+            reflink->w = ref->width;
+            reflink->h = ref->height;
+            reflink->sample_aspect_ratio.num = ref->sample_aspect_ratio.num;
+            reflink->sample_aspect_ratio.den = ref->sample_aspect_ratio.den;
+            reflink->colorspace = ref->colorspace;
+            reflink->color_range = ref->color_range;
+
+            ret = config_props(outlink);
+            if (ret < 0)
+                goto err;
+        }
+
+        if (scale->eval_mode == EVAL_MODE_FRAME) {
+            scale->var_values[VAR_REF_N] = reflink->frame_count_out;
+            scale->var_values[VAR_REF_T] = TS2T(ref->pts, reflink->time_base);
+#if FF_API_FRAME_PKT
+FF_DISABLE_DEPRECATION_WARNINGS
+            scale->var_values[VAR_REF_POS] = ref->pkt_pos == -1 ? NAN : ref->pkt_pos;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+        }
+    }
 
     ret = scale_frame(ctx->inputs[0], in, &out);
     if (out) {
@@ -933,6 +1053,9 @@  static int do_scale(FFFrameSync *fs)
         return ff_filter_frame(outlink, out);
     }
 
+err:
+    if (ret < 0)
+        av_frame_free(&in);
     return ret;
 }
 
@@ -1108,6 +1231,9 @@  static const AVFilterPad avfilter_vf_scale_inputs[] = {
     {
         .name         = "default",
         .type         = AVMEDIA_TYPE_VIDEO,
+    }, {
+        .name         = "ref",
+        .type         = AVMEDIA_TYPE_VIDEO,
     },
 };