[FFmpeg-devel,v5,09/12] swscale: introduce new, dynamic scaling API

Message ID 20241111075733.14603-10-ffmpeg@haasn.xyz
State New
Headers
Series swscale: introduce new, dynamic scaling API |

Checks

Context Check Description
yinshiyou/make_loongarch64 fail Make failed
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Niklas Haas Nov. 11, 2024, 7:56 a.m. UTC
From: Niklas Haas <git@haasn.dev>

As part of a larger, ongoing effort to modernize and partially rewrite
libswscale, it was decided and generally agreed upon to introduce a new
public API for libswscale. This API is designed to be less stateful, more
explicitly defined, and considerably easier to use than the existing one.

Most of the API work has been already accomplished in the previous commits,
this commit merely introduces the ability to use sws_scale_frame()
dynamically, without prior sws_init_context() calls. Instead, the new API
takes frame properties from the frames themselves, and the implementation is
based on the new SwsGraph API, which we simply reinitialize as needed.

This high-level wrapper also recreates the logic that used to live inside
vf_scale for scaling interlaced frames, enabling it to be reused more easily
by end users.

Finally, this function is designed to simply copy refs directly when nothing
needs to be done, substantially improving throughput of the noop fast path.

Sponsored-by: Sovereign Tech Fund
Signed-off-by: Niklas Haas <git@haasn.dev>
---
 libswscale/swscale.c          | 196 ++++++++++++++++++++++++++++++++--
 libswscale/swscale.h          |  89 +++++++++++----
 libswscale/swscale_internal.h |   7 +-
 libswscale/utils.c            |   4 +
 libswscale/x86/output.asm     |   2 +-
 5 files changed, 269 insertions(+), 29 deletions(-)
  

Patch

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 45172dcea4..d3dac44d04 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1219,21 +1219,205 @@  int sws_receive_slice(SwsContext *sws, unsigned int slice_start,
                           dst, c->frame_dst->linesize, slice_start, slice_height);
 }
 
+static void get_frame_pointers(const AVFrame *frame, uint8_t *data[4],
+                               int linesize[4], int field)
+{
+    for (int i = 0; i < 4; i++) {
+        data[i]     = frame->data[i];
+        linesize[i] = frame->linesize[i];
+    }
+
+    if (!(frame->flags & AV_FRAME_FLAG_INTERLACED)) {
+        av_assert1(!field);
+        return;
+    }
+
+    if (field == FIELD_BOTTOM) {
+        /* Odd rows, offset by one line */
+        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
+        for (int i = 0; i < 4; i++) {
+            data[i] += linesize[i];
+            if (desc->flags & AV_PIX_FMT_FLAG_PAL)
+                break;
+        }
+    }
+
+    /* Take only every second line */
+    for (int i = 0; i < 4; i++)
+        linesize[i] <<= 1;
+}
+
+/* Subset of av_frame_ref() that only references (video) data buffers */
+static int frame_ref(AVFrame *dst, const AVFrame *src)
+{
+    /* ref the buffers */
+    for (int i = 0; i < FF_ARRAY_ELEMS(src->buf); i++) {
+        if (!src->buf[i])
+            continue;
+        dst->buf[i] = av_buffer_ref(src->buf[i]);
+        if (!dst->buf[i])
+            return AVERROR(ENOMEM);
+    }
+
+    memcpy(dst->data,     src->data,     sizeof(src->data));
+    memcpy(dst->linesize, src->linesize, sizeof(src->linesize));
+    return 0;
+}
+
 int sws_scale_frame(SwsContext *sws, AVFrame *dst, const AVFrame *src)
 {
     int ret;
+    SwsInternal *c = sws_internal(sws);
+    if (!src || !dst)
+        return AVERROR(EINVAL);
+
+    if (c->frame_src) {
+        /* Context has been initialized with explicit values, fall back to
+         * legacy API */
+        ret = sws_frame_start(sws, dst, src);
+        if (ret < 0)
+            return ret;
+
+        ret = sws_send_slice(sws, 0, src->height);
+        if (ret >= 0)
+            ret = sws_receive_slice(sws, 0, dst->height);
 
-    ret = sws_frame_start(sws, dst, src);
+        sws_frame_end(sws);
+
+        return ret;
+    }
+
+    ret = sws_frame_setup(sws, dst, src);
     if (ret < 0)
         return ret;
 
-    ret = sws_send_slice(sws, 0, src->height);
-    if (ret >= 0)
-        ret = sws_receive_slice(sws, 0, dst->height);
+    if (!src->data[0])
+        return 0;
 
-    sws_frame_end(sws);
+    if (c->graph[FIELD_TOP]->noop &&
+        (!c->graph[FIELD_BOTTOM] || c->graph[FIELD_BOTTOM]->noop) &&
+        src->buf[0] && !dst->buf[0] && !dst->data[0])
+    {
+        /* Lightweight refcopy */
+        ret = frame_ref(dst, src);
+        if (ret < 0)
+            return ret;
+    } else {
+        if (!dst->data[0]) {
+            ret = av_frame_get_buffer(dst, 0);
+            if (ret < 0)
+                return ret;
+        }
 
-    return ret;
+        for (int field = 0; field < 2; field++) {
+            SwsGraph *graph = c->graph[field];
+            uint8_t *dst_data[4], *src_data[4];
+            int dst_linesize[4], src_linesize[4];
+            get_frame_pointers(dst, dst_data, dst_linesize, field);
+            get_frame_pointers(src, src_data, src_linesize, field);
+            sws_graph_run(graph, dst_data, dst_linesize,
+                          (const uint8_t **) src_data, src_linesize);
+            if (!graph->dst.interlaced)
+                break;
+        }
+    }
+
+    return 0;
+}
+
+static int validate_params(SwsContext *ctx)
+{
+#define VALIDATE(field, min, max) \
+    if (ctx->field < min || ctx->field > max) { \
+        av_log(ctx, AV_LOG_ERROR, "'%s' (%d) out of range [%d, %d]\n", \
+               #field, (int) ctx->field, min, max); \
+        return AVERROR(EINVAL); \
+    }
+
+    VALIDATE(threads,       0, 8192);
+    VALIDATE(dither,        0, SWS_DITHER_NB - 1)
+    VALIDATE(alpha_blend,   0, SWS_ALPHA_BLEND_NB - 1)
+    return 0;
+}
+
+int sws_frame_setup(SwsContext *ctx, const AVFrame *dst, const AVFrame *src)
+{
+    SwsInternal *s = sws_internal(ctx);
+    const char *err_msg;
+    int ret;
+
+    if (!src || !dst)
+        return AVERROR(EINVAL);
+    if ((ret = validate_params(ctx)) < 0)
+        return ret;
+
+    for (int field = 0; field < 2; field++) {
+        SwsFormat src_fmt = ff_fmt_from_frame(src, field);
+        SwsFormat dst_fmt = ff_fmt_from_frame(dst, field);
+
+        if ((src->flags ^ dst->flags) & AV_FRAME_FLAG_INTERLACED) {
+            err_msg = "Cannot convert interlaced to progressive frames or vice versa.\n";
+            ret = AVERROR(EINVAL);
+            goto fail;
+        }
+
+        /* TODO: remove once implemented */
+        if ((dst_fmt.prim != src_fmt.prim || dst_fmt.trc != src_fmt.trc) &&
+            !s->color_conversion_warned)
+        {
+            av_log(ctx, AV_LOG_WARNING, "Conversions between different primaries / "
+                   "transfer functions are not currently implemented, expect "
+                   "wrong results.\n");
+            s->color_conversion_warned = 1;
+        }
+
+        if (!ff_test_fmt(&src_fmt, 0)) {
+            err_msg = "Unsupported input";
+            ret = AVERROR(ENOTSUP);
+            goto fail;
+        }
+
+        if (!ff_test_fmt(&dst_fmt, 1)) {
+            err_msg = "Unsupported output";
+            ret = AVERROR(ENOTSUP);
+            goto fail;
+        }
+
+        ret = sws_graph_reinit(ctx, &dst_fmt, &src_fmt, field, &s->graph[field]);
+        if (ret < 0) {
+            err_msg = "Failed initializing scaling graph";
+            goto fail;
+        }
+
+        if (s->graph[field]->incomplete && ctx->flags & SWS_STRICT) {
+            err_msg = "Incomplete scaling graph";
+            ret = AVERROR(EINVAL);
+            goto fail;
+        }
+
+        if (!src_fmt.interlaced) {
+            sws_graph_free(&s->graph[FIELD_BOTTOM]);
+            break;
+        }
+
+        continue;
+
+    fail:
+        av_log(ctx, AV_LOG_ERROR, "%s (%s): fmt:%s csp:%s prim:%s trc:%s ->"
+                                          " fmt:%s csp:%s prim:%s trc:%s\n",
+               err_msg, av_err2str(ret),
+               av_get_pix_fmt_name(src_fmt.format), av_color_space_name(src_fmt.csp),
+               av_color_primaries_name(src_fmt.prim), av_color_transfer_name(src_fmt.trc),
+               av_get_pix_fmt_name(dst_fmt.format), av_color_space_name(dst_fmt.csp),
+               av_color_primaries_name(dst_fmt.prim), av_color_transfer_name(dst_fmt.trc));
+
+        for (int i = 0; i < FF_ARRAY_ELEMS(s->graph); i++)
+            sws_graph_free(&s->graph[i]);
+
+        return ret;
+    }
+
+    return 0;
 }
 
 /**
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index 3996411dc8..fa3a0f01ab 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -107,6 +107,12 @@  typedef enum SwsFlags {
     SWS_LANCZOS       = 1 <<  9, ///< 3-tap sinc/sinc
     SWS_SPLINE        = 1 << 10, ///< cubic Keys spline
 
+    /**
+     * Return an error on underspecified conversions. Without this flag,
+     * unspecified fields are defaulted to sensible values.
+     */
+    SWS_STRICT        = 1 << 11,
+
     /**
      * Emit verbose log of scaling parameters.
      */
@@ -204,7 +210,10 @@  typedef struct SwsContext {
     int gamma_flag;
 
     /**
-     * Frame property overrides.
+     * Deprecated frame property overrides, for the legacy API only.
+     *
+     * Ignored by sws_scale_frame() when used in dynamic mode, in which
+     * case all properties are instead taken from the frame directly.
      */
     int src_w, src_h;  ///< Width and height of the source frame
     int dst_w, dst_h;  ///< Width and height of the destination frame
@@ -216,6 +225,8 @@  typedef struct SwsContext {
     int src_h_chr_pos; ///< Source horizontal chroma position
     int dst_v_chr_pos; ///< Destination vertical chroma position
     int dst_h_chr_pos; ///< Destination horizontal chroma position
+
+    /* Remember to add new fields to graph.c:opts_equal() */
 } SwsContext;
 
 /**
@@ -284,12 +295,57 @@  int sws_test_transfer(enum AVColorTransferCharacteristic trc, int output);
  */
 int sws_test_frame(const AVFrame *frame, int output);
 
+/**
+ * Like `sws_scale_frame`, but without actually scaling. It will instead
+ * merely initialize internal state that *would* be required to perform the
+ * operation, as well as returning the correct error code for unsupported
+ * frame combinations.
+ *
+ * @param ctx   The scaling context.
+ * @param dst   The destination frame to consider.
+ * @param src   The source frame to consider.
+ * @return 0 on success, a negative AVERROR code on failure.
+ */
+int sws_frame_setup(SwsContext *ctx, const AVFrame *dst, const AVFrame *src);
+
+/********************
+ * Main scaling API *
+ ********************/
+
 /**
  * Check if a given conversion is a noop. Returns a positive integer if
  * no operation needs to be performed, 0 otherwise.
  */
 int sws_is_noop(const AVFrame *dst, const AVFrame *src);
 
+/**
+ * Scale source data from `src` and write the output to `dst`.
+ *
+ * This function can be used directly on an allocated context, without setting
+ * up any frame properties or calling `sws_init_context()`. Such usage is fully
+ * dynamic and does not require reallocation if the frame properties change.
+ *
+ * Alternatively, this function can be called on a context that has been
+ * explicitly initialized. However, this is provided only for backwards
+ * compatibility. In this usage mode, all frame properties must be correctly
+ * set at init time, and may no longer change after initialization.
+ *
+ * @param ctx   The scaling context.
+ * @param dst   The destination frame. The data buffers may either be already
+ *              allocated by the caller or left clear, in which case they will
+ *              be allocated by the scaler. The latter may have performance
+ *              advantages - e.g. in certain cases some (or all) output planes
+ *              may be references to input planes, rather than copies.
+ * @param src   The source frame. If the data buffers are set to NULL, then
+ *              this function behaves identically to `sws_frame_setup`.
+ * @return 0 on success, a negative AVERROR code on failure.
+ */
+int sws_scale_frame(SwsContext *c, AVFrame *dst, const AVFrame *src);
+
+/*************************
+ * Legacy (stateful) API *
+ *************************/
+
 #define SWS_SRC_V_CHR_DROP_MASK     0x30000
 #define SWS_SRC_V_CHR_DROP_SHIFT    16
 
@@ -352,6 +408,11 @@  int sws_isSupportedEndiannessConversion(enum AVPixelFormat pix_fmt);
 /**
  * Initialize the swscaler context sws_context.
  *
+ * This function is considered deprecated, and provided only for backwards
+ * compatibility with sws_scale() and sws_start_frame(). The preferred way to
+ * use libswscale is to set all frame properties correctly and call
+ * sws_scale_frame() directly, without explicitly initializing the context.
+ *
  * @return zero or positive value on success, a negative value on
  * error
  */
@@ -393,7 +454,8 @@  SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat,
 /**
  * Scale the image slice in srcSlice and put the resulting scaled
  * slice in the image in dst. A slice is a sequence of consecutive
- * rows in an image.
+ * rows in an image. Requires a context that has been previously
+ * been initialized with sws_init_context().
  *
  * Slices have to be provided in sequential order, either in
  * top-bottom or bottom-top order. If slices are provided in
@@ -420,27 +482,11 @@  int sws_scale(SwsContext *c, const uint8_t *const srcSlice[],
               const int srcStride[], int srcSliceY, int srcSliceH,
               uint8_t *const dst[], const int dstStride[]);
 
-/**
- * Scale source data from src and write the output to dst.
- *
- * This is merely a convenience wrapper around
- * - sws_frame_start()
- * - sws_send_slice(0, src->height)
- * - sws_receive_slice(0, dst->height)
- * - sws_frame_end()
- *
- * @param c   The scaling context
- * @param dst The destination frame. See documentation for sws_frame_start() for
- *            more details.
- * @param src The source frame.
- *
- * @return 0 on success, a negative AVERROR code on failure
- */
-int sws_scale_frame(SwsContext *c, AVFrame *dst, const AVFrame *src);
-
 /**
  * Initialize the scaling process for a given pair of source/destination frames.
  * Must be called before any calls to sws_send_slice() and sws_receive_slice().
+ * Requires a context that has been previously been initialized with
+ * sws_init_context().
  *
  * This function will retain references to src and dst, so they must both use
  * refcounted buffers (if allocated by the caller, in case of dst).
@@ -511,7 +557,8 @@  int sws_receive_slice(SwsContext *c, unsigned int slice_start,
                       unsigned int slice_height);
 
 /**
- * Get the alignment required for slices
+ * Get the alignment required for slices. Requires a context that has been
+ * previously been initialized with sws_init_context().
  *
  * @param c   The scaling context
  * @return alignment required for output slices requested with sws_receive_slice().
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 7c9517975b..80487e5275 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -26,6 +26,7 @@ 
 
 #include "config.h"
 #include "swscale.h"
+#include "graph.h"
 
 #include "libavutil/avassert.h"
 #include "libavutil/common.h"
@@ -323,6 +324,9 @@  struct SwsInternal {
     int                *slice_err;
     int              nb_slice_ctx;
 
+    /* Scaling graph, reinitialized dynamically as needed. */
+    SwsGraph *graph[2]; /* top, bottom fields */
+
     // values passed to current sws_receive_slice() call
     int dst_slice_start;
     int dst_slice_height;
@@ -663,6 +667,7 @@  struct SwsInternal {
     unsigned int dst_slice_align;
     atomic_int   stride_unaligned_warned;
     atomic_int   data_unaligned_warned;
+    int          color_conversion_warned;
 
     Half2FloatTables *h2f_tables;
 };
@@ -674,7 +679,7 @@  static_assert(offsetof(SwsInternal, redDither) + DITHER32_INT == offsetof(SwsInt
 #if ARCH_X86_64
 /* x86 yuv2gbrp uses the SwsInternal for yuv coefficients
    if struct offsets change the asm needs to be updated too */
-static_assert(offsetof(SwsInternal, yuv2rgb_y_offset) == 40316,
+static_assert(offsetof(SwsInternal, yuv2rgb_y_offset) == 40332,
               "yuv2rgb_y_offset must be updated in x86 asm");
 #endif
 
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 1b6f54fc30..628a3f1091 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -61,6 +61,7 @@ 
 #include "swscale.h"
 #include "swscale_internal.h"
 #include "utils.h"
+#include "graph.h"
 
 typedef struct FormatEntry {
     uint8_t is_supported_in         :1;
@@ -2450,6 +2451,9 @@  void sws_freeContext(SwsContext *sws)
     if (!c)
         return;
 
+    for (i = 0; i < FF_ARRAY_ELEMS(c->graph); i++)
+        sws_graph_free(&c->graph[i]);
+
     for (i = 0; i < c->nb_slice_ctx; i++)
         sws_freeContext(c->slice_ctx[i]);
     av_freep(&c->slice_ctx);
diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm
index dec1d27f9a..7a1e5d9bc1 100644
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -582,7 +582,7 @@  yuv2nv12cX_fn yuv2nv21
 
 %if ARCH_X86_64
 struc SwsInternal
-    .padding:           resb 40316 ; offsetof(SwsInternal, yuv2rgb_y_offset)
+    .padding:           resb 40332 ; offsetof(SwsInternal, yuv2rgb_y_offset)
     .yuv2rgb_y_offset:  resd 1
     .yuv2rgb_y_coeff:   resd 1
     .yuv2rgb_v2r_coeff: resd 1