diff mbox series

[FFmpeg-devel,v2,16/19] swscale: introduce new, dynamic scaling API

Message ID 20241014134354.180848-17-ffmpeg@haasn.xyz
State New
Headers show
Series swscale: major refactoring and new API | expand

Checks

Context Check Description
yinshiyou/configure_loongarch64 warning Failed to apply patch

Commit Message

Niklas Haas Oct. 14, 2024, 1:37 p.m. UTC
From: Niklas Haas <git@haasn.dev>

As part of a larger, ongoing effort to modernize and partially rewrite
libswscale, it was decided and generally agreed upon to introduce a new
public API for libswscale. This API is designed to be less stateful, more
explicitly defined, and considerably easier to use than the existing one.

Most of the API work has been already accomplished in the previous commits,
this commit merely introduces the ability to use sws_scale_frame()
dynamically, without prior sws_init_context() calls. Instead, the new API
takes frame properties from the frames themselves, and the implementation is
based on the new SwsGraph API, which we simply reinitialize as needed.

This high-level wrapper also recreates the logic that used to live inside
vf_scale for scaling interlaced frames, enabling it to be reused more easily
by end users.

Finally, this function is designed to simply copy refs directly when nothing
needs to be done, substantially improving throughput of the noop fast path.

Sponsored-by: Sovereign Tech Fund
Signed-off-by: Niklas Haas <git@haasn.dev>
---
 libswscale/swscale.c          | 195 ++++++++++++++++++++++++++++++++--
 libswscale/swscale.h          |  91 ++++++++++++----
 libswscale/swscale_internal.h |   7 +-
 libswscale/utils.c            |   4 +
 libswscale/x86/output.asm     |   2 +-
 5 files changed, 269 insertions(+), 30 deletions(-)
diff mbox series

Patch

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index d5be07193d..f5f58610af 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1212,21 +1212,204 @@  int sws_receive_slice(SwsContext *sws, unsigned int slice_start,
                           dst, c->frame_dst->linesize, slice_start, slice_height);
 }
 
+static SwsField get_field(const AVFrame *frame, int field)
+{
+    SwsField f = {
+#define COPY4(x) { x[0], x[1], x[2], x[3] }
+        .data     = COPY4(frame->data),
+        .linesize = COPY4(frame->linesize),
+    };
+
+    if (!(frame->flags & AV_FRAME_FLAG_INTERLACED)) {
+        av_assert1(!field);
+        return f;
+    }
+
+    if (field == FIELD_BOTTOM) {
+        /* Odd rows, offset by one line */
+        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
+        for (int i = 0; i < FF_ARRAY_ELEMS(f.data); i++) {
+            f.data[i] += f.linesize[i];
+            if (desc->flags & AV_PIX_FMT_FLAG_PAL)
+                break;
+        }
+    }
+
+    /* Take only every second line */
+    for (int i = 0; i < FF_ARRAY_ELEMS(f.linesize); i++)
+        f.linesize[i] <<= 1;
+
+    return f;
+}
+
+/* Subset of av_frame_ref() that only references (video) data buffers */
+static int frame_ref(AVFrame *dst, const AVFrame *src)
+{
+    /* ref the buffers */
+    for (int i = 0; i < FF_ARRAY_ELEMS(src->buf); i++) {
+        if (!src->buf[i])
+            continue;
+        dst->buf[i] = av_buffer_ref(src->buf[i]);
+        if (!dst->buf[i])
+            return AVERROR(ENOMEM);
+    }
+
+    memcpy(dst->data,     src->data,     sizeof(src->data));
+    memcpy(dst->linesize, src->linesize, sizeof(src->linesize));
+    return 0;
+}
+
 int sws_scale_frame(SwsContext *sws, AVFrame *dst, const AVFrame *src)
 {
     int ret;
+    SwsInternal *c = sws_internal(sws);
+    if (!src || !dst)
+        return AVERROR(EINVAL);
 
-    ret = sws_frame_start(sws, dst, src);
+    if (c->frame_src) {
+        /* Context has been initialized with explicit values, fall back to
+         * legacy API */
+        ret = sws_frame_start(sws, dst, src);
+        if (ret < 0)
+            return ret;
+
+        ret = sws_send_slice(sws, 0, src->height);
+        if (ret >= 0)
+            ret = sws_receive_slice(sws, 0, dst->height);
+
+        sws_frame_end(sws);
+
+        return ret;
+    }
+
+    ret = sws_frame_setup(sws, dst, src);
     if (ret < 0)
         return ret;
 
-    ret = sws_send_slice(sws, 0, src->height);
-    if (ret >= 0)
-        ret = sws_receive_slice(sws, 0, dst->height);
+    if (!src->data[0])
+        return 0;
 
-    sws_frame_end(sws);
+    if (c->graph[FIELD_TOP]->noop &&
+        (!c->graph[FIELD_BOTTOM] || c->graph[FIELD_BOTTOM]->noop) &&
+        src->buf[0] && !dst->buf[0] && !dst->data[0])
+    {
+        /* Lightweight refcopy */
+        ret = frame_ref(dst, src);
+        if (ret < 0)
+            return ret;
+    } else {
+        if (!dst->data[0]) {
+            ret = av_frame_get_buffer(dst, 0);
+            if (ret < 0)
+                return ret;
+        }
 
-    return ret;
+        for (int field = 0; field < 2; field++) {
+            SwsGraph *graph = c->graph[field];
+            SwsField dst_field = get_field(dst, field);
+            SwsField src_field = get_field(src, field);
+            sws_graph_run(graph, &dst_field, &src_field);
+            if (!graph->dst.interlaced)
+                break;
+        }
+    }
+
+    return 0;
+}
+
+static int validate_params(SwsContext *ctx)
+{
+#define VALIDATE(field, min, max) \
+    if (ctx->field < min || ctx->field > max) { \
+        av_log(ctx, AV_LOG_ERROR, "'%s' (%d) out of range [%d, %d]\n", \
+               #field, (int) ctx->field, min, max); \
+        return AVERROR(EINVAL); \
+    }
+
+    VALIDATE(threads,       0, 8192);
+    VALIDATE(dither,        0, SWS_DITHER_NB - 1)
+    VALIDATE(alpha_blend,   0, SWS_ALPHA_BLEND_NB - 1)
+    return 0;
+}
+
+int sws_frame_setup(SwsContext *ctx, const AVFrame *dst, const AVFrame *src)
+{
+    SwsInternal *s = ctx->internal;
+    const char *err_msg;
+    int ret;
+
+    if (!src || !dst)
+        return AVERROR(EINVAL);
+    if ((ret = validate_params(ctx)) < 0)
+        return ret;
+
+    for (int field = 0; field < 2; field++) {
+        SwsFormat src_fmt = ff_fmt_from_frame(src, field);
+        SwsFormat dst_fmt = ff_fmt_from_frame(dst, field);
+
+        if ((src->flags ^ dst->flags) & AV_FRAME_FLAG_INTERLACED) {
+            err_msg = "Cannot convert interlaced to progressive frames or vice versa.\n";
+            ret = AVERROR(EINVAL);
+            goto fail;
+        }
+
+        /* TODO: remove once implemented */
+        if ((dst_fmt.prim != src_fmt.prim || dst_fmt.trc != src_fmt.trc) &&
+            !s->color_conversion_warned)
+        {
+            av_log(ctx, AV_LOG_WARNING, "Conversions between different primaries / "
+                   "transfer functions are not currently implemented, expect "
+                   "wrong results.\n");
+            s->color_conversion_warned = 1;
+        }
+
+        if (!ff_test_fmt(&src_fmt, 0)) {
+            err_msg = "Unsupported input";
+            ret = AVERROR(ENOTSUP);
+            goto fail;
+        }
+
+        if (!ff_test_fmt(&dst_fmt, 1)) {
+            err_msg = "Unsupported output";
+            ret = AVERROR(ENOTSUP);
+            goto fail;
+        }
+
+        ret = sws_graph_reinit(ctx, &dst_fmt, &src_fmt, field, &s->graph[field]);
+        if (ret < 0) {
+            err_msg = "Failed initializing scaling graph";
+            goto fail;
+        }
+
+        if (s->graph[field]->incomplete && ctx->flags & SWS_STRICT) {
+            err_msg = "Incomplete scaling graph";
+            ret = AVERROR(EINVAL);
+            goto fail;
+        }
+
+        if (!src_fmt.interlaced) {
+            sws_graph_free(&s->graph[FIELD_BOTTOM]);
+            break;
+        }
+
+        continue;
+
+    fail:
+        av_log(ctx, AV_LOG_ERROR, "%s (%s): fmt:%s csp:%s prim:%s trc:%s ->"
+                                          " fmt:%s csp:%s prim:%s trc:%s\n",
+               err_msg, av_err2str(ret),
+               av_get_pix_fmt_name(src_fmt.format), av_color_space_name(src_fmt.csp),
+               av_color_primaries_name(src_fmt.prim), av_color_transfer_name(src_fmt.trc),
+               av_get_pix_fmt_name(dst_fmt.format), av_color_space_name(dst_fmt.csp),
+               av_color_primaries_name(dst_fmt.prim), av_color_transfer_name(dst_fmt.trc));
+
+        for (int i = 0; i < FF_ARRAY_ELEMS(s->graph); i++)
+            sws_graph_free(&s->graph[i]);
+
+        return ret;
+    }
+
+    return 0;
 }
 
 /**
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index c59f6cf927..b53b3dd32c 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -107,6 +107,12 @@  typedef enum SwsFlags {
     SWS_LANCZOS       = 1 <<  9, ///< 3-tap sinc/sinc
     SWS_SPLINE        = 1 << 10, ///< cubic Keys spline
 
+    /**
+     * Return an error on underspecified conversions. Without this flag,
+     * unspecified fields are defaulted to sensible values.
+     */
+    SWS_STRICT        = 1 << 11,
+
     /**
      * Emit verbose log of scaling parameters.
      */
@@ -209,7 +215,10 @@  typedef struct SwsContext {
     int gamma_flag;
 
     /**
-     * Frame property overrides.
+     * Deprecated frame property overrides, for the legacy API only.
+     *
+     * Ignored by sws_scale_frame() when used in dynamic mode, in which
+     * case all properties are instead taken from the frame directly.
      */
     int src_w, src_h;  ///< Width and height of the source frame
     int dst_w, dst_h;  ///< Width and height of the destination frame
@@ -221,6 +230,8 @@  typedef struct SwsContext {
     int src_h_chr_pos; ///< Source horizontal chroma position
     int dst_v_chr_pos; ///< Destination vertical chroma position
     int dst_h_chr_pos; ///< Destination horizontal chroma position
+
+    /* Remember to add new fields to graph.c:opts_equal() */
 } SwsContext;
 
 /**
@@ -289,12 +300,57 @@  int sws_test_transfer(enum AVColorTransferCharacteristic trc, int output);
  */
 int sws_test_frame(const AVFrame *frame, int output);
 
+/**
+ * Like `sws_scale_frame`, but without actually scaling. It will instead
+ * merely initialize internal state that *would* be required to perform the
+ * operation, as well as returning the correct error code for unsupported
+ * frame combinations.
+ *
+ * @param ctx   The scaling context.
+ * @param dst   The destination frame to consider.
+ * @param src   The source frame to consider.
+ * @return 0 on success, a negative AVERROR code on failure.
+ */
+int sws_frame_setup(SwsContext *ctx, const AVFrame *dst, const AVFrame *src);
+
+/********************
+ * Main scaling API *
+ ********************/
+
 /**
  * Check if a given conversion is a noop. Returns a positive integer if
  * no operation needs to be performed, 0 otherwise.
  */
 int sws_is_noop(const AVFrame *dst, const AVFrame *src);
 
+/**
+ * Scale source data from `src` and write the output to `dst`.
+ *
+ * This function can be used directly on an allocated context, without setting
+ * up any frame properties or calling `sws_init_context()`. Such usage is fully
+ * dynamic and does not require reallocation if the frame properties change.
+ *
+ * Alternatively, this function can be called on a context that has been
+ * explicitly initialized. However, this is provided only for backwards
+ * compatibility. In this usage mode, all frame properties must be correctly
+ * set at init time, and may no longer change after initialization.
+ *
+ * @param ctx   The scaling context.
+ * @param dst   The destination frame. The data buffers may either be already
+ *              allocated by the caller or left clear, in which case they will
+ *              be allocated by the scaler. The latter may have performance
+ *              advantages - e.g. in certain cases some (or all) output planes
+ *              may be references to input planes, rather than copies.
+ * @param src   The source frame. If the data buffers are set to NULL, then
+ *              this function behaves identically to `sws_frame_setup`.
+ * @return 0 on success, a negative AVERROR code on failure.
+ */
+int sws_scale_frame(SwsContext *c, AVFrame *dst, const AVFrame *src);
+
+/*************************
+ * Legacy (stateful) API *
+ *************************/
+
 #define SWS_SRC_V_CHR_DROP_MASK     0x30000
 #define SWS_SRC_V_CHR_DROP_SHIFT    16
 
@@ -358,6 +414,11 @@  int sws_isSupportedEndiannessConversion(enum AVPixelFormat pix_fmt);
  * Initialize the swscaler context sws_context. This function fixes the
  * values of any options set in the SwsContext; further adjustments will
  * not affect the scaling process.
+
+ * This function is considered deprecated, and provided only for backwards
+ * compatibility with sws_scale() and sws_start_frame(). The preferred way to
+ * use libswscale is to set all frame properties correctly and call
+ * sws_scale_frame() directly, without explicitly initializing the context.
  *
  * @return zero or positive value on success, a negative value on
  * error
@@ -400,7 +461,8 @@  SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat,
 /**
  * Scale the image slice in srcSlice and put the resulting scaled
  * slice in the image in dst. A slice is a sequence of consecutive
- * rows in an image.
+ * rows in an image. Requires a context that has been previously
+ * been initialized with sws_init_context().
  *
  * Slices have to be provided in sequential order, either in
  * top-bottom or bottom-top order. If slices are provided in
@@ -427,27 +489,11 @@  int sws_scale(SwsContext *c, const uint8_t *const srcSlice[],
               const int srcStride[], int srcSliceY, int srcSliceH,
               uint8_t *const dst[], const int dstStride[]);
 
-/**
- * Scale source data from src and write the output to dst.
- *
- * This is merely a convenience wrapper around
- * - sws_frame_start()
- * - sws_send_slice(0, src->height)
- * - sws_receive_slice(0, dst->height)
- * - sws_frame_end()
- *
- * @param c   The scaling context
- * @param dst The destination frame. See documentation for sws_frame_start() for
- *            more details.
- * @param src The source frame.
- *
- * @return 0 on success, a negative AVERROR code on failure
- */
-int sws_scale_frame(SwsContext *c, AVFrame *dst, const AVFrame *src);
-
 /**
  * Initialize the scaling process for a given pair of source/destination frames.
  * Must be called before any calls to sws_send_slice() and sws_receive_slice().
+ * Requires a context that has been previously been initialized with
+ * sws_init_context().
  *
  * This function will retain references to src and dst, so they must both use
  * refcounted buffers (if allocated by the caller, in case of dst).
@@ -518,7 +564,8 @@  int sws_receive_slice(SwsContext *c, unsigned int slice_start,
                       unsigned int slice_height);
 
 /**
- * Get the alignment required for slices
+ * Get the alignment required for slices. Requires a context that has been
+ * previously been initialized with sws_init_context().
  *
  * @param c   The scaling context
  * @return alignment required for output slices requested with sws_receive_slice().
@@ -528,7 +575,7 @@  int sws_receive_slice(SwsContext *c, unsigned int slice_start,
 unsigned int sws_receive_slice_alignment(const SwsContext *c);
 
 /**
- * @param c the scaling context
+ * @param c the scaling context, must have been initialized with sws_init_context()
  * @param dstRange flag indicating the while-black range of the output (1=jpeg / 0=mpeg)
  * @param srcRange flag indicating the while-black range of the input (1=jpeg / 0=mpeg)
  * @param table the yuv2rgb coefficients describing the output yuv space, normally ff_yuv2rgb_coeffs[x]
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 5a8d33a988..584a77af9d 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -26,6 +26,7 @@ 
 
 #include "config.h"
 #include "swscale.h"
+#include "graph.h"
 
 #include "libavutil/avassert.h"
 #include "libavutil/common.h"
@@ -323,6 +324,9 @@  struct SwsInternal {
     int                *slice_err;
     int              nb_slice_ctx;
 
+    /* Scaling graph, reinitialized dynamically as needed. */
+    SwsGraph *graph[2]; /* top, bottom fields */
+
     // values passed to current sws_receive_slice() call
     int dst_slice_start;
     int dst_slice_height;
@@ -663,6 +667,7 @@  struct SwsInternal {
     unsigned int dst_slice_align;
     atomic_int   stride_unaligned_warned;
     atomic_int   data_unaligned_warned;
+    int          color_conversion_warned;
 
     Half2FloatTables *h2f_tables;
 };
@@ -674,7 +679,7 @@  static_assert(offsetof(SwsInternal, redDither) + DITHER32_INT == offsetof(SwsInt
 #if ARCH_X86
 /* x86 yuv2gbrp uses the SwsInternal for yuv coefficients
    if struct offsets change the asm needs to be updated too */
-static_assert(offsetof(SwsInternal, yuv2rgb_y_offset) == 40332,
+static_assert(offsetof(SwsInternal, yuv2rgb_y_offset) == 40348,
               "yuv2rgb_y_offset must be updated in x86 asm");
 #endif
 
diff --git a/libswscale/utils.c b/libswscale/utils.c
index cf553e9d56..d18cdc1101 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -61,6 +61,7 @@ 
 #include "swscale.h"
 #include "swscale_internal.h"
 #include "utils.h"
+#include "graph.h"
 
 typedef struct FormatEntry {
     uint8_t is_supported_in         :1;
@@ -2461,6 +2462,9 @@  void sws_freeContext(SwsContext *sws)
     if (!c)
         return;
 
+    for (i = 0; i < FF_ARRAY_ELEMS(c->graph); i++)
+        sws_graph_free(&c->graph[i]);
+
     for (i = 0; i < c->nb_slice_ctx; i++)
         sws_freeContext(c->slice_ctx[i]);
     av_freep(&c->slice_ctx);
diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm
index 7a1e5d9bc1..f2e884780a 100644
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -582,7 +582,7 @@  yuv2nv12cX_fn yuv2nv21
 
 %if ARCH_X86_64
 struc SwsInternal
-    .padding:           resb 40332 ; offsetof(SwsInternal, yuv2rgb_y_offset)
+    .padding:           resb 40348 ; offsetof(SwsInternal, yuv2rgb_y_offset)
     .yuv2rgb_y_offset:  resd 1
     .yuv2rgb_y_coeff:   resd 1
     .yuv2rgb_v2r_coeff: resd 1