[FFmpeg-devel,v2,16/19] swscale: introduce new, dynamic scaling API

Message ID	20241014134354.180848-17-ffmpeg@haasn.xyz
State	New
Headers	show Delivered-To: ffmpegpatchwork2@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; From: Niklas Haas <ffmpeg@haasn.xyz> To: ffmpeg-devel@ffmpeg.org Date: Mon, 14 Oct 2024 15:37:41 +0200 Message-ID: <20241014134354.180848-17-ffmpeg@haasn.xyz> In-Reply-To: <20241014134354.180848-1-ffmpeg@haasn.xyz> References: <20241014134354.180848-1-ffmpeg@haasn.xyz> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH v2 16/19] swscale: introduce new, dynamic scaling API Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Cc: Niklas Haas <git@haasn.dev> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Series	swscale: major refactoring and new API \| expand [FFmpeg-devel,v2,00/19] swscale: major refactoring and new API [FFmpeg-devel,v2,01/19] swscale: publicly typedef struct SwsContext [FFmpeg-devel,v2,02/19] swscale: rename SwsContext to SwsInternal [FFmpeg-devel,v2,03/19] swscale: slightly reorder header [FFmpeg-devel,v2,04/19] swscale: add sws_free_context() [FFmpeg-devel,v2,05/19] swscale/utils: add SwsFormat abstraction and helpers [FFmpeg-devel,v2,06/19] swscale: add new frame testing API [FFmpeg-devel,v2,07/19] swscale: add sws_is_noop() [FFmpeg-devel,v2,08/19] swscale/options: cosmetic changes [FFmpeg-devel,v2,09/19] swscale/internal: use static_assert for enforcing offsets [FFmpeg-devel,v2,10/19] swscale/x86: use dedicated int for self-modifying MMX dstW [FFmpeg-devel,v2,11/19] swscale/internal: group user-facing options together [FFmpeg-devel,v2,12/19] swscale: expose SwsContext publicly [FFmpeg-devel,v2,13/19] swscale: organize and better document flags [FFmpeg-devel,v2,14/19] swscale/internal: expose sws_init_single_context() internally [FFmpeg-devel,v2,15/19] swscale/graph: add new high-level scaler dispatch mechanism [FFmpeg-devel,v2,16/19] swscale: introduce new, dynamic scaling API [FFmpeg-devel,v2,17/19] tests/swscale: rewrite on top of new API [FFmpeg-devel,v2,18/19] tests/swscale: add a benchmarking mode [FFmpeg-devel,v2,19/19] avfilter/vf_scale: switch to new swscale API

Context	Check	Description
yinshiyou/configure_loongarch64	warning	Failed to apply patch

diff --git a/libswscale/swscale.c b/libswscale/swscale.c index d5be07193d..f5f58610af 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -1212,21 +1212,204 @@ int sws_receive_slice(SwsContext *sws, unsigned int slice_start, dst, c->frame_dst->linesize, slice_start, slice_height); } +static SwsField get_field(const AVFrame *frame, int field) +{ + SwsField f = { +#define COPY4(x) { x[0], x[1], x[2], x[3] } + .data = COPY4(frame->data), + .linesize = COPY4(frame->linesize), + }; + + if (!(frame->flags & AV_FRAME_FLAG_INTERLACED)) { + av_assert1(!field); + return f; + } + + if (field == FIELD_BOTTOM) { + /* Odd rows, offset by one line */ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); + for (int i = 0; i < FF_ARRAY_ELEMS(f.data); i++) { + f.data[i] += f.linesize[i]; + if (desc->flags & AV_PIX_FMT_FLAG_PAL) + break; + } + } + + /* Take only every second line */ + for (int i = 0; i < FF_ARRAY_ELEMS(f.linesize); i++) + f.linesize[i] <<= 1; + + return f; +} + +/* Subset of av_frame_ref() that only references (video) data buffers */ +static int frame_ref(AVFrame *dst, const AVFrame *src) +{ + /* ref the buffers */ + for (int i = 0; i < FF_ARRAY_ELEMS(src->buf); i++) { + if (!src->buf[i]) + continue; + dst->buf[i] = av_buffer_ref(src->buf[i]); + if (!dst->buf[i]) + return AVERROR(ENOMEM); + } + + memcpy(dst->data, src->data, sizeof(src->data)); + memcpy(dst->linesize, src->linesize, sizeof(src->linesize)); + return 0; +} + int sws_scale_frame(SwsContext *sws, AVFrame *dst, const AVFrame *src) { int ret; + SwsInternal *c = sws_internal(sws); + if (!src || !dst) + return AVERROR(EINVAL); - ret = sws_frame_start(sws, dst, src); + if (c->frame_src) { + /* Context has been initialized with explicit values, fall back to + * legacy API */ + ret = sws_frame_start(sws, dst, src); + if (ret < 0) + return ret; + + ret = sws_send_slice(sws, 0, src->height); + if (ret >= 0) + ret = sws_receive_slice(sws, 0, dst->height); + + sws_frame_end(sws); + + return ret; + } + + ret = sws_frame_setup(sws, dst, src); if (ret < 0) return ret; - ret = sws_send_slice(sws, 0, src->height); - if (ret >= 0) - ret = sws_receive_slice(sws, 0, dst->height); + if (!src->data[0]) + return 0; - sws_frame_end(sws); + if (c->graph[FIELD_TOP]->noop && + (!c->graph[FIELD_BOTTOM] || c->graph[FIELD_BOTTOM]->noop) && + src->buf[0] && !dst->buf[0] && !dst->data[0]) + { + /* Lightweight refcopy */ + ret = frame_ref(dst, src); + if (ret < 0) + return ret; + } else { + if (!dst->data[0]) { + ret = av_frame_get_buffer(dst, 0); + if (ret < 0) + return ret; + } - return ret; + for (int field = 0; field < 2; field++) { + SwsGraph *graph = c->graph[field]; + SwsField dst_field = get_field(dst, field); + SwsField src_field = get_field(src, field); + sws_graph_run(graph, &dst_field, &src_field); + if (!graph->dst.interlaced) + break; + } + } + + return 0; +} + +static int validate_params(SwsContext *ctx) +{ +#define VALIDATE(field, min, max) \ + if (ctx->field < min || ctx->field > max) { \ + av_log(ctx, AV_LOG_ERROR, "'%s' (%d) out of range [%d, %d]\n", \ + #field, (int) ctx->field, min, max); \ + return AVERROR(EINVAL); \ + } + + VALIDATE(threads, 0, 8192); + VALIDATE(dither, 0, SWS_DITHER_NB - 1) + VALIDATE(alpha_blend, 0, SWS_ALPHA_BLEND_NB - 1) + return 0; +} + +int sws_frame_setup(SwsContext *ctx, const AVFrame *dst, const AVFrame *src) +{ + SwsInternal *s = ctx->internal; + const char *err_msg; + int ret; + + if (!src || !dst) + return AVERROR(EINVAL); + if ((ret = validate_params(ctx)) < 0) + return ret; + + for (int field = 0; field < 2; field++) { + SwsFormat src_fmt = ff_fmt_from_frame(src, field); + SwsFormat dst_fmt = ff_fmt_from_frame(dst, field); + + if ((src->flags ^ dst->flags) & AV_FRAME_FLAG_INTERLACED) { + err_msg = "Cannot convert interlaced to progressive frames or vice versa.\n"; + ret = AVERROR(EINVAL); + goto fail; + } + + /* TODO: remove once implemented */ + if ((dst_fmt.prim != src_fmt.prim || dst_fmt.trc != src_fmt.trc) && + !s->color_conversion_warned) + { + av_log(ctx, AV_LOG_WARNING, "Conversions between different primaries / " + "transfer functions are not currently implemented, expect " + "wrong results.\n"); + s->color_conversion_warned = 1; + } + + if (!ff_test_fmt(&src_fmt, 0)) { + err_msg = "Unsupported input"; + ret = AVERROR(ENOTSUP); + goto fail; + } + + if (!ff_test_fmt(&dst_fmt, 1)) { + err_msg = "Unsupported output"; + ret = AVERROR(ENOTSUP); + goto fail; + } + + ret = sws_graph_reinit(ctx, &dst_fmt, &src_fmt, field, &s->graph[field]); + if (ret < 0) { + err_msg = "Failed initializing scaling graph"; + goto fail; + } + + if (s->graph[field]->incomplete && ctx->flags & SWS_STRICT) { + err_msg = "Incomplete scaling graph"; + ret = AVERROR(EINVAL); + goto fail; + } + + if (!src_fmt.interlaced) { + sws_graph_free(&s->graph[FIELD_BOTTOM]); + break; + } + + continue; + + fail: + av_log(ctx, AV_LOG_ERROR, "%s (%s): fmt:%s csp:%s prim:%s trc:%s ->" + " fmt:%s csp:%s prim:%s trc:%s\n", + err_msg, av_err2str(ret), + av_get_pix_fmt_name(src_fmt.format), av_color_space_name(src_fmt.csp), + av_color_primaries_name(src_fmt.prim), av_color_transfer_name(src_fmt.trc), + av_get_pix_fmt_name(dst_fmt.format), av_color_space_name(dst_fmt.csp), + av_color_primaries_name(dst_fmt.prim), av_color_transfer_name(dst_fmt.trc)); + + for (int i = 0; i < FF_ARRAY_ELEMS(s->graph); i++) + sws_graph_free(&s->graph[i]); + + return ret; + } + + return 0; } /** diff --git a/libswscale/swscale.h b/libswscale/swscale.h index c59f6cf927..b53b3dd32c 100644 --- a/libswscale/swscale.h +++ b/libswscale/swscale.h @@ -107,6 +107,12 @@ typedef enum SwsFlags { SWS_LANCZOS = 1 << 9, ///< 3-tap sinc/sinc SWS_SPLINE = 1 << 10, ///< cubic Keys spline + /** + * Return an error on underspecified conversions. Without this flag, + * unspecified fields are defaulted to sensible values. + */ + SWS_STRICT = 1 << 11, + /** * Emit verbose log of scaling parameters. */ @@ -209,7 +215,10 @@ typedef struct SwsContext { int gamma_flag; /** - * Frame property overrides. + * Deprecated frame property overrides, for the legacy API only. + * + * Ignored by sws_scale_frame() when used in dynamic mode, in which + * case all properties are instead taken from the frame directly. */ int src_w, src_h; ///< Width and height of the source frame int dst_w, dst_h; ///< Width and height of the destination frame @@ -221,6 +230,8 @@ typedef struct SwsContext { int src_h_chr_pos; ///< Source horizontal chroma position int dst_v_chr_pos; ///< Destination vertical chroma position int dst_h_chr_pos; ///< Destination horizontal chroma position + + /* Remember to add new fields to graph.c:opts_equal() */ } SwsContext; /** @@ -289,12 +300,57 @@ int sws_test_transfer(enum AVColorTransferCharacteristic trc, int output); */ int sws_test_frame(const AVFrame *frame, int output); +/** + * Like `sws_scale_frame`, but without actually scaling. It will instead + * merely initialize internal state that *would* be required to perform the + * operation, as well as returning the correct error code for unsupported + * frame combinations. + * + * @param ctx The scaling context. + * @param dst The destination frame to consider. + * @param src The source frame to consider. + * @return 0 on success, a negative AVERROR code on failure. + */ +int sws_frame_setup(SwsContext *ctx, const AVFrame *dst, const AVFrame *src); + +/******************** + * Main scaling API * + ********************/ + /** * Check if a given conversion is a noop. Returns a positive integer if * no operation needs to be performed, 0 otherwise. */ int sws_is_noop(const AVFrame *dst, const AVFrame *src); +/** + * Scale source data from `src` and write the output to `dst`. + * + * This function can be used directly on an allocated context, without setting + * up any frame properties or calling `sws_init_context()`. Such usage is fully + * dynamic and does not require reallocation if the frame properties change. + * + * Alternatively, this function can be called on a context that has been + * explicitly initialized. However, this is provided only for backwards + * compatibility. In this usage mode, all frame properties must be correctly + * set at init time, and may no longer change after initialization. + * + * @param ctx The scaling context. + * @param dst The destination frame. The data buffers may either be already + * allocated by the caller or left clear, in which case they will + * be allocated by the scaler. The latter may have performance + * advantages - e.g. in certain cases some (or all) output planes + * may be references to input planes, rather than copies. + * @param src The source frame. If the data buffers are set to NULL, then + * this function behaves identically to `sws_frame_setup`. + * @return 0 on success, a negative AVERROR code on failure. + */ +int sws_scale_frame(SwsContext *c, AVFrame *dst, const AVFrame *src); + +/************************* + * Legacy (stateful) API * + *************************/ + #define SWS_SRC_V_CHR_DROP_MASK 0x30000 #define SWS_SRC_V_CHR_DROP_SHIFT 16 @@ -358,6 +414,11 @@ int sws_isSupportedEndiannessConversion(enum AVPixelFormat pix_fmt); * Initialize the swscaler context sws_context. This function fixes the * values of any options set in the SwsContext; further adjustments will * not affect the scaling process. + + * This function is considered deprecated, and provided only for backwards + * compatibility with sws_scale() and sws_start_frame(). The preferred way to + * use libswscale is to set all frame properties correctly and call + * sws_scale_frame() directly, without explicitly initializing the context. * * @return zero or positive value on success, a negative value on * error @@ -400,7 +461,8 @@ SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat, /** * Scale the image slice in srcSlice and put the resulting scaled * slice in the image in dst. A slice is a sequence of consecutive - * rows in an image. + * rows in an image. Requires a context that has been previously + * been initialized with sws_init_context(). * * Slices have to be provided in sequential order, either in * top-bottom or bottom-top order. If slices are provided in @@ -427,27 +489,11 @@ int sws_scale(SwsContext *c, const uint8_t *const srcSlice[], const int srcStride[], int srcSliceY, int srcSliceH, uint8_t *const dst[], const int dstStride[]); -/** - * Scale source data from src and write the output to dst. - * - * This is merely a convenience wrapper around - * - sws_frame_start() - * - sws_send_slice(0, src->height) - * - sws_receive_slice(0, dst->height) - * - sws_frame_end() - * - * @param c The scaling context - * @param dst The destination frame. See documentation for sws_frame_start() for - * more details. - * @param src The source frame. - * - * @return 0 on success, a negative AVERROR code on failure - */ -int sws_scale_frame(SwsContext *c, AVFrame *dst, const AVFrame *src); - /** * Initialize the scaling process for a given pair of source/destination frames. * Must be called before any calls to sws_send_slice() and sws_receive_slice(). + * Requires a context that has been previously been initialized with + * sws_init_context(). * * This function will retain references to src and dst, so they must both use * refcounted buffers (if allocated by the caller, in case of dst). @@ -518,7 +564,8 @@ int sws_receive_slice(SwsContext *c, unsigned int slice_start, unsigned int slice_height); /** - * Get the alignment required for slices + * Get the alignment required for slices. Requires a context that has been + * previously been initialized with sws_init_context(). * * @param c The scaling context * @return alignment required for output slices requested with sws_receive_slice(). @@ -528,7 +575,7 @@ int sws_receive_slice(SwsContext *c, unsigned int slice_start, unsigned int sws_receive_slice_alignment(const SwsContext *c); /** - * @param c the scaling context + * @param c the scaling context, must have been initialized with sws_init_context() * @param dstRange flag indicating the while-black range of the output (1=jpeg / 0=mpeg) * @param srcRange flag indicating the while-black range of the input (1=jpeg / 0=mpeg) * @param table the yuv2rgb coefficients describing the output yuv space, normally ff_yuv2rgb_coeffs[x] diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 5a8d33a988..584a77af9d 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -26,6 +26,7 @@ #include "config.h" #include "swscale.h" +#include "graph.h" #include "libavutil/avassert.h" #include "libavutil/common.h" @@ -323,6 +324,9 @@ struct SwsInternal { int *slice_err; int nb_slice_ctx; + /* Scaling graph, reinitialized dynamically as needed. */ + SwsGraph *graph[2]; /* top, bottom fields */ + // values passed to current sws_receive_slice() call int dst_slice_start; int dst_slice_height; @@ -663,6 +667,7 @@ struct SwsInternal { unsigned int dst_slice_align; atomic_int stride_unaligned_warned; atomic_int data_unaligned_warned; + int color_conversion_warned; Half2FloatTables *h2f_tables; }; @@ -674,7 +679,7 @@ static_assert(offsetof(SwsInternal, redDither) + DITHER32_INT == offsetof(SwsInt #if ARCH_X86 /* x86 yuv2gbrp uses the SwsInternal for yuv coefficients if struct offsets change the asm needs to be updated too */ -static_assert(offsetof(SwsInternal, yuv2rgb_y_offset) == 40332, +static_assert(offsetof(SwsInternal, yuv2rgb_y_offset) == 40348, "yuv2rgb_y_offset must be updated in x86 asm"); #endif diff --git a/libswscale/utils.c b/libswscale/utils.c index cf553e9d56..d18cdc1101 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -61,6 +61,7 @@ #include "swscale.h" #include "swscale_internal.h" #include "utils.h" +#include "graph.h" typedef struct FormatEntry { uint8_t is_supported_in :1; @@ -2461,6 +2462,9 @@ void sws_freeContext(SwsContext *sws) if (!c) return; + for (i = 0; i < FF_ARRAY_ELEMS(c->graph); i++) + sws_graph_free(&c->graph[i]); + for (i = 0; i < c->nb_slice_ctx; i++) sws_freeContext(c->slice_ctx[i]); av_freep(&c->slice_ctx); diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm index 7a1e5d9bc1..f2e884780a 100644 --- a/libswscale/x86/output.asm +++ b/libswscale/x86/output.asm @@ -582,7 +582,7 @@ yuv2nv12cX_fn yuv2nv21 %if ARCH_X86_64 struc SwsInternal - .padding: resb 40332 ; offsetof(SwsInternal, yuv2rgb_y_offset) + .padding: resb 40348 ; offsetof(SwsInternal, yuv2rgb_y_offset) .yuv2rgb_y_offset: resd 1 .yuv2rgb_y_coeff: resd 1 .yuv2rgb_v2r_coeff: resd 1

[FFmpeg-devel,v2,16/19] swscale: introduce new, dynamic scaling API

Checks

Commit Message

Patch