diff mbox series

[FFmpeg-devel] libavfilter: zscale performance optimization >4x

Message ID 20220210100804.1830-1-Victoria.Zhislina@intel.com
State New
Headers show
Series [FFmpeg-devel] libavfilter: zscale performance optimization >4x | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished
andriy/make_aarch64_jetson success Make finished
andriy/make_fate_aarch64_jetson success Make fate finished
andriy/make_armv7_RPi4 success Make finished
andriy/make_fate_armv7_RPi4 success Make fate finished

Commit Message

Victoria Zhislina Feb. 10, 2022, 10:08 a.m. UTC
By ffmpeg threading support implementation via frame slicing and doing
zimg_filter_graph_build that used to take 30-60% of each frame processig
only if necessary (some parameters changed)
the performance increase vs original version
in video downscale and color conversion  >4x is seen
on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)

Signed-off-by: Victoria Zhislina <Victoria.Zhislina@intel.com>
---
 libavfilter/vf_zscale.c | 786 ++++++++++++++++++++++++----------------
 1 file changed, 475 insertions(+), 311 deletions(-)

Comments

Paul B Mahol Feb. 18, 2022, 11:43 a.m. UTC | #1
On Thu, Feb 10, 2022 at 01:08:04PM +0300, Victoria Zhislina wrote:
> By ffmpeg threading support implementation via frame slicing and doing
> zimg_filter_graph_build that used to take 30-60% of each frame processig
> only if necessary (some parameters changed)
> the performance increase vs original version
> in video downscale and color conversion  >4x is seen
> on 64 cores Intel Xeon, 3x on i7-6700K (4 cores with HT)
> 
> Signed-off-by: Victoria Zhislina <Victoria.Zhislina@intel.com>
> ---
>  libavfilter/vf_zscale.c | 786 ++++++++++++++++++++++++----------------
>  1 file changed, 475 insertions(+), 311 deletions(-)
> 
> diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
> index 1288c5efc1..ce4c0b2c76 100644
> --- a/libavfilter/vf_zscale.c
> +++ b/libavfilter/vf_zscale.c
> @@ -1,6 +1,7 @@
>  /*
>   * Copyright (c) 2015 Paul B Mahol
> - *
> + * * 2022 Victoria Zhislina, Intel - performance optimization

Just name, please, without extra stuff, see line above.

> + 
>   * This file is part of FFmpeg.
>   *
>   * FFmpeg is free software; you can redistribute it and/or
> @@ -44,6 +45,8 @@
>  #include "libavutil/imgutils.h"
>  
>  #define ZIMG_ALIGNMENT 32
> +#define MIN_TILESIZE 64
> +#define MAX_THREADS 64
>  
>  static const char *const var_names[] = {
>      "in_w",   "iw",
> @@ -113,13 +116,17 @@ typedef struct ZScaleContext {
>  
>      int force_original_aspect_ratio;
>  
> -    void *tmp;
> -    size_t tmp_size;
> +    void *tmp[MAX_THREADS]; //separate for each thread;
> +	int nb_threads;

Sorry, but tab characters are generally forbidden in FFmpeg source code.

> +    int slice_h;
>  
>      zimg_image_format src_format, dst_format;
>      zimg_image_format alpha_src_format, alpha_dst_format;
> +    zimg_image_format src_format_tmp, dst_format_tmp;
> +    zimg_image_format alpha_src_format_tmp, alpha_dst_format_tmp;
>      zimg_graph_builder_params alpha_params, params;
> -    zimg_filter_graph *alpha_graph, *graph;
> +    zimg_graph_builder_params alpha_params_tmp, params_tmp;
> +    zimg_filter_graph *alpha_graph[MAX_THREADS], *graph[MAX_THREADS];
>  
>      enum AVColorSpace in_colorspace, out_colorspace;
>      enum AVColorTransferCharacteristic in_trc, out_trc;
> @@ -128,10 +135,181 @@ typedef struct ZScaleContext {
>      enum AVChromaLocation in_chromal, out_chromal;
>  } ZScaleContext;
>  
> +
> +typedef struct ThreadData {
> +    const AVPixFmtDescriptor *desc, *odesc;
> +    AVFrame *in, *out;
> +} ThreadData;
> +
> +static int convert_chroma_location(enum AVChromaLocation chroma_location)
> +{
> +    switch (chroma_location) {
> +    case AVCHROMA_LOC_UNSPECIFIED:
> +    case AVCHROMA_LOC_LEFT:
> +        return ZIMG_CHROMA_LEFT;
> +    case AVCHROMA_LOC_CENTER:
> +        return ZIMG_CHROMA_CENTER;
> +    case AVCHROMA_LOC_TOPLEFT:
> +        return ZIMG_CHROMA_TOP_LEFT;
> +    case AVCHROMA_LOC_TOP:
> +        return ZIMG_CHROMA_TOP;
> +    case AVCHROMA_LOC_BOTTOMLEFT:
> +        return ZIMG_CHROMA_BOTTOM_LEFT;
> +    case AVCHROMA_LOC_BOTTOM:
> +        return ZIMG_CHROMA_BOTTOM;
> +    }
> +    return ZIMG_CHROMA_LEFT;
> +}
> +
> +static int convert_matrix(enum AVColorSpace colorspace)
> +{
> +    switch (colorspace) {
> +    case AVCOL_SPC_RGB:
> +        return ZIMG_MATRIX_RGB;
> +    case AVCOL_SPC_BT709:
> +        return ZIMG_MATRIX_709;
> +    case AVCOL_SPC_UNSPECIFIED:
> +        return ZIMG_MATRIX_UNSPECIFIED;
> +    case AVCOL_SPC_FCC:
> +        return ZIMG_MATRIX_FCC;
> +    case AVCOL_SPC_BT470BG:
> +        return ZIMG_MATRIX_470BG;
> +    case AVCOL_SPC_SMPTE170M:
> +        return ZIMG_MATRIX_170M;
> +    case AVCOL_SPC_SMPTE240M:
> +        return ZIMG_MATRIX_240M;
> +    case AVCOL_SPC_YCGCO:
> +        return ZIMG_MATRIX_YCGCO;
> +    case AVCOL_SPC_BT2020_NCL:
> +        return ZIMG_MATRIX_2020_NCL;
> +    case AVCOL_SPC_BT2020_CL:
> +        return ZIMG_MATRIX_2020_CL;
> +    case AVCOL_SPC_CHROMA_DERIVED_NCL:
> +        return ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL;
> +    case AVCOL_SPC_CHROMA_DERIVED_CL:
> +        return ZIMG_MATRIX_CHROMATICITY_DERIVED_CL;
> +    case AVCOL_SPC_ICTCP:
> +        return ZIMG_MATRIX_ICTCP;
> +    }
> +    return ZIMG_MATRIX_UNSPECIFIED;
> +}
> +
> +static int convert_trc(enum AVColorTransferCharacteristic color_trc)
> +{
> +    switch (color_trc) {
> +    case AVCOL_TRC_UNSPECIFIED:
> +        return ZIMG_TRANSFER_UNSPECIFIED;
> +    case AVCOL_TRC_BT709:
> +        return ZIMG_TRANSFER_709;
> +    case AVCOL_TRC_GAMMA22:
> +        return ZIMG_TRANSFER_470_M;
> +    case AVCOL_TRC_GAMMA28:
> +        return ZIMG_TRANSFER_470_BG;
> +    case AVCOL_TRC_SMPTE170M:
> +        return ZIMG_TRANSFER_601;
> +    case AVCOL_TRC_SMPTE240M:
> +        return ZIMG_TRANSFER_240M;
> +    case AVCOL_TRC_LINEAR:
> +        return ZIMG_TRANSFER_LINEAR;
> +    case AVCOL_TRC_LOG:
> +        return ZIMG_TRANSFER_LOG_100;
> +    case AVCOL_TRC_LOG_SQRT:
> +        return ZIMG_TRANSFER_LOG_316;
> +    case AVCOL_TRC_IEC61966_2_4:
> +        return ZIMG_TRANSFER_IEC_61966_2_4;
> +    case AVCOL_TRC_BT2020_10:
> +        return ZIMG_TRANSFER_2020_10;
> +    case AVCOL_TRC_BT2020_12:
> +        return ZIMG_TRANSFER_2020_12;
> +    case AVCOL_TRC_SMPTE2084:
> +        return ZIMG_TRANSFER_ST2084;
> +    case AVCOL_TRC_ARIB_STD_B67:
> +        return ZIMG_TRANSFER_ARIB_B67;
> +    case AVCOL_TRC_IEC61966_2_1:
> +        return ZIMG_TRANSFER_IEC_61966_2_1;
> +    }
> +    return ZIMG_TRANSFER_UNSPECIFIED;
> +}
> +
> +static int convert_primaries(enum AVColorPrimaries color_primaries)
> +{
> +    switch (color_primaries) {
> +    case AVCOL_PRI_UNSPECIFIED:
> +        return ZIMG_PRIMARIES_UNSPECIFIED;
> +    case AVCOL_PRI_BT709:
> +        return ZIMG_PRIMARIES_709;
> +    case AVCOL_PRI_BT470M:
> +        return ZIMG_PRIMARIES_470_M;
> +    case AVCOL_PRI_BT470BG:
> +        return ZIMG_PRIMARIES_470_BG;
> +    case AVCOL_PRI_SMPTE170M:
> +        return ZIMG_PRIMARIES_170M;
> +    case AVCOL_PRI_SMPTE240M:
> +        return ZIMG_PRIMARIES_240M;
> +    case AVCOL_PRI_FILM:
> +        return ZIMG_PRIMARIES_FILM;
> +    case AVCOL_PRI_BT2020:
> +        return ZIMG_PRIMARIES_2020;
> +    case AVCOL_PRI_SMPTE428:
> +        return ZIMG_PRIMARIES_ST428;
> +    case AVCOL_PRI_SMPTE431:
> +        return ZIMG_PRIMARIES_ST431_2;
> +    case AVCOL_PRI_SMPTE432:
> +        return ZIMG_PRIMARIES_ST432_1;
> +    case AVCOL_PRI_JEDEC_P22:
> +        return ZIMG_PRIMARIES_EBU3213_E;
> +    }
> +    return ZIMG_PRIMARIES_UNSPECIFIED;
> +}
> +
> +static int convert_range(enum AVColorRange color_range)
> +{
> +    switch (color_range) {
> +    case AVCOL_RANGE_UNSPECIFIED:
> +    case AVCOL_RANGE_MPEG:
> +        return ZIMG_RANGE_LIMITED;
> +    case AVCOL_RANGE_JPEG:
> +        return ZIMG_RANGE_FULL;
> +    }
> +    return ZIMG_RANGE_LIMITED;
> +}
> +
> +static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_range)
> +{
> +    switch (color_range) {
> +    case ZIMG_RANGE_LIMITED:
> +        return AVCOL_RANGE_MPEG;
> +    case ZIMG_RANGE_FULL:
> +        return AVCOL_RANGE_JPEG;
> +    }
> +    return AVCOL_RANGE_UNSPECIFIED;
> +}
> +
>  static av_cold int init(AVFilterContext *ctx)
>  {
>      ZScaleContext *s = ctx->priv;
>      int ret;
> +    int i;
> +
> +    for (i = 0; i < MAX_THREADS; i++) {
> +        s->tmp[i] = NULL;
> +        s->graph[i] = NULL;
> +        s->alpha_graph[i] = NULL;
> +    }
> +    zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->src_format_tmp, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->dst_format_tmp, ZIMG_API_VERSION);
> +
> +    zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->alpha_src_format_tmp, ZIMG_API_VERSION);
> +    zimg_image_format_default(&s->alpha_dst_format_tmp, ZIMG_API_VERSION);
> +
> +    zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
> +    zimg_graph_builder_params_default(&s->params_tmp, ZIMG_API_VERSION);
> +    zimg_graph_builder_params_default(&s->alpha_params, ZIMG_API_VERSION);
> +    zimg_graph_builder_params_default(&s->alpha_params_tmp, ZIMG_API_VERSION);
>  
>      if (s->size_str && (s->w_expr || s->h_expr)) {
>          av_log(ctx, AV_LOG_ERROR,
> @@ -194,6 +372,153 @@ static int query_formats(AVFilterContext *ctx)
>      return ff_formats_ref(ff_make_format_list(pixel_fmts), &ctx->outputs[0]->incfg.formats);
>  }
>  
> +/* returns 0 if image formats are the same and 1 otherwise */
> +static int compare_zimg_image_formats(zimg_image_format *img_fmt0, zimg_image_format *img_fmt1)
> +{
> +    return ((img_fmt0->chroma_location != img_fmt1->chroma_location) ||
> +#if ZIMG_API_VERSION >= 0x204
> +        (img_fmt0->alpha != img_fmt1->alpha) ||
> +#endif
> +        (img_fmt0->color_family != img_fmt1->color_family) ||
> +        (img_fmt0->color_primaries != img_fmt1->color_primaries) ||
> +        (img_fmt0->depth != img_fmt1->depth) ||
> +        (img_fmt0->field_parity != img_fmt1->field_parity) ||
> +        (img_fmt0->height != img_fmt1->height) ||
> +        (img_fmt0->matrix_coefficients != img_fmt1->matrix_coefficients) ||
> +        (img_fmt0->pixel_range != img_fmt1->pixel_range) ||
> +        (img_fmt0->pixel_type != img_fmt1->pixel_type) ||
> +        (img_fmt0->subsample_h != img_fmt1->subsample_h) ||
> +        (img_fmt0->subsample_w != img_fmt1->subsample_w) ||
> +        (img_fmt0->transfer_characteristics != img_fmt1->transfer_characteristics) ||
> +        (img_fmt0->width != img_fmt1->width));
> +}
> +
> +/* returns 0 if graph builder parameters are the same and 1 otherwise */
> +static int compare_zimg_graph_builder_params(zimg_graph_builder_params *parm0, zimg_graph_builder_params *parm1)
> +{
> +    /* the parameters that could be changed inside a single ffmpeg zscale invocation  are checked only
> +    and NaN values that are default for some params are treated properly*/
> +    int ret = (parm0->allow_approximate_gamma != parm1->allow_approximate_gamma) ||
> +        (parm0->dither_type != parm1->dither_type) ||
> +        (parm0->resample_filter != parm1->resample_filter) ||
> +        (parm0->resample_filter_uv != parm1->resample_filter_uv);
> +
> +    if ((isnan(parm0->nominal_peak_luminance) == 0) || (isnan(parm1->nominal_peak_luminance) == 0))
> +        ret = ret || (parm0->nominal_peak_luminance != parm1->nominal_peak_luminance);
> +    if ((isnan(parm0->filter_param_a) == 0) || (isnan(parm1->filter_param_a) == 0))
> +        ret = ret || (parm0->filter_param_a != parm1->filter_param_a);
> +    if ((isnan(parm0->filter_param_a_uv) == 0) || (isnan(parm1->filter_param_a_uv) == 0))
> +        ret = ret || (parm0->filter_param_a_uv != parm1->filter_param_a_uv);
> +    if ((isnan(parm0->filter_param_b) == 0) || (isnan(parm1->filter_param_b) == 0))
> +        ret = ret || (parm0->filter_param_b != parm1->filter_param_b);
> +    if ((isnan(parm0->filter_param_b_uv) == 0) || (isnan(parm1->filter_param_b_uv) == 0))
> +        ret = ret || (parm0->filter_param_b_uv != parm1->filter_param_b_uv);
> +
> +    return ret;
> +}
> +
> +static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
> +    int colorspace, int primaries, int transfer, int range, int location)
> +{
> +    format->width = frame->width;
> +    format->height = frame->height;
> +    format->subsample_w = desc->log2_chroma_w;
> +    format->subsample_h = desc->log2_chroma_h;
> +    format->depth = desc->comp[0].depth;
> +    format->pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
> +    format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
> +    format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
> +    format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
> +    format->transfer_characteristics = transfer == -1 ? convert_trc(frame->color_trc) : transfer;
> +    format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
> +    format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
> +}
> +
> +static int print_zimg_error(AVFilterContext *ctx)
> +{
> +    char err_msg[1024];
> +    int err_code = zimg_get_last_error(err_msg, sizeof(err_msg));
> +
> +    av_log(ctx, AV_LOG_ERROR, "code %d: %s\n", err_code, err_msg);
> +
> +    return AVERROR_EXTERNAL;
> +}
> +
> +static int graphs_build(AVFrame *in, AVFrame *out, const AVPixFmtDescriptor *desc, const AVPixFmtDescriptor *out_desc,
> +    ZScaleContext *s, int job_nr)
> +{
> +    int ret;
> +    size_t size;
> +    zimg_image_format src_format;
> +    zimg_image_format dst_format;
> +    zimg_image_format alpha_src_format;
> +    zimg_image_format alpha_dst_format;
> +
> +    src_format = s->src_format;
> +    dst_format = s->dst_format;
> +    /* The input slice is specified through the active_region field, 
> +    unlike the output slice.
> +    according to zimg requirements input and output slices should have even dimentions */
> +    src_format.active_region.width = in->width;
> +    src_format.active_region.height = s->slice_h;
> +    src_format.active_region.left = 0;
> +    src_format.active_region.top = job_nr * src_format.active_region.height;
> +    //dst now is the single tile only!!
> +    dst_format.width = out->width;
> +    dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
> +
> +    //the last slice could differ from the previous ones due to the slices division "tail"
> +    if (job_nr == (s->nb_threads - 1)) {
> +        src_format.active_region.height = src_format.height - src_format.active_region.top;
> +        dst_format.height = out->height - job_nr * dst_format.height;
> +    }
> +
> +    if (s->graph[job_nr]) {
> +        zimg_filter_graph_free(s->graph[job_nr]);
> +    }
> +    s->graph[job_nr] = zimg_filter_graph_build(&src_format, &dst_format, &s->params);
> +    if (!s->graph[job_nr])
> +        return print_zimg_error(NULL);
> +
> +    ret = zimg_filter_graph_get_tmp_size(s->graph[job_nr], &size);
> +    if (ret)
> +        return print_zimg_error(NULL);
> +
> +    if (s->tmp[job_nr])
> +        av_freep(&s->tmp[job_nr]);
> +    s->tmp[job_nr] = av_malloc(size);
> +    if (!s->tmp[job_nr])
> +        return AVERROR(ENOMEM);
> +
> +    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && out_desc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> +        alpha_src_format = s->alpha_src_format;
> +        alpha_dst_format = s->alpha_dst_format;
> +        /* The input slice is specified through the active_region field, unlike the output slice.
> +        according to zimg requirements input and output slices should have even dimentions */
> +        alpha_src_format.active_region.width = in->width;
> +        alpha_src_format.active_region.height = s->slice_h;
> +        alpha_src_format.active_region.left = 0;
> +        alpha_src_format.active_region.top = job_nr * alpha_src_format.active_region.height;
> +        //dst now is the single tile only!!
> +        alpha_dst_format.width = out->width;
> +        alpha_dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
> +
> +        //the last slice could differ from the previous ones due to the slices division "tail"
> +        if (job_nr == (s->nb_threads - 1)) {
> +            alpha_src_format.active_region.height = alpha_src_format.height - alpha_src_format.active_region.top;
> +            alpha_dst_format.height = out->height - job_nr * alpha_dst_format.height;
> +        }
> +
> +        if (s->alpha_graph[job_nr]) {
> +            zimg_filter_graph_free(s->alpha_graph[job_nr]);
> +        }
> +        s->alpha_graph[job_nr] = zimg_filter_graph_build(&alpha_src_format, &alpha_dst_format, &s->alpha_params);
> +        if (!s->alpha_graph[job_nr])
> +            return print_zimg_error(NULL);
> +     }
> +    return 0;
> +}
> +
>  static int config_props(AVFilterLink *outlink)
>  {
>      AVFilterContext *ctx = outlink->src;
> @@ -317,212 +642,15 @@ fail:
>      return ret;
>  }
>  
> -static int print_zimg_error(AVFilterContext *ctx)
> -{
> -    char err_msg[1024];
> -    int err_code = zimg_get_last_error(err_msg, sizeof(err_msg));
> -
> -    av_log(ctx, AV_LOG_ERROR, "code %d: %s\n", err_code, err_msg);
> -
> -    return AVERROR_EXTERNAL;
> -}
> -
> -static int convert_chroma_location(enum AVChromaLocation chroma_location)
> -{
> -    switch (chroma_location) {
> -    case AVCHROMA_LOC_UNSPECIFIED:
> -    case AVCHROMA_LOC_LEFT:
> -        return ZIMG_CHROMA_LEFT;
> -    case AVCHROMA_LOC_CENTER:
> -        return ZIMG_CHROMA_CENTER;
> -    case AVCHROMA_LOC_TOPLEFT:
> -        return ZIMG_CHROMA_TOP_LEFT;
> -    case AVCHROMA_LOC_TOP:
> -        return ZIMG_CHROMA_TOP;
> -    case AVCHROMA_LOC_BOTTOMLEFT:
> -        return ZIMG_CHROMA_BOTTOM_LEFT;
> -    case AVCHROMA_LOC_BOTTOM:
> -        return ZIMG_CHROMA_BOTTOM;
> -    }
> -    return ZIMG_CHROMA_LEFT;
> -}
> -
> -static int convert_matrix(enum AVColorSpace colorspace)
> -{
> -    switch (colorspace) {
> -    case AVCOL_SPC_RGB:
> -        return ZIMG_MATRIX_RGB;
> -    case AVCOL_SPC_BT709:
> -        return ZIMG_MATRIX_709;
> -    case AVCOL_SPC_UNSPECIFIED:
> -        return ZIMG_MATRIX_UNSPECIFIED;
> -    case AVCOL_SPC_FCC:
> -        return ZIMG_MATRIX_FCC;
> -    case AVCOL_SPC_BT470BG:
> -        return ZIMG_MATRIX_470BG;
> -    case AVCOL_SPC_SMPTE170M:
> -        return ZIMG_MATRIX_170M;
> -    case AVCOL_SPC_SMPTE240M:
> -        return ZIMG_MATRIX_240M;
> -    case AVCOL_SPC_YCGCO:
> -        return ZIMG_MATRIX_YCGCO;
> -    case AVCOL_SPC_BT2020_NCL:
> -        return ZIMG_MATRIX_2020_NCL;
> -    case AVCOL_SPC_BT2020_CL:
> -        return ZIMG_MATRIX_2020_CL;
> -    case AVCOL_SPC_CHROMA_DERIVED_NCL:
> -        return ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL;
> -    case AVCOL_SPC_CHROMA_DERIVED_CL:
> -        return ZIMG_MATRIX_CHROMATICITY_DERIVED_CL;
> -    case AVCOL_SPC_ICTCP:
> -        return ZIMG_MATRIX_ICTCP;
> -    }
> -    return ZIMG_MATRIX_UNSPECIFIED;
> -}
> -
> -static int convert_trc(enum AVColorTransferCharacteristic color_trc)
> -{
> -    switch (color_trc) {
> -    case AVCOL_TRC_UNSPECIFIED:
> -        return ZIMG_TRANSFER_UNSPECIFIED;
> -    case AVCOL_TRC_BT709:
> -        return ZIMG_TRANSFER_709;
> -    case AVCOL_TRC_GAMMA22:
> -        return ZIMG_TRANSFER_470_M;
> -    case AVCOL_TRC_GAMMA28:
> -        return ZIMG_TRANSFER_470_BG;
> -    case AVCOL_TRC_SMPTE170M:
> -        return ZIMG_TRANSFER_601;
> -    case AVCOL_TRC_SMPTE240M:
> -        return ZIMG_TRANSFER_240M;
> -    case AVCOL_TRC_LINEAR:
> -        return ZIMG_TRANSFER_LINEAR;
> -    case AVCOL_TRC_LOG:
> -        return ZIMG_TRANSFER_LOG_100;
> -    case AVCOL_TRC_LOG_SQRT:
> -        return ZIMG_TRANSFER_LOG_316;
> -    case AVCOL_TRC_IEC61966_2_4:
> -        return ZIMG_TRANSFER_IEC_61966_2_4;
> -    case AVCOL_TRC_BT2020_10:
> -        return ZIMG_TRANSFER_2020_10;
> -    case AVCOL_TRC_BT2020_12:
> -        return ZIMG_TRANSFER_2020_12;
> -    case AVCOL_TRC_SMPTE2084:
> -        return ZIMG_TRANSFER_ST2084;
> -    case AVCOL_TRC_ARIB_STD_B67:
> -        return ZIMG_TRANSFER_ARIB_B67;
> -    case AVCOL_TRC_IEC61966_2_1:
> -        return ZIMG_TRANSFER_IEC_61966_2_1;
> -    }
> -    return ZIMG_TRANSFER_UNSPECIFIED;
> -}
> -
> -static int convert_primaries(enum AVColorPrimaries color_primaries)
> -{
> -    switch (color_primaries) {
> -    case AVCOL_PRI_UNSPECIFIED:
> -        return ZIMG_PRIMARIES_UNSPECIFIED;
> -    case AVCOL_PRI_BT709:
> -        return ZIMG_PRIMARIES_709;
> -    case AVCOL_PRI_BT470M:
> -        return ZIMG_PRIMARIES_470_M;
> -    case AVCOL_PRI_BT470BG:
> -        return ZIMG_PRIMARIES_470_BG;
> -    case AVCOL_PRI_SMPTE170M:
> -        return ZIMG_PRIMARIES_170M;
> -    case AVCOL_PRI_SMPTE240M:
> -        return ZIMG_PRIMARIES_240M;
> -    case AVCOL_PRI_FILM:
> -        return ZIMG_PRIMARIES_FILM;
> -    case AVCOL_PRI_BT2020:
> -        return ZIMG_PRIMARIES_2020;
> -    case AVCOL_PRI_SMPTE428:
> -        return ZIMG_PRIMARIES_ST428;
> -    case AVCOL_PRI_SMPTE431:
> -        return ZIMG_PRIMARIES_ST431_2;
> -    case AVCOL_PRI_SMPTE432:
> -        return ZIMG_PRIMARIES_ST432_1;
> -    case AVCOL_PRI_JEDEC_P22:
> -        return ZIMG_PRIMARIES_EBU3213_E;
> -    }
> -    return ZIMG_PRIMARIES_UNSPECIFIED;
> -}
> -
> -static int convert_range(enum AVColorRange color_range)
> -{
> -    switch (color_range) {
> -    case AVCOL_RANGE_UNSPECIFIED:
> -    case AVCOL_RANGE_MPEG:
> -        return ZIMG_RANGE_LIMITED;
> -    case AVCOL_RANGE_JPEG:
> -        return ZIMG_RANGE_FULL;
> -    }
> -    return ZIMG_RANGE_LIMITED;
> -}
> -
> -static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_range)
> -{
> -    switch (color_range) {
> -    case ZIMG_RANGE_LIMITED:
> -        return AVCOL_RANGE_MPEG;
> -    case ZIMG_RANGE_FULL:
> -        return AVCOL_RANGE_JPEG;
> -    }
> -    return AVCOL_RANGE_UNSPECIFIED;
> -}
> -
> -static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
> -                        int colorspace, int primaries, int transfer, int range, int location)
> -{
> -    format->width = frame->width;
> -    format->height = frame->height;
> -    format->subsample_w = desc->log2_chroma_w;
> -    format->subsample_h = desc->log2_chroma_h;
> -    format->depth = desc->comp[0].depth;
> -    format->pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
> -    format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
> -    format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
> -    format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
> -    format->transfer_characteristics = transfer == - 1 ? convert_trc(frame->color_trc) : transfer;
> -    format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
> -    format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
> -}
> -
> -static int graph_build(zimg_filter_graph **graph, zimg_graph_builder_params *params,
> -                       zimg_image_format *src_format, zimg_image_format *dst_format,
> -                       void **tmp, size_t *tmp_size)
> -{
> -    int ret;
> -    size_t size;
> -
> -    zimg_filter_graph_free(*graph);
> -    *graph = zimg_filter_graph_build(src_format, dst_format, params);
> -    if (!*graph)
> -        return print_zimg_error(NULL);
> -
> -    ret = zimg_filter_graph_get_tmp_size(*graph, &size);
> -    if (ret)
> -        return print_zimg_error(NULL);
> -
> -    if (size > *tmp_size) {
> -        av_freep(tmp);
> -        *tmp = av_malloc(size);
> -        if (!*tmp)
> -            return AVERROR(ENOMEM);
> -
> -        *tmp_size = size;
> -    }
> -
> -    return 0;
> -}
>  
>  static int realign_frame(const AVPixFmtDescriptor *desc, AVFrame **frame)
>  {
>      AVFrame *aligned = NULL;
> -    int ret = 0, plane;
> +    int ret = 0, plane, planes;
>  
>      /* Realign any unaligned input frame. */
> -    for (plane = 0; plane < 3; plane++) {
> +    planes = av_pix_fmt_count_planes(desc->nb_components);
> +    for (plane = 0; plane < planes; plane++) {
>          int p = desc->comp[plane].plane;
>          if ((uintptr_t)(*frame)->data[p] % ZIMG_ALIGNMENT || (*frame)->linesize[p] % ZIMG_ALIGNMENT) {
>              if (!(aligned = av_frame_alloc())) {
> @@ -554,6 +682,7 @@ fail:
>      return ret;
>  }
>  
> +

This newline is not needed.

>  static void update_output_color_information(ZScaleContext *s, AVFrame *frame)
>  {
>      if (s->colorspace != -1)
> @@ -572,20 +701,77 @@ static void update_output_color_information(ZScaleContext *s, AVFrame *frame)
>          frame->chroma_location = (int)s->dst_format.chroma_location + 1;
>  }
>  
> +static int filter_slice(AVFilterContext *ctx, void *data, int job_nr, int n_jobs)
> +{
> +    ThreadData *td = data;
> +    int ret = 0;
> +    int p;
> +    int out_sampl;
> +    int need_gb;
> +    ZScaleContext *s = ctx->priv;
> +    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
> +    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
> +    int  dst_tile_height = ((unsigned int)(td->out->height / n_jobs)) & 0xfffffffe; 
> +
> +    /* create zimg filter graphs for each thread
> +     only if not created earlier or there is some change in frame parameters */
> +    need_gb = compare_zimg_image_formats(&s->src_format, &s->src_format_tmp) ||
> +        compare_zimg_image_formats(&s->dst_format, &s->dst_format_tmp) ||
> +        compare_zimg_graph_builder_params(&s->params, &s->params_tmp);
> +    if(td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA)
> +        need_gb = need_gb || compare_zimg_image_formats(&s->alpha_src_format, &s->alpha_src_format_tmp) ||
> +            compare_zimg_image_formats(&s->alpha_dst_format, &s->alpha_dst_format_tmp) ||
> +            compare_zimg_graph_builder_params(&s->alpha_params, &s->alpha_params_tmp);
> +
> +    if (need_gb){
> +        ret = graphs_build(td->in, td->out, td->desc, td->odesc, s, job_nr);
> +        if (ret < 0)
> +            return print_zimg_error(ctx);
> +    }
> +    out_sampl = FFMAX3(td->out->linesize[0], td->out->linesize[1], td->out->linesize[2]);
> +    for (int i = 0; i < 3; i++) {
> +        p = td->desc->comp[i].plane;      
> +
> +        src_buf.plane[i].data = td->in->data[p];
> +        src_buf.plane[i].stride = td->in->linesize[p];
> +        src_buf.plane[i].mask = -1;
> +
> +        p = td->odesc->comp[i].plane;
> +        dst_buf.plane[i].data = td->out->data[p] + td->out->linesize[p] * dst_tile_height * td->out->linesize[p] / out_sampl * job_nr;
> +        dst_buf.plane[i].stride = td->out->linesize[p];
> +        dst_buf.plane[i].mask = -1;
> +    }
> +    ret = zimg_filter_graph_process(s->graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
> +    if (ret) 
> +        return  print_zimg_error(ctx);
> +
> +    if (td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> +        src_buf.plane[0].data = td->in->data[3];
> +        src_buf.plane[0].stride = td->in->linesize[3];
> +        src_buf.plane[0].mask = -1;
> +
> +        dst_buf.plane[0].data = td->out->data[3] + td->out->linesize[3] * dst_tile_height  * job_nr;
> +        dst_buf.plane[0].stride = td->out->linesize[3];
> +        dst_buf.plane[0].mask = -1;
> +
> +        ret = zimg_filter_graph_process(s->alpha_graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
> +        if (ret)
> +            return print_zimg_error(ctx);
> +    }
> +    return 0;
> +}
> +
>  static int filter_frame(AVFilterLink *link, AVFrame *in)
>  {
> -    ZScaleContext *s = link->dst->priv;
> -    AVFilterLink *outlink = link->dst->outputs[0];
> +    AVFilterContext *ctx = link->dst;
> +    ZScaleContext *s = ctx->priv;
> +    AVFilterLink *outlink = ctx->outputs[0];
>      const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
>      const AVPixFmtDescriptor *odesc = av_pix_fmt_desc_get(outlink->format);
> -    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
> -    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
>      char buf[32];
> -    int ret = 0, plane;
> +    int ret = 0;
>      AVFrame *out = NULL;
> -
> -    if ((ret = realign_frame(desc, &in)) < 0)
> -        goto fail;
> +    ThreadData td;
>  
>      if (!(out = ff_get_video_buffer(outlink, outlink->w, outlink->h))) {
>          ret =  AVERROR(ENOMEM);
> @@ -596,35 +782,60 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
>      out->width  = outlink->w;
>      out->height = outlink->h;
>  
> -    if(   in->width  != link->w
> -       || in->height != link->h
> -       || in->format != link->format
> -       || s->in_colorspace != in->colorspace
> -       || s->in_trc  != in->color_trc
> -       || s->in_primaries != in->color_primaries
> -       || s->in_range != in->color_range
> -       || s->out_colorspace != out->colorspace
> -       || s->out_trc  != out->color_trc
> -       || s->out_primaries != out->color_primaries
> -       || s->out_range != out->color_range
> -       || s->in_chromal != in->chroma_location
> -       || s->out_chromal != out->chroma_location) {
> +    //we need to use this filter if something is different for an input and output only
> +    //otherwise - just copy the input frame to the output
> +    if ((link->w != outlink->w) ||
> +        (link->h != outlink->h) ||
> +        (s->src_format.chroma_location != s->dst_format.chroma_location)||

Please use space between ')' and '||' here and anywhere else.

> +        (s->src_format.color_family !=s->dst_format.color_family)||
> +        (s->src_format.color_primaries !=s->dst_format.color_primaries)||
> +        (s->src_format.depth !=s->dst_format.depth)||
> +        (s->src_format.matrix_coefficients !=s->dst_format.matrix_coefficients)||
> +        (s->src_format.field_parity !=s->dst_format.field_parity)||
> +        (s->src_format.pixel_range !=s->dst_format.pixel_range)||
> +        (s->src_format.pixel_type !=s->dst_format.pixel_type)||
> +        (s->src_format.transfer_characteristics !=s->dst_format.transfer_characteristics)
> +    ){
> +        if ((ret = realign_frame(desc, &in)) < 0)
> +            goto fail;
> +
>          snprintf(buf, sizeof(buf)-1, "%d", outlink->w);
>          av_opt_set(s, "w", buf, 0);
>          snprintf(buf, sizeof(buf)-1, "%d", outlink->h);
>          av_opt_set(s, "h", buf, 0);
>  
> +

This new line is not needed.

>          link->dst->inputs[0]->format = in->format;
>          link->dst->inputs[0]->w      = in->width;
>          link->dst->inputs[0]->h      = in->height;
>  
> -        if ((ret = config_props(outlink)) < 0)
> -            goto fail;
> +        update_output_color_information(s, out);
> +    
> +        s->nb_threads = FFMIN(ff_filter_get_nb_threads(ctx), link->h / MIN_TILESIZE);
> +        s->slice_h = ((unsigned int)(link->h / s->nb_threads)) & 0xfffffffe; // slice_h should be even for zimg
> +        s->in_colorspace = in->colorspace;
> +        s->in_trc = in->color_trc;
> +        s->in_primaries = in->color_primaries;
> +        s->in_range = in->color_range;
> +        s->out_colorspace = out->colorspace;
> +        s->out_trc = out->color_trc;
> +        s->out_primaries = out->color_primaries;
> +        s->out_range = out->color_range;
> +    
> +        av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
> +                  (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
> +                  (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
> +                  INT_MAX);
>  
>          zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
>          zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
>          zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
>  
> +        format_init(&s->src_format, in, desc, s->colorspace_in,
> +            s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
> +        format_init(&s->dst_format, out, odesc, s->colorspace,
> +            s->primaries, s->trc, s->range, s->chromal);
> +
>          s->params.dither_type = s->dither;
>          s->params.cpu_type = ZIMG_CPU_AUTO;
>          s->params.resample_filter = s->filter;
> @@ -634,27 +845,6 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
>          s->params.filter_param_a = s->params.filter_param_a_uv = s->param_a;
>          s->params.filter_param_b = s->params.filter_param_b_uv = s->param_b;
>  
> -        format_init(&s->src_format, in, desc, s->colorspace_in,
> -                    s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
> -        format_init(&s->dst_format, out, odesc, s->colorspace,
> -                    s->primaries, s->trc, s->range, s->chromal);
> -
> -        update_output_color_information(s, out);
> -
> -        ret = graph_build(&s->graph, &s->params, &s->src_format, &s->dst_format,
> -                          &s->tmp, &s->tmp_size);
> -        if (ret < 0)
> -            goto fail;
> -
> -        s->in_colorspace  = in->colorspace;
> -        s->in_trc         = in->color_trc;
> -        s->in_primaries   = in->color_primaries;
> -        s->in_range       = in->color_range;
> -        s->out_colorspace = out->colorspace;
> -        s->out_trc        = out->color_trc;
> -        s->out_primaries  = out->color_primaries;
> -        s->out_range      = out->color_range;
> -
>          if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
>              zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
>              zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
> @@ -670,76 +860,48 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
>              s->alpha_src_format.pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
>              s->alpha_src_format.color_family = ZIMG_COLOR_GREY;
>  
> -            s->alpha_dst_format.width = out->width;
> -            s->alpha_dst_format.height = out->height;
>              s->alpha_dst_format.depth = odesc->comp[0].depth;
>              s->alpha_dst_format.pixel_type = (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : odesc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
>              s->alpha_dst_format.color_family = ZIMG_COLOR_GREY;
> -
> -            zimg_filter_graph_free(s->alpha_graph);
> -            s->alpha_graph = zimg_filter_graph_build(&s->alpha_src_format, &s->alpha_dst_format, &s->alpha_params);
> -            if (!s->alpha_graph) {
> -                ret = print_zimg_error(link->dst);
> -                goto fail;
> -            }
>          }
> -    }
>  
> -    update_output_color_information(s, out);
> +        td.in = in;
> +        td.out = out;
> +        td.desc = desc;
> +        td.odesc = odesc;
>  
> -    av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
> -              (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
> -              (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
> -              INT_MAX);
> -
> -    for (plane = 0; plane < 3; plane++) {
> -        int p = desc->comp[plane].plane;
> -        src_buf.plane[plane].data   = in->data[p];
> -        src_buf.plane[plane].stride = in->linesize[p];
> -        src_buf.plane[plane].mask   = -1;
> -
> -        p = odesc->comp[plane].plane;
> -        dst_buf.plane[plane].data   = out->data[p];
> -        dst_buf.plane[plane].stride = out->linesize[p];
> -        dst_buf.plane[plane].mask   = -1;
> -    }
> -
> -    ret = zimg_filter_graph_process(s->graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
> -    if (ret) {
> -        ret = print_zimg_error(link->dst);
> -        goto fail;
> -    }
> +        ff_filter_execute(ctx, filter_slice, &td, NULL, s->nb_threads);
>  
> -    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> -        src_buf.plane[0].data   = in->data[3];
> -        src_buf.plane[0].stride = in->linesize[3];
> -        src_buf.plane[0].mask   = -1;
> -
> -        dst_buf.plane[0].data   = out->data[3];
> -        dst_buf.plane[0].stride = out->linesize[3];
> -        dst_buf.plane[0].mask   = -1;
> -
> -        ret = zimg_filter_graph_process(s->alpha_graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
> -        if (ret) {
> -            ret = print_zimg_error(link->dst);
> -            goto fail;
> +        s->src_format_tmp = s->src_format;
> +        s->dst_format_tmp = s->dst_format;
> +        s->params_tmp = s->params;
> +        if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> +            s->alpha_src_format_tmp = s->alpha_src_format;
> +            s->alpha_dst_format_tmp = s->alpha_dst_format;
> +            s->alpha_params_tmp = s->alpha_params;
>          }
> -    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> -        int x, y;
> -
> -        if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
> -            for (y = 0; y < out->height; y++) {
> -                for (x = 0; x < out->width; x++) {
> -                    AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
> -                            av_float2int(1.0f));
> +
> +        if ((!(desc->flags & AV_PIX_FMT_FLAG_ALPHA)) && (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) ){
> +            int x, y;
> +            if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
> +                for (y = 0; y < out->height; y++) {
> +                    for (x = 0; x < out->width; x++) {
> +                        AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
> +                                av_float2int(1.0f));
> +                    }
>                  }
> +            } else {
> +                for (y = 0; y < outlink->h; y++)
> +                    memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
>              }
> -        } else {
> -            for (y = 0; y < outlink->h; y++)
> -                memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
>          }
>      }
> -
> +    else {
> +        /*no need for any filtering */
> +        ret = av_frame_copy(out, in);
> +        if (ret < 0)
> +            return ret;
> +    }
>  fail:
>      av_frame_free(&in);
>      if (ret) {
> @@ -753,11 +915,12 @@ fail:
>  static av_cold void uninit(AVFilterContext *ctx)
>  {
>      ZScaleContext *s = ctx->priv;
> -
> -    zimg_filter_graph_free(s->graph);
> -    zimg_filter_graph_free(s->alpha_graph);
> -    av_freep(&s->tmp);
> -    s->tmp_size = 0;
> +    int i;
> +    for (i = 0; i < s->nb_threads; i++) {
> +        if (s->tmp[i]) av_freep(&s->tmp[i]);

This check for !NULL is not needed.

> +        if (s->graph[i]) zimg_filter_graph_free(s->graph[i]);
> +        if (s->alpha_graph[i]) zimg_filter_graph_free(s->alpha_graph[i]);
> +    }
>  }
>  
>  static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
> @@ -941,4 +1104,5 @@ const AVFilter ff_vf_zscale = {
>      FILTER_OUTPUTS(avfilter_vf_zscale_outputs),
>      FILTER_QUERY_FUNC(query_formats),
>      .process_command = process_command,
> +    .flags           = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,

timeline support does not work if w/h changes. so just remove this flag from here.

>  };
> -- 
> 2.31.1.windows.1
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff mbox series

Patch

diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
index 1288c5efc1..ce4c0b2c76 100644
--- a/libavfilter/vf_zscale.c
+++ b/libavfilter/vf_zscale.c
@@ -1,6 +1,7 @@ 
 /*
  * Copyright (c) 2015 Paul B Mahol
- *
+ * * 2022 Victoria Zhislina, Intel - performance optimization
+ 
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -44,6 +45,8 @@ 
 #include "libavutil/imgutils.h"
 
 #define ZIMG_ALIGNMENT 32
+#define MIN_TILESIZE 64
+#define MAX_THREADS 64
 
 static const char *const var_names[] = {
     "in_w",   "iw",
@@ -113,13 +116,17 @@  typedef struct ZScaleContext {
 
     int force_original_aspect_ratio;
 
-    void *tmp;
-    size_t tmp_size;
+    void *tmp[MAX_THREADS]; //separate for each thread;
+	int nb_threads;
+    int slice_h;
 
     zimg_image_format src_format, dst_format;
     zimg_image_format alpha_src_format, alpha_dst_format;
+    zimg_image_format src_format_tmp, dst_format_tmp;
+    zimg_image_format alpha_src_format_tmp, alpha_dst_format_tmp;
     zimg_graph_builder_params alpha_params, params;
-    zimg_filter_graph *alpha_graph, *graph;
+    zimg_graph_builder_params alpha_params_tmp, params_tmp;
+    zimg_filter_graph *alpha_graph[MAX_THREADS], *graph[MAX_THREADS];
 
     enum AVColorSpace in_colorspace, out_colorspace;
     enum AVColorTransferCharacteristic in_trc, out_trc;
@@ -128,10 +135,181 @@  typedef struct ZScaleContext {
     enum AVChromaLocation in_chromal, out_chromal;
 } ZScaleContext;
 
+
+typedef struct ThreadData {
+    const AVPixFmtDescriptor *desc, *odesc;
+    AVFrame *in, *out;
+} ThreadData;
+
+static int convert_chroma_location(enum AVChromaLocation chroma_location)
+{
+    switch (chroma_location) {
+    case AVCHROMA_LOC_UNSPECIFIED:
+    case AVCHROMA_LOC_LEFT:
+        return ZIMG_CHROMA_LEFT;
+    case AVCHROMA_LOC_CENTER:
+        return ZIMG_CHROMA_CENTER;
+    case AVCHROMA_LOC_TOPLEFT:
+        return ZIMG_CHROMA_TOP_LEFT;
+    case AVCHROMA_LOC_TOP:
+        return ZIMG_CHROMA_TOP;
+    case AVCHROMA_LOC_BOTTOMLEFT:
+        return ZIMG_CHROMA_BOTTOM_LEFT;
+    case AVCHROMA_LOC_BOTTOM:
+        return ZIMG_CHROMA_BOTTOM;
+    }
+    return ZIMG_CHROMA_LEFT;
+}
+
+static int convert_matrix(enum AVColorSpace colorspace)
+{
+    switch (colorspace) {
+    case AVCOL_SPC_RGB:
+        return ZIMG_MATRIX_RGB;
+    case AVCOL_SPC_BT709:
+        return ZIMG_MATRIX_709;
+    case AVCOL_SPC_UNSPECIFIED:
+        return ZIMG_MATRIX_UNSPECIFIED;
+    case AVCOL_SPC_FCC:
+        return ZIMG_MATRIX_FCC;
+    case AVCOL_SPC_BT470BG:
+        return ZIMG_MATRIX_470BG;
+    case AVCOL_SPC_SMPTE170M:
+        return ZIMG_MATRIX_170M;
+    case AVCOL_SPC_SMPTE240M:
+        return ZIMG_MATRIX_240M;
+    case AVCOL_SPC_YCGCO:
+        return ZIMG_MATRIX_YCGCO;
+    case AVCOL_SPC_BT2020_NCL:
+        return ZIMG_MATRIX_2020_NCL;
+    case AVCOL_SPC_BT2020_CL:
+        return ZIMG_MATRIX_2020_CL;
+    case AVCOL_SPC_CHROMA_DERIVED_NCL:
+        return ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL;
+    case AVCOL_SPC_CHROMA_DERIVED_CL:
+        return ZIMG_MATRIX_CHROMATICITY_DERIVED_CL;
+    case AVCOL_SPC_ICTCP:
+        return ZIMG_MATRIX_ICTCP;
+    }
+    return ZIMG_MATRIX_UNSPECIFIED;
+}
+
+static int convert_trc(enum AVColorTransferCharacteristic color_trc)
+{
+    switch (color_trc) {
+    case AVCOL_TRC_UNSPECIFIED:
+        return ZIMG_TRANSFER_UNSPECIFIED;
+    case AVCOL_TRC_BT709:
+        return ZIMG_TRANSFER_709;
+    case AVCOL_TRC_GAMMA22:
+        return ZIMG_TRANSFER_470_M;
+    case AVCOL_TRC_GAMMA28:
+        return ZIMG_TRANSFER_470_BG;
+    case AVCOL_TRC_SMPTE170M:
+        return ZIMG_TRANSFER_601;
+    case AVCOL_TRC_SMPTE240M:
+        return ZIMG_TRANSFER_240M;
+    case AVCOL_TRC_LINEAR:
+        return ZIMG_TRANSFER_LINEAR;
+    case AVCOL_TRC_LOG:
+        return ZIMG_TRANSFER_LOG_100;
+    case AVCOL_TRC_LOG_SQRT:
+        return ZIMG_TRANSFER_LOG_316;
+    case AVCOL_TRC_IEC61966_2_4:
+        return ZIMG_TRANSFER_IEC_61966_2_4;
+    case AVCOL_TRC_BT2020_10:
+        return ZIMG_TRANSFER_2020_10;
+    case AVCOL_TRC_BT2020_12:
+        return ZIMG_TRANSFER_2020_12;
+    case AVCOL_TRC_SMPTE2084:
+        return ZIMG_TRANSFER_ST2084;
+    case AVCOL_TRC_ARIB_STD_B67:
+        return ZIMG_TRANSFER_ARIB_B67;
+    case AVCOL_TRC_IEC61966_2_1:
+        return ZIMG_TRANSFER_IEC_61966_2_1;
+    }
+    return ZIMG_TRANSFER_UNSPECIFIED;
+}
+
+static int convert_primaries(enum AVColorPrimaries color_primaries)
+{
+    switch (color_primaries) {
+    case AVCOL_PRI_UNSPECIFIED:
+        return ZIMG_PRIMARIES_UNSPECIFIED;
+    case AVCOL_PRI_BT709:
+        return ZIMG_PRIMARIES_709;
+    case AVCOL_PRI_BT470M:
+        return ZIMG_PRIMARIES_470_M;
+    case AVCOL_PRI_BT470BG:
+        return ZIMG_PRIMARIES_470_BG;
+    case AVCOL_PRI_SMPTE170M:
+        return ZIMG_PRIMARIES_170M;
+    case AVCOL_PRI_SMPTE240M:
+        return ZIMG_PRIMARIES_240M;
+    case AVCOL_PRI_FILM:
+        return ZIMG_PRIMARIES_FILM;
+    case AVCOL_PRI_BT2020:
+        return ZIMG_PRIMARIES_2020;
+    case AVCOL_PRI_SMPTE428:
+        return ZIMG_PRIMARIES_ST428;
+    case AVCOL_PRI_SMPTE431:
+        return ZIMG_PRIMARIES_ST431_2;
+    case AVCOL_PRI_SMPTE432:
+        return ZIMG_PRIMARIES_ST432_1;
+    case AVCOL_PRI_JEDEC_P22:
+        return ZIMG_PRIMARIES_EBU3213_E;
+    }
+    return ZIMG_PRIMARIES_UNSPECIFIED;
+}
+
+static int convert_range(enum AVColorRange color_range)
+{
+    switch (color_range) {
+    case AVCOL_RANGE_UNSPECIFIED:
+    case AVCOL_RANGE_MPEG:
+        return ZIMG_RANGE_LIMITED;
+    case AVCOL_RANGE_JPEG:
+        return ZIMG_RANGE_FULL;
+    }
+    return ZIMG_RANGE_LIMITED;
+}
+
+static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_range)
+{
+    switch (color_range) {
+    case ZIMG_RANGE_LIMITED:
+        return AVCOL_RANGE_MPEG;
+    case ZIMG_RANGE_FULL:
+        return AVCOL_RANGE_JPEG;
+    }
+    return AVCOL_RANGE_UNSPECIFIED;
+}
+
 static av_cold int init(AVFilterContext *ctx)
 {
     ZScaleContext *s = ctx->priv;
     int ret;
+    int i;
+
+    for (i = 0; i < MAX_THREADS; i++) {
+        s->tmp[i] = NULL;
+        s->graph[i] = NULL;
+        s->alpha_graph[i] = NULL;
+    }
+    zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->src_format_tmp, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->dst_format_tmp, ZIMG_API_VERSION);
+
+    zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->alpha_src_format_tmp, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->alpha_dst_format_tmp, ZIMG_API_VERSION);
+
+    zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
+    zimg_graph_builder_params_default(&s->params_tmp, ZIMG_API_VERSION);
+    zimg_graph_builder_params_default(&s->alpha_params, ZIMG_API_VERSION);
+    zimg_graph_builder_params_default(&s->alpha_params_tmp, ZIMG_API_VERSION);
 
     if (s->size_str && (s->w_expr || s->h_expr)) {
         av_log(ctx, AV_LOG_ERROR,
@@ -194,6 +372,153 @@  static int query_formats(AVFilterContext *ctx)
     return ff_formats_ref(ff_make_format_list(pixel_fmts), &ctx->outputs[0]->incfg.formats);
 }
 
+/* returns 0 if image formats are the same and 1 otherwise */
+static int compare_zimg_image_formats(zimg_image_format *img_fmt0, zimg_image_format *img_fmt1)
+{
+    return ((img_fmt0->chroma_location != img_fmt1->chroma_location) ||
+#if ZIMG_API_VERSION >= 0x204
+        (img_fmt0->alpha != img_fmt1->alpha) ||
+#endif
+        (img_fmt0->color_family != img_fmt1->color_family) ||
+        (img_fmt0->color_primaries != img_fmt1->color_primaries) ||
+        (img_fmt0->depth != img_fmt1->depth) ||
+        (img_fmt0->field_parity != img_fmt1->field_parity) ||
+        (img_fmt0->height != img_fmt1->height) ||
+        (img_fmt0->matrix_coefficients != img_fmt1->matrix_coefficients) ||
+        (img_fmt0->pixel_range != img_fmt1->pixel_range) ||
+        (img_fmt0->pixel_type != img_fmt1->pixel_type) ||
+        (img_fmt0->subsample_h != img_fmt1->subsample_h) ||
+        (img_fmt0->subsample_w != img_fmt1->subsample_w) ||
+        (img_fmt0->transfer_characteristics != img_fmt1->transfer_characteristics) ||
+        (img_fmt0->width != img_fmt1->width));
+}
+
+/* returns 0 if graph builder parameters are the same and 1 otherwise */
+static int compare_zimg_graph_builder_params(zimg_graph_builder_params *parm0, zimg_graph_builder_params *parm1)
+{
+    /* the parameters that could be changed inside a single ffmpeg zscale invocation  are checked only
+    and NaN values that are default for some params are treated properly*/
+    int ret = (parm0->allow_approximate_gamma != parm1->allow_approximate_gamma) ||
+        (parm0->dither_type != parm1->dither_type) ||
+        (parm0->resample_filter != parm1->resample_filter) ||
+        (parm0->resample_filter_uv != parm1->resample_filter_uv);
+
+    if ((isnan(parm0->nominal_peak_luminance) == 0) || (isnan(parm1->nominal_peak_luminance) == 0))
+        ret = ret || (parm0->nominal_peak_luminance != parm1->nominal_peak_luminance);
+    if ((isnan(parm0->filter_param_a) == 0) || (isnan(parm1->filter_param_a) == 0))
+        ret = ret || (parm0->filter_param_a != parm1->filter_param_a);
+    if ((isnan(parm0->filter_param_a_uv) == 0) || (isnan(parm1->filter_param_a_uv) == 0))
+        ret = ret || (parm0->filter_param_a_uv != parm1->filter_param_a_uv);
+    if ((isnan(parm0->filter_param_b) == 0) || (isnan(parm1->filter_param_b) == 0))
+        ret = ret || (parm0->filter_param_b != parm1->filter_param_b);
+    if ((isnan(parm0->filter_param_b_uv) == 0) || (isnan(parm1->filter_param_b_uv) == 0))
+        ret = ret || (parm0->filter_param_b_uv != parm1->filter_param_b_uv);
+
+    return ret;
+}
+
+static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
+    int colorspace, int primaries, int transfer, int range, int location)
+{
+    format->width = frame->width;
+    format->height = frame->height;
+    format->subsample_w = desc->log2_chroma_w;
+    format->subsample_h = desc->log2_chroma_h;
+    format->depth = desc->comp[0].depth;
+    format->pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
+    format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
+    format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
+    format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
+    format->transfer_characteristics = transfer == -1 ? convert_trc(frame->color_trc) : transfer;
+    format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
+    format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
+}
+
+static int print_zimg_error(AVFilterContext *ctx)
+{
+    char err_msg[1024];
+    int err_code = zimg_get_last_error(err_msg, sizeof(err_msg));
+
+    av_log(ctx, AV_LOG_ERROR, "code %d: %s\n", err_code, err_msg);
+
+    return AVERROR_EXTERNAL;
+}
+
+static int graphs_build(AVFrame *in, AVFrame *out, const AVPixFmtDescriptor *desc, const AVPixFmtDescriptor *out_desc,
+    ZScaleContext *s, int job_nr)
+{
+    int ret;
+    size_t size;
+    zimg_image_format src_format;
+    zimg_image_format dst_format;
+    zimg_image_format alpha_src_format;
+    zimg_image_format alpha_dst_format;
+
+    src_format = s->src_format;
+    dst_format = s->dst_format;
+    /* The input slice is specified through the active_region field, 
+    unlike the output slice.
+    according to zimg requirements input and output slices should have even dimentions */
+    src_format.active_region.width = in->width;
+    src_format.active_region.height = s->slice_h;
+    src_format.active_region.left = 0;
+    src_format.active_region.top = job_nr * src_format.active_region.height;
+    //dst now is the single tile only!!
+    dst_format.width = out->width;
+    dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
+
+    //the last slice could differ from the previous ones due to the slices division "tail"
+    if (job_nr == (s->nb_threads - 1)) {
+        src_format.active_region.height = src_format.height - src_format.active_region.top;
+        dst_format.height = out->height - job_nr * dst_format.height;
+    }
+
+    if (s->graph[job_nr]) {
+        zimg_filter_graph_free(s->graph[job_nr]);
+    }
+    s->graph[job_nr] = zimg_filter_graph_build(&src_format, &dst_format, &s->params);
+    if (!s->graph[job_nr])
+        return print_zimg_error(NULL);
+
+    ret = zimg_filter_graph_get_tmp_size(s->graph[job_nr], &size);
+    if (ret)
+        return print_zimg_error(NULL);
+
+    if (s->tmp[job_nr])
+        av_freep(&s->tmp[job_nr]);
+    s->tmp[job_nr] = av_malloc(size);
+    if (!s->tmp[job_nr])
+        return AVERROR(ENOMEM);
+
+    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && out_desc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+        alpha_src_format = s->alpha_src_format;
+        alpha_dst_format = s->alpha_dst_format;
+        /* The input slice is specified through the active_region field, unlike the output slice.
+        according to zimg requirements input and output slices should have even dimentions */
+        alpha_src_format.active_region.width = in->width;
+        alpha_src_format.active_region.height = s->slice_h;
+        alpha_src_format.active_region.left = 0;
+        alpha_src_format.active_region.top = job_nr * alpha_src_format.active_region.height;
+        //dst now is the single tile only!!
+        alpha_dst_format.width = out->width;
+        alpha_dst_format.height = ((unsigned int)(out->height / s->nb_threads)) & 0xfffffffe;
+
+        //the last slice could differ from the previous ones due to the slices division "tail"
+        if (job_nr == (s->nb_threads - 1)) {
+            alpha_src_format.active_region.height = alpha_src_format.height - alpha_src_format.active_region.top;
+            alpha_dst_format.height = out->height - job_nr * alpha_dst_format.height;
+        }
+
+        if (s->alpha_graph[job_nr]) {
+            zimg_filter_graph_free(s->alpha_graph[job_nr]);
+        }
+        s->alpha_graph[job_nr] = zimg_filter_graph_build(&alpha_src_format, &alpha_dst_format, &s->alpha_params);
+        if (!s->alpha_graph[job_nr])
+            return print_zimg_error(NULL);
+     }
+    return 0;
+}
+
 static int config_props(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
@@ -317,212 +642,15 @@  fail:
     return ret;
 }
 
-static int print_zimg_error(AVFilterContext *ctx)
-{
-    char err_msg[1024];
-    int err_code = zimg_get_last_error(err_msg, sizeof(err_msg));
-
-    av_log(ctx, AV_LOG_ERROR, "code %d: %s\n", err_code, err_msg);
-
-    return AVERROR_EXTERNAL;
-}
-
-static int convert_chroma_location(enum AVChromaLocation chroma_location)
-{
-    switch (chroma_location) {
-    case AVCHROMA_LOC_UNSPECIFIED:
-    case AVCHROMA_LOC_LEFT:
-        return ZIMG_CHROMA_LEFT;
-    case AVCHROMA_LOC_CENTER:
-        return ZIMG_CHROMA_CENTER;
-    case AVCHROMA_LOC_TOPLEFT:
-        return ZIMG_CHROMA_TOP_LEFT;
-    case AVCHROMA_LOC_TOP:
-        return ZIMG_CHROMA_TOP;
-    case AVCHROMA_LOC_BOTTOMLEFT:
-        return ZIMG_CHROMA_BOTTOM_LEFT;
-    case AVCHROMA_LOC_BOTTOM:
-        return ZIMG_CHROMA_BOTTOM;
-    }
-    return ZIMG_CHROMA_LEFT;
-}
-
-static int convert_matrix(enum AVColorSpace colorspace)
-{
-    switch (colorspace) {
-    case AVCOL_SPC_RGB:
-        return ZIMG_MATRIX_RGB;
-    case AVCOL_SPC_BT709:
-        return ZIMG_MATRIX_709;
-    case AVCOL_SPC_UNSPECIFIED:
-        return ZIMG_MATRIX_UNSPECIFIED;
-    case AVCOL_SPC_FCC:
-        return ZIMG_MATRIX_FCC;
-    case AVCOL_SPC_BT470BG:
-        return ZIMG_MATRIX_470BG;
-    case AVCOL_SPC_SMPTE170M:
-        return ZIMG_MATRIX_170M;
-    case AVCOL_SPC_SMPTE240M:
-        return ZIMG_MATRIX_240M;
-    case AVCOL_SPC_YCGCO:
-        return ZIMG_MATRIX_YCGCO;
-    case AVCOL_SPC_BT2020_NCL:
-        return ZIMG_MATRIX_2020_NCL;
-    case AVCOL_SPC_BT2020_CL:
-        return ZIMG_MATRIX_2020_CL;
-    case AVCOL_SPC_CHROMA_DERIVED_NCL:
-        return ZIMG_MATRIX_CHROMATICITY_DERIVED_NCL;
-    case AVCOL_SPC_CHROMA_DERIVED_CL:
-        return ZIMG_MATRIX_CHROMATICITY_DERIVED_CL;
-    case AVCOL_SPC_ICTCP:
-        return ZIMG_MATRIX_ICTCP;
-    }
-    return ZIMG_MATRIX_UNSPECIFIED;
-}
-
-static int convert_trc(enum AVColorTransferCharacteristic color_trc)
-{
-    switch (color_trc) {
-    case AVCOL_TRC_UNSPECIFIED:
-        return ZIMG_TRANSFER_UNSPECIFIED;
-    case AVCOL_TRC_BT709:
-        return ZIMG_TRANSFER_709;
-    case AVCOL_TRC_GAMMA22:
-        return ZIMG_TRANSFER_470_M;
-    case AVCOL_TRC_GAMMA28:
-        return ZIMG_TRANSFER_470_BG;
-    case AVCOL_TRC_SMPTE170M:
-        return ZIMG_TRANSFER_601;
-    case AVCOL_TRC_SMPTE240M:
-        return ZIMG_TRANSFER_240M;
-    case AVCOL_TRC_LINEAR:
-        return ZIMG_TRANSFER_LINEAR;
-    case AVCOL_TRC_LOG:
-        return ZIMG_TRANSFER_LOG_100;
-    case AVCOL_TRC_LOG_SQRT:
-        return ZIMG_TRANSFER_LOG_316;
-    case AVCOL_TRC_IEC61966_2_4:
-        return ZIMG_TRANSFER_IEC_61966_2_4;
-    case AVCOL_TRC_BT2020_10:
-        return ZIMG_TRANSFER_2020_10;
-    case AVCOL_TRC_BT2020_12:
-        return ZIMG_TRANSFER_2020_12;
-    case AVCOL_TRC_SMPTE2084:
-        return ZIMG_TRANSFER_ST2084;
-    case AVCOL_TRC_ARIB_STD_B67:
-        return ZIMG_TRANSFER_ARIB_B67;
-    case AVCOL_TRC_IEC61966_2_1:
-        return ZIMG_TRANSFER_IEC_61966_2_1;
-    }
-    return ZIMG_TRANSFER_UNSPECIFIED;
-}
-
-static int convert_primaries(enum AVColorPrimaries color_primaries)
-{
-    switch (color_primaries) {
-    case AVCOL_PRI_UNSPECIFIED:
-        return ZIMG_PRIMARIES_UNSPECIFIED;
-    case AVCOL_PRI_BT709:
-        return ZIMG_PRIMARIES_709;
-    case AVCOL_PRI_BT470M:
-        return ZIMG_PRIMARIES_470_M;
-    case AVCOL_PRI_BT470BG:
-        return ZIMG_PRIMARIES_470_BG;
-    case AVCOL_PRI_SMPTE170M:
-        return ZIMG_PRIMARIES_170M;
-    case AVCOL_PRI_SMPTE240M:
-        return ZIMG_PRIMARIES_240M;
-    case AVCOL_PRI_FILM:
-        return ZIMG_PRIMARIES_FILM;
-    case AVCOL_PRI_BT2020:
-        return ZIMG_PRIMARIES_2020;
-    case AVCOL_PRI_SMPTE428:
-        return ZIMG_PRIMARIES_ST428;
-    case AVCOL_PRI_SMPTE431:
-        return ZIMG_PRIMARIES_ST431_2;
-    case AVCOL_PRI_SMPTE432:
-        return ZIMG_PRIMARIES_ST432_1;
-    case AVCOL_PRI_JEDEC_P22:
-        return ZIMG_PRIMARIES_EBU3213_E;
-    }
-    return ZIMG_PRIMARIES_UNSPECIFIED;
-}
-
-static int convert_range(enum AVColorRange color_range)
-{
-    switch (color_range) {
-    case AVCOL_RANGE_UNSPECIFIED:
-    case AVCOL_RANGE_MPEG:
-        return ZIMG_RANGE_LIMITED;
-    case AVCOL_RANGE_JPEG:
-        return ZIMG_RANGE_FULL;
-    }
-    return ZIMG_RANGE_LIMITED;
-}
-
-static enum AVColorRange convert_range_from_zimg(enum zimg_pixel_range_e color_range)
-{
-    switch (color_range) {
-    case ZIMG_RANGE_LIMITED:
-        return AVCOL_RANGE_MPEG;
-    case ZIMG_RANGE_FULL:
-        return AVCOL_RANGE_JPEG;
-    }
-    return AVCOL_RANGE_UNSPECIFIED;
-}
-
-static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
-                        int colorspace, int primaries, int transfer, int range, int location)
-{
-    format->width = frame->width;
-    format->height = frame->height;
-    format->subsample_w = desc->log2_chroma_w;
-    format->subsample_h = desc->log2_chroma_h;
-    format->depth = desc->comp[0].depth;
-    format->pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
-    format->color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
-    format->matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : colorspace == -1 ? convert_matrix(frame->colorspace) : colorspace;
-    format->color_primaries = primaries == -1 ? convert_primaries(frame->color_primaries) : primaries;
-    format->transfer_characteristics = transfer == - 1 ? convert_trc(frame->color_trc) : transfer;
-    format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
-    format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
-}
-
-static int graph_build(zimg_filter_graph **graph, zimg_graph_builder_params *params,
-                       zimg_image_format *src_format, zimg_image_format *dst_format,
-                       void **tmp, size_t *tmp_size)
-{
-    int ret;
-    size_t size;
-
-    zimg_filter_graph_free(*graph);
-    *graph = zimg_filter_graph_build(src_format, dst_format, params);
-    if (!*graph)
-        return print_zimg_error(NULL);
-
-    ret = zimg_filter_graph_get_tmp_size(*graph, &size);
-    if (ret)
-        return print_zimg_error(NULL);
-
-    if (size > *tmp_size) {
-        av_freep(tmp);
-        *tmp = av_malloc(size);
-        if (!*tmp)
-            return AVERROR(ENOMEM);
-
-        *tmp_size = size;
-    }
-
-    return 0;
-}
 
 static int realign_frame(const AVPixFmtDescriptor *desc, AVFrame **frame)
 {
     AVFrame *aligned = NULL;
-    int ret = 0, plane;
+    int ret = 0, plane, planes;
 
     /* Realign any unaligned input frame. */
-    for (plane = 0; plane < 3; plane++) {
+    planes = av_pix_fmt_count_planes(desc->nb_components);
+    for (plane = 0; plane < planes; plane++) {
         int p = desc->comp[plane].plane;
         if ((uintptr_t)(*frame)->data[p] % ZIMG_ALIGNMENT || (*frame)->linesize[p] % ZIMG_ALIGNMENT) {
             if (!(aligned = av_frame_alloc())) {
@@ -554,6 +682,7 @@  fail:
     return ret;
 }
 
+
 static void update_output_color_information(ZScaleContext *s, AVFrame *frame)
 {
     if (s->colorspace != -1)
@@ -572,20 +701,77 @@  static void update_output_color_information(ZScaleContext *s, AVFrame *frame)
         frame->chroma_location = (int)s->dst_format.chroma_location + 1;
 }
 
+static int filter_slice(AVFilterContext *ctx, void *data, int job_nr, int n_jobs)
+{
+    ThreadData *td = data;
+    int ret = 0;
+    int p;
+    int out_sampl;
+    int need_gb;
+    ZScaleContext *s = ctx->priv;
+    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
+    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
+    int  dst_tile_height = ((unsigned int)(td->out->height / n_jobs)) & 0xfffffffe; 
+
+    /* create zimg filter graphs for each thread
+     only if not created earlier or there is some change in frame parameters */
+    need_gb = compare_zimg_image_formats(&s->src_format, &s->src_format_tmp) ||
+        compare_zimg_image_formats(&s->dst_format, &s->dst_format_tmp) ||
+        compare_zimg_graph_builder_params(&s->params, &s->params_tmp);
+    if(td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA)
+        need_gb = need_gb || compare_zimg_image_formats(&s->alpha_src_format, &s->alpha_src_format_tmp) ||
+            compare_zimg_image_formats(&s->alpha_dst_format, &s->alpha_dst_format_tmp) ||
+            compare_zimg_graph_builder_params(&s->alpha_params, &s->alpha_params_tmp);
+
+    if (need_gb){
+        ret = graphs_build(td->in, td->out, td->desc, td->odesc, s, job_nr);
+        if (ret < 0)
+            return print_zimg_error(ctx);
+    }
+    out_sampl = FFMAX3(td->out->linesize[0], td->out->linesize[1], td->out->linesize[2]);
+    for (int i = 0; i < 3; i++) {
+        p = td->desc->comp[i].plane;      
+
+        src_buf.plane[i].data = td->in->data[p];
+        src_buf.plane[i].stride = td->in->linesize[p];
+        src_buf.plane[i].mask = -1;
+
+        p = td->odesc->comp[i].plane;
+        dst_buf.plane[i].data = td->out->data[p] + td->out->linesize[p] * dst_tile_height * td->out->linesize[p] / out_sampl * job_nr;
+        dst_buf.plane[i].stride = td->out->linesize[p];
+        dst_buf.plane[i].mask = -1;
+    }
+    ret = zimg_filter_graph_process(s->graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
+    if (ret) 
+        return  print_zimg_error(ctx);
+
+    if (td->desc->flags & AV_PIX_FMT_FLAG_ALPHA && td->odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+        src_buf.plane[0].data = td->in->data[3];
+        src_buf.plane[0].stride = td->in->linesize[3];
+        src_buf.plane[0].mask = -1;
+
+        dst_buf.plane[0].data = td->out->data[3] + td->out->linesize[3] * dst_tile_height  * job_nr;
+        dst_buf.plane[0].stride = td->out->linesize[3];
+        dst_buf.plane[0].mask = -1;
+
+        ret = zimg_filter_graph_process(s->alpha_graph[job_nr], &src_buf, &dst_buf, s->tmp[job_nr], 0, 0, 0, 0);
+        if (ret)
+            return print_zimg_error(ctx);
+    }
+    return 0;
+}
+
 static int filter_frame(AVFilterLink *link, AVFrame *in)
 {
-    ZScaleContext *s = link->dst->priv;
-    AVFilterLink *outlink = link->dst->outputs[0];
+    AVFilterContext *ctx = link->dst;
+    ZScaleContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
     const AVPixFmtDescriptor *odesc = av_pix_fmt_desc_get(outlink->format);
-    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
-    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
     char buf[32];
-    int ret = 0, plane;
+    int ret = 0;
     AVFrame *out = NULL;
-
-    if ((ret = realign_frame(desc, &in)) < 0)
-        goto fail;
+    ThreadData td;
 
     if (!(out = ff_get_video_buffer(outlink, outlink->w, outlink->h))) {
         ret =  AVERROR(ENOMEM);
@@ -596,35 +782,60 @@  static int filter_frame(AVFilterLink *link, AVFrame *in)
     out->width  = outlink->w;
     out->height = outlink->h;
 
-    if(   in->width  != link->w
-       || in->height != link->h
-       || in->format != link->format
-       || s->in_colorspace != in->colorspace
-       || s->in_trc  != in->color_trc
-       || s->in_primaries != in->color_primaries
-       || s->in_range != in->color_range
-       || s->out_colorspace != out->colorspace
-       || s->out_trc  != out->color_trc
-       || s->out_primaries != out->color_primaries
-       || s->out_range != out->color_range
-       || s->in_chromal != in->chroma_location
-       || s->out_chromal != out->chroma_location) {
+    //we need to use this filter if something is different for an input and output only
+    //otherwise - just copy the input frame to the output
+    if ((link->w != outlink->w) ||
+        (link->h != outlink->h) ||
+        (s->src_format.chroma_location != s->dst_format.chroma_location)||
+        (s->src_format.color_family !=s->dst_format.color_family)||
+        (s->src_format.color_primaries !=s->dst_format.color_primaries)||
+        (s->src_format.depth !=s->dst_format.depth)||
+        (s->src_format.matrix_coefficients !=s->dst_format.matrix_coefficients)||
+        (s->src_format.field_parity !=s->dst_format.field_parity)||
+        (s->src_format.pixel_range !=s->dst_format.pixel_range)||
+        (s->src_format.pixel_type !=s->dst_format.pixel_type)||
+        (s->src_format.transfer_characteristics !=s->dst_format.transfer_characteristics)
+    ){
+        if ((ret = realign_frame(desc, &in)) < 0)
+            goto fail;
+
         snprintf(buf, sizeof(buf)-1, "%d", outlink->w);
         av_opt_set(s, "w", buf, 0);
         snprintf(buf, sizeof(buf)-1, "%d", outlink->h);
         av_opt_set(s, "h", buf, 0);
 
+ 
         link->dst->inputs[0]->format = in->format;
         link->dst->inputs[0]->w      = in->width;
         link->dst->inputs[0]->h      = in->height;
 
-        if ((ret = config_props(outlink)) < 0)
-            goto fail;
+        update_output_color_information(s, out);
+    
+        s->nb_threads = FFMIN(ff_filter_get_nb_threads(ctx), link->h / MIN_TILESIZE);
+        s->slice_h = ((unsigned int)(link->h / s->nb_threads)) & 0xfffffffe; // slice_h should be even for zimg
+        s->in_colorspace = in->colorspace;
+        s->in_trc = in->color_trc;
+        s->in_primaries = in->color_primaries;
+        s->in_range = in->color_range;
+        s->out_colorspace = out->colorspace;
+        s->out_trc = out->color_trc;
+        s->out_primaries = out->color_primaries;
+        s->out_range = out->color_range;
+    
+        av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
+                  (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
+                  (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
+                  INT_MAX);
 
         zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
         zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
         zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
 
+        format_init(&s->src_format, in, desc, s->colorspace_in,
+            s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
+        format_init(&s->dst_format, out, odesc, s->colorspace,
+            s->primaries, s->trc, s->range, s->chromal);
+
         s->params.dither_type = s->dither;
         s->params.cpu_type = ZIMG_CPU_AUTO;
         s->params.resample_filter = s->filter;
@@ -634,27 +845,6 @@  static int filter_frame(AVFilterLink *link, AVFrame *in)
         s->params.filter_param_a = s->params.filter_param_a_uv = s->param_a;
         s->params.filter_param_b = s->params.filter_param_b_uv = s->param_b;
 
-        format_init(&s->src_format, in, desc, s->colorspace_in,
-                    s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
-        format_init(&s->dst_format, out, odesc, s->colorspace,
-                    s->primaries, s->trc, s->range, s->chromal);
-
-        update_output_color_information(s, out);
-
-        ret = graph_build(&s->graph, &s->params, &s->src_format, &s->dst_format,
-                          &s->tmp, &s->tmp_size);
-        if (ret < 0)
-            goto fail;
-
-        s->in_colorspace  = in->colorspace;
-        s->in_trc         = in->color_trc;
-        s->in_primaries   = in->color_primaries;
-        s->in_range       = in->color_range;
-        s->out_colorspace = out->colorspace;
-        s->out_trc        = out->color_trc;
-        s->out_primaries  = out->color_primaries;
-        s->out_range      = out->color_range;
-
         if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
             zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
             zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
@@ -670,76 +860,48 @@  static int filter_frame(AVFilterLink *link, AVFrame *in)
             s->alpha_src_format.pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
             s->alpha_src_format.color_family = ZIMG_COLOR_GREY;
 
-            s->alpha_dst_format.width = out->width;
-            s->alpha_dst_format.height = out->height;
             s->alpha_dst_format.depth = odesc->comp[0].depth;
             s->alpha_dst_format.pixel_type = (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : odesc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
             s->alpha_dst_format.color_family = ZIMG_COLOR_GREY;
-
-            zimg_filter_graph_free(s->alpha_graph);
-            s->alpha_graph = zimg_filter_graph_build(&s->alpha_src_format, &s->alpha_dst_format, &s->alpha_params);
-            if (!s->alpha_graph) {
-                ret = print_zimg_error(link->dst);
-                goto fail;
-            }
         }
-    }
 
-    update_output_color_information(s, out);
+        td.in = in;
+        td.out = out;
+        td.desc = desc;
+        td.odesc = odesc;
 
-    av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
-              (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
-              (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
-              INT_MAX);
-
-    for (plane = 0; plane < 3; plane++) {
-        int p = desc->comp[plane].plane;
-        src_buf.plane[plane].data   = in->data[p];
-        src_buf.plane[plane].stride = in->linesize[p];
-        src_buf.plane[plane].mask   = -1;
-
-        p = odesc->comp[plane].plane;
-        dst_buf.plane[plane].data   = out->data[p];
-        dst_buf.plane[plane].stride = out->linesize[p];
-        dst_buf.plane[plane].mask   = -1;
-    }
-
-    ret = zimg_filter_graph_process(s->graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
-    if (ret) {
-        ret = print_zimg_error(link->dst);
-        goto fail;
-    }
+        ff_filter_execute(ctx, filter_slice, &td, NULL, s->nb_threads);
 
-    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
-        src_buf.plane[0].data   = in->data[3];
-        src_buf.plane[0].stride = in->linesize[3];
-        src_buf.plane[0].mask   = -1;
-
-        dst_buf.plane[0].data   = out->data[3];
-        dst_buf.plane[0].stride = out->linesize[3];
-        dst_buf.plane[0].mask   = -1;
-
-        ret = zimg_filter_graph_process(s->alpha_graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
-        if (ret) {
-            ret = print_zimg_error(link->dst);
-            goto fail;
+        s->src_format_tmp = s->src_format;
+        s->dst_format_tmp = s->dst_format;
+        s->params_tmp = s->params;
+        if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+            s->alpha_src_format_tmp = s->alpha_src_format;
+            s->alpha_dst_format_tmp = s->alpha_dst_format;
+            s->alpha_params_tmp = s->alpha_params;
         }
-    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
-        int x, y;
-
-        if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
-            for (y = 0; y < out->height; y++) {
-                for (x = 0; x < out->width; x++) {
-                    AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
-                            av_float2int(1.0f));
+
+        if ((!(desc->flags & AV_PIX_FMT_FLAG_ALPHA)) && (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) ){
+            int x, y;
+            if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
+                for (y = 0; y < out->height; y++) {
+                    for (x = 0; x < out->width; x++) {
+                        AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
+                                av_float2int(1.0f));
+                    }
                 }
+            } else {
+                for (y = 0; y < outlink->h; y++)
+                    memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
             }
-        } else {
-            for (y = 0; y < outlink->h; y++)
-                memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
         }
     }
-
+    else {
+        /*no need for any filtering */
+        ret = av_frame_copy(out, in);
+        if (ret < 0)
+            return ret;
+    }
 fail:
     av_frame_free(&in);
     if (ret) {
@@ -753,11 +915,12 @@  fail:
 static av_cold void uninit(AVFilterContext *ctx)
 {
     ZScaleContext *s = ctx->priv;
-
-    zimg_filter_graph_free(s->graph);
-    zimg_filter_graph_free(s->alpha_graph);
-    av_freep(&s->tmp);
-    s->tmp_size = 0;
+    int i;
+    for (i = 0; i < s->nb_threads; i++) {
+        if (s->tmp[i]) av_freep(&s->tmp[i]);
+        if (s->graph[i]) zimg_filter_graph_free(s->graph[i]);
+        if (s->alpha_graph[i]) zimg_filter_graph_free(s->alpha_graph[i]);
+    }
 }
 
 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
@@ -941,4 +1104,5 @@  const AVFilter ff_vf_zscale = {
     FILTER_OUTPUTS(avfilter_vf_zscale_outputs),
     FILTER_QUERY_FUNC(query_formats),
     .process_command = process_command,
+    .flags           = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
 };