[FFmpeg-devel] avfilter/vf_zscale: add slice threading

Submitted by Paul B Mahol on May 29, 2019, 7:22 p.m.

Details

Message ID 20190529192205.21275-1-onemda@gmail.com
State New
Headers show

Commit Message

Paul B Mahol May 29, 2019, 7:22 p.m.
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libavfilter/vf_zscale.c | 335 +++++++++++++++++++++++++---------------
 1 file changed, 211 insertions(+), 124 deletions(-)

Comments

Ruiling Song May 31, 2019, 4:20 a.m.
> -----Original Message-----

> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf

> Of Paul B Mahol

> Sent: Thursday, May 30, 2019 3:22 AM

> To: ffmpeg-devel@ffmpeg.org

> Subject: [FFmpeg-devel] [PATCH] avfilter/vf_zscale: add slice threading

> 

> Signed-off-by: Paul B Mahol <onemda@gmail.com>

> ---

>  libavfilter/vf_zscale.c | 335 +++++++++++++++++++++++++---------------

>  1 file changed, 211 insertions(+), 124 deletions(-)


Doing some testing show that this patch introduce big performance drop for below scale from 1080p to 720p:
./ffmpeg -I 1080p.mp4 -vf zscale=w=1280:h=720 -f null /dev/null
On my local machine(i7-6770HQ with 4 cores, thus 8 threads), the fps number drops from 240 to 160.
Did you observe any performance gain with this patch for some use-case?

 [...]
> @@ -706,10 +790,12 @@ static void uninit(AVFilterContext *ctx)

>  {

>      ZScaleContext *s = ctx->priv;

> 

> -    zimg_filter_graph_free(s->graph);

> -    zimg_filter_graph_free(s->alpha_graph);

> -    av_freep(&s->tmp);

> -    s->tmp_size = 0;

> +    for (int i = 0; i < s->nb_threads; i++) {

> +        zimg_filter_graph_free(s->ztd[i].graph);

> +        zimg_filter_graph_free(s->ztd[i].alpha_graph);

> +        av_freep(&s->ztd[i].tmp);

> +        s->ztd[i].tmp_size = 0;

> +    }

Missing av_freep(&s->ztd) here?
>  }

> 

>  static int process_command(AVFilterContext *ctx, const char *cmd, const

> char *args,

> @@ -890,4 +976,5 @@ AVFilter ff_vf_zscale = {

>      .inputs          = avfilter_vf_zscale_inputs,

>      .outputs         = avfilter_vf_zscale_outputs,

>      .process_command = process_command,

> +    .flags           = AVFILTER_FLAG_SLICE_THREADS,

>  };

> --

> 2.17.1

> 

> _______________________________________________

> ffmpeg-devel mailing list

> ffmpeg-devel@ffmpeg.org

> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

> 

> To unsubscribe, visit link above, or email

> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

Patch hide | download patch | download mbox

diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
index f0309272fa..082150adf0 100644
--- a/libavfilter/vf_zscale.c
+++ b/libavfilter/vf_zscale.c
@@ -74,6 +74,16 @@  enum var_name {
     VARS_NB
 };
 
+typedef struct ZScaleThreadContext {
+    void *tmp;
+    size_t tmp_size;
+
+    zimg_image_format src_format, dst_format;
+    zimg_image_format alpha_src_format, alpha_dst_format;
+    zimg_graph_builder_params alpha_params, params;
+    zimg_filter_graph *alpha_graph, *graph;
+} ZScaleThreadContext;
+
 typedef struct ZScaleContext {
     const AVClass *class;
 
@@ -100,6 +110,8 @@  typedef struct ZScaleContext {
     double nominal_peak_luminance;
     int approximate_gamma;
 
+    int nb_threads;
+
     char *w_expr;               ///< width  expression string
     char *h_expr;               ///< height expression string
 
@@ -110,13 +122,7 @@  typedef struct ZScaleContext {
 
     int force_original_aspect_ratio;
 
-    void *tmp;
-    size_t tmp_size;
-
-    zimg_image_format src_format, dst_format;
-    zimg_image_format alpha_src_format, alpha_dst_format;
-    zimg_graph_builder_params alpha_params, params;
-    zimg_filter_graph *alpha_graph, *graph;
+    ZScaleThreadContext *ztd;
 
     enum AVColorSpace in_colorspace, out_colorspace;
     enum AVColorTransferCharacteristic in_trc, out_trc;
@@ -204,6 +210,12 @@  static int config_props(AVFilterLink *outlink)
     int ret;
     int factor_w, factor_h;
 
+    s->nb_threads = ff_filter_get_nb_threads(ctx);
+    av_freep(&s->ztd);
+    s->ztd = av_calloc(s->nb_threads, sizeof(*s->ztd));
+    if (!s->ztd)
+        return AVERROR(ENOMEM);
+
     var_values[VAR_IN_W]  = var_values[VAR_IW] = inlink->w;
     var_values[VAR_IN_H]  = var_values[VAR_IH] = inlink->h;
     var_values[VAR_OUT_W] = var_values[VAR_OW] = NAN;
@@ -458,10 +470,12 @@  static int convert_range(enum AVColorRange color_range)
 }
 
 static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFmtDescriptor *desc,
-                        int colorspace, int primaries, int transfer, int range, int location)
+                        int colorspace, int primaries, int transfer, int range, int location,
+                        int width, int height,
+                        int slice_start, int slice_end)
 {
-    format->width = frame->width;
-    format->height = frame->height;
+    format->width = width;
+    format->height = height;
     format->subsample_w = desc->log2_chroma_w;
     format->subsample_h = desc->log2_chroma_h;
     format->depth = desc->comp[0].depth;
@@ -472,6 +486,10 @@  static void format_init(zimg_image_format *format, AVFrame *frame, const AVPixFm
     format->transfer_characteristics = transfer == - 1 ? convert_trc(frame->color_trc) : transfer;
     format->pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : range == -1 ? convert_range(frame->color_range) : range;
     format->chroma_location = location == -1 ? convert_chroma_location(frame->chroma_location) : location;
+    format->active_region.left = 0;
+    format->active_region.top = slice_start;
+    format->active_region.width = width;
+    format->active_region.height = slice_end - slice_start;
 }
 
 static int graph_build(zimg_filter_graph **graph, zimg_graph_builder_params *params,
@@ -502,16 +520,163 @@  static int graph_build(zimg_filter_graph **graph, zimg_graph_builder_params *par
     return 0;
 }
 
+typedef struct ThreadData {
+    AVFrame *in, *out;
+    const AVPixFmtDescriptor *desc, *odesc;
+} ThreadData;
+
+static int prepare_graph(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ZScaleContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in;
+    AVFrame *out = td->out;
+    const AVPixFmtDescriptor *desc = td->desc;
+    const AVPixFmtDescriptor *odesc = td->odesc;
+    const int in_slice_start = (in->height * jobnr) / nb_jobs;
+    const int in_slice_end = (in->height * (jobnr+1)) / nb_jobs;
+    const int out_slice_start = (out->height * jobnr) / nb_jobs;
+    const int out_slice_end = (out->height * (jobnr+1)) / nb_jobs;
+    int ret;
+
+    zimg_image_format_default(&s->ztd[jobnr].src_format, ZIMG_API_VERSION);
+    zimg_image_format_default(&s->ztd[jobnr].dst_format, ZIMG_API_VERSION);
+    zimg_graph_builder_params_default(&s->ztd[jobnr].params, ZIMG_API_VERSION);
+
+    s->ztd[jobnr].params.dither_type = s->dither;
+    s->ztd[jobnr].params.cpu_type = ZIMG_CPU_AUTO;
+    s->ztd[jobnr].params.resample_filter = s->filter;
+    s->ztd[jobnr].params.resample_filter_uv = s->filter;
+    s->ztd[jobnr].params.nominal_peak_luminance = s->nominal_peak_luminance;
+    s->ztd[jobnr].params.allow_approximate_gamma = s->approximate_gamma;
+
+    format_init(&s->ztd[jobnr].src_format, in, desc, s->colorspace_in,
+                s->primaries_in, s->trc_in, s->range_in, s->chromal_in,
+                in->width, in->height,
+                in_slice_start, in_slice_end);
+    format_init(&s->ztd[jobnr].dst_format, out, odesc, s->colorspace,
+                s->primaries, s->trc, s->range, s->chromal,
+                out->width, out_slice_end - out_slice_start,
+                0, out_slice_end - out_slice_start);
+
+    ret = graph_build(&s->ztd[jobnr].graph, &s->ztd[jobnr].params, &s->ztd[jobnr].src_format, &s->ztd[jobnr].dst_format,
+                      &s->ztd[jobnr].tmp, &s->ztd[jobnr].tmp_size);
+    if (ret)
+        return ret;
+
+    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+        zimg_image_format_default(&s->ztd[jobnr].alpha_src_format, ZIMG_API_VERSION);
+        zimg_image_format_default(&s->ztd[jobnr].alpha_dst_format, ZIMG_API_VERSION);
+        zimg_graph_builder_params_default(&s->ztd[jobnr].alpha_params, ZIMG_API_VERSION);
+
+        s->ztd[jobnr].alpha_params.dither_type = s->dither;
+        s->ztd[jobnr].alpha_params.cpu_type = ZIMG_CPU_AUTO;
+        s->ztd[jobnr].alpha_params.resample_filter = s->filter;
+
+        s->ztd[jobnr].alpha_src_format.width = in->width;
+        s->ztd[jobnr].alpha_src_format.height = in->height;
+        s->ztd[jobnr].alpha_src_format.depth = desc->comp[0].depth;
+        s->ztd[jobnr].alpha_src_format.pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
+        s->ztd[jobnr].alpha_src_format.color_family = ZIMG_COLOR_GREY;
+        s->ztd[jobnr].alpha_src_format.active_region.left = 0;
+        s->ztd[jobnr].alpha_src_format.active_region.top = in_slice_start;
+        s->ztd[jobnr].alpha_src_format.active_region.width = in->width;
+        s->ztd[jobnr].alpha_src_format.active_region.height = in_slice_end - in_slice_start;
+
+        s->ztd[jobnr].alpha_dst_format.width = out->width;
+        s->ztd[jobnr].alpha_dst_format.height = out->height;
+        s->ztd[jobnr].alpha_dst_format.depth = odesc->comp[0].depth;
+        s->ztd[jobnr].alpha_dst_format.pixel_type = (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : odesc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
+        s->ztd[jobnr].alpha_dst_format.color_family = ZIMG_COLOR_GREY;
+        s->ztd[jobnr].alpha_dst_format.active_region.left = 0;
+        s->ztd[jobnr].alpha_dst_format.active_region.top = 0;
+        s->ztd[jobnr].alpha_dst_format.active_region.width = out->width;
+        s->ztd[jobnr].alpha_dst_format.active_region.height = out_slice_end - out_slice_start;
+
+        zimg_filter_graph_free(s->ztd[jobnr].alpha_graph);
+        s->ztd[jobnr].alpha_graph = zimg_filter_graph_build(&s->ztd[jobnr].alpha_src_format, &s->ztd[jobnr].alpha_dst_format, &s->ztd[jobnr].alpha_params);
+        if (!s->ztd[jobnr].alpha_graph) {
+            return print_zimg_error(ctx);
+        }
+    }
+
+    return 0;
+}
+
+static int zscale_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ZScaleContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in;
+    AVFrame *out = td->out;
+    const AVPixFmtDescriptor *desc = td->desc;
+    const AVPixFmtDescriptor *odesc = td->odesc;
+    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
+    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
+    int ret;
+
+    for (int plane = 0; plane < 3; plane++) {
+        const int height = plane > 0 ? AV_CEIL_RSHIFT(out->height, odesc->log2_chroma_h) : out->height;
+        const int out_slice_start = (height * jobnr) / nb_jobs;
+        int p = desc->comp[plane].plane;
+
+        src_buf.plane[plane].data   = in->data[p];
+        src_buf.plane[plane].stride = in->linesize[p];
+        src_buf.plane[plane].mask   = -1;
+
+        p = odesc->comp[plane].plane;
+        dst_buf.plane[plane].data   = out->data[p] + out_slice_start * out->linesize[p];
+        dst_buf.plane[plane].stride = out->linesize[p];
+        dst_buf.plane[plane].mask   = -1;
+    }
+
+    ret = zimg_filter_graph_process(s->ztd[jobnr].graph, &src_buf, &dst_buf, s->ztd[jobnr].tmp, 0, 0, 0, 0);
+    if (ret)
+        return print_zimg_error(ctx);
+
+    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+        const int out_slice_start = (out->height * jobnr) / nb_jobs;
+
+        src_buf.plane[0].data   = in->data[3];
+        src_buf.plane[0].stride = in->linesize[3];
+        src_buf.plane[0].mask   = -1;
+
+        dst_buf.plane[0].data   = out->data[3] + out_slice_start * out->linesize[3];
+        dst_buf.plane[0].stride = out->linesize[3];
+        dst_buf.plane[0].mask   = -1;
+
+        ret = zimg_filter_graph_process(s->ztd[jobnr].alpha_graph, &src_buf, &dst_buf, s->ztd[jobnr].tmp, 0, 0, 0, 0);
+        if (ret)
+            return print_zimg_error(ctx);
+    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+        int x, y;
+
+        if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
+            for (y = 0; y < out->height; y++) {
+                for (x = 0; x < out->width; x++) {
+                    AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
+                            av_float2int(1.0f));
+                }
+            }
+        } else {
+            for (y = 0; y < out->height; y++)
+                memset(out->data[3] + y * out->linesize[3], 0xff, out->width);
+        }
+    }
+
+    return 0;
+}
+
 static int filter_frame(AVFilterLink *link, AVFrame *in)
 {
-    ZScaleContext *s = link->dst->priv;
-    AVFilterLink *outlink = link->dst->outputs[0];
+    AVFilterContext *ctx = link->dst;
+    ZScaleContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
     const AVPixFmtDescriptor *odesc = av_pix_fmt_desc_get(outlink->format);
-    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
-    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
     char buf[32];
-    int ret = 0, plane;
+    int ret = 0;
+    ThreadData td;
     AVFrame *out;
 
     out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
@@ -552,41 +717,28 @@  static int filter_frame(AVFilterLink *link, AVFrame *in)
             return ret;
         }
 
-        zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
-        zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
-        zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
-
-        s->params.dither_type = s->dither;
-        s->params.cpu_type = ZIMG_CPU_AUTO;
-        s->params.resample_filter = s->filter;
-        s->params.resample_filter_uv = s->filter;
-        s->params.nominal_peak_luminance = s->nominal_peak_luminance;
-        s->params.allow_approximate_gamma = s->approximate_gamma;
-
-        format_init(&s->src_format, in, desc, s->colorspace_in,
-                    s->primaries_in, s->trc_in, s->range_in, s->chromal_in);
-        format_init(&s->dst_format, out, odesc, s->colorspace,
-                    s->primaries, s->trc, s->range, s->chromal);
+        td.out = out;
+        td.in = in;
+        td.desc = desc;
+        td.odesc = odesc;
+        ret = ctx->internal->execute(ctx, prepare_graph, &td, NULL, FFMIN3(in->height, out->height, s->nb_threads));
+        if (ret)
+            goto fail;
 
         if (s->colorspace != -1)
-            out->colorspace = (int)s->dst_format.matrix_coefficients;
+            out->colorspace = (int)s->ztd[0].dst_format.matrix_coefficients;
 
         if (s->primaries != -1)
-            out->color_primaries = (int)s->dst_format.color_primaries;
+            out->color_primaries = (int)s->ztd[0].dst_format.color_primaries;
 
         if (s->range != -1)
-            out->color_range = (int)s->dst_format.pixel_range + 1;
+            out->color_range = (int)s->ztd[0].dst_format.pixel_range + 1;
 
         if (s->trc != -1)
-            out->color_trc = (int)s->dst_format.transfer_characteristics;
+            out->color_trc = (int)s->ztd[0].dst_format.transfer_characteristics;
 
         if (s->chromal != -1)
-            out->chroma_location = (int)s->dst_format.chroma_location - 1;
-
-        ret = graph_build(&s->graph, &s->params, &s->src_format, &s->dst_format,
-                          &s->tmp, &s->tmp_size);
-        if (ret < 0)
-            goto fail;
+            out->chroma_location = (int)s->ztd[0].dst_format.chroma_location - 1;
 
         s->in_colorspace  = in->colorspace;
         s->in_trc         = in->color_trc;
@@ -596,101 +748,33 @@  static int filter_frame(AVFilterLink *link, AVFrame *in)
         s->out_trc        = out->color_trc;
         s->out_primaries  = out->color_primaries;
         s->out_range      = out->color_range;
-
-        if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
-            zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
-            zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
-            zimg_graph_builder_params_default(&s->alpha_params, ZIMG_API_VERSION);
-
-            s->alpha_params.dither_type = s->dither;
-            s->alpha_params.cpu_type = ZIMG_CPU_AUTO;
-            s->alpha_params.resample_filter = s->filter;
-
-            s->alpha_src_format.width = in->width;
-            s->alpha_src_format.height = in->height;
-            s->alpha_src_format.depth = desc->comp[0].depth;
-            s->alpha_src_format.pixel_type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
-            s->alpha_src_format.color_family = ZIMG_COLOR_GREY;
-
-            s->alpha_dst_format.width = out->width;
-            s->alpha_dst_format.height = out->height;
-            s->alpha_dst_format.depth = odesc->comp[0].depth;
-            s->alpha_dst_format.pixel_type = (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) ? ZIMG_PIXEL_FLOAT : odesc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
-            s->alpha_dst_format.color_family = ZIMG_COLOR_GREY;
-
-            zimg_filter_graph_free(s->alpha_graph);
-            s->alpha_graph = zimg_filter_graph_build(&s->alpha_src_format, &s->alpha_dst_format, &s->alpha_params);
-            if (!s->alpha_graph) {
-                ret = print_zimg_error(link->dst);
-                goto fail;
-            }
-        }
     }
 
     if (s->colorspace != -1)
-        out->colorspace = (int)s->dst_format.matrix_coefficients;
+        out->colorspace = (int)s->ztd[0].dst_format.matrix_coefficients;
 
     if (s->primaries != -1)
-        out->color_primaries = (int)s->dst_format.color_primaries;
+        out->color_primaries = (int)s->ztd[0].dst_format.color_primaries;
 
     if (s->range != -1)
-        out->color_range = (int)s->dst_format.pixel_range;
+        out->color_range = (int)s->ztd[0].dst_format.pixel_range;
 
     if (s->trc != -1)
-        out->color_trc = (int)s->dst_format.transfer_characteristics;
+        out->color_trc = (int)s->ztd[0].dst_format.transfer_characteristics;
+
+    if (s->chromal != -1)
+        out->chroma_location = (int)s->ztd[0].dst_format.chroma_location - 1;
 
     av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
               (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
               (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
               INT_MAX);
 
-    for (plane = 0; plane < 3; plane++) {
-        int p = desc->comp[plane].plane;
-        src_buf.plane[plane].data   = in->data[p];
-        src_buf.plane[plane].stride = in->linesize[p];
-        src_buf.plane[plane].mask   = -1;
-
-        p = odesc->comp[plane].plane;
-        dst_buf.plane[plane].data   = out->data[p];
-        dst_buf.plane[plane].stride = out->linesize[p];
-        dst_buf.plane[plane].mask   = -1;
-    }
-
-    ret = zimg_filter_graph_process(s->graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
-    if (ret) {
-        ret = print_zimg_error(link->dst);
-        goto fail;
-    }
-
-    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
-        src_buf.plane[0].data   = in->data[3];
-        src_buf.plane[0].stride = in->linesize[3];
-        src_buf.plane[0].mask   = -1;
-
-        dst_buf.plane[0].data   = out->data[3];
-        dst_buf.plane[0].stride = out->linesize[3];
-        dst_buf.plane[0].mask   = -1;
-
-        ret = zimg_filter_graph_process(s->alpha_graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
-        if (ret) {
-            ret = print_zimg_error(link->dst);
-            goto fail;
-        }
-    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
-        int x, y;
-
-        if (odesc->flags & AV_PIX_FMT_FLAG_FLOAT) {
-            for (y = 0; y < out->height; y++) {
-                for (x = 0; x < out->width; x++) {
-                    AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
-                            av_float2int(1.0f));
-                }
-            }
-        } else {
-            for (y = 0; y < outlink->h; y++)
-                memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
-        }
-    }
+    td.out = out;
+    td.in = in;
+    td.desc = desc;
+    td.odesc = odesc;
+    ret = ctx->internal->execute(ctx, zscale_slice, &td, NULL, FFMIN3(in->height, out->height, s->nb_threads));
 
 fail:
     av_frame_free(&in);
@@ -706,10 +790,12 @@  static void uninit(AVFilterContext *ctx)
 {
     ZScaleContext *s = ctx->priv;
 
-    zimg_filter_graph_free(s->graph);
-    zimg_filter_graph_free(s->alpha_graph);
-    av_freep(&s->tmp);
-    s->tmp_size = 0;
+    for (int i = 0; i < s->nb_threads; i++) {
+        zimg_filter_graph_free(s->ztd[i].graph);
+        zimg_filter_graph_free(s->ztd[i].alpha_graph);
+        av_freep(&s->ztd[i].tmp);
+        s->ztd[i].tmp_size = 0;
+    }
 }
 
 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
@@ -890,4 +976,5 @@  AVFilter ff_vf_zscale = {
     .inputs          = avfilter_vf_zscale_inputs,
     .outputs         = avfilter_vf_zscale_outputs,
     .process_command = process_command,
+    .flags           = AVFILTER_FLAG_SLICE_THREADS,
 };