diff mbox

[FFmpeg-devel] vf_tonemap.c: Support for slice thread for performance

Message ID 20190505075120.11987-1-lance.lmwang@gmail.com
State New
Headers show

Commit Message

Lance Wang May 5, 2019, 7:51 a.m. UTC
From: Limin Wang <lance.lmwang@gmail.com>

---
 libavfilter/vf_tonemap.c | 109 +++++++++++++++++++++++++++++----------
 1 file changed, 81 insertions(+), 28 deletions(-)

Comments

Paul B Mahol May 5, 2019, 8:02 a.m. UTC | #1
On 5/5/19, lance.lmwang@gmail.com <lance.lmwang@gmail.com> wrote:
> From: Limin Wang <lance.lmwang@gmail.com>
>
> ---
>  libavfilter/vf_tonemap.c | 109 +++++++++++++++++++++++++++++----------
>  1 file changed, 81 insertions(+), 28 deletions(-)
>
> diff --git a/libavfilter/vf_tonemap.c b/libavfilter/vf_tonemap.c
> index efd4af5466..0b26dd5e7f 100644
> --- a/libavfilter/vf_tonemap.c
> +++ b/libavfilter/vf_tonemap.c
> @@ -1,5 +1,6 @@
>  /*
>   * Copyright (c) 2017 Vittorio Giovara <vittorio.giovara@gmail.com>
> + * Copyright (c) 2019 Limin Wang <lance.lmwang@gmail.com>
>   *
>   * This file is part of FFmpeg.
>   *
> @@ -71,6 +72,13 @@ typedef struct TonemapContext {
>      const struct LumaCoefficients *coeffs;
>  } TonemapContext;
>
> +typedef struct ThreadData {
> +    AVFrame *in, *out;
> +    double peak;
> +    const struct AVPixFmtDescriptor *desc;
> +    const struct AVPixFmtDescriptor *odesc;
> +} ThreadData;
> +
>  static const enum AVPixelFormat pix_fmts[] = {
>      AV_PIX_FMT_GBRPF32,
>      AV_PIX_FMT_GBRAPF32,
> @@ -127,15 +135,10 @@ static float mobius(float in, float j, double peak)
>  }
>
>  #define MIX(x,y,a) (x) * (1 - (a)) + (y) * (a)
> -static void tonemap(TonemapContext *s, AVFrame *out, const AVFrame *in,
> -                    const AVPixFmtDescriptor *desc, int x, int y, double
> peak)
> +static void tonemap(TonemapContext *s, float *r_out, float *b_out, float
> *g_out,
> +                    const float *r_in, const float *b_in, const float
> *g_in,
> +                    const AVPixFmtDescriptor *desc, double peak)
>  {
> -    const float *r_in = (const float *)(in->data[0] + x *
> desc->comp[0].step + y * in->linesize[0]);
> -    const float *b_in = (const float *)(in->data[1] + x *
> desc->comp[1].step + y * in->linesize[1]);
> -    const float *g_in = (const float *)(in->data[2] + x *
> desc->comp[2].step + y * in->linesize[2]);
> -    float *r_out = (float *)(out->data[0] + x * desc->comp[0].step + y *
> out->linesize[0]);
> -    float *b_out = (float *)(out->data[1] + x * desc->comp[1].step + y *
> out->linesize[1]);
> -    float *g_out = (float *)(out->data[2] + x * desc->comp[2].step + y *
> out->linesize[2]);
>      float sig, sig_orig;
>
>      /* load values */
> @@ -189,17 +192,78 @@ static void tonemap(TonemapContext *s, AVFrame *out,
> const AVFrame *in,
>      *r_out *= sig / sig_orig;
>      *g_out *= sig / sig_orig;
>      *b_out *= sig / sig_orig;
> +
> +}
> +
> +static int do_tonemap_slice(AVFilterContext *ctx, void *arg, int jobnr, int
> nb_jobs)
> +{
> +    TonemapContext *s = ctx->priv;
> +    const ThreadData *td = arg;
> +    const AVFrame *in = td->in;
> +    AVFrame *out = td->out;
> +    int x, y;
> +    const AVPixFmtDescriptor *desc = td->desc;
> +    const AVPixFmtDescriptor *odesc = td->odesc;
> +    const int slice_start = (out->height *  jobnr   ) / nb_jobs;
> +    const int slice_end   = (out->height * (jobnr+1)) / nb_jobs;
> +    const int slice_h   = slice_end - slice_start;
> +    uint8_t *dstr = out->data[0] + slice_start * out->linesize[0];
> +    uint8_t *dstb = out->data[1] + slice_start * out->linesize[1];
> +    uint8_t *dstg = out->data[2] + slice_start * out->linesize[2];
> +    const uint8_t *srcr = in->data[0] + slice_start * in->linesize[0];
> +    const uint8_t *srcb = in->data[1] + slice_start * in->linesize[1];
> +    const uint8_t *srcg = in->data[2] + slice_start * in->linesize[2];
> +    uint8_t *dsta = out->data[3] + slice_start * out->linesize[3];
> +    const uint8_t *srca = in ->data[3] + slice_start * in->linesize[3];
> +
> +    /* do the tone map */
> +    for (y = slice_start; y < slice_end; y++) {
> +        for (x = 0; x < out->width; x++) {
> +            const float *r_in = (const float *)(srcr + x *
> desc->comp[0].step);
> +            const float *b_in = (const float *)(srcb + x *
> desc->comp[1].step);
> +            const float *g_in = (const float *)(srcg + x *
> desc->comp[2].step);
> +            float *r_out = (float *)(dstr + x * desc->comp[0].step);
> +            float *b_out = (float *)(dstb + x * desc->comp[1].step);
> +            float *g_out = (float *)(dstg + x * desc->comp[2].step);
> +
> +            tonemap(s, r_out, b_out, g_out, r_in, b_in, g_in, desc,
> td->peak);
> +        }
> +        srcr += in->linesize[0];
> +        srcg += in->linesize[1];
> +        srcb += in->linesize[2];
> +        dstr += out->linesize[0];
> +        dstg += out->linesize[1];
> +        dstb += out->linesize[2];
> +    }
> +
> +    /* copy/generate alpha if needed */
> +    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags &
> AV_PIX_FMT_FLAG_ALPHA) {
> +        av_image_copy_plane(dsta, out->linesize[3],
> +                srca, in->linesize[3],
> +                out->linesize[3], slice_h);
> +    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> +        for (y = slice_start; y < slice_end; y++) {
> +            for (x = 0; x < out->width; x++) {
> +                AV_WN32(dsta + x * odesc->comp[3].step + y *
> out->linesize[3],
> +                        av_float2int(1.0f));
> +            }
> +        }
> +    }
> +
> +    return 0;
>  }
>
>  static int filter_frame(AVFilterLink *link, AVFrame *in)
>  {
> -    TonemapContext *s = link->dst->priv;
> +    AVFilterContext *ctx = link->dst;
> +    TonemapContext *s = ctx->priv;
>      AVFilterLink *outlink = link->dst->outputs[0];
>      AVFrame *out;
>      const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
>      const AVPixFmtDescriptor *odesc = av_pix_fmt_desc_get(outlink->format);
> -    int ret, x, y;
> +    int ret;
>      double peak = s->peak;
> +    ThreadData td;
>
>      if (!desc || !odesc) {
>          av_frame_free(&in);
> @@ -244,24 +308,12 @@ static int filter_frame(AVFilterLink *link, AVFrame
> *in)
>          s->desat = 0;
>      }
>
> -    /* do the tone map */
> -    for (y = 0; y < out->height; y++)
> -        for (x = 0; x < out->width; x++)
> -            tonemap(s, out, in, desc, x, y, peak);
> -
> -    /* copy/generate alpha if needed */
> -    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags &
> AV_PIX_FMT_FLAG_ALPHA) {
> -        av_image_copy_plane(out->data[3], out->linesize[3],
> -                            in->data[3], in->linesize[3],
> -                            out->linesize[3], outlink->h);
> -    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
> -        for (y = 0; y < out->height; y++) {
> -            for (x = 0; x < out->width; x++) {
> -                AV_WN32(out->data[3] + x * odesc->comp[3].step + y *
> out->linesize[3],
> -                        av_float2int(1.0f));
> -            }
> -        }
> -    }
> +    td.in = in;
> +    td.out = out;
> +    td.desc = desc;
> +    td.odesc = odesc;
> +    td.peak = peak;
> +    ctx->internal->execute(ctx, do_tonemap_slice, &td, NULL,
> FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));
>
>      av_frame_free(&in);
>
> @@ -315,4 +367,5 @@ AVFilter ff_vf_tonemap = {
>      .priv_class      = &tonemap_class,
>      .inputs          = tonemap_inputs,
>      .outputs         = tonemap_outputs,
> +    .flags           = AVFILTER_FLAG_SLICE_THREADS,
>  };
> --
> 2.21.0

Another patch that does same thing was already applied.
diff mbox

Patch

diff --git a/libavfilter/vf_tonemap.c b/libavfilter/vf_tonemap.c
index efd4af5466..0b26dd5e7f 100644
--- a/libavfilter/vf_tonemap.c
+++ b/libavfilter/vf_tonemap.c
@@ -1,5 +1,6 @@ 
 /*
  * Copyright (c) 2017 Vittorio Giovara <vittorio.giovara@gmail.com>
+ * Copyright (c) 2019 Limin Wang <lance.lmwang@gmail.com>
  *
  * This file is part of FFmpeg.
  *
@@ -71,6 +72,13 @@  typedef struct TonemapContext {
     const struct LumaCoefficients *coeffs;
 } TonemapContext;
 
+typedef struct ThreadData {
+    AVFrame *in, *out;
+    double peak;
+    const struct AVPixFmtDescriptor *desc;
+    const struct AVPixFmtDescriptor *odesc;
+} ThreadData;
+
 static const enum AVPixelFormat pix_fmts[] = {
     AV_PIX_FMT_GBRPF32,
     AV_PIX_FMT_GBRAPF32,
@@ -127,15 +135,10 @@  static float mobius(float in, float j, double peak)
 }
 
 #define MIX(x,y,a) (x) * (1 - (a)) + (y) * (a)
-static void tonemap(TonemapContext *s, AVFrame *out, const AVFrame *in,
-                    const AVPixFmtDescriptor *desc, int x, int y, double peak)
+static void tonemap(TonemapContext *s, float *r_out, float *b_out, float *g_out,
+                    const float *r_in, const float *b_in, const float *g_in,
+                    const AVPixFmtDescriptor *desc, double peak)
 {
-    const float *r_in = (const float *)(in->data[0] + x * desc->comp[0].step + y * in->linesize[0]);
-    const float *b_in = (const float *)(in->data[1] + x * desc->comp[1].step + y * in->linesize[1]);
-    const float *g_in = (const float *)(in->data[2] + x * desc->comp[2].step + y * in->linesize[2]);
-    float *r_out = (float *)(out->data[0] + x * desc->comp[0].step + y * out->linesize[0]);
-    float *b_out = (float *)(out->data[1] + x * desc->comp[1].step + y * out->linesize[1]);
-    float *g_out = (float *)(out->data[2] + x * desc->comp[2].step + y * out->linesize[2]);
     float sig, sig_orig;
 
     /* load values */
@@ -189,17 +192,78 @@  static void tonemap(TonemapContext *s, AVFrame *out, const AVFrame *in,
     *r_out *= sig / sig_orig;
     *g_out *= sig / sig_orig;
     *b_out *= sig / sig_orig;
+
+}
+
+static int do_tonemap_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    TonemapContext *s = ctx->priv;
+    const ThreadData *td = arg;
+    const AVFrame *in = td->in;
+    AVFrame *out = td->out;
+    int x, y;
+    const AVPixFmtDescriptor *desc = td->desc;
+    const AVPixFmtDescriptor *odesc = td->odesc;
+    const int slice_start = (out->height *  jobnr   ) / nb_jobs;
+    const int slice_end   = (out->height * (jobnr+1)) / nb_jobs;
+    const int slice_h   = slice_end - slice_start;
+    uint8_t *dstr = out->data[0] + slice_start * out->linesize[0];
+    uint8_t *dstb = out->data[1] + slice_start * out->linesize[1];
+    uint8_t *dstg = out->data[2] + slice_start * out->linesize[2];
+    const uint8_t *srcr = in->data[0] + slice_start * in->linesize[0];
+    const uint8_t *srcb = in->data[1] + slice_start * in->linesize[1];
+    const uint8_t *srcg = in->data[2] + slice_start * in->linesize[2];
+    uint8_t *dsta = out->data[3] + slice_start * out->linesize[3];
+    const uint8_t *srca = in ->data[3] + slice_start * in->linesize[3];
+
+    /* do the tone map */
+    for (y = slice_start; y < slice_end; y++) {
+        for (x = 0; x < out->width; x++) {
+            const float *r_in = (const float *)(srcr + x * desc->comp[0].step);
+            const float *b_in = (const float *)(srcb + x * desc->comp[1].step);
+            const float *g_in = (const float *)(srcg + x * desc->comp[2].step);
+            float *r_out = (float *)(dstr + x * desc->comp[0].step);
+            float *b_out = (float *)(dstb + x * desc->comp[1].step);
+            float *g_out = (float *)(dstg + x * desc->comp[2].step);
+
+            tonemap(s, r_out, b_out, g_out, r_in, b_in, g_in, desc, td->peak);
+        }
+        srcr += in->linesize[0];
+        srcg += in->linesize[1];
+        srcb += in->linesize[2];
+        dstr += out->linesize[0];
+        dstg += out->linesize[1];
+        dstb += out->linesize[2];
+    }
+
+    /* copy/generate alpha if needed */
+    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+        av_image_copy_plane(dsta, out->linesize[3],
+                srca, in->linesize[3],
+                out->linesize[3], slice_h);
+    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+        for (y = slice_start; y < slice_end; y++) {
+            for (x = 0; x < out->width; x++) {
+                AV_WN32(dsta + x * odesc->comp[3].step + y * out->linesize[3],
+                        av_float2int(1.0f));
+            }
+        }
+    }
+
+    return 0;
 }
 
 static int filter_frame(AVFilterLink *link, AVFrame *in)
 {
-    TonemapContext *s = link->dst->priv;
+    AVFilterContext *ctx = link->dst;
+    TonemapContext *s = ctx->priv;
     AVFilterLink *outlink = link->dst->outputs[0];
     AVFrame *out;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
     const AVPixFmtDescriptor *odesc = av_pix_fmt_desc_get(outlink->format);
-    int ret, x, y;
+    int ret;
     double peak = s->peak;
+    ThreadData td;
 
     if (!desc || !odesc) {
         av_frame_free(&in);
@@ -244,24 +308,12 @@  static int filter_frame(AVFilterLink *link, AVFrame *in)
         s->desat = 0;
     }
 
-    /* do the tone map */
-    for (y = 0; y < out->height; y++)
-        for (x = 0; x < out->width; x++)
-            tonemap(s, out, in, desc, x, y, peak);
-
-    /* copy/generate alpha if needed */
-    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
-        av_image_copy_plane(out->data[3], out->linesize[3],
-                            in->data[3], in->linesize[3],
-                            out->linesize[3], outlink->h);
-    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
-        for (y = 0; y < out->height; y++) {
-            for (x = 0; x < out->width; x++) {
-                AV_WN32(out->data[3] + x * odesc->comp[3].step + y * out->linesize[3],
-                        av_float2int(1.0f));
-            }
-        }
-    }
+    td.in = in;
+    td.out = out;
+    td.desc = desc;
+    td.odesc = odesc;
+    td.peak = peak;
+    ctx->internal->execute(ctx, do_tonemap_slice, &td, NULL, FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));
 
     av_frame_free(&in);
 
@@ -315,4 +367,5 @@  AVFilter ff_vf_tonemap = {
     .priv_class      = &tonemap_class,
     .inputs          = tonemap_inputs,
     .outputs         = tonemap_outputs,
+    .flags           = AVFILTER_FLAG_SLICE_THREADS,
 };