diff mbox

[FFmpeg-devel,v1,3/3] avfilter/colorlevels: add slice threading support with less code

Message ID 20191023102743.19979-3-lance.lmwang@gmail.com
State New
Headers show

Commit Message

Lance Wang Oct. 23, 2019, 10:27 a.m. UTC
From: Limin Wang <lance.lmwang@gmail.com>

Signed-off-by: Limin Wang <lance.lmwang@gmail.com>
---
 libavfilter/vf_colorlevels.c | 176 +++++++++++++++--------------------
 1 file changed, 77 insertions(+), 99 deletions(-)

Comments

Lance Wang Nov. 8, 2019, 10:29 a.m. UTC | #1
ping.

On Wed, Oct 23, 2019 at 06:27:43PM +0800, lance.lmwang@gmail.com wrote:
> From: Limin Wang <lance.lmwang@gmail.com>
> 
> Signed-off-by: Limin Wang <lance.lmwang@gmail.com>
> ---
>  libavfilter/vf_colorlevels.c | 176 +++++++++++++++--------------------
>  1 file changed, 77 insertions(+), 99 deletions(-)
> 
> diff --git a/libavfilter/vf_colorlevels.c b/libavfilter/vf_colorlevels.c
> index 5385a5e754..f8645a08bd 100644
> --- a/libavfilter/vf_colorlevels.c
> +++ b/libavfilter/vf_colorlevels.c
> @@ -26,6 +26,7 @@
>  #include "formats.h"
>  #include "internal.h"
>  #include "video.h"
> +#include "thread.h"
>  
>  #define R 0
>  #define G 1
> @@ -37,6 +38,11 @@ typedef struct Range {
>      double out_min, out_max;
>  } Range;
>  
> +typedef struct ThreadData {
> +    AVFrame *in;
> +    AVFrame *out;
> +} ThreadData;
> +
>  typedef struct ColorLevelsContext {
>      const AVClass *class;
>      Range range[4];
> @@ -45,6 +51,7 @@ typedef struct ColorLevelsContext {
>      int step;
>      uint8_t rgba_map[4];
>      int linesize;
> +    int (*colorlevels_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
>  } ColorLevelsContext;
>  
>  #define OFFSET(x) offsetof(ColorLevelsContext, x)
> @@ -90,6 +97,68 @@ static int query_formats(AVFilterContext *ctx)
>      return ff_set_common_formats(ctx, fmts_list);
>  }
>  
> +#define DEFINE_COLORLEVELS(type, nbits)                                                      \
> +static int do_##nbits##bit_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)    \
> +{                                                                                            \
> +    ColorLevelsContext *s = ctx->priv;                                                       \
> +    AVFilterLink *inlink = ctx->inputs[0];                                                   \
> +    const int step = s->step;                                                                \
> +    int x, y, i;                                                                             \
> +    ThreadData *td = arg;                                                                    \
> +    const AVFrame *in = td->in;                                                              \
> +    AVFrame *out = td->out;                                                                  \
> +                                                                                             \
> +    for (i = 0; i < s->nb_comp; i++) {                                                       \
> +        Range *r = &s->range[i];                                                             \
> +        const int slice_start = (inlink->h * jobnr) / nb_jobs;                               \
> +        const int slice_end = (inlink->h * (jobnr+1)) / nb_jobs;                             \
> +        const uint8_t offset = s->rgba_map[i];                                               \
> +        const uint8_t *srcrow = in->data[0] + slice_start * in->linesize[0];                 \
> +        uint8_t *dstrow = out->data[0] + slice_start * out->linesize[0];                     \
> +        int imin = lrint(r->in_min  * UINT##nbits##_MAX);                                    \
> +        int imax = lrint(r->in_max  * UINT##nbits##_MAX);                                    \
> +        int omin = lrint(r->out_min * UINT##nbits##_MAX);                                    \
> +        int omax = lrint(r->out_max * UINT##nbits##_MAX);                                    \
> +        double coeff;                                                                        \
> +                                                                                             \
> +        if (imin < 0) {                                                                      \
> +            imin = UINT##nbits##_MAX;                                                        \
> +            for (y = slice_start; y < slice_end; y++) {                                      \
> +                const type *src = (const type *)srcrow;                                      \
> +                                                                                             \
> +                for (x = 0; x < s->linesize; x += step)                                      \
> +                    imin = FFMIN(imin, src[x + offset]);                                     \
> +                srcrow += in->linesize[0];                                                   \
> +            }                                                                                \
> +        }                                                                                    \
> +        if (imax < 0) {                                                                      \
> +            imax = 0;                                                                        \
> +            for (y = slice_start; y < slice_end; y++) {                                      \
> +                const type *src = (const type *)srcrow;                                      \
> +                                                                                             \
> +                for (x = 0; x < s->linesize; x += step)                                      \
> +                    imax = FFMAX(imax, src[x + offset]);                                     \
> +                srcrow += in->linesize[0];                                                   \
> +            }                                                                                \
> +        }                                                                                    \
> +                                                                                             \
> +        coeff = (omax - omin) / (double)(imax - imin);                                       \
> +        for (y = slice_start; y < slice_end; y++) {                                          \
> +            const type *src = (const type*)srcrow;                                           \
> +            type *dst = (type *)dstrow;                                                      \
> +                                                                                             \
> +            for (x = 0; x < s->linesize; x += step)                                          \
> +                dst[x + offset] = av_clip_uint##nbits(                                       \
> +                        (src[x + offset] - imin) * coeff + omin);                            \
> +            dstrow += out->linesize[0];                                                      \
> +            srcrow += in->linesize[0];                                                       \
> +        }                                                                                    \
> +    }                                                                                        \
> +    return 0;                                                                                \
> +}
> +DEFINE_COLORLEVELS(uint8_t, 8)
> +DEFINE_COLORLEVELS(uint16_t, 16)
> +
>  static int config_input(AVFilterLink *inlink)
>  {
>      AVFilterContext *ctx = inlink->dst;
> @@ -102,17 +171,17 @@ static int config_input(AVFilterLink *inlink)
>      s->linesize = inlink->w * s->step;
>      ff_fill_rgba_map(s->rgba_map, inlink->format);
>  
> +    s->colorlevels_slice = s->bpp <= 1 ? do_8bit_slice : do_16bit_slice;
>      return 0;
>  }
>  
>  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
>  {
>      AVFilterContext *ctx = inlink->dst;
> -    ColorLevelsContext *s = ctx->priv;
>      AVFilterLink *outlink = ctx->outputs[0];
> -    const int step = s->step;
> +    ColorLevelsContext *s = ctx->priv;
>      AVFrame *out;
> -    int x, y, i;
> +    ThreadData td;
>  
>      if (av_frame_is_writable(in)) {
>          out = in;
> @@ -125,101 +194,10 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
>          av_frame_copy_props(out, in);
>      }
>  
> -    switch (s->bpp) {
> -    case 1:
> -        for (i = 0; i < s->nb_comp; i++) {
> -            Range *r = &s->range[i];
> -            const uint8_t offset = s->rgba_map[i];
> -            const uint8_t *srcrow = in->data[0];
> -            uint8_t *dstrow = out->data[0];
> -            int imin = lrint(r->in_min  * UINT8_MAX);
> -            int imax = lrint(r->in_max  * UINT8_MAX);
> -            int omin = lrint(r->out_min * UINT8_MAX);
> -            int omax = lrint(r->out_max * UINT8_MAX);
> -            double coeff;
> -
> -            if (imin < 0) {
> -                imin = UINT8_MAX;
> -                for (y = 0; y < inlink->h; y++) {
> -                    const uint8_t *src = srcrow;
> -
> -                    for (x = 0; x < s->linesize; x += step)
> -                        imin = FFMIN(imin, src[x + offset]);
> -                    srcrow += in->linesize[0];
> -                }
> -            }
> -            if (imax < 0) {
> -                srcrow = in->data[0];
> -                imax = 0;
> -                for (y = 0; y < inlink->h; y++) {
> -                    const uint8_t *src = srcrow;
> -
> -                    for (x = 0; x < s->linesize; x += step)
> -                        imax = FFMAX(imax, src[x + offset]);
> -                    srcrow += in->linesize[0];
> -                }
> -            }
> -
> -            srcrow = in->data[0];
> -            coeff = (omax - omin) / (double)(imax - imin);
> -            for (y = 0; y < inlink->h; y++) {
> -                const uint8_t *src = srcrow;
> -                uint8_t *dst = dstrow;
> -
> -                for (x = 0; x < s->linesize; x += step)
> -                    dst[x + offset] = av_clip_uint8((src[x + offset] - imin) * coeff + omin);
> -                dstrow += out->linesize[0];
> -                srcrow += in->linesize[0];
> -            }
> -        }
> -        break;
> -    case 2:
> -        for (i = 0; i < s->nb_comp; i++) {
> -            Range *r = &s->range[i];
> -            const uint8_t offset = s->rgba_map[i];
> -            const uint8_t *srcrow = in->data[0];
> -            uint8_t *dstrow = out->data[0];
> -            int imin = lrint(r->in_min  * UINT16_MAX);
> -            int imax = lrint(r->in_max  * UINT16_MAX);
> -            int omin = lrint(r->out_min * UINT16_MAX);
> -            int omax = lrint(r->out_max * UINT16_MAX);
> -            double coeff;
> -
> -            if (imin < 0) {
> -                imin = UINT16_MAX;
> -                for (y = 0; y < inlink->h; y++) {
> -                    const uint16_t *src = (const uint16_t *)srcrow;
> -
> -                    for (x = 0; x < s->linesize; x += step)
> -                        imin = FFMIN(imin, src[x + offset]);
> -                    srcrow += in->linesize[0];
> -                }
> -            }
> -            if (imax < 0) {
> -                srcrow = in->data[0];
> -                imax = 0;
> -                for (y = 0; y < inlink->h; y++) {
> -                    const uint16_t *src = (const uint16_t *)srcrow;
> -
> -                    for (x = 0; x < s->linesize; x += step)
> -                        imax = FFMAX(imax, src[x + offset]);
> -                    srcrow += in->linesize[0];
> -                }
> -            }
> -
> -            srcrow = in->data[0];
> -            coeff = (omax - omin) / (double)(imax - imin);
> -            for (y = 0; y < inlink->h; y++) {
> -                const uint16_t *src = (const uint16_t*)srcrow;
> -                uint16_t *dst = (uint16_t *)dstrow;
> -
> -                for (x = 0; x < s->linesize; x += step)
> -                    dst[x + offset] = av_clip_uint16((src[x + offset] - imin) * coeff + omin);
> -                dstrow += out->linesize[0];
> -                srcrow += in->linesize[0];
> -            }
> -        }
> -    }
> +    td.in = in;
> +    td.out = out;
> +    ctx->internal->execute(ctx, s->colorlevels_slice, &td, NULL,
> +            FFMIN(inlink->h, ff_filter_get_nb_threads(ctx)));
>  
>      if (in != out)
>          av_frame_free(&in);
> @@ -252,5 +230,5 @@ AVFilter ff_vf_colorlevels = {
>      .query_formats = query_formats,
>      .inputs        = colorlevels_inputs,
>      .outputs       = colorlevels_outputs,
> -    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
> +    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
>  };
> -- 
> 2.21.0
>
diff mbox

Patch

diff --git a/libavfilter/vf_colorlevels.c b/libavfilter/vf_colorlevels.c
index 5385a5e754..f8645a08bd 100644
--- a/libavfilter/vf_colorlevels.c
+++ b/libavfilter/vf_colorlevels.c
@@ -26,6 +26,7 @@ 
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
+#include "thread.h"
 
 #define R 0
 #define G 1
@@ -37,6 +38,11 @@  typedef struct Range {
     double out_min, out_max;
 } Range;
 
+typedef struct ThreadData {
+    AVFrame *in;
+    AVFrame *out;
+} ThreadData;
+
 typedef struct ColorLevelsContext {
     const AVClass *class;
     Range range[4];
@@ -45,6 +51,7 @@  typedef struct ColorLevelsContext {
     int step;
     uint8_t rgba_map[4];
     int linesize;
+    int (*colorlevels_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
 } ColorLevelsContext;
 
 #define OFFSET(x) offsetof(ColorLevelsContext, x)
@@ -90,6 +97,68 @@  static int query_formats(AVFilterContext *ctx)
     return ff_set_common_formats(ctx, fmts_list);
 }
 
+#define DEFINE_COLORLEVELS(type, nbits)                                                      \
+static int do_##nbits##bit_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)    \
+{                                                                                            \
+    ColorLevelsContext *s = ctx->priv;                                                       \
+    AVFilterLink *inlink = ctx->inputs[0];                                                   \
+    const int step = s->step;                                                                \
+    int x, y, i;                                                                             \
+    ThreadData *td = arg;                                                                    \
+    const AVFrame *in = td->in;                                                              \
+    AVFrame *out = td->out;                                                                  \
+                                                                                             \
+    for (i = 0; i < s->nb_comp; i++) {                                                       \
+        Range *r = &s->range[i];                                                             \
+        const int slice_start = (inlink->h * jobnr) / nb_jobs;                               \
+        const int slice_end = (inlink->h * (jobnr+1)) / nb_jobs;                             \
+        const uint8_t offset = s->rgba_map[i];                                               \
+        const uint8_t *srcrow = in->data[0] + slice_start * in->linesize[0];                 \
+        uint8_t *dstrow = out->data[0] + slice_start * out->linesize[0];                     \
+        int imin = lrint(r->in_min  * UINT##nbits##_MAX);                                    \
+        int imax = lrint(r->in_max  * UINT##nbits##_MAX);                                    \
+        int omin = lrint(r->out_min * UINT##nbits##_MAX);                                    \
+        int omax = lrint(r->out_max * UINT##nbits##_MAX);                                    \
+        double coeff;                                                                        \
+                                                                                             \
+        if (imin < 0) {                                                                      \
+            imin = UINT##nbits##_MAX;                                                        \
+            for (y = slice_start; y < slice_end; y++) {                                      \
+                const type *src = (const type *)srcrow;                                      \
+                                                                                             \
+                for (x = 0; x < s->linesize; x += step)                                      \
+                    imin = FFMIN(imin, src[x + offset]);                                     \
+                srcrow += in->linesize[0];                                                   \
+            }                                                                                \
+        }                                                                                    \
+        if (imax < 0) {                                                                      \
+            imax = 0;                                                                        \
+            for (y = slice_start; y < slice_end; y++) {                                      \
+                const type *src = (const type *)srcrow;                                      \
+                                                                                             \
+                for (x = 0; x < s->linesize; x += step)                                      \
+                    imax = FFMAX(imax, src[x + offset]);                                     \
+                srcrow += in->linesize[0];                                                   \
+            }                                                                                \
+        }                                                                                    \
+                                                                                             \
+        coeff = (omax - omin) / (double)(imax - imin);                                       \
+        for (y = slice_start; y < slice_end; y++) {                                          \
+            const type *src = (const type*)srcrow;                                           \
+            type *dst = (type *)dstrow;                                                      \
+                                                                                             \
+            for (x = 0; x < s->linesize; x += step)                                          \
+                dst[x + offset] = av_clip_uint##nbits(                                       \
+                        (src[x + offset] - imin) * coeff + omin);                            \
+            dstrow += out->linesize[0];                                                      \
+            srcrow += in->linesize[0];                                                       \
+        }                                                                                    \
+    }                                                                                        \
+    return 0;                                                                                \
+}
+DEFINE_COLORLEVELS(uint8_t, 8)
+DEFINE_COLORLEVELS(uint16_t, 16)
+
 static int config_input(AVFilterLink *inlink)
 {
     AVFilterContext *ctx = inlink->dst;
@@ -102,17 +171,17 @@  static int config_input(AVFilterLink *inlink)
     s->linesize = inlink->w * s->step;
     ff_fill_rgba_map(s->rgba_map, inlink->format);
 
+    s->colorlevels_slice = s->bpp <= 1 ? do_8bit_slice : do_16bit_slice;
     return 0;
 }
 
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
     AVFilterContext *ctx = inlink->dst;
-    ColorLevelsContext *s = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
-    const int step = s->step;
+    ColorLevelsContext *s = ctx->priv;
     AVFrame *out;
-    int x, y, i;
+    ThreadData td;
 
     if (av_frame_is_writable(in)) {
         out = in;
@@ -125,101 +194,10 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         av_frame_copy_props(out, in);
     }
 
-    switch (s->bpp) {
-    case 1:
-        for (i = 0; i < s->nb_comp; i++) {
-            Range *r = &s->range[i];
-            const uint8_t offset = s->rgba_map[i];
-            const uint8_t *srcrow = in->data[0];
-            uint8_t *dstrow = out->data[0];
-            int imin = lrint(r->in_min  * UINT8_MAX);
-            int imax = lrint(r->in_max  * UINT8_MAX);
-            int omin = lrint(r->out_min * UINT8_MAX);
-            int omax = lrint(r->out_max * UINT8_MAX);
-            double coeff;
-
-            if (imin < 0) {
-                imin = UINT8_MAX;
-                for (y = 0; y < inlink->h; y++) {
-                    const uint8_t *src = srcrow;
-
-                    for (x = 0; x < s->linesize; x += step)
-                        imin = FFMIN(imin, src[x + offset]);
-                    srcrow += in->linesize[0];
-                }
-            }
-            if (imax < 0) {
-                srcrow = in->data[0];
-                imax = 0;
-                for (y = 0; y < inlink->h; y++) {
-                    const uint8_t *src = srcrow;
-
-                    for (x = 0; x < s->linesize; x += step)
-                        imax = FFMAX(imax, src[x + offset]);
-                    srcrow += in->linesize[0];
-                }
-            }
-
-            srcrow = in->data[0];
-            coeff = (omax - omin) / (double)(imax - imin);
-            for (y = 0; y < inlink->h; y++) {
-                const uint8_t *src = srcrow;
-                uint8_t *dst = dstrow;
-
-                for (x = 0; x < s->linesize; x += step)
-                    dst[x + offset] = av_clip_uint8((src[x + offset] - imin) * coeff + omin);
-                dstrow += out->linesize[0];
-                srcrow += in->linesize[0];
-            }
-        }
-        break;
-    case 2:
-        for (i = 0; i < s->nb_comp; i++) {
-            Range *r = &s->range[i];
-            const uint8_t offset = s->rgba_map[i];
-            const uint8_t *srcrow = in->data[0];
-            uint8_t *dstrow = out->data[0];
-            int imin = lrint(r->in_min  * UINT16_MAX);
-            int imax = lrint(r->in_max  * UINT16_MAX);
-            int omin = lrint(r->out_min * UINT16_MAX);
-            int omax = lrint(r->out_max * UINT16_MAX);
-            double coeff;
-
-            if (imin < 0) {
-                imin = UINT16_MAX;
-                for (y = 0; y < inlink->h; y++) {
-                    const uint16_t *src = (const uint16_t *)srcrow;
-
-                    for (x = 0; x < s->linesize; x += step)
-                        imin = FFMIN(imin, src[x + offset]);
-                    srcrow += in->linesize[0];
-                }
-            }
-            if (imax < 0) {
-                srcrow = in->data[0];
-                imax = 0;
-                for (y = 0; y < inlink->h; y++) {
-                    const uint16_t *src = (const uint16_t *)srcrow;
-
-                    for (x = 0; x < s->linesize; x += step)
-                        imax = FFMAX(imax, src[x + offset]);
-                    srcrow += in->linesize[0];
-                }
-            }
-
-            srcrow = in->data[0];
-            coeff = (omax - omin) / (double)(imax - imin);
-            for (y = 0; y < inlink->h; y++) {
-                const uint16_t *src = (const uint16_t*)srcrow;
-                uint16_t *dst = (uint16_t *)dstrow;
-
-                for (x = 0; x < s->linesize; x += step)
-                    dst[x + offset] = av_clip_uint16((src[x + offset] - imin) * coeff + omin);
-                dstrow += out->linesize[0];
-                srcrow += in->linesize[0];
-            }
-        }
-    }
+    td.in = in;
+    td.out = out;
+    ctx->internal->execute(ctx, s->colorlevels_slice, &td, NULL,
+            FFMIN(inlink->h, ff_filter_get_nb_threads(ctx)));
 
     if (in != out)
         av_frame_free(&in);
@@ -252,5 +230,5 @@  AVFilter ff_vf_colorlevels = {
     .query_formats = query_formats,
     .inputs        = colorlevels_inputs,
     .outputs       = colorlevels_outputs,
-    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
 };