[FFmpeg-devel,V2] lavfi/colorlevels: Add slice threading support

Submitted by Jun Zhao on May 21, 2019, 11:20 a.m.

Details

Message ID 1558437636-6274-1-git-send-email-mypopydev@gmail.com
State New
Headers show

Commit Message

Jun Zhao May 21, 2019, 11:20 a.m.
From: Jun Zhao <barryjzhao@tencent.com>

Add slice threading support, use the command like:

./ffmpeg -i input -vf colorlevel with 1080p h264 clip, the fps
from 39 fps to 79 fps in the local

Signed-off-by: Jun Zhao <barryjzhao@tencent.com>
---
 libavfilter/vf_colorlevels.c |  125 +++++++++++++++++++++++++++++++++++------
 1 files changed, 106 insertions(+), 19 deletions(-)

Comments

Zhong Li May 22, 2019, 4:24 a.m.
> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf

> Of Jun Zhao

> Sent: Tuesday, May 21, 2019 7:21 PM

> To: ffmpeg-devel@ffmpeg.org

> Cc: Jun Zhao <barryjzhao@tencent.com>

> Subject: [FFmpeg-devel] [PATCH V2] lavfi/colorlevels: Add slice threading

> support

> 

> From: Jun Zhao <barryjzhao@tencent.com>

> 

> Add slice threading support, use the command like:

> 

> ./ffmpeg -i input -vf colorlevel with 1080p h264 clip, the fps from 39 fps to

> 79 fps in the local

> 

> Signed-off-by: Jun Zhao <barryjzhao@tencent.com>

> ---

>  libavfilter/vf_colorlevels.c |  125

> +++++++++++++++++++++++++++++++++++------

>  1 files changed, 106 insertions(+), 19 deletions(-)

> 

> diff --git a/libavfilter/vf_colorlevels.c b/libavfilter/vf_colorlevels.c index

> 5385a5e..68668e7 100644

> --- a/libavfilter/vf_colorlevels.c

> +++ b/libavfilter/vf_colorlevels.c

> @@ -105,6 +105,83 @@ static int config_input(AVFilterLink *inlink)

>      return 0;

>  }

> 

> +struct thread_data {

> +    const uint8_t *srcrow;

> +    uint8_t *dstrow;

> +    int dst_linesize;

> +    int src_linesize;

> +

> +    double coeff;

> +    uint8_t offset;

> +

> +    int h;

> +

> +    int imin;

> +    int omin;

> +};

> +

> +static int colorlevel_slice_8(AVFilterContext *ctx, void *arg, int

> +jobnr, int nb_jobs) {

> +    ColorLevelsContext *s = ctx->priv;

> +    const struct thread_data *td = arg;

> +

> +    int process_h = td->h;

> +    const int slice_start = (process_h *  jobnr   ) / nb_jobs;

> +    const int slice_end   = (process_h * (jobnr+1)) / nb_jobs;

> +    int x, y;

> +    const uint8_t *srcrow = td->srcrow;

> +    uint8_t *dstrow = td->dstrow;

> +    const int step = s->step;

> +    const uint8_t offset = td->offset;

> +

> +    int imin = td->imin;

> +    int omin = td->omin;

> +    double coeff = td->coeff;

> +

> +    for (y = slice_start; y < slice_end; y++) {

> +        const uint8_t *src = srcrow;

> +        uint8_t *dst = dstrow;

> +

> +        for (x = 0; x < s->linesize; x += step)

> +            dst[x + offset] = av_clip_uint8((src[x + offset] - imin) * coeff

> + omin);

> +        dstrow += td->dst_linesize;

> +        srcrow += td->src_linesize;

> +    }

> +

> +    return 0;

> +}

> +

> +static int colorlevel_slice_16(AVFilterContext *ctx, void *arg, int

> +jobnr, int nb_jobs) {

> +    ColorLevelsContext *s = ctx->priv;

> +    const struct thread_data *td = arg;

> +

> +    int process_h = td->h;

> +    const int slice_start = (process_h *  jobnr   ) / nb_jobs;

> +    const int slice_end   = (process_h * (jobnr+1)) / nb_jobs;

> +    int x, y;

> +    const uint8_t *srcrow = td->srcrow;

> +    uint8_t *dstrow = td->dstrow;

> +    const int step = s->step;

> +    const uint8_t offset = td->offset;

> +

> +    int imin = td->imin;

> +    int omin = td->omin;

> +    double coeff = td->coeff;

> +

> +    for (y = slice_start; y < slice_end; y++) {

> +        const uint16_t *src = (const uint16_t *)srcrow;

> +        uint16_t *dst = (uint16_t *)dstrow;


Function colorlevel_slice_16() is same as colorlevel_slice_8 expect here to replace unit8_t to be unit16t.
Would better to define a template function to be reused.

> +        for (x = 0; x < s->linesize; x += step)

> +            dst[x + offset] = av_clip_uint8((src[x + offset] - imin) * coeff

> + omin);

> +        dstrow += td->dst_linesize;

> +        srcrow += td->src_linesize;

> +    }

> +

> +    return 0;

> +}
mypopy@gmail.com May 23, 2019, 2:13 a.m.
On Wed, May 22, 2019 at 12:24 PM Li, Zhong <zhong.li@intel.com> wrote:
>
> > From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf
> > Of Jun Zhao
> > Sent: Tuesday, May 21, 2019 7:21 PM
> > To: ffmpeg-devel@ffmpeg.org
> > Cc: Jun Zhao <barryjzhao@tencent.com>
> > Subject: [FFmpeg-devel] [PATCH V2] lavfi/colorlevels: Add slice threading
> > support
> >
> > From: Jun Zhao <barryjzhao@tencent.com>
> >
> > Add slice threading support, use the command like:
> >
> > ./ffmpeg -i input -vf colorlevel with 1080p h264 clip, the fps from 39 fps to
> > 79 fps in the local
> >
> > Signed-off-by: Jun Zhao <barryjzhao@tencent.com>
> > ---
> >  libavfilter/vf_colorlevels.c |  125
> > +++++++++++++++++++++++++++++++++++------
> >  1 files changed, 106 insertions(+), 19 deletions(-)
> >
> > diff --git a/libavfilter/vf_colorlevels.c b/libavfilter/vf_colorlevels.c index
> > 5385a5e..68668e7 100644
> > --- a/libavfilter/vf_colorlevels.c
> > +++ b/libavfilter/vf_colorlevels.c
> > @@ -105,6 +105,83 @@ static int config_input(AVFilterLink *inlink)
> >      return 0;
> >  }
> >
> > +struct thread_data {
> > +    const uint8_t *srcrow;
> > +    uint8_t *dstrow;
> > +    int dst_linesize;
> > +    int src_linesize;
> > +
> > +    double coeff;
> > +    uint8_t offset;
> > +
> > +    int h;
> > +
> > +    int imin;
> > +    int omin;
> > +};
> > +
> > +static int colorlevel_slice_8(AVFilterContext *ctx, void *arg, int
> > +jobnr, int nb_jobs) {
> > +    ColorLevelsContext *s = ctx->priv;
> > +    const struct thread_data *td = arg;
> > +
> > +    int process_h = td->h;
> > +    const int slice_start = (process_h *  jobnr   ) / nb_jobs;
> > +    const int slice_end   = (process_h * (jobnr+1)) / nb_jobs;
> > +    int x, y;
> > +    const uint8_t *srcrow = td->srcrow;
> > +    uint8_t *dstrow = td->dstrow;
> > +    const int step = s->step;
> > +    const uint8_t offset = td->offset;
> > +
> > +    int imin = td->imin;
> > +    int omin = td->omin;
> > +    double coeff = td->coeff;
> > +
> > +    for (y = slice_start; y < slice_end; y++) {
> > +        const uint8_t *src = srcrow;
> > +        uint8_t *dst = dstrow;
> > +
> > +        for (x = 0; x < s->linesize; x += step)
> > +            dst[x + offset] = av_clip_uint8((src[x + offset] - imin) * coeff
> > + omin);
> > +        dstrow += td->dst_linesize;
> > +        srcrow += td->src_linesize;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int colorlevel_slice_16(AVFilterContext *ctx, void *arg, int
> > +jobnr, int nb_jobs) {
> > +    ColorLevelsContext *s = ctx->priv;
> > +    const struct thread_data *td = arg;
> > +
> > +    int process_h = td->h;
> > +    const int slice_start = (process_h *  jobnr   ) / nb_jobs;
> > +    const int slice_end   = (process_h * (jobnr+1)) / nb_jobs;
> > +    int x, y;
> > +    const uint8_t *srcrow = td->srcrow;
> > +    uint8_t *dstrow = td->dstrow;
> > +    const int step = s->step;
> > +    const uint8_t offset = td->offset;
> > +
> > +    int imin = td->imin;
> > +    int omin = td->omin;
> > +    double coeff = td->coeff;
> > +
> > +    for (y = slice_start; y < slice_end; y++) {
> > +        const uint16_t *src = (const uint16_t *)srcrow;
> > +        uint16_t *dst = (uint16_t *)dstrow;
>
> Function colorlevel_slice_16() is same as colorlevel_slice_8 expect here to replace unit8_t to be unit16t.
> Would better to define a template function to be reused.
I don't like to get in C to templates is some ugly macro code for
function overloading, this is the reason use 2 function for
8bits/16bits

Patch hide | download patch | download mbox

diff --git a/libavfilter/vf_colorlevels.c b/libavfilter/vf_colorlevels.c
index 5385a5e..68668e7 100644
--- a/libavfilter/vf_colorlevels.c
+++ b/libavfilter/vf_colorlevels.c
@@ -105,6 +105,83 @@  static int config_input(AVFilterLink *inlink)
     return 0;
 }
 
+struct thread_data {
+    const uint8_t *srcrow;
+    uint8_t *dstrow;
+    int dst_linesize;
+    int src_linesize;
+
+    double coeff;
+    uint8_t offset;
+
+    int h;
+
+    int imin;
+    int omin;
+};
+
+static int colorlevel_slice_8(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ColorLevelsContext *s = ctx->priv;
+    const struct thread_data *td = arg;
+
+    int process_h = td->h;
+    const int slice_start = (process_h *  jobnr   ) / nb_jobs;
+    const int slice_end   = (process_h * (jobnr+1)) / nb_jobs;
+    int x, y;
+    const uint8_t *srcrow = td->srcrow;
+    uint8_t *dstrow = td->dstrow;
+    const int step = s->step;
+    const uint8_t offset = td->offset;
+
+    int imin = td->imin;
+    int omin = td->omin;
+    double coeff = td->coeff;
+
+    for (y = slice_start; y < slice_end; y++) {
+        const uint8_t *src = srcrow;
+        uint8_t *dst = dstrow;
+
+        for (x = 0; x < s->linesize; x += step)
+            dst[x + offset] = av_clip_uint8((src[x + offset] - imin) * coeff + omin);
+        dstrow += td->dst_linesize;
+        srcrow += td->src_linesize;
+    }
+
+    return 0;
+}
+
+static int colorlevel_slice_16(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ColorLevelsContext *s = ctx->priv;
+    const struct thread_data *td = arg;
+
+    int process_h = td->h;
+    const int slice_start = (process_h *  jobnr   ) / nb_jobs;
+    const int slice_end   = (process_h * (jobnr+1)) / nb_jobs;
+    int x, y;
+    const uint8_t *srcrow = td->srcrow;
+    uint8_t *dstrow = td->dstrow;
+    const int step = s->step;
+    const uint8_t offset = td->offset;
+
+    int imin = td->imin;
+    int omin = td->omin;
+    double coeff = td->coeff;
+
+    for (y = slice_start; y < slice_end; y++) {
+        const uint16_t *src = (const uint16_t *)srcrow;
+        uint16_t *dst = (uint16_t *)dstrow;
+
+        for (x = 0; x < s->linesize; x += step)
+            dst[x + offset] = av_clip_uint8((src[x + offset] - imin) * coeff + omin);
+        dstrow += td->dst_linesize;
+        srcrow += td->src_linesize;
+    }
+
+    return 0;
+}
+
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
     AVFilterContext *ctx = inlink->dst;
@@ -137,6 +214,7 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
             int omin = lrint(r->out_min * UINT8_MAX);
             int omax = lrint(r->out_max * UINT8_MAX);
             double coeff;
+            struct thread_data td;
 
             if (imin < 0) {
                 imin = UINT8_MAX;
@@ -162,15 +240,19 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 
             srcrow = in->data[0];
             coeff = (omax - omin) / (double)(imax - imin);
-            for (y = 0; y < inlink->h; y++) {
-                const uint8_t *src = srcrow;
-                uint8_t *dst = dstrow;
-
-                for (x = 0; x < s->linesize; x += step)
-                    dst[x + offset] = av_clip_uint8((src[x + offset] - imin) * coeff + omin);
-                dstrow += out->linesize[0];
-                srcrow += in->linesize[0];
-            }
+
+            td.srcrow        = srcrow;
+            td.dstrow        = dstrow;
+            td.dst_linesize  = out->linesize[0];
+            td.src_linesize  = in->linesize[0];
+            td.coeff         = coeff;
+            td.offset        = offset;
+            td.h             = inlink->h;
+            td.imin          = imin;
+            td.omin          = omin;
+
+            ctx->internal->execute(ctx, colorlevel_slice_8, &td, NULL,
+                                   FFMIN(inlink->h, ff_filter_get_nb_threads(ctx)));
         }
         break;
     case 2:
@@ -184,6 +266,7 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
             int omin = lrint(r->out_min * UINT16_MAX);
             int omax = lrint(r->out_max * UINT16_MAX);
             double coeff;
+            struct thread_data td;
 
             if (imin < 0) {
                 imin = UINT16_MAX;
@@ -209,15 +292,19 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 
             srcrow = in->data[0];
             coeff = (omax - omin) / (double)(imax - imin);
-            for (y = 0; y < inlink->h; y++) {
-                const uint16_t *src = (const uint16_t*)srcrow;
-                uint16_t *dst = (uint16_t *)dstrow;
-
-                for (x = 0; x < s->linesize; x += step)
-                    dst[x + offset] = av_clip_uint16((src[x + offset] - imin) * coeff + omin);
-                dstrow += out->linesize[0];
-                srcrow += in->linesize[0];
-            }
+
+            td.srcrow        = srcrow;
+            td.dstrow        = dstrow;
+            td.dst_linesize  = out->linesize[0];
+            td.src_linesize  = in->linesize[0];
+            td.coeff         = coeff;
+            td.offset        = offset;
+            td.h             = inlink->h;
+            td.imin          = imin;
+            td.omin          = omin;
+
+            ctx->internal->execute(ctx, colorlevel_slice_16, &td, NULL,
+                                   FFMIN(inlink->h, ff_filter_get_nb_threads(ctx)));
         }
     }
 
@@ -252,5 +339,5 @@  AVFilter ff_vf_colorlevels = {
     .query_formats = query_formats,
     .inputs        = colorlevels_inputs,
     .outputs       = colorlevels_outputs,
-    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
 };