Message ID | 1558437636-6274-1-git-send-email-mypopydev@gmail.com |
---|---|
State | New |
Headers | show |
> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf > Of Jun Zhao > Sent: Tuesday, May 21, 2019 7:21 PM > To: ffmpeg-devel@ffmpeg.org > Cc: Jun Zhao <barryjzhao@tencent.com> > Subject: [FFmpeg-devel] [PATCH V2] lavfi/colorlevels: Add slice threading > support > > From: Jun Zhao <barryjzhao@tencent.com> > > Add slice threading support, use the command like: > > ./ffmpeg -i input -vf colorlevel with 1080p h264 clip, the fps from 39 fps to > 79 fps in the local > > Signed-off-by: Jun Zhao <barryjzhao@tencent.com> > --- > libavfilter/vf_colorlevels.c | 125 > +++++++++++++++++++++++++++++++++++------ > 1 files changed, 106 insertions(+), 19 deletions(-) > > diff --git a/libavfilter/vf_colorlevels.c b/libavfilter/vf_colorlevels.c index > 5385a5e..68668e7 100644 > --- a/libavfilter/vf_colorlevels.c > +++ b/libavfilter/vf_colorlevels.c > @@ -105,6 +105,83 @@ static int config_input(AVFilterLink *inlink) > return 0; > } > > +struct thread_data { > + const uint8_t *srcrow; > + uint8_t *dstrow; > + int dst_linesize; > + int src_linesize; > + > + double coeff; > + uint8_t offset; > + > + int h; > + > + int imin; > + int omin; > +}; > + > +static int colorlevel_slice_8(AVFilterContext *ctx, void *arg, int > +jobnr, int nb_jobs) { > + ColorLevelsContext *s = ctx->priv; > + const struct thread_data *td = arg; > + > + int process_h = td->h; > + const int slice_start = (process_h * jobnr ) / nb_jobs; > + const int slice_end = (process_h * (jobnr+1)) / nb_jobs; > + int x, y; > + const uint8_t *srcrow = td->srcrow; > + uint8_t *dstrow = td->dstrow; > + const int step = s->step; > + const uint8_t offset = td->offset; > + > + int imin = td->imin; > + int omin = td->omin; > + double coeff = td->coeff; > + > + for (y = slice_start; y < slice_end; y++) { > + const uint8_t *src = srcrow; > + uint8_t *dst = dstrow; > + > + for (x = 0; x < s->linesize; x += step) > + dst[x + offset] = av_clip_uint8((src[x + offset] - imin) * coeff > + omin); > + dstrow += td->dst_linesize; > + srcrow += td->src_linesize; > + } > + > + return 0; > +} > + > +static int colorlevel_slice_16(AVFilterContext *ctx, void *arg, int > +jobnr, int nb_jobs) { > + ColorLevelsContext *s = ctx->priv; > + const struct thread_data *td = arg; > + > + int process_h = td->h; > + const int slice_start = (process_h * jobnr ) / nb_jobs; > + const int slice_end = (process_h * (jobnr+1)) / nb_jobs; > + int x, y; > + const uint8_t *srcrow = td->srcrow; > + uint8_t *dstrow = td->dstrow; > + const int step = s->step; > + const uint8_t offset = td->offset; > + > + int imin = td->imin; > + int omin = td->omin; > + double coeff = td->coeff; > + > + for (y = slice_start; y < slice_end; y++) { > + const uint16_t *src = (const uint16_t *)srcrow; > + uint16_t *dst = (uint16_t *)dstrow; Function colorlevel_slice_16() is same as colorlevel_slice_8 expect here to replace unit8_t to be unit16t. Would better to define a template function to be reused. > + for (x = 0; x < s->linesize; x += step) > + dst[x + offset] = av_clip_uint8((src[x + offset] - imin) * coeff > + omin); > + dstrow += td->dst_linesize; > + srcrow += td->src_linesize; > + } > + > + return 0; > +}
On Wed, May 22, 2019 at 12:24 PM Li, Zhong <zhong.li@intel.com> wrote: > > > From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf > > Of Jun Zhao > > Sent: Tuesday, May 21, 2019 7:21 PM > > To: ffmpeg-devel@ffmpeg.org > > Cc: Jun Zhao <barryjzhao@tencent.com> > > Subject: [FFmpeg-devel] [PATCH V2] lavfi/colorlevels: Add slice threading > > support > > > > From: Jun Zhao <barryjzhao@tencent.com> > > > > Add slice threading support, use the command like: > > > > ./ffmpeg -i input -vf colorlevel with 1080p h264 clip, the fps from 39 fps to > > 79 fps in the local > > > > Signed-off-by: Jun Zhao <barryjzhao@tencent.com> > > --- > > libavfilter/vf_colorlevels.c | 125 > > +++++++++++++++++++++++++++++++++++------ > > 1 files changed, 106 insertions(+), 19 deletions(-) > > > > diff --git a/libavfilter/vf_colorlevels.c b/libavfilter/vf_colorlevels.c index > > 5385a5e..68668e7 100644 > > --- a/libavfilter/vf_colorlevels.c > > +++ b/libavfilter/vf_colorlevels.c > > @@ -105,6 +105,83 @@ static int config_input(AVFilterLink *inlink) > > return 0; > > } > > > > +struct thread_data { > > + const uint8_t *srcrow; > > + uint8_t *dstrow; > > + int dst_linesize; > > + int src_linesize; > > + > > + double coeff; > > + uint8_t offset; > > + > > + int h; > > + > > + int imin; > > + int omin; > > +}; > > + > > +static int colorlevel_slice_8(AVFilterContext *ctx, void *arg, int > > +jobnr, int nb_jobs) { > > + ColorLevelsContext *s = ctx->priv; > > + const struct thread_data *td = arg; > > + > > + int process_h = td->h; > > + const int slice_start = (process_h * jobnr ) / nb_jobs; > > + const int slice_end = (process_h * (jobnr+1)) / nb_jobs; > > + int x, y; > > + const uint8_t *srcrow = td->srcrow; > > + uint8_t *dstrow = td->dstrow; > > + const int step = s->step; > > + const uint8_t offset = td->offset; > > + > > + int imin = td->imin; > > + int omin = td->omin; > > + double coeff = td->coeff; > > + > > + for (y = slice_start; y < slice_end; y++) { > > + const uint8_t *src = srcrow; > > + uint8_t *dst = dstrow; > > + > > + for (x = 0; x < s->linesize; x += step) > > + dst[x + offset] = av_clip_uint8((src[x + offset] - imin) * coeff > > + omin); > > + dstrow += td->dst_linesize; > > + srcrow += td->src_linesize; > > + } > > + > > + return 0; > > +} > > + > > +static int colorlevel_slice_16(AVFilterContext *ctx, void *arg, int > > +jobnr, int nb_jobs) { > > + ColorLevelsContext *s = ctx->priv; > > + const struct thread_data *td = arg; > > + > > + int process_h = td->h; > > + const int slice_start = (process_h * jobnr ) / nb_jobs; > > + const int slice_end = (process_h * (jobnr+1)) / nb_jobs; > > + int x, y; > > + const uint8_t *srcrow = td->srcrow; > > + uint8_t *dstrow = td->dstrow; > > + const int step = s->step; > > + const uint8_t offset = td->offset; > > + > > + int imin = td->imin; > > + int omin = td->omin; > > + double coeff = td->coeff; > > + > > + for (y = slice_start; y < slice_end; y++) { > > + const uint16_t *src = (const uint16_t *)srcrow; > > + uint16_t *dst = (uint16_t *)dstrow; > > Function colorlevel_slice_16() is same as colorlevel_slice_8 expect here to replace unit8_t to be unit16t. > Would better to define a template function to be reused. I don't like to get in C to templates is some ugly macro code for function overloading, this is the reason use 2 function for 8bits/16bits
diff --git a/libavfilter/vf_colorlevels.c b/libavfilter/vf_colorlevels.c index 5385a5e..68668e7 100644 --- a/libavfilter/vf_colorlevels.c +++ b/libavfilter/vf_colorlevels.c @@ -105,6 +105,83 @@ static int config_input(AVFilterLink *inlink) return 0; } +struct thread_data { + const uint8_t *srcrow; + uint8_t *dstrow; + int dst_linesize; + int src_linesize; + + double coeff; + uint8_t offset; + + int h; + + int imin; + int omin; +}; + +static int colorlevel_slice_8(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +{ + ColorLevelsContext *s = ctx->priv; + const struct thread_data *td = arg; + + int process_h = td->h; + const int slice_start = (process_h * jobnr ) / nb_jobs; + const int slice_end = (process_h * (jobnr+1)) / nb_jobs; + int x, y; + const uint8_t *srcrow = td->srcrow; + uint8_t *dstrow = td->dstrow; + const int step = s->step; + const uint8_t offset = td->offset; + + int imin = td->imin; + int omin = td->omin; + double coeff = td->coeff; + + for (y = slice_start; y < slice_end; y++) { + const uint8_t *src = srcrow; + uint8_t *dst = dstrow; + + for (x = 0; x < s->linesize; x += step) + dst[x + offset] = av_clip_uint8((src[x + offset] - imin) * coeff + omin); + dstrow += td->dst_linesize; + srcrow += td->src_linesize; + } + + return 0; +} + +static int colorlevel_slice_16(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +{ + ColorLevelsContext *s = ctx->priv; + const struct thread_data *td = arg; + + int process_h = td->h; + const int slice_start = (process_h * jobnr ) / nb_jobs; + const int slice_end = (process_h * (jobnr+1)) / nb_jobs; + int x, y; + const uint8_t *srcrow = td->srcrow; + uint8_t *dstrow = td->dstrow; + const int step = s->step; + const uint8_t offset = td->offset; + + int imin = td->imin; + int omin = td->omin; + double coeff = td->coeff; + + for (y = slice_start; y < slice_end; y++) { + const uint16_t *src = (const uint16_t *)srcrow; + uint16_t *dst = (uint16_t *)dstrow; + + for (x = 0; x < s->linesize; x += step) + dst[x + offset] = av_clip_uint8((src[x + offset] - imin) * coeff + omin); + dstrow += td->dst_linesize; + srcrow += td->src_linesize; + } + + return 0; +} + static int filter_frame(AVFilterLink *inlink, AVFrame *in) { AVFilterContext *ctx = inlink->dst; @@ -137,6 +214,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) int omin = lrint(r->out_min * UINT8_MAX); int omax = lrint(r->out_max * UINT8_MAX); double coeff; + struct thread_data td; if (imin < 0) { imin = UINT8_MAX; @@ -162,15 +240,19 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) srcrow = in->data[0]; coeff = (omax - omin) / (double)(imax - imin); - for (y = 0; y < inlink->h; y++) { - const uint8_t *src = srcrow; - uint8_t *dst = dstrow; - - for (x = 0; x < s->linesize; x += step) - dst[x + offset] = av_clip_uint8((src[x + offset] - imin) * coeff + omin); - dstrow += out->linesize[0]; - srcrow += in->linesize[0]; - } + + td.srcrow = srcrow; + td.dstrow = dstrow; + td.dst_linesize = out->linesize[0]; + td.src_linesize = in->linesize[0]; + td.coeff = coeff; + td.offset = offset; + td.h = inlink->h; + td.imin = imin; + td.omin = omin; + + ctx->internal->execute(ctx, colorlevel_slice_8, &td, NULL, + FFMIN(inlink->h, ff_filter_get_nb_threads(ctx))); } break; case 2: @@ -184,6 +266,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) int omin = lrint(r->out_min * UINT16_MAX); int omax = lrint(r->out_max * UINT16_MAX); double coeff; + struct thread_data td; if (imin < 0) { imin = UINT16_MAX; @@ -209,15 +292,19 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) srcrow = in->data[0]; coeff = (omax - omin) / (double)(imax - imin); - for (y = 0; y < inlink->h; y++) { - const uint16_t *src = (const uint16_t*)srcrow; - uint16_t *dst = (uint16_t *)dstrow; - - for (x = 0; x < s->linesize; x += step) - dst[x + offset] = av_clip_uint16((src[x + offset] - imin) * coeff + omin); - dstrow += out->linesize[0]; - srcrow += in->linesize[0]; - } + + td.srcrow = srcrow; + td.dstrow = dstrow; + td.dst_linesize = out->linesize[0]; + td.src_linesize = in->linesize[0]; + td.coeff = coeff; + td.offset = offset; + td.h = inlink->h; + td.imin = imin; + td.omin = omin; + + ctx->internal->execute(ctx, colorlevel_slice_16, &td, NULL, + FFMIN(inlink->h, ff_filter_get_nb_threads(ctx))); } } @@ -252,5 +339,5 @@ AVFilter ff_vf_colorlevels = { .query_formats = query_formats, .inputs = colorlevels_inputs, .outputs = colorlevels_outputs, - .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS, };