Message ID | 1570632431-16033-1-git-send-email-mypopydev@gmail.com |
---|---|
State | New |
Headers | show |
On 10/9/19, Jun Zhao <mypopydev@gmail.com> wrote: > From: Jun Zhao <barryjzhao@tencent.com> > > Enabled one thread per planar, used the test command for 1080P video > (YUV420P format) as follow: > > ffmpeg -i 1080p.mp4 -an -vf hqdn3d -f null /dev/nul > > This optimization improved the performance about 30% in 1080P YUV420P > case (from 110fps to 143fps), also pass the framemd5 check and FATE. > > Signed-off-by: Jun Zhao <barryjzhao@tencent.com> > --- > libavfilter/vf_hqdn3d.c | 56 > +++++++++++++++++++++++++++++++++------------- > libavfilter/vf_hqdn3d.h | 2 +- > 2 files changed, 41 insertions(+), 17 deletions(-) > > diff --git a/libavfilter/vf_hqdn3d.c b/libavfilter/vf_hqdn3d.c > index d6c14bb..08cc03a 100644 > --- a/libavfilter/vf_hqdn3d.c > +++ b/libavfilter/vf_hqdn3d.c > @@ -223,7 +223,9 @@ static av_cold void uninit(AVFilterContext *ctx) > av_freep(&s->coefs[1]); > av_freep(&s->coefs[2]); > av_freep(&s->coefs[3]); > - av_freep(&s->line); > + av_freep(&s->line[0]); > + av_freep(&s->line[1]); > + av_freep(&s->line[2]); > av_freep(&s->frame_prev[0]); > av_freep(&s->frame_prev[1]); > av_freep(&s->frame_prev[2]); > @@ -271,9 +273,11 @@ static int config_input(AVFilterLink *inlink) > s->vsub = desc->log2_chroma_h; > s->depth = desc->comp[0].depth; > > - s->line = av_malloc_array(inlink->w, sizeof(*s->line)); > - if (!s->line) > - return AVERROR(ENOMEM); > + for (i = 0; i < 3; i++) { > + s->line[i] = av_malloc_array(inlink->w, sizeof(*s->line[i])); > + if (!s->line[i]) > + return AVERROR(ENOMEM); > + } > > for (i = 0; i < 4; i++) { > s->coefs[i] = precalc_coefs(s->strength[i], s->depth); > @@ -287,14 +291,38 @@ static int config_input(AVFilterLink *inlink) > return 0; > } > > +typedef struct ThreadData { > + AVFrame *in, *out; > + int direct; > +} ThreadData; > + > +static int do_denoise(AVFilterContext *ctx, void *data, int job_nr, int > n_jobs) > +{ > + HQDN3DContext *s = ctx->priv; > + const ThreadData *td = data; > + AVFrame *out = td->out; > + AVFrame *in = td->in; > + int direct = td->direct; > + > + denoise(s, in->data[job_nr], out->data[job_nr], > + s->line[job_nr], &s->frame_prev[job_nr], > + AV_CEIL_RSHIFT(in->width, (!!job_nr * s->hsub)), > + AV_CEIL_RSHIFT(in->height, (!!job_nr * s->vsub)), > + in->linesize[job_nr], out->linesize[job_nr], > + s->coefs[job_nr ? CHROMA_SPATIAL : LUMA_SPATIAL], > + s->coefs[job_nr ? CHROMA_TMP : LUMA_TMP]); > + > + return 0; > +} > + > static int filter_frame(AVFilterLink *inlink, AVFrame *in) > { > AVFilterContext *ctx = inlink->dst; > - HQDN3DContext *s = ctx->priv; > AVFilterLink *outlink = ctx->outputs[0]; > > AVFrame *out; > - int c, direct = av_frame_is_writable(in) && !ctx->is_disabled; > + int direct = av_frame_is_writable(in) && !ctx->is_disabled; > + ThreadData td; > > if (direct) { > out = in; > @@ -308,15 +336,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame > *in) > av_frame_copy_props(out, in); > } > > - for (c = 0; c < 3; c++) { > - denoise(s, in->data[c], out->data[c], > - s->line, &s->frame_prev[c], > - AV_CEIL_RSHIFT(in->width, (!!c * s->hsub)), > - AV_CEIL_RSHIFT(in->height, (!!c * s->vsub)), > - in->linesize[c], out->linesize[c], > - s->coefs[c ? CHROMA_SPATIAL : LUMA_SPATIAL], > - s->coefs[c ? CHROMA_TMP : LUMA_TMP]); > - } > + td.in = in; > + td.out = out; > + td.direct = direct; > + /* one thread per planar */ /* one thread per plane */ > + ctx->internal->execute(ctx, do_denoise, &td, NULL, 3); > > if (ctx->is_disabled) { > av_frame_free(&out); > @@ -370,5 +394,5 @@ AVFilter ff_vf_hqdn3d = { > .query_formats = query_formats, > .inputs = avfilter_vf_hqdn3d_inputs, > .outputs = avfilter_vf_hqdn3d_outputs, > - .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL, > + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | > AVFILTER_FLAG_SLICE_THREADS, > }; > diff --git a/libavfilter/vf_hqdn3d.h b/libavfilter/vf_hqdn3d.h > index 03a79a1..3279bbc 100644 > --- a/libavfilter/vf_hqdn3d.h > +++ b/libavfilter/vf_hqdn3d.h > @@ -31,7 +31,7 @@ > typedef struct HQDN3DContext { > const AVClass *class; > int16_t *coefs[4]; > - uint16_t *line; > + uint16_t *line[3]; > uint16_t *frame_prev[3]; > double strength[4]; > int hsub, vsub; > -- > 1.7.1 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". Looks otherwise ready to merge.
On Wed, Oct 9, 2019 at 10:49 PM Paul B Mahol <onemda@gmail.com> wrote: > > On 10/9/19, Jun Zhao <mypopydev@gmail.com> wrote: > > From: Jun Zhao <barryjzhao@tencent.com> > > > > Enabled one thread per planar, used the test command for 1080P video > > (YUV420P format) as follow: > > > > ffmpeg -i 1080p.mp4 -an -vf hqdn3d -f null /dev/nul > > > > This optimization improved the performance about 30% in 1080P YUV420P > > case (from 110fps to 143fps), also pass the framemd5 check and FATE. > > > > Signed-off-by: Jun Zhao <barryjzhao@tencent.com> > > --- > > libavfilter/vf_hqdn3d.c | 56 > > +++++++++++++++++++++++++++++++++------------- > > libavfilter/vf_hqdn3d.h | 2 +- > > 2 files changed, 41 insertions(+), 17 deletions(-) > > > > diff --git a/libavfilter/vf_hqdn3d.c b/libavfilter/vf_hqdn3d.c > > index d6c14bb..08cc03a 100644 > > --- a/libavfilter/vf_hqdn3d.c > > +++ b/libavfilter/vf_hqdn3d.c > > @@ -223,7 +223,9 @@ static av_cold void uninit(AVFilterContext *ctx) > > av_freep(&s->coefs[1]); > > av_freep(&s->coefs[2]); > > av_freep(&s->coefs[3]); > > - av_freep(&s->line); > > + av_freep(&s->line[0]); > > + av_freep(&s->line[1]); > > + av_freep(&s->line[2]); > > av_freep(&s->frame_prev[0]); > > av_freep(&s->frame_prev[1]); > > av_freep(&s->frame_prev[2]); > > @@ -271,9 +273,11 @@ static int config_input(AVFilterLink *inlink) > > s->vsub = desc->log2_chroma_h; > > s->depth = desc->comp[0].depth; > > > > - s->line = av_malloc_array(inlink->w, sizeof(*s->line)); > > - if (!s->line) > > - return AVERROR(ENOMEM); > > + for (i = 0; i < 3; i++) { > > + s->line[i] = av_malloc_array(inlink->w, sizeof(*s->line[i])); > > + if (!s->line[i]) > > + return AVERROR(ENOMEM); > > + } > > > > for (i = 0; i < 4; i++) { > > s->coefs[i] = precalc_coefs(s->strength[i], s->depth); > > @@ -287,14 +291,38 @@ static int config_input(AVFilterLink *inlink) > > return 0; > > } > > > > +typedef struct ThreadData { > > + AVFrame *in, *out; > > + int direct; > > +} ThreadData; > > + > > +static int do_denoise(AVFilterContext *ctx, void *data, int job_nr, int > > n_jobs) > > +{ > > + HQDN3DContext *s = ctx->priv; > > + const ThreadData *td = data; > > + AVFrame *out = td->out; > > + AVFrame *in = td->in; > > + int direct = td->direct; > > + > > + denoise(s, in->data[job_nr], out->data[job_nr], > > + s->line[job_nr], &s->frame_prev[job_nr], > > + AV_CEIL_RSHIFT(in->width, (!!job_nr * s->hsub)), > > + AV_CEIL_RSHIFT(in->height, (!!job_nr * s->vsub)), > > + in->linesize[job_nr], out->linesize[job_nr], > > + s->coefs[job_nr ? CHROMA_SPATIAL : LUMA_SPATIAL], > > + s->coefs[job_nr ? CHROMA_TMP : LUMA_TMP]); > > + > > + return 0; > > +} > > + > > static int filter_frame(AVFilterLink *inlink, AVFrame *in) > > { > > AVFilterContext *ctx = inlink->dst; > > - HQDN3DContext *s = ctx->priv; > > AVFilterLink *outlink = ctx->outputs[0]; > > > > AVFrame *out; > > - int c, direct = av_frame_is_writable(in) && !ctx->is_disabled; > > + int direct = av_frame_is_writable(in) && !ctx->is_disabled; > > + ThreadData td; > > > > if (direct) { > > out = in; > > @@ -308,15 +336,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame > > *in) > > av_frame_copy_props(out, in); > > } > > > > - for (c = 0; c < 3; c++) { > > - denoise(s, in->data[c], out->data[c], > > - s->line, &s->frame_prev[c], > > - AV_CEIL_RSHIFT(in->width, (!!c * s->hsub)), > > - AV_CEIL_RSHIFT(in->height, (!!c * s->vsub)), > > - in->linesize[c], out->linesize[c], > > - s->coefs[c ? CHROMA_SPATIAL : LUMA_SPATIAL], > > - s->coefs[c ? CHROMA_TMP : LUMA_TMP]); > > - } > > + td.in = in; > > + td.out = out; > > + td.direct = direct; > > + /* one thread per planar */ > > /* one thread per plane */ > > > + ctx->internal->execute(ctx, do_denoise, &td, NULL, 3); > > > > if (ctx->is_disabled) { > > av_frame_free(&out); > > @@ -370,5 +394,5 @@ AVFilter ff_vf_hqdn3d = { > > .query_formats = query_formats, > > .inputs = avfilter_vf_hqdn3d_inputs, > > .outputs = avfilter_vf_hqdn3d_outputs, > > - .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL, > > + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | > > AVFILTER_FLAG_SLICE_THREADS, > > }; > > diff --git a/libavfilter/vf_hqdn3d.h b/libavfilter/vf_hqdn3d.h > > index 03a79a1..3279bbc 100644 > > --- a/libavfilter/vf_hqdn3d.h > > +++ b/libavfilter/vf_hqdn3d.h > > @@ -31,7 +31,7 @@ > > typedef struct HQDN3DContext { > > const AVClass *class; > > int16_t *coefs[4]; > > - uint16_t *line; > > + uint16_t *line[3]; > > uint16_t *frame_prev[3]; > > double strength[4]; > > int hsub, vsub; > > -- > > 1.7.1 > > > > _______________________________________________ > > ffmpeg-devel mailing list > > ffmpeg-devel@ffmpeg.org > > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > > > To unsubscribe, visit link above, or email > > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". > > > Looks otherwise ready to merge. Updated the comments and applied, thx
diff --git a/libavfilter/vf_hqdn3d.c b/libavfilter/vf_hqdn3d.c index d6c14bb..08cc03a 100644 --- a/libavfilter/vf_hqdn3d.c +++ b/libavfilter/vf_hqdn3d.c @@ -223,7 +223,9 @@ static av_cold void uninit(AVFilterContext *ctx) av_freep(&s->coefs[1]); av_freep(&s->coefs[2]); av_freep(&s->coefs[3]); - av_freep(&s->line); + av_freep(&s->line[0]); + av_freep(&s->line[1]); + av_freep(&s->line[2]); av_freep(&s->frame_prev[0]); av_freep(&s->frame_prev[1]); av_freep(&s->frame_prev[2]); @@ -271,9 +273,11 @@ static int config_input(AVFilterLink *inlink) s->vsub = desc->log2_chroma_h; s->depth = desc->comp[0].depth; - s->line = av_malloc_array(inlink->w, sizeof(*s->line)); - if (!s->line) - return AVERROR(ENOMEM); + for (i = 0; i < 3; i++) { + s->line[i] = av_malloc_array(inlink->w, sizeof(*s->line[i])); + if (!s->line[i]) + return AVERROR(ENOMEM); + } for (i = 0; i < 4; i++) { s->coefs[i] = precalc_coefs(s->strength[i], s->depth); @@ -287,14 +291,38 @@ static int config_input(AVFilterLink *inlink) return 0; } +typedef struct ThreadData { + AVFrame *in, *out; + int direct; +} ThreadData; + +static int do_denoise(AVFilterContext *ctx, void *data, int job_nr, int n_jobs) +{ + HQDN3DContext *s = ctx->priv; + const ThreadData *td = data; + AVFrame *out = td->out; + AVFrame *in = td->in; + int direct = td->direct; + + denoise(s, in->data[job_nr], out->data[job_nr], + s->line[job_nr], &s->frame_prev[job_nr], + AV_CEIL_RSHIFT(in->width, (!!job_nr * s->hsub)), + AV_CEIL_RSHIFT(in->height, (!!job_nr * s->vsub)), + in->linesize[job_nr], out->linesize[job_nr], + s->coefs[job_nr ? CHROMA_SPATIAL : LUMA_SPATIAL], + s->coefs[job_nr ? CHROMA_TMP : LUMA_TMP]); + + return 0; +} + static int filter_frame(AVFilterLink *inlink, AVFrame *in) { AVFilterContext *ctx = inlink->dst; - HQDN3DContext *s = ctx->priv; AVFilterLink *outlink = ctx->outputs[0]; AVFrame *out; - int c, direct = av_frame_is_writable(in) && !ctx->is_disabled; + int direct = av_frame_is_writable(in) && !ctx->is_disabled; + ThreadData td; if (direct) { out = in; @@ -308,15 +336,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) av_frame_copy_props(out, in); } - for (c = 0; c < 3; c++) { - denoise(s, in->data[c], out->data[c], - s->line, &s->frame_prev[c], - AV_CEIL_RSHIFT(in->width, (!!c * s->hsub)), - AV_CEIL_RSHIFT(in->height, (!!c * s->vsub)), - in->linesize[c], out->linesize[c], - s->coefs[c ? CHROMA_SPATIAL : LUMA_SPATIAL], - s->coefs[c ? CHROMA_TMP : LUMA_TMP]); - } + td.in = in; + td.out = out; + td.direct = direct; + /* one thread per planar */ + ctx->internal->execute(ctx, do_denoise, &td, NULL, 3); if (ctx->is_disabled) { av_frame_free(&out); @@ -370,5 +394,5 @@ AVFilter ff_vf_hqdn3d = { .query_formats = query_formats, .inputs = avfilter_vf_hqdn3d_inputs, .outputs = avfilter_vf_hqdn3d_outputs, - .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL, + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS, }; diff --git a/libavfilter/vf_hqdn3d.h b/libavfilter/vf_hqdn3d.h index 03a79a1..3279bbc 100644 --- a/libavfilter/vf_hqdn3d.h +++ b/libavfilter/vf_hqdn3d.h @@ -31,7 +31,7 @@ typedef struct HQDN3DContext { const AVClass *class; int16_t *coefs[4]; - uint16_t *line; + uint16_t *line[3]; uint16_t *frame_prev[3]; double strength[4]; int hsub, vsub;