Message ID | 20210502194720.19226-1-andrey.moskalenko@graphics.cs.msu.ru |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,GSOC] avfilter: added guided filter | expand |
Context | Check | Description |
---|---|---|
andriy/x86_make | success | Make finished |
andriy/x86_make_fate | success | Make fate finished |
andriy/PPC64_make | success | Make finished |
andriy/PPC64_make_fate | success | Make fate finished |
Andrey Moskalenko <andrey.moskalenko@graphics.cs.msu.ru> 于2021年5月3日周一 上午3:47写道: > > Added guided filter with subsampling and multithreading for speedup. Maybe not only use multithreading for speedup, also need some algorithms to improve it. What about the speedup compare data? > --- > doc/filters.texi | 25 +++ > libavfilter/Makefile | 1 + > libavfilter/allfilters.c | 1 + > libavfilter/vf_guided.c | 400 +++++++++++++++++++++++++++++++++++++++ > 4 files changed, 427 insertions(+) > create mode 100644 libavfilter/vf_guided.c > > diff --git a/doc/filters.texi b/doc/filters.texi > index 36e35a175b..856969f51f 100644 > --- a/doc/filters.texi > +++ b/doc/filters.texi > @@ -12918,6 +12918,31 @@ greyedge=difford=1:minknorm=0:sigma=2 > > @end itemize > > +@section guided > +Apply guided filter, spatial smoothing while preserving edges. > + > +The filter accepts the following options: > +@table @option > +@item radius > +Set the spatial blur radius in pixels. > +Allowed range is 1 to 1024. Default is 8. > + > +@item eps > +Set regularization parameter (without square) as in the original paper. > +Allowed range is 0 to 1. Default is 0.1. > + > +@item sub > +Set subsampling ratio. > +Allowed range is 1 to 1024. Default is 4. > + > +@item planes > +Set planes to filter. Default is first only. > +@end table > + > +@subsection Commands > + > +This filter supports the all above options as @ref{commands}. > + > @anchor{haldclut} > @section haldclut > > diff --git a/libavfilter/Makefile b/libavfilter/Makefile > index 5a287364b0..f66c6ef65b 100644 > --- a/libavfilter/Makefile > +++ b/libavfilter/Makefile > @@ -174,6 +174,7 @@ OBJS-$(CONFIG_AVGBLUR_FILTER) += vf_avgblur.o > OBJS-$(CONFIG_AVGBLUR_OPENCL_FILTER) += vf_avgblur_opencl.o opencl.o \ > opencl/avgblur.o boxblur.o > OBJS-$(CONFIG_AVGBLUR_VULKAN_FILTER) += vf_avgblur_vulkan.o vulkan.o > +OBJS-$(CONFIG_GUIDED_FILTER) += vf_guided.o > OBJS-$(CONFIG_BBOX_FILTER) += bbox.o vf_bbox.o > OBJS-$(CONFIG_BENCH_FILTER) += f_bench.o > OBJS-$(CONFIG_BILATERAL_FILTER) += vf_bilateral.o > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c > index 931d7dbb0d..962f656abc 100644 > --- a/libavfilter/allfilters.c > +++ b/libavfilter/allfilters.c > @@ -270,6 +270,7 @@ extern const AVFilter ff_vf_geq; > extern const AVFilter ff_vf_gradfun; > extern const AVFilter ff_vf_graphmonitor; > extern const AVFilter ff_vf_greyedge; > +extern const AVFilter ff_vf_guided; > extern const AVFilter ff_vf_haldclut; > extern const AVFilter ff_vf_hflip; > extern const AVFilter ff_vf_histeq; > diff --git a/libavfilter/vf_guided.c b/libavfilter/vf_guided.c > new file mode 100644 > index 0000000000..2bb640f686 > --- /dev/null > +++ b/libavfilter/vf_guided.c > @@ -0,0 +1,400 @@ > +/* > + * Copyright (c) 2021 Andrey Moskalenko > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License along > + * with FFmpeg; if not, write to the Free Software Foundation, Inc., > + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. > + */ > + > +/** > + * @file > + * Guided filter. > + * Principle of operation: > + * Implemented the approach from the original article > + * with multithread speedup of boxfilter and subsampling technique. > + */ > + > +#include "libavutil/imgutils.h" > +#include "libavutil/opt.h" > +#include "libavutil/pixdesc.h" > +#include "avfilter.h" > +#include "formats.h" > +#include "internal.h" > +#include "video.h" > + > +typedef struct GuidedContext { > + const AVClass *class; > + int radius; > + float eps; > + int sub; > + int planes; > + int nb_planes; > + int depth; > + int planewidth[4]; > + int planeheight[4]; > + float *I_mean; > + float *I_pow; > + float *A; > + float *B; > + float *buffer; > + int (*filter_horizontally)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs); > + int (*filter_vertically)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs); > +} GuidedContext; > + > +#define OFFSET(x) offsetof(GuidedContext, x) > +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM > + > +static const AVOption guided_options[] = { > + { "radius", "The box radius in pixels", OFFSET(radius), AV_OPT_TYPE_INT, {.i64=8}, 1, 1024, FLAGS }, > + { "eps", "Regularization parameter (without square)", OFFSET(eps), AV_OPT_TYPE_FLOAT, {.dbl=0.1}, 0.0, 1, FLAGS }, > + { "sub", "Subsampling ratio", OFFSET(sub), AV_OPT_TYPE_INT, {.i64=4}, 1, 1024, FLAGS }, > + { "planes", "Set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT, {.i64=1}, 0, 0xF, FLAGS }, > + { NULL } > +}; > + > +typedef struct ThreadData { > + int height; > + int width; > + float *ptr; > + int linesize; > +} ThreadData; > + > +AVFILTER_DEFINE_CLASS(guided); > + > +static int query_formats(AVFilterContext *ctx) > +{ > + static const enum AVPixelFormat pix_fmts[] = { > + AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P, > + AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, > + AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV420P, > + AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P, > + AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P, > + AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9, > + AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, > + AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV440P12, > + AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14, > + AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16, > + AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9, > + AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10, > + AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16, > + AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, > + AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16, > + AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16, > + AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16, > + AV_PIX_FMT_NONE > + }; > + > + return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts)); > +} > + > + > +static int filter_horizontally(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) > +{ > + GuidedContext *s = ctx->priv; > + ThreadData *td = arg; > + const int height = td->height; > + const int width = td->width; > + const int slice_start = (height * jobnr ) / nb_jobs; > + const int slice_end = (height * (jobnr+1)) / nb_jobs; > + const int radius = s->radius; > + const int linesize = td->linesize / sizeof(float); > + float *buffer = s->buffer; > + const float *src; > + float *ptr; > + int y, x; > + > + /* Filter horizontally along each row */ > + for (y = slice_start; y < slice_end; y++) { > + float acc = 0; > + int count = 0; > + > + src = (const float *)td->ptr + linesize * y; > + ptr = buffer + width * y; > + > + for (x = 0; x < radius; x++) { > + acc += src[x]; > + } > + count += radius; > + > + for (x = 0; x <= radius; x++) { > + acc += src[x + radius]; > + count++; > + ptr[x] = acc / count; > + } > + > + for (; x < width - radius; x++) { > + acc += src[x + radius] - src[x - radius - 1]; > + ptr[x] = acc / count; > + } > + > + for (; x < width; x++) { > + acc -= src[x - radius]; > + count--; > + ptr[x] = acc / count; > + } > + } > + > + return 0; > +} > + > + > +static int filter_vertically(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) > +{ > + GuidedContext *s = ctx->priv; > + ThreadData *td = arg; > + const int height = td->height; > + const int width = td->width; > + const int slice_start = (width * jobnr ) / nb_jobs; > + const int slice_end = (width * (jobnr+1)) / nb_jobs; > + const int radius = s->radius; > + const int linesize = td->linesize / sizeof(float); > + float *buffer = (float *)td->ptr; > + const float *src; > + float *ptr; > + int i, x; > + > + /* Filter vertically along each column */ > + for (x = slice_start; x < slice_end; x++) { > + float acc = 0; > + int count = 0; > + > + src = s->buffer + x; > + > + for (i = 0; i < radius; i++) { > + acc += src[0]; > + src += width; > + } > + count += radius; > + > + src = s->buffer + x; > + ptr = buffer + x; > + for (i = 0; i + radius < height && i <= radius; i++) { > + acc += src[(i + radius) * width]; > + count++; > + ptr[i * linesize] = acc / count; > + } > + > + for (; i < height - radius; i++) { > + acc += src[(i + radius) * width] - src[(i - radius - 1) * width]; > + ptr[i * linesize] = acc / count; > + } > + > + for (; i < height; i++) { > + acc -= src[(i - radius) * width]; > + count--; > + ptr[i * linesize] = acc / count; > + } > + } > + > + return 0; > +} > + > + > +static int config_input(AVFilterLink *inlink) > +{ > + AVFilterContext *ctx = inlink->dst; > + GuidedContext *s = ctx->priv; > + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); > + > + s->depth = desc->comp[0].depth; > + // For numerical stability > + if (s->eps < 1e-8) > + s->eps = 1e-8; > + // Avoid radius/sub = 0 > + if (s->radius >= s->sub) > + s->radius = s->radius / s->sub; > + else > + s->radius = 1; > + s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w); > + s->planewidth[0] = s->planewidth[3] = inlink->w; > + s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h); > + s->planeheight[0] = s->planeheight[3] = inlink->h; > + > + s->nb_planes = av_pix_fmt_count_planes(inlink->format); > + > + s->I_mean = av_calloc(inlink->w/s->sub * inlink->h/s->sub, sizeof(float)); > + s->I_pow = av_calloc(inlink->w/s->sub * inlink->h/s->sub, sizeof(float)); > + s->A = av_calloc(inlink->w/s->sub * inlink->h/s->sub, sizeof(float)); > + s->B = av_calloc(inlink->w/s->sub * inlink->h/s->sub, sizeof(float)); > + s->buffer = av_malloc_array(inlink->w/s->sub, inlink->h/s->sub * sizeof(*s->buffer)); > + if (!s->I_mean || > + !s->I_pow || > + !s->A || > + !s->B || > + !s->buffer) > + return AVERROR(ENOMEM); > + s->filter_horizontally = filter_horizontally; > + s->filter_vertically = filter_vertically; > + return 0; > +} > + > + > +/** > + * Calculate guided filter output. > + * > + * @param ctx filter context > + * @param ssrc source plane > + * @param ddst output plane > + * @param width, height shapes > + * @param src_linesize, dst_linesize aligned shapes > + * @param maxval maximum value of image type (used for converting to float) > + */ > +#define GUIDED_FILTER(name, type) \ > +static void guided_##name(AVFilterContext *ctx, const uint8_t *ssrc, uint8_t *ddst, \ > + int w, int h, int src_linesize, \ > + int dst_linesize, float maxval) { \ > + GuidedContext *s = ctx->priv; \ > + const int nb_threads = ff_filter_get_nb_threads(ctx); \ > + ThreadData td; \ > + type *dst = (type *)ddst; \ > + const type *src = (const type *)ssrc; \ > + float *I_mean = s->I_mean; \ > + float *I_pow = s->I_pow; \ > + float *A = s->A; \ > + float *B = s->B; \ > + int sub = s->sub; \ > + int height = h / sub; \ > + int width = w / sub; \ > + float val; \ > + for (int i = 0; i < height; ++i) { \ > + for (int j = 0; j < width; ++j) { \ > + I_mean[j + i * width] = src[j * sub + i * src_linesize * sub] / maxval; \ > + I_pow[j + i * width] = I_mean[j + i * width] * I_mean[j + i * width]; \ > + } \ > + } \ > + td.width = width; \ > + td.height = height; \ > + td.linesize = width * sizeof(float); \ > + td.ptr = I_mean; \ > + ctx->internal->execute(ctx, s->filter_horizontally, &td, NULL, FFMIN(height, nb_threads));\ > + ctx->internal->execute(ctx, s->filter_vertically, &td, NULL, FFMIN(width, nb_threads)); \ > + \ > + td.ptr = I_pow; \ > + ctx->internal->execute(ctx, s->filter_horizontally, &td, NULL, FFMIN(height, nb_threads));\ > + ctx->internal->execute(ctx, s->filter_vertically, &td, NULL, FFMIN(width, nb_threads)); \ > + \ > + for (int i = 0; i < height; ++i) { \ > + for (int j = 0; j < width; ++j) { \ > + val = I_pow[j + i * width] - I_mean[j + i * width] * I_mean[j + i * width]; \ > + A[j + i * width] = val / (val + s->eps*s->eps); \ > + B[j + i * width] = I_mean[j + i * width] * (1.f - A[j + i * width]); \ > + } \ > + } \ > + \ > + td.ptr = A; \ > + ctx->internal->execute(ctx, s->filter_horizontally, &td, NULL, FFMIN(height, nb_threads));\ > + ctx->internal->execute(ctx, s->filter_vertically, &td, NULL, FFMIN(width, nb_threads)); \ > + \ > + td.ptr = B; \ > + ctx->internal->execute(ctx, s->filter_horizontally, &td, NULL, FFMIN(height, nb_threads));\ > + ctx->internal->execute(ctx, s->filter_vertically, &td, NULL, FFMIN(width, nb_threads)); \ > + \ > + for (int i = 0; i < h; i++) \ > + for (int j = 0; j < w; j++) \ > + dst[j + i * dst_linesize] = \ > + A[j / sub + (i / sub) * width] * src[j + i * src_linesize] + \ > + B[j / sub + (i / sub) * width] * maxval; \ > +} \ > + > +GUIDED_FILTER(8, uint8_t) > +GUIDED_FILTER(16, uint16_t) > + > +static int filter_frame(AVFilterLink *inlink, AVFrame *in) > +{ > + AVFilterContext *ctx = inlink->dst; > + GuidedContext *s = ctx->priv; > + AVFilterLink *outlink = ctx->outputs[0]; > + AVFrame *out; > + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); > + if (!out) { > + av_frame_free(&in); > + return AVERROR(ENOMEM); > + } > + av_frame_copy_props(out, in); > + for (int plane = 0; plane < s->nb_planes; plane++) { > + if (!(s->planes & (1 << plane))) { > + av_image_copy_plane(out->data[plane], out->linesize[plane], > + in->data[plane], in->linesize[plane], > + s->planewidth[plane] * ((s->depth + 7) / 8), s->planeheight[plane]); > + continue; > + } > + if (s->depth <= 8) > + guided_8(ctx, in->data[plane], out->data[plane], s->planewidth[plane], > + s->planeheight[plane], in->linesize[plane], > + out->linesize[plane], (1 << s->depth) - 1.f); > + else > + guided_16(ctx, in->data[plane], out->data[plane], s->planewidth[plane], > + s->planeheight[plane], in->linesize[plane] / 2, > + out->linesize[plane] / 2, (1 << s->depth) - 1.f); > + } > + av_frame_free(&in); > + return ff_filter_frame(outlink, out); > +} > + > +static av_cold void uninit(AVFilterContext *ctx) > +{ > + GuidedContext *s = ctx->priv; > + av_freep(&s->I_mean); > + av_freep(&s->I_pow); > + av_freep(&s->A); > + av_freep(&s->B); > + av_freep(&s->buffer); > +} > + > +static int process_command(AVFilterContext *ctx, > + const char *cmd, > + const char *arg, > + char *res, > + int res_len, > + int flags) > +{ > + int ret = ff_filter_process_command(ctx, cmd, arg, res, res_len, flags); > + > + if (ret < 0) > + return ret; > + > + return 0; > +} > + > +static const AVFilterPad guided_inputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + .config_props = config_input, > + .filter_frame = filter_frame, > + }, > + { NULL } > +}; > + > +static const AVFilterPad guided_outputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + }, > + { NULL } > +}; > + > +AVFilter ff_vf_guided = { > + .name = "guided", > + .description = NULL_IF_CONFIG_SMALL("Apply Guided filter."), > + .priv_size = sizeof(GuidedContext), > + .priv_class = &guided_class, > + .uninit = uninit, > + .query_formats = query_formats, > + .inputs = guided_inputs, > + .outputs = guided_outputs, > + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS, > + .process_command = process_command, > +}; > -- > 2.30.2 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". Thanks Steven
> Maybe not only use multithreading for speedup, also need some > algorithms to improve it. > What about the speedup compare data? Additionally, a fast implementation of box filtering is used and sub-sampling as recommended in https://arxiv.org/abs/1505.00996. Current implementation outperforms the existing bilateral filter in terms of speed and quality on my 8-core CPU. The speed depends significantly on the sub-sampling parameter. With sub = 4, for example, the filter works 110 FPS against 74 FPS for the bilateral one with the same spatial radius.
diff --git a/doc/filters.texi b/doc/filters.texi index 36e35a175b..856969f51f 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -12918,6 +12918,31 @@ greyedge=difford=1:minknorm=0:sigma=2 @end itemize +@section guided +Apply guided filter, spatial smoothing while preserving edges. + +The filter accepts the following options: +@table @option +@item radius +Set the spatial blur radius in pixels. +Allowed range is 1 to 1024. Default is 8. + +@item eps +Set regularization parameter (without square) as in the original paper. +Allowed range is 0 to 1. Default is 0.1. + +@item sub +Set subsampling ratio. +Allowed range is 1 to 1024. Default is 4. + +@item planes +Set planes to filter. Default is first only. +@end table + +@subsection Commands + +This filter supports the all above options as @ref{commands}. + @anchor{haldclut} @section haldclut diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 5a287364b0..f66c6ef65b 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -174,6 +174,7 @@ OBJS-$(CONFIG_AVGBLUR_FILTER) += vf_avgblur.o OBJS-$(CONFIG_AVGBLUR_OPENCL_FILTER) += vf_avgblur_opencl.o opencl.o \ opencl/avgblur.o boxblur.o OBJS-$(CONFIG_AVGBLUR_VULKAN_FILTER) += vf_avgblur_vulkan.o vulkan.o +OBJS-$(CONFIG_GUIDED_FILTER) += vf_guided.o OBJS-$(CONFIG_BBOX_FILTER) += bbox.o vf_bbox.o OBJS-$(CONFIG_BENCH_FILTER) += f_bench.o OBJS-$(CONFIG_BILATERAL_FILTER) += vf_bilateral.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 931d7dbb0d..962f656abc 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -270,6 +270,7 @@ extern const AVFilter ff_vf_geq; extern const AVFilter ff_vf_gradfun; extern const AVFilter ff_vf_graphmonitor; extern const AVFilter ff_vf_greyedge; +extern const AVFilter ff_vf_guided; extern const AVFilter ff_vf_haldclut; extern const AVFilter ff_vf_hflip; extern const AVFilter ff_vf_histeq; diff --git a/libavfilter/vf_guided.c b/libavfilter/vf_guided.c new file mode 100644 index 0000000000..2bb640f686 --- /dev/null +++ b/libavfilter/vf_guided.c @@ -0,0 +1,400 @@ +/* + * Copyright (c) 2021 Andrey Moskalenko + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/** + * @file + * Guided filter. + * Principle of operation: + * Implemented the approach from the original article + * with multithread speedup of boxfilter and subsampling technique. + */ + +#include "libavutil/imgutils.h" +#include "libavutil/opt.h" +#include "libavutil/pixdesc.h" +#include "avfilter.h" +#include "formats.h" +#include "internal.h" +#include "video.h" + +typedef struct GuidedContext { + const AVClass *class; + int radius; + float eps; + int sub; + int planes; + int nb_planes; + int depth; + int planewidth[4]; + int planeheight[4]; + float *I_mean; + float *I_pow; + float *A; + float *B; + float *buffer; + int (*filter_horizontally)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs); + int (*filter_vertically)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs); +} GuidedContext; + +#define OFFSET(x) offsetof(GuidedContext, x) +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM + +static const AVOption guided_options[] = { + { "radius", "The box radius in pixels", OFFSET(radius), AV_OPT_TYPE_INT, {.i64=8}, 1, 1024, FLAGS }, + { "eps", "Regularization parameter (without square)", OFFSET(eps), AV_OPT_TYPE_FLOAT, {.dbl=0.1}, 0.0, 1, FLAGS }, + { "sub", "Subsampling ratio", OFFSET(sub), AV_OPT_TYPE_INT, {.i64=4}, 1, 1024, FLAGS }, + { "planes", "Set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT, {.i64=1}, 0, 0xF, FLAGS }, + { NULL } +}; + +typedef struct ThreadData { + int height; + int width; + float *ptr; + int linesize; +} ThreadData; + +AVFILTER_DEFINE_CLASS(guided); + +static int query_formats(AVFilterContext *ctx) +{ + static const enum AVPixelFormat pix_fmts[] = { + AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P, + AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, + AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV420P, + AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P, + AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P, + AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9, + AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, + AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV440P12, + AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14, + AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16, + AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9, + AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10, + AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16, + AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, + AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16, + AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16, + AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16, + AV_PIX_FMT_NONE + }; + + return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts)); +} + + +static int filter_horizontally(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +{ + GuidedContext *s = ctx->priv; + ThreadData *td = arg; + const int height = td->height; + const int width = td->width; + const int slice_start = (height * jobnr ) / nb_jobs; + const int slice_end = (height * (jobnr+1)) / nb_jobs; + const int radius = s->radius; + const int linesize = td->linesize / sizeof(float); + float *buffer = s->buffer; + const float *src; + float *ptr; + int y, x; + + /* Filter horizontally along each row */ + for (y = slice_start; y < slice_end; y++) { + float acc = 0; + int count = 0; + + src = (const float *)td->ptr + linesize * y; + ptr = buffer + width * y; + + for (x = 0; x < radius; x++) { + acc += src[x]; + } + count += radius; + + for (x = 0; x <= radius; x++) { + acc += src[x + radius]; + count++; + ptr[x] = acc / count; + } + + for (; x < width - radius; x++) { + acc += src[x + radius] - src[x - radius - 1]; + ptr[x] = acc / count; + } + + for (; x < width; x++) { + acc -= src[x - radius]; + count--; + ptr[x] = acc / count; + } + } + + return 0; +} + + +static int filter_vertically(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +{ + GuidedContext *s = ctx->priv; + ThreadData *td = arg; + const int height = td->height; + const int width = td->width; + const int slice_start = (width * jobnr ) / nb_jobs; + const int slice_end = (width * (jobnr+1)) / nb_jobs; + const int radius = s->radius; + const int linesize = td->linesize / sizeof(float); + float *buffer = (float *)td->ptr; + const float *src; + float *ptr; + int i, x; + + /* Filter vertically along each column */ + for (x = slice_start; x < slice_end; x++) { + float acc = 0; + int count = 0; + + src = s->buffer + x; + + for (i = 0; i < radius; i++) { + acc += src[0]; + src += width; + } + count += radius; + + src = s->buffer + x; + ptr = buffer + x; + for (i = 0; i + radius < height && i <= radius; i++) { + acc += src[(i + radius) * width]; + count++; + ptr[i * linesize] = acc / count; + } + + for (; i < height - radius; i++) { + acc += src[(i + radius) * width] - src[(i - radius - 1) * width]; + ptr[i * linesize] = acc / count; + } + + for (; i < height; i++) { + acc -= src[(i - radius) * width]; + count--; + ptr[i * linesize] = acc / count; + } + } + + return 0; +} + + +static int config_input(AVFilterLink *inlink) +{ + AVFilterContext *ctx = inlink->dst; + GuidedContext *s = ctx->priv; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); + + s->depth = desc->comp[0].depth; + // For numerical stability + if (s->eps < 1e-8) + s->eps = 1e-8; + // Avoid radius/sub = 0 + if (s->radius >= s->sub) + s->radius = s->radius / s->sub; + else + s->radius = 1; + s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w); + s->planewidth[0] = s->planewidth[3] = inlink->w; + s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h); + s->planeheight[0] = s->planeheight[3] = inlink->h; + + s->nb_planes = av_pix_fmt_count_planes(inlink->format); + + s->I_mean = av_calloc(inlink->w/s->sub * inlink->h/s->sub, sizeof(float)); + s->I_pow = av_calloc(inlink->w/s->sub * inlink->h/s->sub, sizeof(float)); + s->A = av_calloc(inlink->w/s->sub * inlink->h/s->sub, sizeof(float)); + s->B = av_calloc(inlink->w/s->sub * inlink->h/s->sub, sizeof(float)); + s->buffer = av_malloc_array(inlink->w/s->sub, inlink->h/s->sub * sizeof(*s->buffer)); + if (!s->I_mean || + !s->I_pow || + !s->A || + !s->B || + !s->buffer) + return AVERROR(ENOMEM); + s->filter_horizontally = filter_horizontally; + s->filter_vertically = filter_vertically; + return 0; +} + + +/** + * Calculate guided filter output. + * + * @param ctx filter context + * @param ssrc source plane + * @param ddst output plane + * @param width, height shapes + * @param src_linesize, dst_linesize aligned shapes + * @param maxval maximum value of image type (used for converting to float) + */ +#define GUIDED_FILTER(name, type) \ +static void guided_##name(AVFilterContext *ctx, const uint8_t *ssrc, uint8_t *ddst, \ + int w, int h, int src_linesize, \ + int dst_linesize, float maxval) { \ + GuidedContext *s = ctx->priv; \ + const int nb_threads = ff_filter_get_nb_threads(ctx); \ + ThreadData td; \ + type *dst = (type *)ddst; \ + const type *src = (const type *)ssrc; \ + float *I_mean = s->I_mean; \ + float *I_pow = s->I_pow; \ + float *A = s->A; \ + float *B = s->B; \ + int sub = s->sub; \ + int height = h / sub; \ + int width = w / sub; \ + float val; \ + for (int i = 0; i < height; ++i) { \ + for (int j = 0; j < width; ++j) { \ + I_mean[j + i * width] = src[j * sub + i * src_linesize * sub] / maxval; \ + I_pow[j + i * width] = I_mean[j + i * width] * I_mean[j + i * width]; \ + } \ + } \ + td.width = width; \ + td.height = height; \ + td.linesize = width * sizeof(float); \ + td.ptr = I_mean; \ + ctx->internal->execute(ctx, s->filter_horizontally, &td, NULL, FFMIN(height, nb_threads));\ + ctx->internal->execute(ctx, s->filter_vertically, &td, NULL, FFMIN(width, nb_threads)); \ + \ + td.ptr = I_pow; \ + ctx->internal->execute(ctx, s->filter_horizontally, &td, NULL, FFMIN(height, nb_threads));\ + ctx->internal->execute(ctx, s->filter_vertically, &td, NULL, FFMIN(width, nb_threads)); \ + \ + for (int i = 0; i < height; ++i) { \ + for (int j = 0; j < width; ++j) { \ + val = I_pow[j + i * width] - I_mean[j + i * width] * I_mean[j + i * width]; \ + A[j + i * width] = val / (val + s->eps*s->eps); \ + B[j + i * width] = I_mean[j + i * width] * (1.f - A[j + i * width]); \ + } \ + } \ + \ + td.ptr = A; \ + ctx->internal->execute(ctx, s->filter_horizontally, &td, NULL, FFMIN(height, nb_threads));\ + ctx->internal->execute(ctx, s->filter_vertically, &td, NULL, FFMIN(width, nb_threads)); \ + \ + td.ptr = B; \ + ctx->internal->execute(ctx, s->filter_horizontally, &td, NULL, FFMIN(height, nb_threads));\ + ctx->internal->execute(ctx, s->filter_vertically, &td, NULL, FFMIN(width, nb_threads)); \ + \ + for (int i = 0; i < h; i++) \ + for (int j = 0; j < w; j++) \ + dst[j + i * dst_linesize] = \ + A[j / sub + (i / sub) * width] * src[j + i * src_linesize] + \ + B[j / sub + (i / sub) * width] * maxval; \ +} \ + +GUIDED_FILTER(8, uint8_t) +GUIDED_FILTER(16, uint16_t) + +static int filter_frame(AVFilterLink *inlink, AVFrame *in) +{ + AVFilterContext *ctx = inlink->dst; + GuidedContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + AVFrame *out; + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + av_frame_free(&in); + return AVERROR(ENOMEM); + } + av_frame_copy_props(out, in); + for (int plane = 0; plane < s->nb_planes; plane++) { + if (!(s->planes & (1 << plane))) { + av_image_copy_plane(out->data[plane], out->linesize[plane], + in->data[plane], in->linesize[plane], + s->planewidth[plane] * ((s->depth + 7) / 8), s->planeheight[plane]); + continue; + } + if (s->depth <= 8) + guided_8(ctx, in->data[plane], out->data[plane], s->planewidth[plane], + s->planeheight[plane], in->linesize[plane], + out->linesize[plane], (1 << s->depth) - 1.f); + else + guided_16(ctx, in->data[plane], out->data[plane], s->planewidth[plane], + s->planeheight[plane], in->linesize[plane] / 2, + out->linesize[plane] / 2, (1 << s->depth) - 1.f); + } + av_frame_free(&in); + return ff_filter_frame(outlink, out); +} + +static av_cold void uninit(AVFilterContext *ctx) +{ + GuidedContext *s = ctx->priv; + av_freep(&s->I_mean); + av_freep(&s->I_pow); + av_freep(&s->A); + av_freep(&s->B); + av_freep(&s->buffer); +} + +static int process_command(AVFilterContext *ctx, + const char *cmd, + const char *arg, + char *res, + int res_len, + int flags) +{ + int ret = ff_filter_process_command(ctx, cmd, arg, res, res_len, flags); + + if (ret < 0) + return ret; + + return 0; +} + +static const AVFilterPad guided_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = config_input, + .filter_frame = filter_frame, + }, + { NULL } +}; + +static const AVFilterPad guided_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + }, + { NULL } +}; + +AVFilter ff_vf_guided = { + .name = "guided", + .description = NULL_IF_CONFIG_SMALL("Apply Guided filter."), + .priv_size = sizeof(GuidedContext), + .priv_class = &guided_class, + .uninit = uninit, + .query_formats = query_formats, + .inputs = guided_inputs, + .outputs = guided_outputs, + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS, + .process_command = process_command, +};