diff mbox series

[FFmpeg-devel,GSOC] avfilter: added guided filter

Message ID 20210502194720.19226-1-andrey.moskalenko@graphics.cs.msu.ru
State New
Headers show
Series [FFmpeg-devel,GSOC] avfilter: added guided filter
Related show

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Andrey Moskalenko May 2, 2021, 7:47 p.m. UTC
Added guided filter with subsampling and multithreading for speedup.
---
 doc/filters.texi         |  25 +++
 libavfilter/Makefile     |   1 +
 libavfilter/allfilters.c |   1 +
 libavfilter/vf_guided.c  | 400 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 427 insertions(+)
 create mode 100644 libavfilter/vf_guided.c

Comments

Steven Liu May 7, 2021, 10:08 a.m. UTC | #1
Andrey Moskalenko <andrey.moskalenko@graphics.cs.msu.ru> 于2021年5月3日周一 上午3:47写道:
>
> Added guided filter with subsampling and multithreading for speedup.
Maybe not only use multithreading for speedup, also need some
algorithms to improve it.
What about the speedup compare data?
> ---
>  doc/filters.texi         |  25 +++
>  libavfilter/Makefile     |   1 +
>  libavfilter/allfilters.c |   1 +
>  libavfilter/vf_guided.c  | 400 +++++++++++++++++++++++++++++++++++++++
>  4 files changed, 427 insertions(+)
>  create mode 100644 libavfilter/vf_guided.c
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 36e35a175b..856969f51f 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -12918,6 +12918,31 @@ greyedge=difford=1:minknorm=0:sigma=2
>
>  @end itemize
>
> +@section guided
> +Apply guided filter, spatial smoothing while preserving edges.
> +
> +The filter accepts the following options:
> +@table @option
> +@item radius
> +Set the spatial blur radius in pixels.
> +Allowed range is 1 to 1024. Default is 8.
> +
> +@item eps
> +Set regularization parameter (without square) as in the original paper.
> +Allowed range is 0 to 1. Default is 0.1.
> +
> +@item sub
> +Set subsampling ratio.
> +Allowed range is 1 to 1024. Default is 4.
> +
> +@item planes
> +Set planes to filter. Default is first only.
> +@end table
> +
> +@subsection Commands
> +
> +This filter supports the all above options as @ref{commands}.
> +
>  @anchor{haldclut}
>  @section haldclut
>
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 5a287364b0..f66c6ef65b 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -174,6 +174,7 @@ OBJS-$(CONFIG_AVGBLUR_FILTER)                += vf_avgblur.o
>  OBJS-$(CONFIG_AVGBLUR_OPENCL_FILTER)         += vf_avgblur_opencl.o opencl.o \
>                                                  opencl/avgblur.o boxblur.o
>  OBJS-$(CONFIG_AVGBLUR_VULKAN_FILTER)         += vf_avgblur_vulkan.o vulkan.o
> +OBJS-$(CONFIG_GUIDED_FILTER)                 += vf_guided.o
>  OBJS-$(CONFIG_BBOX_FILTER)                   += bbox.o vf_bbox.o
>  OBJS-$(CONFIG_BENCH_FILTER)                  += f_bench.o
>  OBJS-$(CONFIG_BILATERAL_FILTER)              += vf_bilateral.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index 931d7dbb0d..962f656abc 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -270,6 +270,7 @@ extern const AVFilter ff_vf_geq;
>  extern const AVFilter ff_vf_gradfun;
>  extern const AVFilter ff_vf_graphmonitor;
>  extern const AVFilter ff_vf_greyedge;
> +extern const AVFilter ff_vf_guided;
>  extern const AVFilter ff_vf_haldclut;
>  extern const AVFilter ff_vf_hflip;
>  extern const AVFilter ff_vf_histeq;
> diff --git a/libavfilter/vf_guided.c b/libavfilter/vf_guided.c
> new file mode 100644
> index 0000000000..2bb640f686
> --- /dev/null
> +++ b/libavfilter/vf_guided.c
> @@ -0,0 +1,400 @@
> +/*
> + * Copyright (c) 2021 Andrey Moskalenko
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
> + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> + */
> +
> +/**
> + * @file
> + * Guided filter.
> + * Principle of operation:
> + * Implemented the approach from the original article
> + * with multithread speedup of boxfilter and subsampling technique.
> + */
> +
> +#include "libavutil/imgutils.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/pixdesc.h"
> +#include "avfilter.h"
> +#include "formats.h"
> +#include "internal.h"
> +#include "video.h"
> +
> +typedef struct GuidedContext {
> +    const AVClass *class;
> +    int radius;
> +    float eps;
> +    int sub;
> +    int planes;
> +    int nb_planes;
> +    int depth;
> +    int planewidth[4];
> +    int planeheight[4];
> +    float *I_mean;
> +    float *I_pow;
> +    float *A;
> +    float *B;
> +    float *buffer;
> +    int (*filter_horizontally)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
> +    int (*filter_vertically)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
> +} GuidedContext;
> +
> +#define OFFSET(x) offsetof(GuidedContext, x)
> +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
> +
> +static const AVOption guided_options[] = {
> +        { "radius", "The box radius in pixels", OFFSET(radius),  AV_OPT_TYPE_INT, {.i64=8},   1, 1024, FLAGS },
> +        { "eps", "Regularization parameter (without square)",    OFFSET(eps), AV_OPT_TYPE_FLOAT, {.dbl=0.1}, 0.0,   1, FLAGS },
> +        { "sub", "Subsampling ratio", OFFSET(sub),  AV_OPT_TYPE_INT, {.i64=4},   1, 1024, FLAGS },
> +        { "planes", "Set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT,   {.i64=1},     0, 0xF, FLAGS },
> +        { NULL }
> +};
> +
> +typedef struct ThreadData {
> +    int height;
> +    int width;
> +    float *ptr;
> +    int linesize;
> +} ThreadData;
> +
> +AVFILTER_DEFINE_CLASS(guided);
> +
> +static int query_formats(AVFilterContext *ctx)
> +{
> +    static const enum AVPixelFormat pix_fmts[] = {
> +            AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P,
> +            AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P,
> +            AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV420P,
> +            AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
> +            AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
> +            AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
> +            AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
> +            AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV440P12,
> +            AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14,
> +            AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
> +            AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
> +            AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
> +            AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
> +            AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
> +            AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
> +            AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
> +            AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
> +            AV_PIX_FMT_NONE
> +    };
> +
> +    return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
> +}
> +
> +
> +static int filter_horizontally(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
> +{
> +    GuidedContext *s = ctx->priv;
> +    ThreadData *td = arg;
> +    const int height = td->height;
> +    const int width = td->width;
> +    const int slice_start = (height *  jobnr   ) / nb_jobs;
> +    const int slice_end   = (height * (jobnr+1)) / nb_jobs;
> +    const int radius = s->radius;
> +    const int linesize = td->linesize / sizeof(float);
> +    float *buffer = s->buffer;
> +    const float *src;
> +    float *ptr;
> +    int y, x;
> +
> +    /* Filter horizontally along each row */
> +    for (y = slice_start; y < slice_end; y++) {
> +        float acc = 0;
> +        int count = 0;
> +
> +        src = (const float *)td->ptr + linesize * y;
> +        ptr = buffer + width * y;
> +
> +        for (x = 0; x < radius; x++) {
> +            acc += src[x];
> +        }
> +        count += radius;
> +
> +        for (x = 0; x <= radius; x++) {
> +            acc += src[x + radius];
> +            count++;
> +            ptr[x] = acc / count;
> +        }
> +
> +        for (; x < width - radius; x++) {
> +            acc += src[x + radius] - src[x - radius - 1];
> +            ptr[x] = acc / count;
> +        }
> +
> +        for (; x < width; x++) {
> +            acc -= src[x - radius];
> +            count--;
> +            ptr[x] = acc / count;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +
> +static int filter_vertically(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
> +{
> +    GuidedContext *s = ctx->priv;
> +    ThreadData *td = arg;
> +    const int height = td->height;
> +    const int width = td->width;
> +    const int slice_start = (width *  jobnr   ) / nb_jobs;
> +    const int slice_end   = (width * (jobnr+1)) / nb_jobs;
> +    const int radius = s->radius;
> +    const int linesize = td->linesize / sizeof(float);
> +    float *buffer = (float *)td->ptr;
> +    const float *src;
> +    float *ptr;
> +    int i, x;
> +
> +    /* Filter vertically along each column */
> +    for (x = slice_start; x < slice_end; x++) {
> +        float acc = 0;
> +        int count = 0;
> +
> +        src = s->buffer + x;
> +
> +        for (i = 0; i < radius; i++) {
> +            acc += src[0];
> +            src += width;
> +        }
> +        count += radius;
> +
> +        src = s->buffer + x;
> +        ptr = buffer + x;
> +        for (i = 0; i + radius < height && i <= radius; i++) {
> +            acc += src[(i + radius) * width];
> +            count++;
> +            ptr[i * linesize] = acc / count;
> +        }
> +
> +        for (; i < height - radius; i++) {
> +            acc += src[(i + radius) * width] - src[(i - radius - 1) * width];
> +            ptr[i * linesize] = acc / count;
> +        }
> +
> +        for (; i < height; i++) {
> +            acc -= src[(i - radius) * width];
> +            count--;
> +            ptr[i * linesize] = acc / count;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +
> +static int config_input(AVFilterLink *inlink)
> +{
> +    AVFilterContext *ctx = inlink->dst;
> +    GuidedContext *s = ctx->priv;
> +    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
> +
> +    s->depth = desc->comp[0].depth;
> +    // For numerical stability
> +    if (s->eps < 1e-8)
> +        s->eps = 1e-8;
> +    // Avoid radius/sub = 0
> +    if (s->radius >= s->sub)
> +        s->radius = s->radius / s->sub;
> +    else
> +        s->radius = 1;
> +    s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
> +    s->planewidth[0] = s->planewidth[3] = inlink->w;
> +    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
> +    s->planeheight[0] = s->planeheight[3] = inlink->h;
> +
> +    s->nb_planes = av_pix_fmt_count_planes(inlink->format);
> +
> +    s->I_mean = av_calloc(inlink->w/s->sub * inlink->h/s->sub, sizeof(float));
> +    s->I_pow = av_calloc(inlink->w/s->sub * inlink->h/s->sub, sizeof(float));
> +    s->A = av_calloc(inlink->w/s->sub * inlink->h/s->sub, sizeof(float));
> +    s->B = av_calloc(inlink->w/s->sub * inlink->h/s->sub, sizeof(float));
> +    s->buffer = av_malloc_array(inlink->w/s->sub, inlink->h/s->sub * sizeof(*s->buffer));
> +    if (!s->I_mean ||
> +        !s->I_pow ||
> +        !s->A ||
> +        !s->B ||
> +        !s->buffer)
> +        return AVERROR(ENOMEM);
> +    s->filter_horizontally = filter_horizontally;
> +    s->filter_vertically = filter_vertically;
> +    return 0;
> +}
> +
> +
> +/**
> + * Calculate guided filter output.
> + *
> + * @param ctx filter context
> + * @param ssrc source plane
> + * @param ddst output plane
> + * @param width, height shapes
> + * @param src_linesize, dst_linesize aligned shapes
> + * @param maxval maximum value of image type (used for converting to float)
> + */
> +#define GUIDED_FILTER(name, type)                                                             \
> +static void guided_##name(AVFilterContext *ctx, const uint8_t *ssrc, uint8_t *ddst,           \
> +                          int w, int h, int src_linesize,                                     \
> +                          int dst_linesize, float maxval) {                                   \
> +    GuidedContext *s = ctx->priv;                                                             \
> +    const int nb_threads = ff_filter_get_nb_threads(ctx);                                     \
> +    ThreadData td;                                                                            \
> +    type *dst = (type *)ddst;                                                                 \
> +    const type *src = (const type *)ssrc;                                                     \
> +    float *I_mean = s->I_mean;                                                                \
> +    float *I_pow = s->I_pow;                                                                  \
> +    float *A = s->A;                                                                          \
> +    float *B = s->B;                                                                          \
> +    int sub = s->sub;                                                                         \
> +    int height = h / sub;                                                                     \
> +    int width = w / sub;                                                                      \
> +    float val;                                                                                \
> +    for (int i = 0; i < height; ++i) {                                                        \
> +        for (int j = 0; j < width; ++j) {                                                     \
> +            I_mean[j + i * width] = src[j * sub + i * src_linesize * sub] / maxval;           \
> +            I_pow[j + i * width] = I_mean[j + i * width] * I_mean[j + i * width];             \
> +        }                                                                                     \
> +    }                                                                                         \
> +    td.width = width;                                                                         \
> +    td.height = height;                                                                       \
> +    td.linesize = width * sizeof(float);                                                      \
> +    td.ptr = I_mean;                                                                          \
> +    ctx->internal->execute(ctx, s->filter_horizontally, &td, NULL, FFMIN(height, nb_threads));\
> +    ctx->internal->execute(ctx, s->filter_vertically, &td, NULL, FFMIN(width, nb_threads));   \
> +                                                                                              \
> +    td.ptr = I_pow;                                                                           \
> +    ctx->internal->execute(ctx, s->filter_horizontally, &td, NULL, FFMIN(height, nb_threads));\
> +    ctx->internal->execute(ctx, s->filter_vertically, &td, NULL, FFMIN(width, nb_threads));   \
> +                                                                                              \
> +    for (int i = 0; i < height; ++i) {                                                        \
> +        for (int j = 0; j < width; ++j) {                                                     \
> +            val = I_pow[j + i * width] - I_mean[j + i * width] * I_mean[j + i * width];       \
> +            A[j + i * width] = val / (val + s->eps*s->eps);                                   \
> +            B[j + i * width] = I_mean[j + i * width] * (1.f - A[j + i * width]);              \
> +        }                                                                                     \
> +    }                                                                                         \
> +                                                                                              \
> +    td.ptr = A;                                                                               \
> +    ctx->internal->execute(ctx, s->filter_horizontally, &td, NULL, FFMIN(height, nb_threads));\
> +    ctx->internal->execute(ctx, s->filter_vertically, &td, NULL, FFMIN(width, nb_threads));   \
> +                                                                                              \
> +    td.ptr = B;                                                                               \
> +    ctx->internal->execute(ctx, s->filter_horizontally, &td, NULL, FFMIN(height, nb_threads));\
> +    ctx->internal->execute(ctx, s->filter_vertically, &td, NULL, FFMIN(width, nb_threads));   \
> +                                                                                              \
> +    for (int i = 0; i < h; i++)                                                               \
> +        for (int j = 0; j < w; j++)                                                           \
> +            dst[j + i * dst_linesize] =                                                       \
> +                    A[j / sub + (i / sub) * width] * src[j + i * src_linesize] +              \
> +                    B[j / sub + (i / sub)  * width] * maxval;                                 \
> +}                                                                                             \
> +
> +GUIDED_FILTER(8, uint8_t)
> +GUIDED_FILTER(16, uint16_t)
> +
> +static int filter_frame(AVFilterLink *inlink, AVFrame *in)
> +{
> +    AVFilterContext *ctx = inlink->dst;
> +    GuidedContext *s = ctx->priv;
> +    AVFilterLink *outlink = ctx->outputs[0];
> +    AVFrame *out;
> +    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> +    if (!out) {
> +        av_frame_free(&in);
> +        return AVERROR(ENOMEM);
> +    }
> +    av_frame_copy_props(out, in);
> +    for (int plane = 0; plane < s->nb_planes; plane++) {
> +        if (!(s->planes & (1 << plane))) {
> +            av_image_copy_plane(out->data[plane], out->linesize[plane],
> +                                in->data[plane], in->linesize[plane],
> +                                s->planewidth[plane] * ((s->depth + 7) / 8), s->planeheight[plane]);
> +            continue;
> +        }
> +        if (s->depth <= 8)
> +            guided_8(ctx, in->data[plane], out->data[plane], s->planewidth[plane],
> +                                s->planeheight[plane], in->linesize[plane],
> +                                out->linesize[plane], (1 << s->depth) - 1.f);
> +        else
> +            guided_16(ctx, in->data[plane], out->data[plane], s->planewidth[plane],
> +                    s->planeheight[plane], in->linesize[plane] / 2,
> +                    out->linesize[plane] / 2, (1 << s->depth) - 1.f);
> +    }
> +    av_frame_free(&in);
> +    return ff_filter_frame(outlink, out);
> +}
> +
> +static av_cold void uninit(AVFilterContext *ctx)
> +{
> +    GuidedContext *s = ctx->priv;
> +    av_freep(&s->I_mean);
> +    av_freep(&s->I_pow);
> +    av_freep(&s->A);
> +    av_freep(&s->B);
> +    av_freep(&s->buffer);
> +}
> +
> +static int process_command(AVFilterContext *ctx,
> +                           const char *cmd,
> +                           const char *arg,
> +                           char *res,
> +                           int res_len,
> +                           int flags)
> +{
> +    int ret = ff_filter_process_command(ctx, cmd, arg, res, res_len, flags);
> +
> +    if (ret < 0)
> +        return ret;
> +
> +    return 0;
> +}
> +
> +static const AVFilterPad guided_inputs[] = {
> +        {
> +                .name         = "default",
> +                .type         = AVMEDIA_TYPE_VIDEO,
> +                .config_props = config_input,
> +                .filter_frame = filter_frame,
> +        },
> +        { NULL }
> +};
> +
> +static const AVFilterPad guided_outputs[] = {
> +        {
> +                .name = "default",
> +                .type = AVMEDIA_TYPE_VIDEO,
> +        },
> +        { NULL }
> +};
> +
> +AVFilter ff_vf_guided = {
> +        .name          = "guided",
> +        .description   = NULL_IF_CONFIG_SMALL("Apply Guided filter."),
> +        .priv_size     = sizeof(GuidedContext),
> +        .priv_class    = &guided_class,
> +        .uninit        = uninit,
> +        .query_formats = query_formats,
> +        .inputs        = guided_inputs,
> +        .outputs       = guided_outputs,
> +        .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
> +        .process_command = process_command,
> +};
> --
> 2.30.2
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

Thanks
Steven
Andrey Moskalenko May 8, 2021, 1:48 p.m. UTC | #2
> Maybe not only use multithreading for speedup, also need some
 > algorithms to improve it.
 > What about the speedup compare data?
Additionally, a fast implementation of box filtering is used and 
sub-sampling as recommended in https://arxiv.org/abs/1505.00996. Current 
implementation outperforms the existing bilateral filter in terms of 
speed and quality on my 8-core CPU. The speed depends significantly on 
the sub-sampling parameter. With sub = 4, for example, the filter works 
110 FPS against 74 FPS for the bilateral one with the same spatial radius.
diff mbox series

Patch

diff --git a/doc/filters.texi b/doc/filters.texi
index 36e35a175b..856969f51f 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -12918,6 +12918,31 @@  greyedge=difford=1:minknorm=0:sigma=2
 
 @end itemize
 
+@section guided
+Apply guided filter, spatial smoothing while preserving edges.
+
+The filter accepts the following options:
+@table @option
+@item radius
+Set the spatial blur radius in pixels.
+Allowed range is 1 to 1024. Default is 8.
+
+@item eps
+Set regularization parameter (without square) as in the original paper.
+Allowed range is 0 to 1. Default is 0.1.
+
+@item sub
+Set subsampling ratio.
+Allowed range is 1 to 1024. Default is 4.
+
+@item planes
+Set planes to filter. Default is first only.
+@end table
+
+@subsection Commands
+
+This filter supports the all above options as @ref{commands}.
+
 @anchor{haldclut}
 @section haldclut
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 5a287364b0..f66c6ef65b 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -174,6 +174,7 @@  OBJS-$(CONFIG_AVGBLUR_FILTER)                += vf_avgblur.o
 OBJS-$(CONFIG_AVGBLUR_OPENCL_FILTER)         += vf_avgblur_opencl.o opencl.o \
                                                 opencl/avgblur.o boxblur.o
 OBJS-$(CONFIG_AVGBLUR_VULKAN_FILTER)         += vf_avgblur_vulkan.o vulkan.o
+OBJS-$(CONFIG_GUIDED_FILTER)                 += vf_guided.o
 OBJS-$(CONFIG_BBOX_FILTER)                   += bbox.o vf_bbox.o
 OBJS-$(CONFIG_BENCH_FILTER)                  += f_bench.o
 OBJS-$(CONFIG_BILATERAL_FILTER)              += vf_bilateral.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 931d7dbb0d..962f656abc 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -270,6 +270,7 @@  extern const AVFilter ff_vf_geq;
 extern const AVFilter ff_vf_gradfun;
 extern const AVFilter ff_vf_graphmonitor;
 extern const AVFilter ff_vf_greyedge;
+extern const AVFilter ff_vf_guided;
 extern const AVFilter ff_vf_haldclut;
 extern const AVFilter ff_vf_hflip;
 extern const AVFilter ff_vf_histeq;
diff --git a/libavfilter/vf_guided.c b/libavfilter/vf_guided.c
new file mode 100644
index 0000000000..2bb640f686
--- /dev/null
+++ b/libavfilter/vf_guided.c
@@ -0,0 +1,400 @@ 
+/*
+ * Copyright (c) 2021 Andrey Moskalenko
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/**
+ * @file
+ * Guided filter.
+ * Principle of operation:
+ * Implemented the approach from the original article
+ * with multithread speedup of boxfilter and subsampling technique.
+ */
+
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct GuidedContext {
+    const AVClass *class;
+    int radius;
+    float eps;
+    int sub;
+    int planes;
+    int nb_planes;
+    int depth;
+    int planewidth[4];
+    int planeheight[4];
+    float *I_mean;
+    float *I_pow;
+    float *A;
+    float *B;
+    float *buffer;
+    int (*filter_horizontally)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
+    int (*filter_vertically)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
+} GuidedContext;
+
+#define OFFSET(x) offsetof(GuidedContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
+
+static const AVOption guided_options[] = {
+        { "radius", "The box radius in pixels", OFFSET(radius),  AV_OPT_TYPE_INT, {.i64=8},   1, 1024, FLAGS },
+        { "eps", "Regularization parameter (without square)",    OFFSET(eps), AV_OPT_TYPE_FLOAT, {.dbl=0.1}, 0.0,   1, FLAGS },
+        { "sub", "Subsampling ratio", OFFSET(sub),  AV_OPT_TYPE_INT, {.i64=4},   1, 1024, FLAGS },
+        { "planes", "Set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT,   {.i64=1},     0, 0xF, FLAGS },
+        { NULL }
+};
+
+typedef struct ThreadData {
+    int height;
+    int width;
+    float *ptr;
+    int linesize;
+} ThreadData;
+
+AVFILTER_DEFINE_CLASS(guided);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+            AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P,
+            AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P,
+            AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV420P,
+            AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
+            AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
+            AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
+            AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
+            AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV440P12,
+            AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14,
+            AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
+            AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
+            AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
+            AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
+            AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
+            AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
+            AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
+            AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
+            AV_PIX_FMT_NONE
+    };
+
+    return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
+}
+
+
+static int filter_horizontally(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    GuidedContext *s = ctx->priv;
+    ThreadData *td = arg;
+    const int height = td->height;
+    const int width = td->width;
+    const int slice_start = (height *  jobnr   ) / nb_jobs;
+    const int slice_end   = (height * (jobnr+1)) / nb_jobs;
+    const int radius = s->radius;
+    const int linesize = td->linesize / sizeof(float);
+    float *buffer = s->buffer;
+    const float *src;
+    float *ptr;
+    int y, x;
+
+    /* Filter horizontally along each row */
+    for (y = slice_start; y < slice_end; y++) {
+        float acc = 0;
+        int count = 0;
+
+        src = (const float *)td->ptr + linesize * y;
+        ptr = buffer + width * y;
+
+        for (x = 0; x < radius; x++) {
+            acc += src[x];
+        }
+        count += radius;
+
+        for (x = 0; x <= radius; x++) {
+            acc += src[x + radius];
+            count++;
+            ptr[x] = acc / count;
+        }
+
+        for (; x < width - radius; x++) {
+            acc += src[x + radius] - src[x - radius - 1];
+            ptr[x] = acc / count;
+        }
+
+        for (; x < width; x++) {
+            acc -= src[x - radius];
+            count--;
+            ptr[x] = acc / count;
+        }
+    }
+
+    return 0;
+}
+
+
+static int filter_vertically(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    GuidedContext *s = ctx->priv;
+    ThreadData *td = arg;
+    const int height = td->height;
+    const int width = td->width;
+    const int slice_start = (width *  jobnr   ) / nb_jobs;
+    const int slice_end   = (width * (jobnr+1)) / nb_jobs;
+    const int radius = s->radius;
+    const int linesize = td->linesize / sizeof(float);
+    float *buffer = (float *)td->ptr;
+    const float *src;
+    float *ptr;
+    int i, x;
+
+    /* Filter vertically along each column */
+    for (x = slice_start; x < slice_end; x++) {
+        float acc = 0;
+        int count = 0;
+
+        src = s->buffer + x;
+
+        for (i = 0; i < radius; i++) {
+            acc += src[0];
+            src += width;
+        }
+        count += radius;
+
+        src = s->buffer + x;
+        ptr = buffer + x;
+        for (i = 0; i + radius < height && i <= radius; i++) {
+            acc += src[(i + radius) * width];
+            count++;
+            ptr[i * linesize] = acc / count;
+        }
+
+        for (; i < height - radius; i++) {
+            acc += src[(i + radius) * width] - src[(i - radius - 1) * width];
+            ptr[i * linesize] = acc / count;
+        }
+
+        for (; i < height; i++) {
+            acc -= src[(i - radius) * width];
+            count--;
+            ptr[i * linesize] = acc / count;
+        }
+    }
+
+    return 0;
+}
+
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    GuidedContext *s = ctx->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+
+    s->depth = desc->comp[0].depth;
+    // For numerical stability
+    if (s->eps < 1e-8)
+        s->eps = 1e-8;
+    // Avoid radius/sub = 0
+    if (s->radius >= s->sub)
+        s->radius = s->radius / s->sub;
+    else
+        s->radius = 1;
+    s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
+    s->planewidth[0] = s->planewidth[3] = inlink->w;
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[0] = s->planeheight[3] = inlink->h;
+
+    s->nb_planes = av_pix_fmt_count_planes(inlink->format);
+
+    s->I_mean = av_calloc(inlink->w/s->sub * inlink->h/s->sub, sizeof(float));
+    s->I_pow = av_calloc(inlink->w/s->sub * inlink->h/s->sub, sizeof(float));
+    s->A = av_calloc(inlink->w/s->sub * inlink->h/s->sub, sizeof(float));
+    s->B = av_calloc(inlink->w/s->sub * inlink->h/s->sub, sizeof(float));
+    s->buffer = av_malloc_array(inlink->w/s->sub, inlink->h/s->sub * sizeof(*s->buffer));
+    if (!s->I_mean ||
+        !s->I_pow ||
+        !s->A ||
+        !s->B ||
+        !s->buffer)
+        return AVERROR(ENOMEM);
+    s->filter_horizontally = filter_horizontally;
+    s->filter_vertically = filter_vertically;
+    return 0;
+}
+
+
+/**
+ * Calculate guided filter output.
+ *
+ * @param ctx filter context
+ * @param ssrc source plane
+ * @param ddst output plane
+ * @param width, height shapes
+ * @param src_linesize, dst_linesize aligned shapes
+ * @param maxval maximum value of image type (used for converting to float)
+ */
+#define GUIDED_FILTER(name, type)                                                             \
+static void guided_##name(AVFilterContext *ctx, const uint8_t *ssrc, uint8_t *ddst,           \
+                          int w, int h, int src_linesize,                                     \
+                          int dst_linesize, float maxval) {                                   \
+    GuidedContext *s = ctx->priv;                                                             \
+    const int nb_threads = ff_filter_get_nb_threads(ctx);                                     \
+    ThreadData td;                                                                            \
+    type *dst = (type *)ddst;                                                                 \
+    const type *src = (const type *)ssrc;                                                     \
+    float *I_mean = s->I_mean;                                                                \
+    float *I_pow = s->I_pow;                                                                  \
+    float *A = s->A;                                                                          \
+    float *B = s->B;                                                                          \
+    int sub = s->sub;                                                                         \
+    int height = h / sub;                                                                     \
+    int width = w / sub;                                                                      \
+    float val;                                                                                \
+    for (int i = 0; i < height; ++i) {                                                        \
+        for (int j = 0; j < width; ++j) {                                                     \
+            I_mean[j + i * width] = src[j * sub + i * src_linesize * sub] / maxval;           \
+            I_pow[j + i * width] = I_mean[j + i * width] * I_mean[j + i * width];             \
+        }                                                                                     \
+    }                                                                                         \
+    td.width = width;                                                                         \
+    td.height = height;                                                                       \
+    td.linesize = width * sizeof(float);                                                      \
+    td.ptr = I_mean;                                                                          \
+    ctx->internal->execute(ctx, s->filter_horizontally, &td, NULL, FFMIN(height, nb_threads));\
+    ctx->internal->execute(ctx, s->filter_vertically, &td, NULL, FFMIN(width, nb_threads));   \
+                                                                                              \
+    td.ptr = I_pow;                                                                           \
+    ctx->internal->execute(ctx, s->filter_horizontally, &td, NULL, FFMIN(height, nb_threads));\
+    ctx->internal->execute(ctx, s->filter_vertically, &td, NULL, FFMIN(width, nb_threads));   \
+                                                                                              \
+    for (int i = 0; i < height; ++i) {                                                        \
+        for (int j = 0; j < width; ++j) {                                                     \
+            val = I_pow[j + i * width] - I_mean[j + i * width] * I_mean[j + i * width];       \
+            A[j + i * width] = val / (val + s->eps*s->eps);                                   \
+            B[j + i * width] = I_mean[j + i * width] * (1.f - A[j + i * width]);              \
+        }                                                                                     \
+    }                                                                                         \
+                                                                                              \
+    td.ptr = A;                                                                               \
+    ctx->internal->execute(ctx, s->filter_horizontally, &td, NULL, FFMIN(height, nb_threads));\
+    ctx->internal->execute(ctx, s->filter_vertically, &td, NULL, FFMIN(width, nb_threads));   \
+                                                                                              \
+    td.ptr = B;                                                                               \
+    ctx->internal->execute(ctx, s->filter_horizontally, &td, NULL, FFMIN(height, nb_threads));\
+    ctx->internal->execute(ctx, s->filter_vertically, &td, NULL, FFMIN(width, nb_threads));   \
+                                                                                              \
+    for (int i = 0; i < h; i++)                                                               \
+        for (int j = 0; j < w; j++)                                                           \
+            dst[j + i * dst_linesize] =                                                       \
+                    A[j / sub + (i / sub) * width] * src[j + i * src_linesize] +              \
+                    B[j / sub + (i / sub)  * width] * maxval;                                 \
+}                                                                                             \
+
+GUIDED_FILTER(8, uint8_t)
+GUIDED_FILTER(16, uint16_t)
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    GuidedContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out;
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out) {
+        av_frame_free(&in);
+        return AVERROR(ENOMEM);
+    }
+    av_frame_copy_props(out, in);
+    for (int plane = 0; plane < s->nb_planes; plane++) {
+        if (!(s->planes & (1 << plane))) {
+            av_image_copy_plane(out->data[plane], out->linesize[plane],
+                                in->data[plane], in->linesize[plane],
+                                s->planewidth[plane] * ((s->depth + 7) / 8), s->planeheight[plane]);
+            continue;
+        }
+        if (s->depth <= 8)
+            guided_8(ctx, in->data[plane], out->data[plane], s->planewidth[plane],
+                                s->planeheight[plane], in->linesize[plane],
+                                out->linesize[plane], (1 << s->depth) - 1.f);
+        else
+            guided_16(ctx, in->data[plane], out->data[plane], s->planewidth[plane],
+                    s->planeheight[plane], in->linesize[plane] / 2,
+                    out->linesize[plane] / 2, (1 << s->depth) - 1.f);
+    }
+    av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    GuidedContext *s = ctx->priv;
+    av_freep(&s->I_mean);
+    av_freep(&s->I_pow);
+    av_freep(&s->A);
+    av_freep(&s->B);
+    av_freep(&s->buffer);
+}
+
+static int process_command(AVFilterContext *ctx,
+                           const char *cmd,
+                           const char *arg,
+                           char *res,
+                           int res_len,
+                           int flags)
+{
+    int ret = ff_filter_process_command(ctx, cmd, arg, res, res_len, flags);
+
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+static const AVFilterPad guided_inputs[] = {
+        {
+                .name         = "default",
+                .type         = AVMEDIA_TYPE_VIDEO,
+                .config_props = config_input,
+                .filter_frame = filter_frame,
+        },
+        { NULL }
+};
+
+static const AVFilterPad guided_outputs[] = {
+        {
+                .name = "default",
+                .type = AVMEDIA_TYPE_VIDEO,
+        },
+        { NULL }
+};
+
+AVFilter ff_vf_guided = {
+        .name          = "guided",
+        .description   = NULL_IF_CONFIG_SMALL("Apply Guided filter."),
+        .priv_size     = sizeof(GuidedContext),
+        .priv_class    = &guided_class,
+        .uninit        = uninit,
+        .query_formats = query_formats,
+        .inputs        = guided_inputs,
+        .outputs       = guided_outputs,
+        .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
+        .process_command = process_command,
+};