diff mbox series

[FFmpeg-devel] GSoC: Support fast guided filter.

Message ID tencent_E177FCB2AC2D946A64D732DB7B0CCDB5A706@qq.com
State Accepted
Commit 43d70feb788318b124418c5c666f2120ee5ca930
Headers show
Series [FFmpeg-devel] GSoC: Support fast guided filter. | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate fail Make fate failed
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate warning Make fate failed

Commit Message

Xuewei Meng May 10, 2021, 1:42 p.m. UTC
From: Xuewei Meng <xwmeng96@gmail.com>

Two modes are supported in guided filter, basic mode and fast mode.
Basic mode is the initial pushed guided filter without optimization.
Fast mode is implemented based on the basic one by sub-sampling method.
The sub-sampling ratio which can be defined by users controls the
algorithm complexity. The larger the sub-sampling ratio, the lower
the algorithm complexity.

Signed-off-by: Xuewei Meng <xwmeng96@gmail.com>
---
 doc/filters.texi        |  20 +++++++---
 libavfilter/vf_guided.c | 104 ++++++++++++++++++++++++++++++++----------------
 2 files changed, 85 insertions(+), 39 deletions(-)

Comments

mypopy@gmail.com May 11, 2021, 5:08 a.m. UTC | #1
On Mon, May 10, 2021 at 9:42 PM Xuewei Meng <928826483@qq.com> wrote:
>
> From: Xuewei Meng <xwmeng96@gmail.com>
>
> Two modes are supported in guided filter, basic mode and fast mode.
> Basic mode is the initial pushed guided filter without optimization.
> Fast mode is implemented based on the basic one by sub-sampling method.
> The sub-sampling ratio which can be defined by users controls the
> algorithm complexity. The larger the sub-sampling ratio, the lower
> the algorithm complexity.
>
> Signed-off-by: Xuewei Meng <xwmeng96@gmail.com>
> ---
>  doc/filters.texi        |  20 +++++++---
>  libavfilter/vf_guided.c | 104 ++++++++++++++++++++++++++++++++----------------
>  2 files changed, 85 insertions(+), 39 deletions(-)
>
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 03ca9ae..eb747cb 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -12963,12 +12963,22 @@ Apply guided filter for edge-preserving smoothing, dehazing and so on.
>  The filter accepts the following options:
>  @table @option
>  @item radius
> -Set the radius in pixels.
> +Set the box radius in pixels.
>  Allowed range is 1 to 20. Default is 3.
>
>  @item eps
> -Set regularization parameter.
> -Allowed range is 0 to 1. Default is 0.1.
> +Set regularization parameter (with square).
> +Allowed range is 0 to 1. Default is 0.01.
> +
> +@item mode
> +Set filter mode. Can be @code{basic} or @code{fast}.
> +Default is @code{basic}.
> +
> +@item sub
> +Set subsampling ratio.
> +Allowed range is 1 to 64.
> +Default is always 1 for @code{basic} value of @var{mode} option,
> +and 4 for @code{fast} value of @var{mode} option.
>
>  @item planes
>  Set planes to filter. Default is first only.
> @@ -12987,8 +12997,8 @@ ffmpeg -i in.png -i in.png -filter_complex guided out.png
>
>  @item
>  Dehazing, structure-transferring filtering, detail enhancement with guided filter.
> -For the generation of guidance image,
> -see @url{http://kaiminghe.com/publications/pami12guidedfilter.pdf}.
> +For the generation of guidance image, refer to paper "Guided Image Filtering".
> +See: @url{http://kaiminghe.com/publications/pami12guidedfilter.pdf}.
>  @example
>  ffmpeg -i in.png -i guidance.png -filter_complex guided out.png
>  @end example
> diff --git a/libavfilter/vf_guided.c b/libavfilter/vf_guided.c
> index 86c0db5..230fb7b 100644
> --- a/libavfilter/vf_guided.c
> +++ b/libavfilter/vf_guided.c
> @@ -27,12 +27,20 @@
>  #include "internal.h"
>  #include "video.h"
>
> +enum FilterModes {
> +    BASIC,
> +    FAST,
> +    NB_MODES,
> +};
> +
>  typedef struct GuidedContext {
>      const AVClass *class;
>      FFFrameSync fs;
>
>      int radius;
>      float eps;
> +    int mode;
> +    int sub;
>
>      int planes;
>
> @@ -51,9 +59,13 @@ typedef struct GuidedContext {
>  #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
>
>  static const AVOption guided_options[] = {
> -    { "radius", "set the box radius",           OFFSET(radius), AV_OPT_TYPE_INT,   {.i64=3    },   1,  20, FLAGS },
> -    { "eps",    "set the regularization parameter (with square)",              OFFSET(eps),    AV_OPT_TYPE_FLOAT, {.dbl=0.01  }, 0.0,   1, FLAGS },
> -    { "planes", "set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT,   {.i64=1    },   0, 0xF, FLAGS },
> +    { "radius", "set the box radius",                               OFFSET(radius), AV_OPT_TYPE_INT,   {.i64 = 3    },   1,           20, FLAGS },
> +    { "eps",    "set the regularization parameter (with square)",   OFFSET(eps),    AV_OPT_TYPE_FLOAT, {.dbl = 0.01 }, 0.0,            1, FLAGS },
> +    { "mode",   "set filtering mode (0: basic mode; 1: fast mode)", OFFSET(mode),   AV_OPT_TYPE_INT,   {.i64 = BASIC},   0, NB_MODES - 1, FLAGS, "mode" },
> +    { "basic",  "basic guided filter",                              0,              AV_OPT_TYPE_CONST, {.i64 = BASIC},   0,            0, FLAGS, "mode" },
> +    { "fast",   "fast guided filter",                               0,              AV_OPT_TYPE_CONST, {.i64 = FAST },   0,            0, FLAGS, "mode" },
> +    { "sub",    "subsampling ratio",                                OFFSET(sub),    AV_OPT_TYPE_INT,   {.i64 = 1    },   1,           64, FLAGS },
> +    { "planes", "set planes to filter",                             OFFSET(planes), AV_OPT_TYPE_INT,   {.i64=1      },   0,          0xF, FLAGS },
>      { NULL }
>  };
>
> @@ -147,6 +159,26 @@ static int config_input(AVFilterLink *inlink)
>          return AVERROR(EINVAL);
>      }
>
> +    if (s->mode == BASIC) {
> +        if (s->sub != 1) {
> +            av_log(ctx, AV_LOG_WARNING, "Subsampling ratio is 1 in basic mode.\n");
> +            s->sub = 1;
> +        }
> +    }
> +    else if (s->mode == FAST) {
> +        if (s->sub == 1) {
> +            av_log(ctx, AV_LOG_WARNING, "Subsampling ratio is larger than 1 in fast mode.\n");
> +            s->sub = 4;
> +        }
> +        if (s->radius >= s->sub)
> +            s->radius = s->radius / s->sub;
> +        else {
> +            s->radius = 1;
> +        }
> +    }
> +    else {
> +        return AVERROR_BUG;
> +    }
>
>      s->depth = desc->comp[0].depth;
>      s->width = ctx->inputs[0]->w;
> @@ -174,6 +206,10 @@ static int guided_##name(AVFilterContext *ctx, GuidedContext *s,
>      const type *src = (const type *)ssrc;                                               \
>      const type *srcRef = (const type *)ssrcRef;                                         \
>                                                                                          \
> +    int sub = s->sub;                                                                   \
> +    int h = (height % sub) == 0 ? height / sub : height / sub + 1;                      \
> +    int w = (width % sub) == 0 ? width / sub : width / sub + 1;                         \
> +                                                                                        \
>      ThreadData t;                                                                       \
>      const int nb_threads = ff_filter_get_nb_threads(ctx);                               \
>      float *I;                                                                           \
> @@ -189,55 +225,55 @@ static int guided_##name(AVFilterContext *ctx, GuidedContext *s,
>      float *meanA;                                                                       \
>      float *meanB;                                                                       \
>                                                                                          \
> -    I      = av_calloc(width * height, sizeof(float));                                  \
> -    II     = av_calloc(width * height, sizeof(float));                                  \
> -    P      = av_calloc(width * height, sizeof(float));                                  \
> -    IP     = av_calloc(width * height, sizeof(float));                                  \
> -    meanI  = av_calloc(width * height, sizeof(float));                                  \
> -    meanII = av_calloc(width * height, sizeof(float));                                  \
> -    meanP  = av_calloc(width * height, sizeof(float));                                  \
> -    meanIP = av_calloc(width * height, sizeof(float));                                  \
> +    I      = av_calloc(w * h, sizeof(float));                                           \
> +    II     = av_calloc(w * h, sizeof(float));                                           \
> +    P      = av_calloc(w * h, sizeof(float));                                           \
> +    IP     = av_calloc(w * h, sizeof(float));                                           \
> +    meanI  = av_calloc(w * h, sizeof(float));                                           \
> +    meanII = av_calloc(w * h, sizeof(float));                                           \
> +    meanP  = av_calloc(w * h, sizeof(float));                                           \
> +    meanIP = av_calloc(w * h, sizeof(float));                                           \
>                                                                                          \
> -    A      = av_calloc(width * height, sizeof(float));                                  \
> -    B      = av_calloc(width * height, sizeof(float));                                  \
> -    meanA  = av_calloc(width * height, sizeof(float));                                  \
> -    meanB  = av_calloc(width * height, sizeof(float));                                  \
> +    A      = av_calloc(w * h, sizeof(float));                                           \
> +    B      = av_calloc(w * h, sizeof(float));                                           \
> +    meanA  = av_calloc(w * h, sizeof(float));                                           \
> +    meanB  = av_calloc(w * h, sizeof(float));                                           \
>                                                                                          \
>      if (!I || !II || !P || !IP || !meanI || !meanII || !meanP ||                        \
>          !meanIP || !A || !B || !meanA || !meanB){                                       \
>          ret = AVERROR(ENOMEM);                                                          \
>          goto end;                                                                       \
>      }                                                                                   \
> -    for (int i = 0;i < height;i++) {                                                    \
> -      for (int j = 0;j < width;j++) {                                                   \
> -        int x = i * width + j;                                                          \
> -        I[x]  = src[i * src_stride + j] / maxval;                                       \
> +    for (int i = 0;i < h;i++) {                                                         \
> +      for (int j = 0;j < w;j++) {                                                       \
> +        int x = i * w + j;                                                              \
> +        I[x]  = src[(i * src_stride + j) * sub] / maxval;                               \
>          II[x] = I[x] * I[x];                                                            \
> -        P[x]  = srcRef[i * src_ref_stride + j] / maxval;                                \
> +        P[x]  = srcRef[(i * src_ref_stride + j) * sub] / maxval;                        \
>          IP[x] = I[x] * P[x];                                                            \
>        }                                                                                 \
>      }                                                                                   \
>                                                                                          \
> -    t.width  = width;                                                                   \
> -    t.height = height;                                                                  \
> -    t.srcStride = width;                                                                \
> -    t.dstStride = width;                                                                \
> +    t.width  = w;                                                                       \
> +    t.height = h;                                                                       \
> +    t.srcStride = w;                                                                    \
> +    t.dstStride = w;                                                                    \
>      t.src = I;                                                                          \
>      t.dst = meanI;                                                                      \
> -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
>      t.src = II;                                                                         \
>      t.dst = meanII;                                                                     \
> -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
>      t.src = P;                                                                          \
>      t.dst = meanP;                                                                      \
> -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
>      t.src = IP;                                                                         \
>      t.dst = meanIP;                                                                     \
> -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
>                                                                                          \
> -    for (int i = 0;i < height;i++) {                                                    \
> -      for (int j = 0;j < width;j++) {                                                   \
> -        int x = i * width + j;                                                          \
> +    for (int i = 0;i < h;i++) {                                                         \
> +      for (int j = 0;j < w;j++) {                                                       \
> +        int x = i * w + j;                                                              \
>          float varI = meanII[x] - (meanI[x] * meanI[x]);                                 \
>          float covIP = meanIP[x] - (meanI[x] * meanP[x]);                                \
>          A[x] = covIP / (varI + eps);                                                    \
> @@ -247,14 +283,14 @@ static int guided_##name(AVFilterContext *ctx, GuidedContext *s,
>                                                                                          \
>      t.src = A;                                                                          \
>      t.dst = meanA;                                                                      \
> -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
>      t.src = B;                                                                          \
>      t.dst = meanB;                                                                      \
> -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
>                                                                                          \
>      for (int i = 0;i < height;i++) {                                                    \
>        for (int j = 0;j < width;j++) {                                                   \
> -        int x = i * width + j;                                                          \
> +        int x = i / sub * w + j / sub;                                                  \
>          dst[i * dst_stride + j] = meanA[x] * src[i * src_stride + j] +                  \
>                                    meanB[x] * maxval;                                    \
>        }                                                                                 \
> --
> 1.9.1
I think you submit version4 - version3 diff part for fast mode, it's
strange, you need to submit a full version 4 patch
mypopy@gmail.com May 11, 2021, 5:13 a.m. UTC | #2
On Tue, May 11, 2021 at 1:08 PM mypopy@gmail.com <mypopy@gmail.com> wrote:
>
> On Mon, May 10, 2021 at 9:42 PM Xuewei Meng <928826483@qq.com> wrote:
> >
> > From: Xuewei Meng <xwmeng96@gmail.com>
> >
> > Two modes are supported in guided filter, basic mode and fast mode.
> > Basic mode is the initial pushed guided filter without optimization.
> > Fast mode is implemented based on the basic one by sub-sampling method.
> > The sub-sampling ratio which can be defined by users controls the
> > algorithm complexity. The larger the sub-sampling ratio, the lower
> > the algorithm complexity.
> >
> > Signed-off-by: Xuewei Meng <xwmeng96@gmail.com>
> > ---
> >  doc/filters.texi        |  20 +++++++---
> >  libavfilter/vf_guided.c | 104 ++++++++++++++++++++++++++++++++----------------
> >  2 files changed, 85 insertions(+), 39 deletions(-)
> >
> > diff --git a/doc/filters.texi b/doc/filters.texi
> > index 03ca9ae..eb747cb 100644
> > --- a/doc/filters.texi
> > +++ b/doc/filters.texi
> > @@ -12963,12 +12963,22 @@ Apply guided filter for edge-preserving smoothing, dehazing and so on.
> >  The filter accepts the following options:
> >  @table @option
> >  @item radius
> > -Set the radius in pixels.
> > +Set the box radius in pixels.
> >  Allowed range is 1 to 20. Default is 3.
> >
> >  @item eps
> > -Set regularization parameter.
> > -Allowed range is 0 to 1. Default is 0.1.
> > +Set regularization parameter (with square).
> > +Allowed range is 0 to 1. Default is 0.01.
> > +
> > +@item mode
> > +Set filter mode. Can be @code{basic} or @code{fast}.
> > +Default is @code{basic}.
> > +
> > +@item sub
> > +Set subsampling ratio.
> > +Allowed range is 1 to 64.
> > +Default is always 1 for @code{basic} value of @var{mode} option,
> > +and 4 for @code{fast} value of @var{mode} option.
> >
> >  @item planes
> >  Set planes to filter. Default is first only.
> > @@ -12987,8 +12997,8 @@ ffmpeg -i in.png -i in.png -filter_complex guided out.png
> >
> >  @item
> >  Dehazing, structure-transferring filtering, detail enhancement with guided filter.
> > -For the generation of guidance image,
> > -see @url{http://kaiminghe.com/publications/pami12guidedfilter.pdf}.
> > +For the generation of guidance image, refer to paper "Guided Image Filtering".
> > +See: @url{http://kaiminghe.com/publications/pami12guidedfilter.pdf}.
> >  @example
> >  ffmpeg -i in.png -i guidance.png -filter_complex guided out.png
> >  @end example
> > diff --git a/libavfilter/vf_guided.c b/libavfilter/vf_guided.c
> > index 86c0db5..230fb7b 100644
> > --- a/libavfilter/vf_guided.c
> > +++ b/libavfilter/vf_guided.c
> > @@ -27,12 +27,20 @@
> >  #include "internal.h"
> >  #include "video.h"
> >
> > +enum FilterModes {
> > +    BASIC,
> > +    FAST,
> > +    NB_MODES,
> > +};
> > +
> >  typedef struct GuidedContext {
> >      const AVClass *class;
> >      FFFrameSync fs;
> >
> >      int radius;
> >      float eps;
> > +    int mode;
> > +    int sub;
> >
> >      int planes;
> >
> > @@ -51,9 +59,13 @@ typedef struct GuidedContext {
> >  #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
> >
> >  static const AVOption guided_options[] = {
> > -    { "radius", "set the box radius",           OFFSET(radius), AV_OPT_TYPE_INT,   {.i64=3    },   1,  20, FLAGS },
> > -    { "eps",    "set the regularization parameter (with square)",              OFFSET(eps),    AV_OPT_TYPE_FLOAT, {.dbl=0.01  }, 0.0,   1, FLAGS },
> > -    { "planes", "set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT,   {.i64=1    },   0, 0xF, FLAGS },
> > +    { "radius", "set the box radius",                               OFFSET(radius), AV_OPT_TYPE_INT,   {.i64 = 3    },   1,           20, FLAGS },
> > +    { "eps",    "set the regularization parameter (with square)",   OFFSET(eps),    AV_OPT_TYPE_FLOAT, {.dbl = 0.01 }, 0.0,            1, FLAGS },
> > +    { "mode",   "set filtering mode (0: basic mode; 1: fast mode)", OFFSET(mode),   AV_OPT_TYPE_INT,   {.i64 = BASIC},   0, NB_MODES - 1, FLAGS, "mode" },
> > +    { "basic",  "basic guided filter",                              0,              AV_OPT_TYPE_CONST, {.i64 = BASIC},   0,            0, FLAGS, "mode" },
> > +    { "fast",   "fast guided filter",                               0,              AV_OPT_TYPE_CONST, {.i64 = FAST },   0,            0, FLAGS, "mode" },
> > +    { "sub",    "subsampling ratio",                                OFFSET(sub),    AV_OPT_TYPE_INT,   {.i64 = 1    },   1,           64, FLAGS },
> > +    { "planes", "set planes to filter",                             OFFSET(planes), AV_OPT_TYPE_INT,   {.i64=1      },   0,          0xF, FLAGS },
> >      { NULL }
> >  };
> >
> > @@ -147,6 +159,26 @@ static int config_input(AVFilterLink *inlink)
> >          return AVERROR(EINVAL);
> >      }
> >
> > +    if (s->mode == BASIC) {
> > +        if (s->sub != 1) {
> > +            av_log(ctx, AV_LOG_WARNING, "Subsampling ratio is 1 in basic mode.\n");
> > +            s->sub = 1;
> > +        }
> > +    }
> > +    else if (s->mode == FAST) {
> > +        if (s->sub == 1) {
> > +            av_log(ctx, AV_LOG_WARNING, "Subsampling ratio is larger than 1 in fast mode.\n");
> > +            s->sub = 4;
> > +        }
> > +        if (s->radius >= s->sub)
> > +            s->radius = s->radius / s->sub;
> > +        else {
> > +            s->radius = 1;
> > +        }
> > +    }
> > +    else {
> > +        return AVERROR_BUG;
> > +    }
> >
> >      s->depth = desc->comp[0].depth;
> >      s->width = ctx->inputs[0]->w;
> > @@ -174,6 +206,10 @@ static int guided_##name(AVFilterContext *ctx, GuidedContext *s,
> >      const type *src = (const type *)ssrc;                                               \
> >      const type *srcRef = (const type *)ssrcRef;                                         \
> >                                                                                          \
> > +    int sub = s->sub;                                                                   \
> > +    int h = (height % sub) == 0 ? height / sub : height / sub + 1;                      \
> > +    int w = (width % sub) == 0 ? width / sub : width / sub + 1;                         \
> > +                                                                                        \
> >      ThreadData t;                                                                       \
> >      const int nb_threads = ff_filter_get_nb_threads(ctx);                               \
> >      float *I;                                                                           \
> > @@ -189,55 +225,55 @@ static int guided_##name(AVFilterContext *ctx, GuidedContext *s,
> >      float *meanA;                                                                       \
> >      float *meanB;                                                                       \
> >                                                                                          \
> > -    I      = av_calloc(width * height, sizeof(float));                                  \
> > -    II     = av_calloc(width * height, sizeof(float));                                  \
> > -    P      = av_calloc(width * height, sizeof(float));                                  \
> > -    IP     = av_calloc(width * height, sizeof(float));                                  \
> > -    meanI  = av_calloc(width * height, sizeof(float));                                  \
> > -    meanII = av_calloc(width * height, sizeof(float));                                  \
> > -    meanP  = av_calloc(width * height, sizeof(float));                                  \
> > -    meanIP = av_calloc(width * height, sizeof(float));                                  \
> > +    I      = av_calloc(w * h, sizeof(float));                                           \
> > +    II     = av_calloc(w * h, sizeof(float));                                           \
> > +    P      = av_calloc(w * h, sizeof(float));                                           \
> > +    IP     = av_calloc(w * h, sizeof(float));                                           \
> > +    meanI  = av_calloc(w * h, sizeof(float));                                           \
> > +    meanII = av_calloc(w * h, sizeof(float));                                           \
> > +    meanP  = av_calloc(w * h, sizeof(float));                                           \
> > +    meanIP = av_calloc(w * h, sizeof(float));                                           \
> >                                                                                          \
> > -    A      = av_calloc(width * height, sizeof(float));                                  \
> > -    B      = av_calloc(width * height, sizeof(float));                                  \
> > -    meanA  = av_calloc(width * height, sizeof(float));                                  \
> > -    meanB  = av_calloc(width * height, sizeof(float));                                  \
> > +    A      = av_calloc(w * h, sizeof(float));                                           \
> > +    B      = av_calloc(w * h, sizeof(float));                                           \
> > +    meanA  = av_calloc(w * h, sizeof(float));                                           \
> > +    meanB  = av_calloc(w * h, sizeof(float));                                           \
> >                                                                                          \
> >      if (!I || !II || !P || !IP || !meanI || !meanII || !meanP ||                        \
> >          !meanIP || !A || !B || !meanA || !meanB){                                       \
> >          ret = AVERROR(ENOMEM);                                                          \
> >          goto end;                                                                       \
> >      }                                                                                   \
> > -    for (int i = 0;i < height;i++) {                                                    \
> > -      for (int j = 0;j < width;j++) {                                                   \
> > -        int x = i * width + j;                                                          \
> > -        I[x]  = src[i * src_stride + j] / maxval;                                       \
> > +    for (int i = 0;i < h;i++) {                                                         \
> > +      for (int j = 0;j < w;j++) {                                                       \
> > +        int x = i * w + j;                                                              \
> > +        I[x]  = src[(i * src_stride + j) * sub] / maxval;                               \
> >          II[x] = I[x] * I[x];                                                            \
> > -        P[x]  = srcRef[i * src_ref_stride + j] / maxval;                                \
> > +        P[x]  = srcRef[(i * src_ref_stride + j) * sub] / maxval;                        \
> >          IP[x] = I[x] * P[x];                                                            \
> >        }                                                                                 \
> >      }                                                                                   \
> >                                                                                          \
> > -    t.width  = width;                                                                   \
> > -    t.height = height;                                                                  \
> > -    t.srcStride = width;                                                                \
> > -    t.dstStride = width;                                                                \
> > +    t.width  = w;                                                                       \
> > +    t.height = h;                                                                       \
> > +    t.srcStride = w;                                                                    \
> > +    t.dstStride = w;                                                                    \
> >      t.src = I;                                                                          \
> >      t.dst = meanI;                                                                      \
> > -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> > +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
> >      t.src = II;                                                                         \
> >      t.dst = meanII;                                                                     \
> > -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> > +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
> >      t.src = P;                                                                          \
> >      t.dst = meanP;                                                                      \
> > -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> > +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
> >      t.src = IP;                                                                         \
> >      t.dst = meanIP;                                                                     \
> > -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> > +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
> >                                                                                          \
> > -    for (int i = 0;i < height;i++) {                                                    \
> > -      for (int j = 0;j < width;j++) {                                                   \
> > -        int x = i * width + j;                                                          \
> > +    for (int i = 0;i < h;i++) {                                                         \
> > +      for (int j = 0;j < w;j++) {                                                       \
> > +        int x = i * w + j;                                                              \
> >          float varI = meanII[x] - (meanI[x] * meanI[x]);                                 \
> >          float covIP = meanIP[x] - (meanI[x] * meanP[x]);                                \
> >          A[x] = covIP / (varI + eps);                                                    \
> > @@ -247,14 +283,14 @@ static int guided_##name(AVFilterContext *ctx, GuidedContext *s,
> >                                                                                          \
> >      t.src = A;                                                                          \
> >      t.dst = meanA;                                                                      \
> > -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> > +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
> >      t.src = B;                                                                          \
> >      t.dst = meanB;                                                                      \
> > -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> > +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
> >                                                                                          \
> >      for (int i = 0;i < height;i++) {                                                    \
> >        for (int j = 0;j < width;j++) {                                                   \
> > -        int x = i * width + j;                                                          \
> > +        int x = i / sub * w + j / sub;                                                  \
> >          dst[i * dst_stride + j] = meanA[x] * src[i * src_stride + j] +                  \
> >                                    meanB[x] * maxval;                                    \
> >        }                                                                                 \
> > --
> > 1.9.1
> I think you submit version4 - version3 diff part for fast mode, it's
> strange, you need to submit a full version 4 patch

Ignore the comments, guided filter have been merged
Liu Steven May 13, 2021, 4 a.m. UTC | #3
> 2021年5月10日 下午9:42,Xuewei Meng <928826483@qq.com> 写道:
> 
> From: Xuewei Meng <xwmeng96@gmail.com>
> 
> Two modes are supported in guided filter, basic mode and fast mode.
> Basic mode is the initial pushed guided filter without optimization.
> Fast mode is implemented based on the basic one by sub-sampling method.
> The sub-sampling ratio which can be defined by users controls the
> algorithm complexity. The larger the sub-sampling ratio, the lower
> the algorithm complexity.
> 
> Signed-off-by: Xuewei Meng <xwmeng96@gmail.com>
> ---
> doc/filters.texi        |  20 +++++++---
> libavfilter/vf_guided.c | 104 ++++++++++++++++++++++++++++++++----------------
> 2 files changed, 85 insertions(+), 39 deletions(-)
> 
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 03ca9ae..eb747cb 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -12963,12 +12963,22 @@ Apply guided filter for edge-preserving smoothing, dehazing and so on.
> The filter accepts the following options:
> @table @option
> @item radius
> -Set the radius in pixels.
> +Set the box radius in pixels.
> Allowed range is 1 to 20. Default is 3.
> 
> @item eps
> -Set regularization parameter.
> -Allowed range is 0 to 1. Default is 0.1.
> +Set regularization parameter (with square).
> +Allowed range is 0 to 1. Default is 0.01.
> +
> +@item mode
> +Set filter mode. Can be @code{basic} or @code{fast}.
> +Default is @code{basic}.
> +
> +@item sub
> +Set subsampling ratio.
> +Allowed range is 1 to 64.
> +Default is always 1 for @code{basic} value of @var{mode} option,
> +and 4 for @code{fast} value of @var{mode} option.
> 
> @item planes
> Set planes to filter. Default is first only.
> @@ -12987,8 +12997,8 @@ ffmpeg -i in.png -i in.png -filter_complex guided out.png
> 
> @item
> Dehazing, structure-transferring filtering, detail enhancement with guided filter.
> -For the generation of guidance image,
> -see @url{http://kaiminghe.com/publications/pami12guidedfilter.pdf}.
> +For the generation of guidance image, refer to paper "Guided Image Filtering".
> +See: @url{http://kaiminghe.com/publications/pami12guidedfilter.pdf}.
> @example
> ffmpeg -i in.png -i guidance.png -filter_complex guided out.png
> @end example
> diff --git a/libavfilter/vf_guided.c b/libavfilter/vf_guided.c
> index 86c0db5..230fb7b 100644
> --- a/libavfilter/vf_guided.c
> +++ b/libavfilter/vf_guided.c
> @@ -27,12 +27,20 @@
> #include "internal.h"
> #include "video.h"
> 
> +enum FilterModes {
> +    BASIC,
> +    FAST,
> +    NB_MODES,
> +};
> +
> typedef struct GuidedContext {
>     const AVClass *class;
>     FFFrameSync fs;
> 
>     int radius;
>     float eps;
> +    int mode;
> +    int sub;
> 
>     int planes;
> 
> @@ -51,9 +59,13 @@ typedef struct GuidedContext {
> #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
> 
> static const AVOption guided_options[] = {
> -    { "radius", "set the box radius",           OFFSET(radius), AV_OPT_TYPE_INT,   {.i64=3    },   1,  20, FLAGS },
> -    { "eps",    "set the regularization parameter (with square)",              OFFSET(eps),    AV_OPT_TYPE_FLOAT, {.dbl=0.01  }, 0.0,   1, FLAGS },
> -    { "planes", "set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT,   {.i64=1    },   0, 0xF, FLAGS },
> +    { "radius", "set the box radius",                               OFFSET(radius), AV_OPT_TYPE_INT,   {.i64 = 3    },   1,           20, FLAGS },
> +    { "eps",    "set the regularization parameter (with square)",   OFFSET(eps),    AV_OPT_TYPE_FLOAT, {.dbl = 0.01 }, 0.0,            1, FLAGS },
> +    { "mode",   "set filtering mode (0: basic mode; 1: fast mode)", OFFSET(mode),   AV_OPT_TYPE_INT,   {.i64 = BASIC},   0, NB_MODES - 1, FLAGS, "mode" },
> +    { "basic",  "basic guided filter",                              0,              AV_OPT_TYPE_CONST, {.i64 = BASIC},   0,            0, FLAGS, "mode" },
> +    { "fast",   "fast guided filter",                               0,              AV_OPT_TYPE_CONST, {.i64 = FAST },   0,            0, FLAGS, "mode" },
> +    { "sub",    "subsampling ratio",                                OFFSET(sub),    AV_OPT_TYPE_INT,   {.i64 = 1    },   1,           64, FLAGS },
> +    { "planes", "set planes to filter",                             OFFSET(planes), AV_OPT_TYPE_INT,   {.i64=1      },   0,          0xF, FLAGS },
>     { NULL }
> };
> 
> @@ -147,6 +159,26 @@ static int config_input(AVFilterLink *inlink)
>         return AVERROR(EINVAL);
>     }
> 
> +    if (s->mode == BASIC) {
> +        if (s->sub != 1) {
> +            av_log(ctx, AV_LOG_WARNING, "Subsampling ratio is 1 in basic mode.\n");
> +            s->sub = 1;
> +        }
> +    }
> +    else if (s->mode == FAST) {
> +        if (s->sub == 1) {
> +            av_log(ctx, AV_LOG_WARNING, "Subsampling ratio is larger than 1 in fast mode.\n");
> +            s->sub = 4;
> +        }
> +        if (s->radius >= s->sub)
> +            s->radius = s->radius / s->sub;
> +        else {
> +            s->radius = 1;
> +        }
> +    }
> +    else {
> +        return AVERROR_BUG;
> +    }
> 
>     s->depth = desc->comp[0].depth;
>     s->width = ctx->inputs[0]->w;
> @@ -174,6 +206,10 @@ static int guided_##name(AVFilterContext *ctx, GuidedContext *s,
>     const type *src = (const type *)ssrc;                                               \
>     const type *srcRef = (const type *)ssrcRef;                                         \
>                                                                                         \
> +    int sub = s->sub;                                                                   \
> +    int h = (height % sub) == 0 ? height / sub : height / sub + 1;                      \
> +    int w = (width % sub) == 0 ? width / sub : width / sub + 1;                         \
> +                                                                                        \
>     ThreadData t;                                                                       \
>     const int nb_threads = ff_filter_get_nb_threads(ctx);                               \
>     float *I;                                                                           \
> @@ -189,55 +225,55 @@ static int guided_##name(AVFilterContext *ctx, GuidedContext *s,
>     float *meanA;                                                                       \
>     float *meanB;                                                                       \
>                                                                                         \
> -    I      = av_calloc(width * height, sizeof(float));                                  \
> -    II     = av_calloc(width * height, sizeof(float));                                  \
> -    P      = av_calloc(width * height, sizeof(float));                                  \
> -    IP     = av_calloc(width * height, sizeof(float));                                  \
> -    meanI  = av_calloc(width * height, sizeof(float));                                  \
> -    meanII = av_calloc(width * height, sizeof(float));                                  \
> -    meanP  = av_calloc(width * height, sizeof(float));                                  \
> -    meanIP = av_calloc(width * height, sizeof(float));                                  \
> +    I      = av_calloc(w * h, sizeof(float));                                           \
> +    II     = av_calloc(w * h, sizeof(float));                                           \
> +    P      = av_calloc(w * h, sizeof(float));                                           \
> +    IP     = av_calloc(w * h, sizeof(float));                                           \
> +    meanI  = av_calloc(w * h, sizeof(float));                                           \
> +    meanII = av_calloc(w * h, sizeof(float));                                           \
> +    meanP  = av_calloc(w * h, sizeof(float));                                           \
> +    meanIP = av_calloc(w * h, sizeof(float));                                           \
>                                                                                         \
> -    A      = av_calloc(width * height, sizeof(float));                                  \
> -    B      = av_calloc(width * height, sizeof(float));                                  \
> -    meanA  = av_calloc(width * height, sizeof(float));                                  \
> -    meanB  = av_calloc(width * height, sizeof(float));                                  \
> +    A      = av_calloc(w * h, sizeof(float));                                           \
> +    B      = av_calloc(w * h, sizeof(float));                                           \
> +    meanA  = av_calloc(w * h, sizeof(float));                                           \
> +    meanB  = av_calloc(w * h, sizeof(float));                                           \
>                                                                                         \
>     if (!I || !II || !P || !IP || !meanI || !meanII || !meanP ||                        \
>         !meanIP || !A || !B || !meanA || !meanB){                                       \
>         ret = AVERROR(ENOMEM);                                                          \
>         goto end;                                                                       \
>     }                                                                                   \
> -    for (int i = 0;i < height;i++) {                                                    \
> -      for (int j = 0;j < width;j++) {                                                   \
> -        int x = i * width + j;                                                          \
> -        I[x]  = src[i * src_stride + j] / maxval;                                       \
> +    for (int i = 0;i < h;i++) {                                                         \
> +      for (int j = 0;j < w;j++) {                                                       \
> +        int x = i * w + j;                                                              \
> +        I[x]  = src[(i * src_stride + j) * sub] / maxval;                               \
>         II[x] = I[x] * I[x];                                                            \
> -        P[x]  = srcRef[i * src_ref_stride + j] / maxval;                                \
> +        P[x]  = srcRef[(i * src_ref_stride + j) * sub] / maxval;                        \
>         IP[x] = I[x] * P[x];                                                            \
>       }                                                                                 \
>     }                                                                                   \
>                                                                                         \
> -    t.width  = width;                                                                   \
> -    t.height = height;                                                                  \
> -    t.srcStride = width;                                                                \
> -    t.dstStride = width;                                                                \
> +    t.width  = w;                                                                       \
> +    t.height = h;                                                                       \
> +    t.srcStride = w;                                                                    \
> +    t.dstStride = w;                                                                    \
>     t.src = I;                                                                          \
>     t.dst = meanI;                                                                      \
> -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
>     t.src = II;                                                                         \
>     t.dst = meanII;                                                                     \
> -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
>     t.src = P;                                                                          \
>     t.dst = meanP;                                                                      \
> -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
>     t.src = IP;                                                                         \
>     t.dst = meanIP;                                                                     \
> -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
>                                                                                         \
> -    for (int i = 0;i < height;i++) {                                                    \
> -      for (int j = 0;j < width;j++) {                                                   \
> -        int x = i * width + j;                                                          \
> +    for (int i = 0;i < h;i++) {                                                         \
> +      for (int j = 0;j < w;j++) {                                                       \
> +        int x = i * w + j;                                                              \
>         float varI = meanII[x] - (meanI[x] * meanI[x]);                                 \
>         float covIP = meanIP[x] - (meanI[x] * meanP[x]);                                \
>         A[x] = covIP / (varI + eps);                                                    \
> @@ -247,14 +283,14 @@ static int guided_##name(AVFilterContext *ctx, GuidedContext *s,
>                                                                                         \
>     t.src = A;                                                                          \
>     t.dst = meanA;                                                                      \
> -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
>     t.src = B;                                                                          \
>     t.dst = meanB;                                                                      \
> -    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
> +    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
>                                                                                         \
>     for (int i = 0;i < height;i++) {                                                    \
>       for (int j = 0;j < width;j++) {                                                   \
> -        int x = i * width + j;                                                          \
> +        int x = i / sub * w + j / sub;                                                  \
>         dst[i * dst_stride + j] = meanA[x] * src[i * src_stride + j] +                  \
>                                   meanB[x] * maxval;                                    \
>       }                                                                                 \
> -- 
> 1.9.1
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> 

Pushed

Thanks

Steven Liu
diff mbox series

Patch

diff --git a/doc/filters.texi b/doc/filters.texi
index 03ca9ae..eb747cb 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -12963,12 +12963,22 @@  Apply guided filter for edge-preserving smoothing, dehazing and so on.
 The filter accepts the following options:
 @table @option
 @item radius
-Set the radius in pixels.
+Set the box radius in pixels.
 Allowed range is 1 to 20. Default is 3.
 
 @item eps
-Set regularization parameter.
-Allowed range is 0 to 1. Default is 0.1.
+Set regularization parameter (with square).
+Allowed range is 0 to 1. Default is 0.01.
+
+@item mode
+Set filter mode. Can be @code{basic} or @code{fast}.
+Default is @code{basic}.
+
+@item sub
+Set subsampling ratio.
+Allowed range is 1 to 64.
+Default is always 1 for @code{basic} value of @var{mode} option,
+and 4 for @code{fast} value of @var{mode} option.
 
 @item planes
 Set planes to filter. Default is first only.
@@ -12987,8 +12997,8 @@  ffmpeg -i in.png -i in.png -filter_complex guided out.png
 
 @item
 Dehazing, structure-transferring filtering, detail enhancement with guided filter.
-For the generation of guidance image,
-see @url{http://kaiminghe.com/publications/pami12guidedfilter.pdf}.
+For the generation of guidance image, refer to paper "Guided Image Filtering".
+See: @url{http://kaiminghe.com/publications/pami12guidedfilter.pdf}.
 @example
 ffmpeg -i in.png -i guidance.png -filter_complex guided out.png
 @end example
diff --git a/libavfilter/vf_guided.c b/libavfilter/vf_guided.c
index 86c0db5..230fb7b 100644
--- a/libavfilter/vf_guided.c
+++ b/libavfilter/vf_guided.c
@@ -27,12 +27,20 @@ 
 #include "internal.h"
 #include "video.h"
 
+enum FilterModes {
+    BASIC,
+    FAST,
+    NB_MODES,
+};
+
 typedef struct GuidedContext {
     const AVClass *class;
     FFFrameSync fs;
 
     int radius;
     float eps;
+    int mode;
+    int sub;
 
     int planes;
 
@@ -51,9 +59,13 @@  typedef struct GuidedContext {
 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
 
 static const AVOption guided_options[] = {
-    { "radius", "set the box radius",           OFFSET(radius), AV_OPT_TYPE_INT,   {.i64=3    },   1,  20, FLAGS },
-    { "eps",    "set the regularization parameter (with square)",              OFFSET(eps),    AV_OPT_TYPE_FLOAT, {.dbl=0.01  }, 0.0,   1, FLAGS },
-    { "planes", "set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT,   {.i64=1    },   0, 0xF, FLAGS },
+    { "radius", "set the box radius",                               OFFSET(radius), AV_OPT_TYPE_INT,   {.i64 = 3    },   1,           20, FLAGS },
+    { "eps",    "set the regularization parameter (with square)",   OFFSET(eps),    AV_OPT_TYPE_FLOAT, {.dbl = 0.01 }, 0.0,            1, FLAGS },
+    { "mode",   "set filtering mode (0: basic mode; 1: fast mode)", OFFSET(mode),   AV_OPT_TYPE_INT,   {.i64 = BASIC},   0, NB_MODES - 1, FLAGS, "mode" },
+    { "basic",  "basic guided filter",                              0,              AV_OPT_TYPE_CONST, {.i64 = BASIC},   0,            0, FLAGS, "mode" },
+    { "fast",   "fast guided filter",                               0,              AV_OPT_TYPE_CONST, {.i64 = FAST },   0,            0, FLAGS, "mode" },
+    { "sub",    "subsampling ratio",                                OFFSET(sub),    AV_OPT_TYPE_INT,   {.i64 = 1    },   1,           64, FLAGS },
+    { "planes", "set planes to filter",                             OFFSET(planes), AV_OPT_TYPE_INT,   {.i64=1      },   0,          0xF, FLAGS },
     { NULL }
 };
 
@@ -147,6 +159,26 @@  static int config_input(AVFilterLink *inlink)
         return AVERROR(EINVAL);
     }
 
+    if (s->mode == BASIC) {
+        if (s->sub != 1) {
+            av_log(ctx, AV_LOG_WARNING, "Subsampling ratio is 1 in basic mode.\n");
+            s->sub = 1;
+        }
+    }
+    else if (s->mode == FAST) {
+        if (s->sub == 1) {
+            av_log(ctx, AV_LOG_WARNING, "Subsampling ratio is larger than 1 in fast mode.\n");
+            s->sub = 4;
+        }
+        if (s->radius >= s->sub)
+            s->radius = s->radius / s->sub;
+        else {
+            s->radius = 1;
+        }
+    }
+    else {
+        return AVERROR_BUG;
+    }
 
     s->depth = desc->comp[0].depth;
     s->width = ctx->inputs[0]->w;
@@ -174,6 +206,10 @@  static int guided_##name(AVFilterContext *ctx, GuidedContext *s,
     const type *src = (const type *)ssrc;                                               \
     const type *srcRef = (const type *)ssrcRef;                                         \
                                                                                         \
+    int sub = s->sub;                                                                   \
+    int h = (height % sub) == 0 ? height / sub : height / sub + 1;                      \
+    int w = (width % sub) == 0 ? width / sub : width / sub + 1;                         \
+                                                                                        \
     ThreadData t;                                                                       \
     const int nb_threads = ff_filter_get_nb_threads(ctx);                               \
     float *I;                                                                           \
@@ -189,55 +225,55 @@  static int guided_##name(AVFilterContext *ctx, GuidedContext *s,
     float *meanA;                                                                       \
     float *meanB;                                                                       \
                                                                                         \
-    I      = av_calloc(width * height, sizeof(float));                                  \
-    II     = av_calloc(width * height, sizeof(float));                                  \
-    P      = av_calloc(width * height, sizeof(float));                                  \
-    IP     = av_calloc(width * height, sizeof(float));                                  \
-    meanI  = av_calloc(width * height, sizeof(float));                                  \
-    meanII = av_calloc(width * height, sizeof(float));                                  \
-    meanP  = av_calloc(width * height, sizeof(float));                                  \
-    meanIP = av_calloc(width * height, sizeof(float));                                  \
+    I      = av_calloc(w * h, sizeof(float));                                           \
+    II     = av_calloc(w * h, sizeof(float));                                           \
+    P      = av_calloc(w * h, sizeof(float));                                           \
+    IP     = av_calloc(w * h, sizeof(float));                                           \
+    meanI  = av_calloc(w * h, sizeof(float));                                           \
+    meanII = av_calloc(w * h, sizeof(float));                                           \
+    meanP  = av_calloc(w * h, sizeof(float));                                           \
+    meanIP = av_calloc(w * h, sizeof(float));                                           \
                                                                                         \
-    A      = av_calloc(width * height, sizeof(float));                                  \
-    B      = av_calloc(width * height, sizeof(float));                                  \
-    meanA  = av_calloc(width * height, sizeof(float));                                  \
-    meanB  = av_calloc(width * height, sizeof(float));                                  \
+    A      = av_calloc(w * h, sizeof(float));                                           \
+    B      = av_calloc(w * h, sizeof(float));                                           \
+    meanA  = av_calloc(w * h, sizeof(float));                                           \
+    meanB  = av_calloc(w * h, sizeof(float));                                           \
                                                                                         \
     if (!I || !II || !P || !IP || !meanI || !meanII || !meanP ||                        \
         !meanIP || !A || !B || !meanA || !meanB){                                       \
         ret = AVERROR(ENOMEM);                                                          \
         goto end;                                                                       \
     }                                                                                   \
-    for (int i = 0;i < height;i++) {                                                    \
-      for (int j = 0;j < width;j++) {                                                   \
-        int x = i * width + j;                                                          \
-        I[x]  = src[i * src_stride + j] / maxval;                                       \
+    for (int i = 0;i < h;i++) {                                                         \
+      for (int j = 0;j < w;j++) {                                                       \
+        int x = i * w + j;                                                              \
+        I[x]  = src[(i * src_stride + j) * sub] / maxval;                               \
         II[x] = I[x] * I[x];                                                            \
-        P[x]  = srcRef[i * src_ref_stride + j] / maxval;                                \
+        P[x]  = srcRef[(i * src_ref_stride + j) * sub] / maxval;                        \
         IP[x] = I[x] * P[x];                                                            \
       }                                                                                 \
     }                                                                                   \
                                                                                         \
-    t.width  = width;                                                                   \
-    t.height = height;                                                                  \
-    t.srcStride = width;                                                                \
-    t.dstStride = width;                                                                \
+    t.width  = w;                                                                       \
+    t.height = h;                                                                       \
+    t.srcStride = w;                                                                    \
+    t.dstStride = w;                                                                    \
     t.src = I;                                                                          \
     t.dst = meanI;                                                                      \
-    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
+    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
     t.src = II;                                                                         \
     t.dst = meanII;                                                                     \
-    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
+    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
     t.src = P;                                                                          \
     t.dst = meanP;                                                                      \
-    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
+    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
     t.src = IP;                                                                         \
     t.dst = meanIP;                                                                     \
-    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
+    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
                                                                                         \
-    for (int i = 0;i < height;i++) {                                                    \
-      for (int j = 0;j < width;j++) {                                                   \
-        int x = i * width + j;                                                          \
+    for (int i = 0;i < h;i++) {                                                         \
+      for (int j = 0;j < w;j++) {                                                       \
+        int x = i * w + j;                                                              \
         float varI = meanII[x] - (meanI[x] * meanI[x]);                                 \
         float covIP = meanIP[x] - (meanI[x] * meanP[x]);                                \
         A[x] = covIP / (varI + eps);                                                    \
@@ -247,14 +283,14 @@  static int guided_##name(AVFilterContext *ctx, GuidedContext *s,
                                                                                         \
     t.src = A;                                                                          \
     t.dst = meanA;                                                                      \
-    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
+    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
     t.src = B;                                                                          \
     t.dst = meanB;                                                                      \
-    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(height, nb_threads));     \
+    ctx->internal->execute(ctx, s->box_slice, &t, NULL, FFMIN(h, nb_threads));          \
                                                                                         \
     for (int i = 0;i < height;i++) {                                                    \
       for (int j = 0;j < width;j++) {                                                   \
-        int x = i * width + j;                                                          \
+        int x = i / sub * w + j / sub;                                                  \
         dst[i * dst_stride + j] = meanA[x] * src[i * src_stride + j] +                  \
                                   meanB[x] * maxval;                                    \
       }                                                                                 \