diff mbox series

[FFmpeg-devel] zoompan filter: fix shaking when zooming

Message ID 20200123091614.8880-1-deibel.robert@googlemail.com
State New
Headers show
Series [FFmpeg-devel] zoompan filter: fix shaking when zooming | expand

Checks

Context Check Description
andriy/ffmpeg-patchwork success Make fate finished

Commit Message

Robert Deibel Jan. 23, 2020, 9:16 a.m. UTC
Fix shaking of image when zoom is applied by the zoompan filter.
Resolves ticket https://trac.ffmpeg.org/ticket/4298
---
 libavfilter/vf_zoompan.c | 93 ++++++++++++++++++++++++++++++----------
 1 file changed, 70 insertions(+), 23 deletions(-)

Comments

Paul B Mahol Jan. 23, 2020, 10:39 a.m. UTC | #1
On 1/23/20, Robert Deibel <deibel.robert@googlemail.com> wrote:
> Fix shaking of image when zoom is applied by the zoompan filter.
> Resolves ticket https://trac.ffmpeg.org/ticket/4298
> ---
>  libavfilter/vf_zoompan.c | 93 ++++++++++++++++++++++++++++++----------
>  1 file changed, 70 insertions(+), 23 deletions(-)
>
> diff --git a/libavfilter/vf_zoompan.c b/libavfilter/vf_zoompan.c
> index 59c9b19ec8..4ae8c64cd0 100644
> --- a/libavfilter/vf_zoompan.c
> +++ b/libavfilter/vf_zoompan.c
> @@ -150,16 +150,30 @@ static int config_output(AVFilterLink *outlink)
>      return 0;
>  }
>
> +/**
> + * Scales n to be a multiple of grid_size but minimally 2 * grid_size and
> divisable by two.
> + *
> + * Used to scale the width and height of a frame to fit with the
> subsampling grid.
> + * @param n The number to be scaled.
> + * @param grid_size the size of the grid.
> + * @return The scaled number divisable by 2 and minimally 2 * grid_size
> + */
> +static int scale_to_grid(int n, uint8_t grid_size){
> +    return (((n + (1 << grid_size) * 2) & ~((1 << grid_size) - 1)) + 1) &
> ~1;
> +
> +}
> +
>  static int output_single_frame(AVFilterContext *ctx, AVFrame *in, double
> *var_values, int i,
>                                 double *zoom, double *dx, double *dy)
>  {
>      ZPContext *s = ctx->priv;
>      AVFilterLink *outlink = ctx->outputs[0];
>      int64_t pts = s->frame_count;
> -    int k, x, y, w, h, ret = 0;
> +    int k, x, y, crop_x, crop_y, w, h, crop_w, crop_h, overscaled_w,
> overscaled_h, ret = 0;
>      uint8_t *input[4];
>      int px[4], py[4];
>      AVFrame *out;
> +    double dw, dh;
>
>      var_values[VAR_PX]    = s->x;
>      var_values[VAR_PY]    = s->y;
> @@ -173,32 +187,46 @@ static int output_single_frame(AVFilterContext *ctx,
> AVFrame *in, double *var_va
>
>      *zoom = av_clipd(*zoom, 1, 10);
>      var_values[VAR_ZOOM] = *zoom;
> -    w = in->width * (1.0 / *zoom);
> -    h = in->height * (1.0 / *zoom);
> +
> +    // Keep track of double variables for correct calculation
> +    w = dw = (double) in->width * (1.0 / *zoom);
> +    h = dh = (double) in->height * (1.0 / *zoom);
> +
> +    // width and height with additional pixels from subsampling "grid"
> +    crop_w = scale_to_grid(w, s->desc->log2_chroma_w);
> +    crop_h = scale_to_grid(h, s->desc->log2_chroma_h);
>
>      *dx = av_expr_eval(s->x_expr, var_values, NULL);
>
> -    x = *dx = av_clipd(*dx, 0, FFMAX(in->width - w, 0));
> -    var_values[VAR_X] = *dx;
> -    x &= ~((1 << s->desc->log2_chroma_w) - 1);
> +    crop_x = ceil(av_clipd(*dx - (((double) crop_w - w) / 2.0), 0,
> FFMAX(in->width - crop_w, 0)));
> +    var_values[VAR_X] = *dx = av_clipd(*dx, 0, FFMAX(in->width - w, 0));
> +    crop_x &= ~((1 << s->desc->log2_chroma_w) - 1);             // Masking
> LSBs making coordianate divisible
>
>      *dy = av_expr_eval(s->y_expr, var_values, NULL);
>
> -    y = *dy = av_clipd(*dy, 0, FFMAX(in->height - h, 0));
> -    var_values[VAR_Y] = *dy;
> -    y &= ~((1 << s->desc->log2_chroma_h) - 1);
> +    crop_y = ceil(av_clipd(*dy - (((double) crop_h - h)/ 2.0), 0,
> FFMAX(in->height - crop_h, 0)));
> +    var_values[VAR_Y] = *dy = av_clipd(*dy, 0, FFMAX(in->height - h, 0));
> +    crop_y &= ~((1 << s->desc->log2_chroma_h) - 1);             // Masking
> LSBs making coordianate divisible
>
> -    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> +    overscaled_w = outlink->w + (crop_w - dw) * *zoom;
> +    overscaled_h = outlink->h + (crop_h - dh) * *zoom;
> +
> +    out = ff_get_video_buffer(outlink, overscaled_w, overscaled_h);
>      if (!out) {
>          ret = AVERROR(ENOMEM);
>          return ret;
>      }
>
> -    px[1] = px[2] = AV_CEIL_RSHIFT(x, s->desc->log2_chroma_w);
> -    px[0] = px[3] = x;
> +    // Values for crop transform. Channel 1 and 2 are chroma plane, 0 luma
> plane, 3 alpha plane
> +    px[1] = px[2] = AV_CEIL_RSHIFT(crop_x, s->desc->log2_chroma_w);
> +    px[0] = px[3] = crop_x;
>
> -    py[1] = py[2] = AV_CEIL_RSHIFT(y, s->desc->log2_chroma_h);
> -    py[0] = py[3] = y;
> +    py[1] = py[2] = AV_CEIL_RSHIFT(crop_y, s->desc->log2_chroma_h);
> +    py[0] = py[3] = crop_y;
> +
> +    // Crop data in input using px/py
> +    for (k = 0; k<4; k++)
> +        input[k] = in->data[k] + py[k] * in->linesize[k] + px[k];
>
>      s->sws = sws_alloc_context();
>      if (!s->sws) {
> @@ -206,21 +234,41 @@ static int output_single_frame(AVFilterContext *ctx,
> AVFrame *in, double *var_va
>          goto error;
>      }
>
> -    for (k = 0; in->data[k]; k++)
> -        input[k] = in->data[k] + py[k] * in->linesize[k] + px[k];
> -
> -    av_opt_set_int(s->sws, "srcw", w, 0);
> -    av_opt_set_int(s->sws, "srch", h, 0);
> +    // Set context variables. Used in scaling transform
> +    av_opt_set_int(s->sws, "srcw", crop_w, 0);
> +    av_opt_set_int(s->sws, "srch", crop_h, 0);
>      av_opt_set_int(s->sws, "src_format", in->format, 0);
> -    av_opt_set_int(s->sws, "dstw", outlink->w, 0);
> -    av_opt_set_int(s->sws, "dsth", outlink->h, 0);
> +    av_opt_set_int(s->sws, "dstw", overscaled_w, 0);
> +    av_opt_set_int(s->sws, "dsth", overscaled_h, 0);
>      av_opt_set_int(s->sws, "dst_format", outlink->format, 0);
>      av_opt_set_int(s->sws, "sws_flags", SWS_BICUBIC, 0);
>
>      if ((ret = sws_init_context(s->sws, NULL, NULL)) < 0)
>          goto error;
>
> -    sws_scale(s->sws, (const uint8_t *const *)&input, in->linesize, 0, h,
> out->data, out->linesize);
> +    // Scale data in input to defined size and copy to out
> +    sws_scale(s->sws, (const uint8_t *const *)&input, in->linesize, 0,
> crop_h, out->data, out->linesize);
> +
> +
> +    // Calculate x and y with respect to rounding error.
> +    *dx = ((crop_w - dw) * *zoom) / (((double) crop_w - dw) / (*dx -
> (double) crop_x));
> +    x = ceil(av_clipd(*dx, 0, FFMAX(overscaled_w - outlink->w, 0)));
> +    x &= ~((1 << s->desc->log2_chroma_w) - 1);          // Masking LSBs
> making coordianate divisible
> +
> +    *dy = ((crop_h - dh) * *zoom) / (((double) crop_h - dh) / (*dy -
> (double) crop_y));
> +    y = ceil(av_clipd(*dy, 0, FFMAX(overscaled_h - outlink->h, 0)));
> +    y &= ~((1 << s->desc->log2_chroma_h) - 1);          // Masking LSBs
> making coordianate divisible
> +
> +    // Values for crop transform. Channel 1 and 2 are chroma plane, 0 luma
> plane, 3 alpha plane
> +    px[1] = px[2] = AV_CEIL_RSHIFT(x, s->desc->log2_chroma_w);
> +    px[0] = px[3] = x;
> +
> +    py[1] = py[2] = AV_CEIL_RSHIFT(y, s->desc->log2_chroma_h);
> +    py[0] = py[3] = y;
> +
> +    // Crop data in input using px/py
> +    for (k = 0; k<4; k++)
> +        out->data[k] = out->data[k] + py[k] * out->linesize[k] + px[k];
>
>      out->pts = pts;
>      s->frame_count++;
> @@ -229,7 +277,6 @@ static int output_single_frame(AVFilterContext *ctx,
> AVFrame *in, double *var_va
>      sws_freeContext(s->sws);
>      s->sws = NULL;
>      s->current_frame++;
> -
>      if (s->current_frame >= s->nb_frames) {
>          if (*dx != -1)
>              s->x = *dx;
> --
> 2.25.0
>

Patch have numerous style issues.
Please post minimal patch that fix things. Without addition of obvious
comments and removal of empty lines.
diff mbox series

Patch

diff --git a/libavfilter/vf_zoompan.c b/libavfilter/vf_zoompan.c
index 59c9b19ec8..4ae8c64cd0 100644
--- a/libavfilter/vf_zoompan.c
+++ b/libavfilter/vf_zoompan.c
@@ -150,16 +150,30 @@  static int config_output(AVFilterLink *outlink)
     return 0;
 }
 
+/**
+ * Scales n to be a multiple of grid_size but minimally 2 * grid_size and divisable by two.
+ *
+ * Used to scale the width and height of a frame to fit with the subsampling grid.
+ * @param n The number to be scaled.
+ * @param grid_size the size of the grid.
+ * @return The scaled number divisable by 2 and minimally 2 * grid_size
+ */
+static int scale_to_grid(int n, uint8_t grid_size){
+    return (((n + (1 << grid_size) * 2) & ~((1 << grid_size) - 1)) + 1) & ~1;
+
+}
+
 static int output_single_frame(AVFilterContext *ctx, AVFrame *in, double *var_values, int i,
                                double *zoom, double *dx, double *dy)
 {
     ZPContext *s = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
     int64_t pts = s->frame_count;
-    int k, x, y, w, h, ret = 0;
+    int k, x, y, crop_x, crop_y, w, h, crop_w, crop_h, overscaled_w, overscaled_h, ret = 0;
     uint8_t *input[4];
     int px[4], py[4];
     AVFrame *out;
+    double dw, dh;
 
     var_values[VAR_PX]    = s->x;
     var_values[VAR_PY]    = s->y;
@@ -173,32 +187,46 @@  static int output_single_frame(AVFilterContext *ctx, AVFrame *in, double *var_va
 
     *zoom = av_clipd(*zoom, 1, 10);
     var_values[VAR_ZOOM] = *zoom;
-    w = in->width * (1.0 / *zoom);
-    h = in->height * (1.0 / *zoom);
+
+    // Keep track of double variables for correct calculation
+    w = dw = (double) in->width * (1.0 / *zoom);
+    h = dh = (double) in->height * (1.0 / *zoom);
+
+    // width and height with additional pixels from subsampling "grid"
+    crop_w = scale_to_grid(w, s->desc->log2_chroma_w);
+    crop_h = scale_to_grid(h, s->desc->log2_chroma_h);
 
     *dx = av_expr_eval(s->x_expr, var_values, NULL);
 
-    x = *dx = av_clipd(*dx, 0, FFMAX(in->width - w, 0));
-    var_values[VAR_X] = *dx;
-    x &= ~((1 << s->desc->log2_chroma_w) - 1);
+    crop_x = ceil(av_clipd(*dx - (((double) crop_w - w) / 2.0), 0, FFMAX(in->width - crop_w, 0)));
+    var_values[VAR_X] = *dx = av_clipd(*dx, 0, FFMAX(in->width - w, 0));
+    crop_x &= ~((1 << s->desc->log2_chroma_w) - 1);             // Masking LSBs making coordianate divisible
 
     *dy = av_expr_eval(s->y_expr, var_values, NULL);
 
-    y = *dy = av_clipd(*dy, 0, FFMAX(in->height - h, 0));
-    var_values[VAR_Y] = *dy;
-    y &= ~((1 << s->desc->log2_chroma_h) - 1);
+    crop_y = ceil(av_clipd(*dy - (((double) crop_h - h)/ 2.0), 0, FFMAX(in->height - crop_h, 0)));
+    var_values[VAR_Y] = *dy = av_clipd(*dy, 0, FFMAX(in->height - h, 0));
+    crop_y &= ~((1 << s->desc->log2_chroma_h) - 1);             // Masking LSBs making coordianate divisible
 
-    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    overscaled_w = outlink->w + (crop_w - dw) * *zoom;
+    overscaled_h = outlink->h + (crop_h - dh) * *zoom;
+
+    out = ff_get_video_buffer(outlink, overscaled_w, overscaled_h);
     if (!out) {
         ret = AVERROR(ENOMEM);
         return ret;
     }
 
-    px[1] = px[2] = AV_CEIL_RSHIFT(x, s->desc->log2_chroma_w);
-    px[0] = px[3] = x;
+    // Values for crop transform. Channel 1 and 2 are chroma plane, 0 luma plane, 3 alpha plane
+    px[1] = px[2] = AV_CEIL_RSHIFT(crop_x, s->desc->log2_chroma_w);
+    px[0] = px[3] = crop_x;
 
-    py[1] = py[2] = AV_CEIL_RSHIFT(y, s->desc->log2_chroma_h);
-    py[0] = py[3] = y;
+    py[1] = py[2] = AV_CEIL_RSHIFT(crop_y, s->desc->log2_chroma_h);
+    py[0] = py[3] = crop_y;
+
+    // Crop data in input using px/py
+    for (k = 0; k<4; k++)
+        input[k] = in->data[k] + py[k] * in->linesize[k] + px[k];
 
     s->sws = sws_alloc_context();
     if (!s->sws) {
@@ -206,21 +234,41 @@  static int output_single_frame(AVFilterContext *ctx, AVFrame *in, double *var_va
         goto error;
     }
 
-    for (k = 0; in->data[k]; k++)
-        input[k] = in->data[k] + py[k] * in->linesize[k] + px[k];
-
-    av_opt_set_int(s->sws, "srcw", w, 0);
-    av_opt_set_int(s->sws, "srch", h, 0);
+    // Set context variables. Used in scaling transform
+    av_opt_set_int(s->sws, "srcw", crop_w, 0);
+    av_opt_set_int(s->sws, "srch", crop_h, 0);
     av_opt_set_int(s->sws, "src_format", in->format, 0);
-    av_opt_set_int(s->sws, "dstw", outlink->w, 0);
-    av_opt_set_int(s->sws, "dsth", outlink->h, 0);
+    av_opt_set_int(s->sws, "dstw", overscaled_w, 0);
+    av_opt_set_int(s->sws, "dsth", overscaled_h, 0);
     av_opt_set_int(s->sws, "dst_format", outlink->format, 0);
     av_opt_set_int(s->sws, "sws_flags", SWS_BICUBIC, 0);
 
     if ((ret = sws_init_context(s->sws, NULL, NULL)) < 0)
         goto error;
 
-    sws_scale(s->sws, (const uint8_t *const *)&input, in->linesize, 0, h, out->data, out->linesize);
+    // Scale data in input to defined size and copy to out
+    sws_scale(s->sws, (const uint8_t *const *)&input, in->linesize, 0, crop_h, out->data, out->linesize);
+
+
+    // Calculate x and y with respect to rounding error.
+    *dx = ((crop_w - dw) * *zoom) / (((double) crop_w - dw) / (*dx - (double) crop_x));
+    x = ceil(av_clipd(*dx, 0, FFMAX(overscaled_w - outlink->w, 0)));
+    x &= ~((1 << s->desc->log2_chroma_w) - 1);          // Masking LSBs making coordianate divisible
+
+    *dy = ((crop_h - dh) * *zoom) / (((double) crop_h - dh) / (*dy - (double) crop_y));
+    y = ceil(av_clipd(*dy, 0, FFMAX(overscaled_h - outlink->h, 0)));
+    y &= ~((1 << s->desc->log2_chroma_h) - 1);          // Masking LSBs making coordianate divisible
+
+    // Values for crop transform. Channel 1 and 2 are chroma plane, 0 luma plane, 3 alpha plane
+    px[1] = px[2] = AV_CEIL_RSHIFT(x, s->desc->log2_chroma_w);
+    px[0] = px[3] = x;
+
+    py[1] = py[2] = AV_CEIL_RSHIFT(y, s->desc->log2_chroma_h);
+    py[0] = py[3] = y;
+
+    // Crop data in input using px/py
+    for (k = 0; k<4; k++)
+        out->data[k] = out->data[k] + py[k] * out->linesize[k] + px[k];
 
     out->pts = pts;
     s->frame_count++;
@@ -229,7 +277,6 @@  static int output_single_frame(AVFilterContext *ctx, AVFrame *in, double *var_va
     sws_freeContext(s->sws);
     s->sws = NULL;
     s->current_frame++;
-
     if (s->current_frame >= s->nb_frames) {
         if (*dx != -1)
             s->x = *dx;