Message ID | 20200127141729.11773-1-deibel.robert@googlemail.com |
---|---|
State | Superseded |
Headers | show |
Series | [FFmpeg-devel,v3] avfilter/vf_zoompan: fix shaking when zooming | expand |
Context | Check | Description |
---|---|---|
andriy/ffmpeg-patchwork | success | Make fate finished |
On 1/27/20, Robert Deibel <deibel.robert@googlemail.com> wrote: > Fix shaking of image when zoom is applied by the zoompan filter. > Resolves ticket https://trac.ffmpeg.org/ticket/4298 > --- > Fixed the style issues and modified the comment of scale_to_grid() to > correctly specify the functionality. > > libavfilter/vf_zoompan.c | 82 ++++++++++++++++++++++++++++------------ > 1 file changed, 58 insertions(+), 24 deletions(-) > > diff --git a/libavfilter/vf_zoompan.c b/libavfilter/vf_zoompan.c > index 59c9b19ec8..1710f1d353 100644 > --- a/libavfilter/vf_zoompan.c > +++ b/libavfilter/vf_zoompan.c > @@ -150,16 +150,30 @@ static int config_output(AVFilterLink *outlink) > return 0; > } > > +/** > + * Scales n to the next number divisible by 2 * 2^log_grid_size but > minimally n + 2 * 2^log_grid_size. > + * > + * Used to scale the width and height of a frame to fit with the > subsampling grid. > + * @param n The number to be scaled. > + * @param log_grid_size log 2 of the gridsize. > + * @return The next number divisible by 2 * 2^log_grid_size but minimally n > + 2 * 2^log_grid_size > + */ > +static int scale_to_grid(int n, uint8_t log2_grid_size) > +{ > + return (((n + (1 << log2_grid_size) * 2) & ~((1 << log2_grid_size) - > 1)) + 1) & ~1;; > +} > + > static int output_single_frame(AVFilterContext *ctx, AVFrame *in, double > *var_values, int i, > double *zoom, double *dx, double *dy) > { > ZPContext *s = ctx->priv; > AVFilterLink *outlink = ctx->outputs[0]; > int64_t pts = s->frame_count; > - int k, x, y, w, h, ret = 0; > + int k, x, y, crop_x, crop_y, w, h, crop_w, crop_h, overscaled_w, > overscaled_h, ret = 0; > uint8_t *input[4]; > int px[4], py[4]; > AVFrame *out; > + double dw, dh, dx_scaled, dy_scaled; > > var_values[VAR_PX] = s->x; > var_values[VAR_PY] = s->y; > @@ -173,32 +187,38 @@ static int output_single_frame(AVFilterContext *ctx, > AVFrame *in, double *var_va > > *zoom = av_clipd(*zoom, 1, 10); > var_values[VAR_ZOOM] = *zoom; > - w = in->width * (1.0 / *zoom); > - h = in->height * (1.0 / *zoom); > > - *dx = av_expr_eval(s->x_expr, var_values, NULL); > + w = dw = (double) in->width * (1 / *zoom); > + h = dh = (double) in->height * (1 / *zoom); > + crop_w = scale_to_grid(w, s->desc->log2_chroma_w); > + crop_h = scale_to_grid(h, s->desc->log2_chroma_h); > + overscaled_w = outlink->w + (crop_w - dw) * *zoom; > + overscaled_h = outlink->h + (crop_h - dh) * *zoom; > > - x = *dx = av_clipd(*dx, 0, FFMAX(in->width - w, 0)); > - var_values[VAR_X] = *dx; > - x &= ~((1 << s->desc->log2_chroma_w) - 1); > + *dx = av_expr_eval(s->x_expr, var_values, NULL); > + crop_x = ceil(av_clipd(*dx - (((double) crop_w - w) / 2.0), 0, > FFMAX(in->width - crop_w, 0))); > + var_values[VAR_X] = *dx = av_clipd(*dx, 0, FFMAX(in->width - w, 0)); > + crop_x &= ~((1 << s->desc->log2_chroma_w) - 1); > > *dy = av_expr_eval(s->y_expr, var_values, NULL); > + crop_y = ceil(av_clipd(*dy - (((double) crop_h - h)/ 2.0), 0, > FFMAX(in->height - crop_h, 0))); > + var_values[VAR_Y] = *dy = av_clipd(*dy, 0, FFMAX(in->height - h, 0)); > + crop_y &= ~((1 << s->desc->log2_chroma_h) - 1); > > - y = *dy = av_clipd(*dy, 0, FFMAX(in->height - h, 0)); > - var_values[VAR_Y] = *dy; > - y &= ~((1 << s->desc->log2_chroma_h) - 1); > - > - out = ff_get_video_buffer(outlink, outlink->w, outlink->h); > + out = ff_get_video_buffer(outlink, overscaled_w, overscaled_h); This is wrong. outlink->w/h should be used always, otherwise outlink w/h differs from frame w/h. > if (!out) { > ret = AVERROR(ENOMEM); > return ret; > } > > - px[1] = px[2] = AV_CEIL_RSHIFT(x, s->desc->log2_chroma_w); > - px[0] = px[3] = x; > + px[1] = px[2] = AV_CEIL_RSHIFT(crop_x, s->desc->log2_chroma_w); > + px[0] = px[3] = crop_x; > > - py[1] = py[2] = AV_CEIL_RSHIFT(y, s->desc->log2_chroma_h); > - py[0] = py[3] = y; > + py[1] = py[2] = AV_CEIL_RSHIFT(crop_y, s->desc->log2_chroma_h); > + py[0] = py[3] = crop_y; > + > + for (k = 0; k < 4; k++) > + input[k] = in->data[k] + py[k] * in->linesize[k] + px[k]; > > s->sws = sws_alloc_context(); > if (!s->sws) { > @@ -206,21 +226,35 @@ static int output_single_frame(AVFilterContext *ctx, > AVFrame *in, double *var_va > goto error; > } > > - for (k = 0; in->data[k]; k++) > - input[k] = in->data[k] + py[k] * in->linesize[k] + px[k]; > - > - av_opt_set_int(s->sws, "srcw", w, 0); > - av_opt_set_int(s->sws, "srch", h, 0); > + av_opt_set_int(s->sws, "srcw", crop_w, 0); > + av_opt_set_int(s->sws, "srch", crop_h, 0); > av_opt_set_int(s->sws, "src_format", in->format, 0); > - av_opt_set_int(s->sws, "dstw", outlink->w, 0); > - av_opt_set_int(s->sws, "dsth", outlink->h, 0); > + av_opt_set_int(s->sws, "dstw", overscaled_w, 0); > + av_opt_set_int(s->sws, "dsth", overscaled_h, 0); > av_opt_set_int(s->sws, "dst_format", outlink->format, 0); > av_opt_set_int(s->sws, "sws_flags", SWS_BICUBIC, 0); > > if ((ret = sws_init_context(s->sws, NULL, NULL)) < 0) > goto error; > > - sws_scale(s->sws, (const uint8_t *const *)&input, in->linesize, 0, h, > out->data, out->linesize); > + sws_scale(s->sws, (const uint8_t *const *)&input, in->linesize, 0, > crop_h, out->data, out->linesize); > + > + dx_scaled = ((crop_w - dw) * *zoom) / (((double) crop_w - dw) / (*dx - > (double) crop_x)); > + x = ceil(av_clipd(dx_scaled, 0, FFMAX(overscaled_w - outlink->w, 0))); > + x &= ~((1 << s->desc->log2_chroma_w) - 1); > + > + dy_scaled = ((crop_h - dh) * *zoom) / (((double) crop_h - dh) / (*dx - > (double) crop_y)); > + y = ceil(av_clipd(dy_scaled, 0, FFMAX(overscaled_h - outlink->h, 0))); > + y &= ~((1 << s->desc->log2_chroma_h) - 1); > + > + px[1] = px[2] = AV_CEIL_RSHIFT(x, s->desc->log2_chroma_w); > + px[0] = px[3] = x; > + > + py[1] = py[2] = AV_CEIL_RSHIFT(y, s->desc->log2_chroma_h); > + py[0] = py[3] = y; > + > + for (k = 0; k < 4; k++) > + out->data[k] = out->data[k] + py[k] * out->linesize[k] + px[k]; > > out->pts = pts; > s->frame_count++; > -- > 2.25.0 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
On Mon, Jan 27, 2020 at 15:17:29 +0100, Robert Deibel wrote: > + * Used to scale the width and height of a frame to fit with the subsampling grid. > + * @param n The number to be scaled. > + * @param log_grid_size log 2 of the gridsize. > + * @return The next number divisible by 2 * 2^log_grid_size but minimally n + 2 * 2^log_grid_size With this modified explanation, my observations probably make sense. > - w = in->width * (1.0 / *zoom); > - h = in->height * (1.0 / *zoom); > > - *dx = av_expr_eval(s->x_expr, var_values, NULL); > + w = dw = (double) in->width * (1 / *zoom); > + h = dh = (double) in->height * (1 / *zoom); Actually, I meant w = dw = (double)in->width / *zoom; h = dh = (double)in->height / *zoom; But the original lines were fine, as: int / double is automatically promoted/cast to double / double (and were therefore technically identical to my proposal and your slightly too complex implementation). Cheers, Moritz
On 27.01.20 16:44, Paul B Mahol wrote: > On 1/27/20, Robert Deibel <deibel.robert@googlemail.com> wrote: >> - y = *dy = av_clipd(*dy, 0, FFMAX(in->height - h, 0)); >> - var_values[VAR_Y] = *dy; >> - y &= ~((1 << s->desc->log2_chroma_h) - 1); >> - >> - out = ff_get_video_buffer(outlink, outlink->w, outlink->h); >> + out = ff_get_video_buffer(outlink, overscaled_w, overscaled_h); > This is wrong. outlink->w/h should be used always, otherwise outlink > w/h differs from frame w/h. > So how should I create a video buffer? I tried av_frame_alloc + setting width, height and format + av_frame_get_buffer, but this resulted in a heavy performance impact. Apart from the fact, that I can't get the correct data copied. Also, if it's not allowed to pass other w/h why is it even possible? Or is it only a problem if I use the outlink? Could I request a buffer from the inlink with the modified sizes?
On 1/28/20, Robert Deibel <deibel.robert@googlemail.com> wrote: > On 27.01.20 16:44, Paul B Mahol wrote: >> On 1/27/20, Robert Deibel <deibel.robert@googlemail.com> wrote: >>> - y = *dy = av_clipd(*dy, 0, FFMAX(in->height - h, 0)); >>> - var_values[VAR_Y] = *dy; >>> - y &= ~((1 << s->desc->log2_chroma_h) - 1); >>> - >>> - out = ff_get_video_buffer(outlink, outlink->w, outlink->h); >>> + out = ff_get_video_buffer(outlink, overscaled_w, overscaled_h); >> This is wrong. outlink->w/h should be used always, otherwise outlink >> w/h differs from frame w/h. >> > So how should I create a video buffer? I tried av_frame_alloc + setting > width, height and format + av_frame_get_buffer, but this resulted in a > heavy performance impact. Apart from the fact, that I can't get the > correct data copied. > > Also, if it's not allowed to pass other w/h why is it even possible? Or > is it only a problem if I use the outlink? Could I request a buffer from > the inlink with the modified sizes? Frame w/h should be exactly same as outlink w/h. Otherwise output will not have always same w/h. Why are you changing w/h at all? > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
On 28.01.20 15:46, Paul B Mahol wrote: > On 1/28/20, Robert Deibel <deibel.robert@googlemail.com> wrote: >> On 27.01.20 16:44, Paul B Mahol wrote: >>> On 1/27/20, Robert Deibel <deibel.robert@googlemail.com> wrote: >>>> - y = *dy = av_clipd(*dy, 0, FFMAX(in->height - h, 0)); >>>> - var_values[VAR_Y] = *dy; >>>> - y &= ~((1 << s->desc->log2_chroma_h) - 1); >>>> - >>>> - out = ff_get_video_buffer(outlink, outlink->w, outlink->h); >>>> + out = ff_get_video_buffer(outlink, overscaled_w, overscaled_h); >>> This is wrong. outlink->w/h should be used always, otherwise outlink >>> w/h differs from frame w/h. >>> >> So how should I create a video buffer? I tried av_frame_alloc + setting >> width, height and format + av_frame_get_buffer, but this resulted in a >> heavy performance impact. Apart from the fact, that I can't get the >> correct data copied. >> >> Also, if it's not allowed to pass other w/h why is it even possible? Or >> is it only a problem if I use the outlink? Could I request a buffer from >> the inlink with the modified sizes? > Frame w/h should be exactly same as outlink w/h. > Otherwise output will not have always same w/h. > Why are you changing w/h at all? The idea was to build a slightly larger frame than output to compensate for rounding errors and crop it to the correct size afterwards.
diff --git a/libavfilter/vf_zoompan.c b/libavfilter/vf_zoompan.c index 59c9b19ec8..1710f1d353 100644 --- a/libavfilter/vf_zoompan.c +++ b/libavfilter/vf_zoompan.c @@ -150,16 +150,30 @@ static int config_output(AVFilterLink *outlink) return 0; } +/** + * Scales n to the next number divisible by 2 * 2^log_grid_size but minimally n + 2 * 2^log_grid_size. + * + * Used to scale the width and height of a frame to fit with the subsampling grid. + * @param n The number to be scaled. + * @param log_grid_size log 2 of the gridsize. + * @return The next number divisible by 2 * 2^log_grid_size but minimally n + 2 * 2^log_grid_size + */ +static int scale_to_grid(int n, uint8_t log2_grid_size) +{ + return (((n + (1 << log2_grid_size) * 2) & ~((1 << log2_grid_size) - 1)) + 1) & ~1;; +} + static int output_single_frame(AVFilterContext *ctx, AVFrame *in, double *var_values, int i, double *zoom, double *dx, double *dy) { ZPContext *s = ctx->priv; AVFilterLink *outlink = ctx->outputs[0]; int64_t pts = s->frame_count; - int k, x, y, w, h, ret = 0; + int k, x, y, crop_x, crop_y, w, h, crop_w, crop_h, overscaled_w, overscaled_h, ret = 0; uint8_t *input[4]; int px[4], py[4]; AVFrame *out; + double dw, dh, dx_scaled, dy_scaled; var_values[VAR_PX] = s->x; var_values[VAR_PY] = s->y; @@ -173,32 +187,38 @@ static int output_single_frame(AVFilterContext *ctx, AVFrame *in, double *var_va *zoom = av_clipd(*zoom, 1, 10); var_values[VAR_ZOOM] = *zoom; - w = in->width * (1.0 / *zoom); - h = in->height * (1.0 / *zoom); - *dx = av_expr_eval(s->x_expr, var_values, NULL); + w = dw = (double) in->width * (1 / *zoom); + h = dh = (double) in->height * (1 / *zoom); + crop_w = scale_to_grid(w, s->desc->log2_chroma_w); + crop_h = scale_to_grid(h, s->desc->log2_chroma_h); + overscaled_w = outlink->w + (crop_w - dw) * *zoom; + overscaled_h = outlink->h + (crop_h - dh) * *zoom; - x = *dx = av_clipd(*dx, 0, FFMAX(in->width - w, 0)); - var_values[VAR_X] = *dx; - x &= ~((1 << s->desc->log2_chroma_w) - 1); + *dx = av_expr_eval(s->x_expr, var_values, NULL); + crop_x = ceil(av_clipd(*dx - (((double) crop_w - w) / 2.0), 0, FFMAX(in->width - crop_w, 0))); + var_values[VAR_X] = *dx = av_clipd(*dx, 0, FFMAX(in->width - w, 0)); + crop_x &= ~((1 << s->desc->log2_chroma_w) - 1); *dy = av_expr_eval(s->y_expr, var_values, NULL); + crop_y = ceil(av_clipd(*dy - (((double) crop_h - h)/ 2.0), 0, FFMAX(in->height - crop_h, 0))); + var_values[VAR_Y] = *dy = av_clipd(*dy, 0, FFMAX(in->height - h, 0)); + crop_y &= ~((1 << s->desc->log2_chroma_h) - 1); - y = *dy = av_clipd(*dy, 0, FFMAX(in->height - h, 0)); - var_values[VAR_Y] = *dy; - y &= ~((1 << s->desc->log2_chroma_h) - 1); - - out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + out = ff_get_video_buffer(outlink, overscaled_w, overscaled_h); if (!out) { ret = AVERROR(ENOMEM); return ret; } - px[1] = px[2] = AV_CEIL_RSHIFT(x, s->desc->log2_chroma_w); - px[0] = px[3] = x; + px[1] = px[2] = AV_CEIL_RSHIFT(crop_x, s->desc->log2_chroma_w); + px[0] = px[3] = crop_x; - py[1] = py[2] = AV_CEIL_RSHIFT(y, s->desc->log2_chroma_h); - py[0] = py[3] = y; + py[1] = py[2] = AV_CEIL_RSHIFT(crop_y, s->desc->log2_chroma_h); + py[0] = py[3] = crop_y; + + for (k = 0; k < 4; k++) + input[k] = in->data[k] + py[k] * in->linesize[k] + px[k]; s->sws = sws_alloc_context(); if (!s->sws) { @@ -206,21 +226,35 @@ static int output_single_frame(AVFilterContext *ctx, AVFrame *in, double *var_va goto error; } - for (k = 0; in->data[k]; k++) - input[k] = in->data[k] + py[k] * in->linesize[k] + px[k]; - - av_opt_set_int(s->sws, "srcw", w, 0); - av_opt_set_int(s->sws, "srch", h, 0); + av_opt_set_int(s->sws, "srcw", crop_w, 0); + av_opt_set_int(s->sws, "srch", crop_h, 0); av_opt_set_int(s->sws, "src_format", in->format, 0); - av_opt_set_int(s->sws, "dstw", outlink->w, 0); - av_opt_set_int(s->sws, "dsth", outlink->h, 0); + av_opt_set_int(s->sws, "dstw", overscaled_w, 0); + av_opt_set_int(s->sws, "dsth", overscaled_h, 0); av_opt_set_int(s->sws, "dst_format", outlink->format, 0); av_opt_set_int(s->sws, "sws_flags", SWS_BICUBIC, 0); if ((ret = sws_init_context(s->sws, NULL, NULL)) < 0) goto error; - sws_scale(s->sws, (const uint8_t *const *)&input, in->linesize, 0, h, out->data, out->linesize); + sws_scale(s->sws, (const uint8_t *const *)&input, in->linesize, 0, crop_h, out->data, out->linesize); + + dx_scaled = ((crop_w - dw) * *zoom) / (((double) crop_w - dw) / (*dx - (double) crop_x)); + x = ceil(av_clipd(dx_scaled, 0, FFMAX(overscaled_w - outlink->w, 0))); + x &= ~((1 << s->desc->log2_chroma_w) - 1); + + dy_scaled = ((crop_h - dh) * *zoom) / (((double) crop_h - dh) / (*dx - (double) crop_y)); + y = ceil(av_clipd(dy_scaled, 0, FFMAX(overscaled_h - outlink->h, 0))); + y &= ~((1 << s->desc->log2_chroma_h) - 1); + + px[1] = px[2] = AV_CEIL_RSHIFT(x, s->desc->log2_chroma_w); + px[0] = px[3] = x; + + py[1] = py[2] = AV_CEIL_RSHIFT(y, s->desc->log2_chroma_h); + py[0] = py[3] = y; + + for (k = 0; k < 4; k++) + out->data[k] = out->data[k] + py[k] * out->linesize[k] + px[k]; out->pts = pts; s->frame_count++;