diff mbox

[FFmpeg-devel,v3,1/2] vf_crop: Add support for cropping hardware frames

Message ID 20190323161849.7019-1-sw@jkqxz.net
State Superseded
Headers show

Commit Message

Mark Thompson March 23, 2019, 4:18 p.m. UTC
Set the cropping fields in the AVFrame.
---
 libavfilter/vf_crop.c | 74 +++++++++++++++++++++++++++++--------------
 1 file changed, 51 insertions(+), 23 deletions(-)

There is the slightly unfortunate effect the filter links don't carry the cropping information, so we don't know how big the cropped output is in following links until we actually get a frame.

For example, to get the middle ninth of a stream:

./ffmpeg_g -y -hwaccel vaapi -hwaccel_device /dev/dri/renderD128 -hwaccel_output_format vaapi -i in.mp4 -an -vf "crop=iw/3:ih/3:iw/3:ih/3,scale_vaapi=iw/3:ih/3" -c:v h264_vaapi out.mp4

Without the extra arguments to scale it will take the cropped part correctly but then scale it to the original size.

Comments

Diego Felix de Souza via ffmpeg-devel March 25, 2019, 4:58 a.m. UTC | #1
On Sat, 23 Mar 2019 16:18:48 +0000
Mark Thompson <sw@jkqxz.net> wrote:

> Set the cropping fields in the AVFrame.
> ---
>  libavfilter/vf_crop.c | 74
> +++++++++++++++++++++++++++++-------------- 1 file changed, 51
> insertions(+), 23 deletions(-)
> 
> There is the slightly unfortunate effect the filter links don't carry
> the cropping information, so we don't know how big the cropped output
> is in following links until we actually get a frame.
> 
> For example, to get the middle ninth of a stream:
> 
> ./ffmpeg_g -y -hwaccel vaapi -hwaccel_device /dev/dri/renderD128
> -hwaccel_output_format vaapi -i in.mp4 -an -vf
> "crop=iw/3:ih/3:iw/3:ih/3,scale_vaapi=iw/3:ih/3" -c:v h264_vaapi
> out.mp4
> 
> Without the extra arguments to scale it will take the cropped part
> correctly but then scale it to the original size.
> 
> diff --git a/libavfilter/vf_crop.c b/libavfilter/vf_crop.c
> index 84be4c7d0d..7f6b0f03d3 100644
> --- a/libavfilter/vf_crop.c
> +++ b/libavfilter/vf_crop.c
> @@ -98,9 +98,17 @@ static int query_formats(AVFilterContext *ctx)
>  
>      for (fmt = 0; av_pix_fmt_desc_get(fmt); fmt++) {
>          const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
> -        if (!(desc->flags & (AV_PIX_FMT_FLAG_HWACCEL |
> AV_PIX_FMT_FLAG_BITSTREAM)) &&
> -            !((desc->log2_chroma_w || desc->log2_chroma_h)
> && !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) &&
> -            (ret = ff_add_format(&formats, fmt)) < 0)
> +        if (desc->flags & AV_PIX_FMT_FLAG_BITSTREAM)
> +            continue;
> +        if (!(desc->flags & AV_PIX_FMT_FLAG_HWACCEL)) {
> +            // Not usable if there is any subsampling but the format
> is
> +            // not planar (e.g. YUYV422).
> +            if ((desc->log2_chroma_w || desc->log2_chroma_h) &&
> +                !(desc->flags & AV_PIX_FMT_FLAG_PLANAR))
> +                continue;
> +        }
> +        ret = ff_add_format(&formats, fmt);
> +        if (ret < 0)
>              return ret;
>      }
>  
> @@ -157,8 +165,14 @@ static int config_input(AVFilterLink *link)
>      s->var_values[VAR_POS]   = NAN;
>  
>      av_image_fill_max_pixsteps(s->max_step, NULL, pix_desc);
> -    s->hsub = pix_desc->log2_chroma_w;
> -    s->vsub = pix_desc->log2_chroma_h;
> +
> +    if (pix_desc->flags & AV_PIX_FMT_FLAG_HWACCEL) {
> +        s->hsub = 1;
> +        s->vsub = 1;
> +    } else {
> +        s->hsub = pix_desc->log2_chroma_w;
> +        s->vsub = pix_desc->log2_chroma_h;
> +    }
>  
>      if ((ret = av_expr_parse_and_eval(&res, (expr = s->w_expr),
>                                        var_names, s->var_values,
> @@ -237,9 +251,15 @@ fail_expr:
>  static int config_output(AVFilterLink *link)
>  {
>      CropContext *s = link->src->priv;
> +    const AVPixFmtDescriptor *desc =
> av_pix_fmt_desc_get(link->format); 
> -    link->w = s->w;
> -    link->h = s->h;
> +    if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) {
> +        // Hardware frames adjust the cropping regions rather than
> +        // changing the frame size.
> +    } else {
> +        link->w = s->w;
> +        link->h = s->h;
> +    }
>      link->sample_aspect_ratio = s->out_sar;
>  
>      return 0;
> @@ -252,9 +272,6 @@ static int filter_frame(AVFilterLink *link,
> AVFrame *frame) const AVPixFmtDescriptor *desc =
> av_pix_fmt_desc_get(link->format); int i;
>  
> -    frame->width  = s->w;
> -    frame->height = s->h;
> -
>      s->var_values[VAR_N] = link->frame_count_out;
>      s->var_values[VAR_T] = frame->pts == AV_NOPTS_VALUE ?
>          NAN : frame->pts * av_q2d(link->time_base);
> @@ -285,22 +302,33 @@ static int filter_frame(AVFilterLink *link,
> AVFrame *frame) (int)s->var_values[VAR_N], s->var_values[VAR_T],
> s->var_values[VAR_POS], s->x, s->y, s->x+s->w, s->y+s->h);
>  
> -    frame->data[0] += s->y * frame->linesize[0];
> -    frame->data[0] += s->x * s->max_step[0];
> -
> -    if (!(desc->flags & AV_PIX_FMT_FLAG_PAL || desc->flags &
> FF_PSEUDOPAL)) {
> -        for (i = 1; i < 3; i ++) {
> -            if (frame->data[i]) {
> -                frame->data[i] += (s->y >> s->vsub) *
> frame->linesize[i];
> -                frame->data[i] += (s->x * s->max_step[i]) >> s->hsub;
> +    if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) {
> +        frame->crop_top   += s->y;
> +        frame->crop_left  += s->x;
> +        frame->crop_bottom = frame->height - frame->crop_top -
> frame->crop_bottom - s->h;
> +        frame->crop_right  = frame->width  - frame->crop_left -
> frame->crop_right - s->w; +
> +    } else {
> +        frame->width  = s->w;
> +        frame->height = s->h;
> +
> +        frame->data[0] += s->y * frame->linesize[0];
> +        frame->data[0] += s->x * s->max_step[0];
> +
> +        if (!(desc->flags & AV_PIX_FMT_FLAG_PAL || desc->flags &
> FF_PSEUDOPAL)) {
> +            for (i = 1; i < 3; i ++) {
> +                if (frame->data[i]) {
> +                    frame->data[i] += (s->y >> s->vsub) *
> frame->linesize[i];
> +                    frame->data[i] += (s->x * s->max_step[i]) >>
> s->hsub;
> +                }
>              }
>          }
> -    }
>  
> -    /* alpha plane */
> -    if (frame->data[3]) {
> -        frame->data[3] += s->y * frame->linesize[3];
> -        frame->data[3] += s->x * s->max_step[3];
> +        /* alpha plane */
> +        if (frame->data[3]) {
> +            frame->data[3] += s->y * frame->linesize[3];
> +            frame->data[3] += s->x * s->max_step[3];
> +        }
>      }
>  
>      return ff_filter_frame(link->dst->outputs[0], frame);

Ship it.

--phil
Ruiling Song March 26, 2019, 10:59 a.m. UTC | #2
> -----Original Message-----

> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf Of

> Mark Thompson

> Sent: Sunday, March 24, 2019 12:19 AM

> To: ffmpeg-devel@ffmpeg.org

> Subject: [FFmpeg-devel] [PATCH v3 1/2] vf_crop: Add support for cropping

> hardware frames

> 

> Set the cropping fields in the AVFrame.

> ---

>  libavfilter/vf_crop.c | 74 +++++++++++++++++++++++++++++--------------

>  1 file changed, 51 insertions(+), 23 deletions(-)

> 

> There is the slightly unfortunate effect the filter links don't carry the cropping

> information, so we don't know how big the cropped output is in following links

> until we actually get a frame.

> 

> For example, to get the middle ninth of a stream:

> 

> ./ffmpeg_g -y -hwaccel vaapi -hwaccel_device /dev/dri/renderD128 -

> hwaccel_output_format vaapi -i in.mp4 -an -vf

> "crop=iw/3:ih/3:iw/3:ih/3,scale_vaapi=iw/3:ih/3" -c:v h264_vaapi out.mp4

Hi Mark,

I tested the command against the patch, it works.
But for people who have no idea of implementation details, I think the ”scale_vaapi=iw/3:ih/3“ will be very confusing.
I think we need to make scale_vaapi evaluate input dimensions considering crop information. What do you think?
People would just think that the input buffer to the scale_vaapi is the cropped size.
And do we need to add warning message against crop information in encoder if user failed to add some vaapi filter after crop?
Seems that vaapi encoder does not encode correctly with crop?

Thanks!
Ruiling
> 

> Without the extra arguments to scale it will take the cropped part correctly but

> then scale it to the original size.

> 

> diff --git a/libavfilter/vf_crop.c b/libavfilter/vf_crop.c

> index 84be4c7d0d..7f6b0f03d3 100644

> --- a/libavfilter/vf_crop.c

> +++ b/libavfilter/vf_crop.c

> @@ -98,9 +98,17 @@ static int query_formats(AVFilterContext *ctx)

> 

>      for (fmt = 0; av_pix_fmt_desc_get(fmt); fmt++) {

>          const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);

> -        if (!(desc->flags & (AV_PIX_FMT_FLAG_HWACCEL |

> AV_PIX_FMT_FLAG_BITSTREAM)) &&

> -            !((desc->log2_chroma_w || desc->log2_chroma_h) && !(desc->flags &

> AV_PIX_FMT_FLAG_PLANAR)) &&

> -            (ret = ff_add_format(&formats, fmt)) < 0)

> +        if (desc->flags & AV_PIX_FMT_FLAG_BITSTREAM)

> +            continue;

> +        if (!(desc->flags & AV_PIX_FMT_FLAG_HWACCEL)) {

> +            // Not usable if there is any subsampling but the format is

> +            // not planar (e.g. YUYV422).

> +            if ((desc->log2_chroma_w || desc->log2_chroma_h) &&

> +                !(desc->flags & AV_PIX_FMT_FLAG_PLANAR))

> +                continue;

> +        }

> +        ret = ff_add_format(&formats, fmt);

> +        if (ret < 0)

>              return ret;

>      }

> 

> @@ -157,8 +165,14 @@ static int config_input(AVFilterLink *link)

>      s->var_values[VAR_POS]   = NAN;

> 

>      av_image_fill_max_pixsteps(s->max_step, NULL, pix_desc);

> -    s->hsub = pix_desc->log2_chroma_w;

> -    s->vsub = pix_desc->log2_chroma_h;

> +

> +    if (pix_desc->flags & AV_PIX_FMT_FLAG_HWACCEL) {

> +        s->hsub = 1;

> +        s->vsub = 1;

> +    } else {

> +        s->hsub = pix_desc->log2_chroma_w;

> +        s->vsub = pix_desc->log2_chroma_h;

> +    }

> 

>      if ((ret = av_expr_parse_and_eval(&res, (expr = s->w_expr),

>                                        var_names, s->var_values,

> @@ -237,9 +251,15 @@ fail_expr:

>  static int config_output(AVFilterLink *link)

>  {

>      CropContext *s = link->src->priv;

> +    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);

> 

> -    link->w = s->w;

> -    link->h = s->h;

> +    if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) {

> +        // Hardware frames adjust the cropping regions rather than

> +        // changing the frame size.

> +    } else {

> +        link->w = s->w;

> +        link->h = s->h;

> +    }

>      link->sample_aspect_ratio = s->out_sar;

> 

>      return 0;

> @@ -252,9 +272,6 @@ static int filter_frame(AVFilterLink *link, AVFrame

> *frame)

>      const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);

>      int i;

> 

> -    frame->width  = s->w;

> -    frame->height = s->h;

> -

>      s->var_values[VAR_N] = link->frame_count_out;

>      s->var_values[VAR_T] = frame->pts == AV_NOPTS_VALUE ?

>          NAN : frame->pts * av_q2d(link->time_base);

> @@ -285,22 +302,33 @@ static int filter_frame(AVFilterLink *link, AVFrame

> *frame)

>              (int)s->var_values[VAR_N], s->var_values[VAR_T], s-

> >var_values[VAR_POS],

>              s->x, s->y, s->x+s->w, s->y+s->h);

> 

> -    frame->data[0] += s->y * frame->linesize[0];

> -    frame->data[0] += s->x * s->max_step[0];

> -

> -    if (!(desc->flags & AV_PIX_FMT_FLAG_PAL || desc->flags & FF_PSEUDOPAL))

> {

> -        for (i = 1; i < 3; i ++) {

> -            if (frame->data[i]) {

> -                frame->data[i] += (s->y >> s->vsub) * frame->linesize[i];

> -                frame->data[i] += (s->x * s->max_step[i]) >> s->hsub;

> +    if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) {

> +        frame->crop_top   += s->y;

> +        frame->crop_left  += s->x;

> +        frame->crop_bottom = frame->height - frame->crop_top - frame-

> >crop_bottom - s->h;

> +        frame->crop_right  = frame->width  - frame->crop_left - frame-

> >crop_right - s->w;

> +

> +    } else {

> +        frame->width  = s->w;

> +        frame->height = s->h;

> +

> +        frame->data[0] += s->y * frame->linesize[0];

> +        frame->data[0] += s->x * s->max_step[0];

> +

> +        if (!(desc->flags & AV_PIX_FMT_FLAG_PAL || desc->flags &

> FF_PSEUDOPAL)) {

> +            for (i = 1; i < 3; i ++) {

> +                if (frame->data[i]) {

> +                    frame->data[i] += (s->y >> s->vsub) * frame->linesize[i];

> +                    frame->data[i] += (s->x * s->max_step[i]) >> s->hsub;

> +                }

>              }

>          }

> -    }

> 

> -    /* alpha plane */

> -    if (frame->data[3]) {

> -        frame->data[3] += s->y * frame->linesize[3];

> -        frame->data[3] += s->x * s->max_step[3];

> +        /* alpha plane */

> +        if (frame->data[3]) {

> +            frame->data[3] += s->y * frame->linesize[3];

> +            frame->data[3] += s->x * s->max_step[3];

> +        }

>      }

> 

>      return ff_filter_frame(link->dst->outputs[0], frame);

> --

> 2.19.2

> 

> _______________________________________________

> ffmpeg-devel mailing list

> ffmpeg-devel@ffmpeg.org

> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

> 

> To unsubscribe, visit link above, or email

> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff mbox

Patch

diff --git a/libavfilter/vf_crop.c b/libavfilter/vf_crop.c
index 84be4c7d0d..7f6b0f03d3 100644
--- a/libavfilter/vf_crop.c
+++ b/libavfilter/vf_crop.c
@@ -98,9 +98,17 @@  static int query_formats(AVFilterContext *ctx)
 
     for (fmt = 0; av_pix_fmt_desc_get(fmt); fmt++) {
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
-        if (!(desc->flags & (AV_PIX_FMT_FLAG_HWACCEL | AV_PIX_FMT_FLAG_BITSTREAM)) &&
-            !((desc->log2_chroma_w || desc->log2_chroma_h) && !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) &&
-            (ret = ff_add_format(&formats, fmt)) < 0)
+        if (desc->flags & AV_PIX_FMT_FLAG_BITSTREAM)
+            continue;
+        if (!(desc->flags & AV_PIX_FMT_FLAG_HWACCEL)) {
+            // Not usable if there is any subsampling but the format is
+            // not planar (e.g. YUYV422).
+            if ((desc->log2_chroma_w || desc->log2_chroma_h) &&
+                !(desc->flags & AV_PIX_FMT_FLAG_PLANAR))
+                continue;
+        }
+        ret = ff_add_format(&formats, fmt);
+        if (ret < 0)
             return ret;
     }
 
@@ -157,8 +165,14 @@  static int config_input(AVFilterLink *link)
     s->var_values[VAR_POS]   = NAN;
 
     av_image_fill_max_pixsteps(s->max_step, NULL, pix_desc);
-    s->hsub = pix_desc->log2_chroma_w;
-    s->vsub = pix_desc->log2_chroma_h;
+
+    if (pix_desc->flags & AV_PIX_FMT_FLAG_HWACCEL) {
+        s->hsub = 1;
+        s->vsub = 1;
+    } else {
+        s->hsub = pix_desc->log2_chroma_w;
+        s->vsub = pix_desc->log2_chroma_h;
+    }
 
     if ((ret = av_expr_parse_and_eval(&res, (expr = s->w_expr),
                                       var_names, s->var_values,
@@ -237,9 +251,15 @@  fail_expr:
 static int config_output(AVFilterLink *link)
 {
     CropContext *s = link->src->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
 
-    link->w = s->w;
-    link->h = s->h;
+    if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) {
+        // Hardware frames adjust the cropping regions rather than
+        // changing the frame size.
+    } else {
+        link->w = s->w;
+        link->h = s->h;
+    }
     link->sample_aspect_ratio = s->out_sar;
 
     return 0;
@@ -252,9 +272,6 @@  static int filter_frame(AVFilterLink *link, AVFrame *frame)
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
     int i;
 
-    frame->width  = s->w;
-    frame->height = s->h;
-
     s->var_values[VAR_N] = link->frame_count_out;
     s->var_values[VAR_T] = frame->pts == AV_NOPTS_VALUE ?
         NAN : frame->pts * av_q2d(link->time_base);
@@ -285,22 +302,33 @@  static int filter_frame(AVFilterLink *link, AVFrame *frame)
             (int)s->var_values[VAR_N], s->var_values[VAR_T], s->var_values[VAR_POS],
             s->x, s->y, s->x+s->w, s->y+s->h);
 
-    frame->data[0] += s->y * frame->linesize[0];
-    frame->data[0] += s->x * s->max_step[0];
-
-    if (!(desc->flags & AV_PIX_FMT_FLAG_PAL || desc->flags & FF_PSEUDOPAL)) {
-        for (i = 1; i < 3; i ++) {
-            if (frame->data[i]) {
-                frame->data[i] += (s->y >> s->vsub) * frame->linesize[i];
-                frame->data[i] += (s->x * s->max_step[i]) >> s->hsub;
+    if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) {
+        frame->crop_top   += s->y;
+        frame->crop_left  += s->x;
+        frame->crop_bottom = frame->height - frame->crop_top - frame->crop_bottom - s->h;
+        frame->crop_right  = frame->width  - frame->crop_left - frame->crop_right - s->w;
+
+    } else {
+        frame->width  = s->w;
+        frame->height = s->h;
+
+        frame->data[0] += s->y * frame->linesize[0];
+        frame->data[0] += s->x * s->max_step[0];
+
+        if (!(desc->flags & AV_PIX_FMT_FLAG_PAL || desc->flags & FF_PSEUDOPAL)) {
+            for (i = 1; i < 3; i ++) {
+                if (frame->data[i]) {
+                    frame->data[i] += (s->y >> s->vsub) * frame->linesize[i];
+                    frame->data[i] += (s->x * s->max_step[i]) >> s->hsub;
+                }
             }
         }
-    }
 
-    /* alpha plane */
-    if (frame->data[3]) {
-        frame->data[3] += s->y * frame->linesize[3];
-        frame->data[3] += s->x * s->max_step[3];
+        /* alpha plane */
+        if (frame->data[3]) {
+            frame->data[3] += s->y * frame->linesize[3];
+            frame->data[3] += s->x * s->max_step[3];
+        }
     }
 
     return ff_filter_frame(link->dst->outputs[0], frame);