diff mbox series

[FFmpeg-devel] libavformat\matroskadec.c: crop support for matroska demuxer.

Message ID 20220921112905.1753-1-ovchinnikov.dmitrii@gmail.com
State New
Headers show
Series [FFmpeg-devel] libavformat\matroskadec.c: crop support for matroska demuxer. | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Dmitrii Ovchinnikov Sept. 21, 2022, 11:29 a.m. UTC
In webm specification, it supports cropping information. (https://www.webmproject.org/docs/container/)
In ffmpeg, the implementation of webm is a subset of matroska. In matroskadec.c, those cropping related four fields are forced to 0.

for the sample file with crop (crop_bottom =8, crop_top=crop_left=crop_right=0.)
ffmpeg.exe -i  test_with_container_crop.webm -pix_fmt yuv420p -y output.yuv

original ffmpeg code - the output.yuv resolution is 1920x1088
changed code - the output.yuv resolution is 1920x1080"
---
 libavcodec/avcodec.h      |  8 ++++++++
 libavcodec/codec_par.c    |  8 ++++++++
 libavcodec/codec_par.h    |  8 ++++++++
 libavcodec/decode.c       |  9 +++++++++
 libavformat/matroskadec.c | 17 +++++++++++++----
 5 files changed, 46 insertions(+), 4 deletions(-)

Comments

Andreas Rheinhardt Sept. 21, 2022, 3 p.m. UTC | #1
OvchinnikovDmitrii:
> In webm specification, it supports cropping information. (https://www.webmproject.org/docs/container/)

So does Matroska; actually, the only reason WebM supports this type of
cropping is because it is part of the subset of Matroska that WebM kept.

> In ffmpeg, the implementation of webm is a subset of matroska. In matroskadec.c, those cropping related four fields are forced to 0.
> 
> for the sample file with crop (crop_bottom =8, crop_top=crop_left=crop_right=0.)
> ffmpeg.exe -i  test_with_container_crop.webm -pix_fmt yuv420p -y output.yuv
> 
> original ffmpeg code - the output.yuv resolution is 1920x1088
> changed code - the output.yuv resolution is 1920x1080"
> ---
>  libavcodec/avcodec.h      |  8 ++++++++
>  libavcodec/codec_par.c    |  8 ++++++++
>  libavcodec/codec_par.h    |  8 ++++++++
>  libavcodec/decode.c       |  9 +++++++++
>  libavformat/matroskadec.c | 17 +++++++++++++----
>  5 files changed, 46 insertions(+), 4 deletions(-)
> 
> diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
> index 7db5d1b1c5..4420cfa0c9 100644
> --- a/libavcodec/avcodec.h
> +++ b/libavcodec/avcodec.h
> @@ -585,6 +585,14 @@ typedef struct AVCodecContext {
>       */
>      int coded_width, coded_height;
>  
> +    /**
> +     * The dimensions of the crop, usually from container.
> +     */
> +    int crop_top;
> +    int crop_left;
> +    int crop_bottom;
> +    int crop_right;
> +
>      /**
>       * the number of pictures in a group of pictures, or 0 for intra_only
>       * - encoding: Set by user.
> diff --git a/libavcodec/codec_par.c b/libavcodec/codec_par.c
> index abda649aa8..f74964a817 100644
> --- a/libavcodec/codec_par.c
> +++ b/libavcodec/codec_par.c
> @@ -118,6 +118,10 @@ int avcodec_parameters_from_context(AVCodecParameters *par,
>          par->format              = codec->pix_fmt;
>          par->width               = codec->width;
>          par->height              = codec->height;
> +        par->crop_top           = codec->crop_top;
> +        par->crop_left          = codec->crop_left;
> +        par->crop_bottom        = codec->crop_bottom;
> +        par->crop_right         = codec->crop_right;
>          par->field_order         = codec->field_order;
>          par->color_range         = codec->color_range;
>          par->color_primaries     = codec->color_primaries;
> @@ -199,6 +203,10 @@ int avcodec_parameters_to_context(AVCodecContext *codec,
>          codec->pix_fmt                = par->format;
>          codec->width                  = par->width;
>          codec->height                 = par->height;
> +        codec->crop_top               = par->crop_top;
> +        codec->crop_left              = par->crop_left;
> +        codec->crop_bottom            = par->crop_bottom;
> +        codec->crop_right             = par->crop_right;
>          codec->field_order            = par->field_order;
>          codec->color_range            = par->color_range;
>          codec->color_primaries        = par->color_primaries;
> diff --git a/libavcodec/codec_par.h b/libavcodec/codec_par.h
> index 7660791a12..6671f1810b 100644
> --- a/libavcodec/codec_par.h
> +++ b/libavcodec/codec_par.h
> @@ -127,6 +127,14 @@ typedef struct AVCodecParameters {
>      int width;
>      int height;
>  
> +    /**
> +     * The dimensions of the crop, usually from container.
> +     */
> +    int crop_top;
> +    int crop_left;
> +    int crop_bottom;
> +    int crop_right;
> +
>      /**
>       * Video only. The aspect ratio (width / height) which a single pixel
>       * should have when displayed.
> diff --git a/libavcodec/decode.c b/libavcodec/decode.c
> index 2961705c9d..905eb8401e 100644
> --- a/libavcodec/decode.c
> +++ b/libavcodec/decode.c
> @@ -324,6 +324,15 @@ static inline int decode_simple_internal(AVCodecContext *avctx, AVFrame *frame,
>      emms_c();
>      actual_got_frame = got_frame;
>  
> +    if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
> +        if (avctx->crop_top != 0 || avctx->crop_left != 0 || avctx->crop_right != 0 || avctx->crop_bottom != 0){
> +            frame->crop_top     = avctx->crop_top;
> +            frame->crop_left    = avctx->crop_left;
> +            frame->crop_right   = avctx->crop_right;
> +            frame->crop_bottom  = avctx->crop_bottom;
> +        }
> +    }
> +
>      if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
>          if (frame->flags & AV_FRAME_FLAG_DISCARD)
>              got_frame = 0;
> diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
> index 16a3e93611..6e05b36e58 100644
> --- a/libavformat/matroskadec.c
> +++ b/libavformat/matroskadec.c
> @@ -209,6 +209,10 @@ typedef struct MatroskaTrackVideo {
>      uint64_t pixel_width;
>      uint64_t pixel_height;
>      EbmlBin  color_space;
> +    uint64_t pixel_cropt;
> +    uint64_t pixel_cropl;
> +    uint64_t pixel_cropb;
> +    uint64_t pixel_cropr;
>      uint64_t display_unit;
>      uint64_t interlaced;
>      uint64_t field_order;
> @@ -516,10 +520,10 @@ static EbmlSyntax matroska_track_video[] = {
>      { MATROSKA_ID_VIDEOALPHAMODE,      EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, alpha_mode), { .u = 0 } },
>      { MATROSKA_ID_VIDEOCOLOR,          EBML_NEST,  0, sizeof(MatroskaTrackVideoColor), offsetof(MatroskaTrackVideo, color), { .n = matroska_track_video_color } },
>      { MATROSKA_ID_VIDEOPROJECTION,     EBML_NEST,  0, 0, offsetof(MatroskaTrackVideo, projection), { .n = matroska_track_video_projection } },
> -    { MATROSKA_ID_VIDEOPIXELCROPB,     EBML_NONE },
> -    { MATROSKA_ID_VIDEOPIXELCROPT,     EBML_NONE },
> -    { MATROSKA_ID_VIDEOPIXELCROPL,     EBML_NONE },
> -    { MATROSKA_ID_VIDEOPIXELCROPR,     EBML_NONE },
> +    { MATROSKA_ID_VIDEOPIXELCROPT,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropt), {.u = 0 } },
> +    { MATROSKA_ID_VIDEOPIXELCROPL,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropl), {.u = 0 } },
> +    { MATROSKA_ID_VIDEOPIXELCROPB,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropb), {.u = 0 } },
> +    { MATROSKA_ID_VIDEOPIXELCROPR,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropr), {.u = 0 } },
>      { MATROSKA_ID_VIDEODISPLAYUNIT,    EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, display_unit), { .u= MATROSKA_VIDEO_DISPLAYUNIT_PIXELS } },
>      { MATROSKA_ID_VIDEOFLAGINTERLACED, EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, interlaced),  { .u = MATROSKA_VIDEO_INTERLACE_FLAG_UNDETERMINED } },
>      { MATROSKA_ID_VIDEOFIELDORDER,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, field_order), { .u = MATROSKA_VIDEO_FIELDORDER_UNDETERMINED } },
> @@ -2878,6 +2882,11 @@ static int matroska_parse_tracks(AVFormatContext *s)
>              st->codecpar->width      = track->video.pixel_width;
>              st->codecpar->height     = track->video.pixel_height;
>  
> +            st->codecpar->crop_top   = track->video.pixel_cropt;
> +            st->codecpar->crop_left  = track->video.pixel_cropl;
> +            st->codecpar->crop_bottom= track->video.pixel_cropb;
> +            st->codecpar->crop_right = track->video.pixel_cropr;
> +
>              if (track->video.interlaced == MATROSKA_VIDEO_INTERLACE_FLAG_INTERLACED)
>                  st->codecpar->field_order = mkv_field_order(matroska, track->video.field_order);
>              else if (track->video.interlaced == MATROSKA_VIDEO_INTERLACE_FLAG_PROGRESSIVE)

1. You can't add fields in the middle of AVCodecParameters, as the
offset of all these fields is part of the public ABI. New fields need to
be added at the end.
2. Patches that add fields to these public structs should be separate
from the patches to e.g. the Matroska demuxer.
3. You are simply overwriting the frame's cropping info with the one
from the container. Yet there is a problem here: In codecs like H.264,
the bitstream can signal cropping of its own (e.g. 1920x1080p is
actually 1920x1088 internally). The Matroska pixel_width and
pixel_height fields correspond to the dimensions after this
decoder-internal cropping has already been applied, so they need to be
applied additionally to the bitstream cropping. Yet you are overwriting
the cropping parameters with the ones from the container.
4. You are only applying the cropping for decoders that use the simple
API, not for decoders implementing the receive_frame API.
5. mov has something similar to these cropping parameters, namely "clean
aperture". It also allows to crop by subpixels (which will probably
never be supported by our decoders). If one wants to make the mov->mov
remuxing work without information loss, one will have to use the more
general clean aperture information and translate the Matroska crop
information to that.
6. There was an attempt more than two years ago to do that; see
https://ffmpeg.org/pipermail/ffmpeg-devel/2020-April/261465.html,
https://ffmpeg.org/pipermail/ffmpeg-devel/2020-May/262153.html and
https://ffmpeg.org/pipermail/ffmpeg-devel/2020-June/264146.html
7. And the state of these crop values for Matroska in general is
unfortunately very bad. Let me quote myself from
https://ffmpeg.org/pipermail/ffmpeg-devel/2020-May/262270.html:
"And let me also add other points about the sorry state of the
PixelCrop* elements:
a) The display aspect ratio usually changes when using cropping; the
pixel aspect ratio doesn't change, yet unfortunately (and inexplicably)
Matroska does not support that. This means that if one sets the display
aspect ratio explicitly when using cropping, a player that ignores the
cropping will not only display the video with the area that ought to be
cropped away; it will also display it with a wrong pixel aspect ratio.

Given that almost all players don't support the cropping flags simply
saying that these players are broken and need to be fixed is not an option.

But there is a method to create videos that will be played with the
correct pixel aspect ratio by both types of players. It makes use of the
fact that (when using pixels as DisplayUnit (it's the default value
anyway)) the default value of DisplayWidth and DisplayHeight are given
by the dimensions of the cropped frame.
It works especially well with nonanamorphic videos. Consider the case of
a 1280x720 nonanamorphic video with 40 pixels on top and bottom cropping
on top and bottom (each) and 240 pixels on the left and right (each). If
one leaves the DisplayWidth/Height away, a player that ignores the crop
values will infer DisplayWidth to be 1280 and DisplayHeight to be 720,
whereas a player that supports the crop values will infer DisplayHeight
to be 640 and DisplayWidth to be 800. Both players will use quadratic
pixels.
It does not work so well with anamorphic videos. If one only wants to
crop in one dimension (say, the vertical one), then one can still
produce videos that work well with both players: Leave out the
DisplayHeight and set DisplayWidth to the common value suitable for
both. (This already has a downside: An absolutely precise display aspect
ratio is no longer attainable with this method in general.)
If one has anamorphic video and wants to crop in both dimensions, then
there is no way of making both types of players display the video with
the same pixel aspect ratio (unless the cropping happens to be so that
the ratio of the uncropped frame and the ratio of the cropped frame
coincide).

We already don't write the display dimensions when nonanamorphic and
your (revised) patch needs to preserve that (should be trivial). Given
that the method outlined above in case cropping is restricted to one
dimension might have side-effects I don't think it should be implemented
(would also be too niche); but one (needn't be you) should add a warning
for users in case players not supporting the crop elements might play
the output with the wrong pixel aspect ratio. (Said warning should
preferably be added in a separate commit.)

b) MKVToolNix behaviour wrt cropping is unfortunately buggy: mkvmerge
reads and propagates the cropping parameters upon remuxing, but it does
not take them into account when inferring the display dimensions (which
it explicitly sets); therefore it changes the pixel aspect ratio upon
remuxing (with the usual exception of the ratios of cropped and
uncropped frames agreeing).
And the GUIs help text does not mention that one should also modify the
display dimensions when editing the cropping parameters. E.g. both
videos from ticket 4489 (and also the original in VLC ticket 13982) are
anamorphic when the specs are followed. This was certainly not intended.
Certainly a large part of the files in existence that have crop values
set have them set in the wrong way. Adding a workaround would
unfortunately incur the possibility of misparsing valid files. My
current approach is to wait and see how many users complain before
deciding on whether to add a workaround."

- Andreas
Dmitrii Ovchinnikov Sept. 26, 2022, 2:22 p.m. UTC | #2
Hi Andreas,

Thanks for your quick and direct feedback about the patch. Your feedback
about the displayWidth, displayHeight and the display aspect ratio shows
that you have deep thinking in this area.

I have several questions and comments about your feedback.

1. Yes, i will get this change, and put the added fields at the rear part
of AVCodecParameters
2. I will split these changes into different commits. Should I also split
the edits in the AVCodecContext and codec_pack.c files into different
commits?
3. Yes, I will apply the container cropping info additionally to the
bitstream cropping.
4. Yes, I will apply the cropping for receive_frame API as well.
5. I'm also working on “clean aperture” for MP4 container. mov is a part of
the subset of MP4. I will submit that patch to ffmpeg after i finished it.
Maybe one month later.
6. Thanks for a good reference.
7. Yes, there are many problems in the MKVToolNix. MKVToolNix may result in
some inconsistent of display aspect ratio when editing crop information.
There are two part of work(for support of Matroska cropping), one part is
matroska encoder, another is matroska decoder. This time i submit the
matroska decoder patch. In the matroska encoder, i have already taken the
inconsistent into consideration, and did some work to make sure it keeps
consistent. Do you want me to put the matroska encoder and matroska decode
together in the same patch to submit ffmpeg, or do you want I put them in
two separate patch and submit seperately?
Dmitrii Ovchinnikov Oct. 1, 2022, 6:35 a.m. UTC | #3
I fixed issues1-4 and sent an updated version:
https://patchwork.ffmpeg.org/project/ffmpeg/list/?series=7668
diff mbox series

Patch

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 7db5d1b1c5..4420cfa0c9 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -585,6 +585,14 @@  typedef struct AVCodecContext {
      */
     int coded_width, coded_height;
 
+    /**
+     * The dimensions of the crop, usually from container.
+     */
+    int crop_top;
+    int crop_left;
+    int crop_bottom;
+    int crop_right;
+
     /**
      * the number of pictures in a group of pictures, or 0 for intra_only
      * - encoding: Set by user.
diff --git a/libavcodec/codec_par.c b/libavcodec/codec_par.c
index abda649aa8..f74964a817 100644
--- a/libavcodec/codec_par.c
+++ b/libavcodec/codec_par.c
@@ -118,6 +118,10 @@  int avcodec_parameters_from_context(AVCodecParameters *par,
         par->format              = codec->pix_fmt;
         par->width               = codec->width;
         par->height              = codec->height;
+        par->crop_top           = codec->crop_top;
+        par->crop_left          = codec->crop_left;
+        par->crop_bottom        = codec->crop_bottom;
+        par->crop_right         = codec->crop_right;
         par->field_order         = codec->field_order;
         par->color_range         = codec->color_range;
         par->color_primaries     = codec->color_primaries;
@@ -199,6 +203,10 @@  int avcodec_parameters_to_context(AVCodecContext *codec,
         codec->pix_fmt                = par->format;
         codec->width                  = par->width;
         codec->height                 = par->height;
+        codec->crop_top               = par->crop_top;
+        codec->crop_left              = par->crop_left;
+        codec->crop_bottom            = par->crop_bottom;
+        codec->crop_right             = par->crop_right;
         codec->field_order            = par->field_order;
         codec->color_range            = par->color_range;
         codec->color_primaries        = par->color_primaries;
diff --git a/libavcodec/codec_par.h b/libavcodec/codec_par.h
index 7660791a12..6671f1810b 100644
--- a/libavcodec/codec_par.h
+++ b/libavcodec/codec_par.h
@@ -127,6 +127,14 @@  typedef struct AVCodecParameters {
     int width;
     int height;
 
+    /**
+     * The dimensions of the crop, usually from container.
+     */
+    int crop_top;
+    int crop_left;
+    int crop_bottom;
+    int crop_right;
+
     /**
      * Video only. The aspect ratio (width / height) which a single pixel
      * should have when displayed.
diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index 2961705c9d..905eb8401e 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -324,6 +324,15 @@  static inline int decode_simple_internal(AVCodecContext *avctx, AVFrame *frame,
     emms_c();
     actual_got_frame = got_frame;
 
+    if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
+        if (avctx->crop_top != 0 || avctx->crop_left != 0 || avctx->crop_right != 0 || avctx->crop_bottom != 0){
+            frame->crop_top     = avctx->crop_top;
+            frame->crop_left    = avctx->crop_left;
+            frame->crop_right   = avctx->crop_right;
+            frame->crop_bottom  = avctx->crop_bottom;
+        }
+    }
+
     if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
         if (frame->flags & AV_FRAME_FLAG_DISCARD)
             got_frame = 0;
diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 16a3e93611..6e05b36e58 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -209,6 +209,10 @@  typedef struct MatroskaTrackVideo {
     uint64_t pixel_width;
     uint64_t pixel_height;
     EbmlBin  color_space;
+    uint64_t pixel_cropt;
+    uint64_t pixel_cropl;
+    uint64_t pixel_cropb;
+    uint64_t pixel_cropr;
     uint64_t display_unit;
     uint64_t interlaced;
     uint64_t field_order;
@@ -516,10 +520,10 @@  static EbmlSyntax matroska_track_video[] = {
     { MATROSKA_ID_VIDEOALPHAMODE,      EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, alpha_mode), { .u = 0 } },
     { MATROSKA_ID_VIDEOCOLOR,          EBML_NEST,  0, sizeof(MatroskaTrackVideoColor), offsetof(MatroskaTrackVideo, color), { .n = matroska_track_video_color } },
     { MATROSKA_ID_VIDEOPROJECTION,     EBML_NEST,  0, 0, offsetof(MatroskaTrackVideo, projection), { .n = matroska_track_video_projection } },
-    { MATROSKA_ID_VIDEOPIXELCROPB,     EBML_NONE },
-    { MATROSKA_ID_VIDEOPIXELCROPT,     EBML_NONE },
-    { MATROSKA_ID_VIDEOPIXELCROPL,     EBML_NONE },
-    { MATROSKA_ID_VIDEOPIXELCROPR,     EBML_NONE },
+    { MATROSKA_ID_VIDEOPIXELCROPT,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropt), {.u = 0 } },
+    { MATROSKA_ID_VIDEOPIXELCROPL,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropl), {.u = 0 } },
+    { MATROSKA_ID_VIDEOPIXELCROPB,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropb), {.u = 0 } },
+    { MATROSKA_ID_VIDEOPIXELCROPR,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropr), {.u = 0 } },
     { MATROSKA_ID_VIDEODISPLAYUNIT,    EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, display_unit), { .u= MATROSKA_VIDEO_DISPLAYUNIT_PIXELS } },
     { MATROSKA_ID_VIDEOFLAGINTERLACED, EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, interlaced),  { .u = MATROSKA_VIDEO_INTERLACE_FLAG_UNDETERMINED } },
     { MATROSKA_ID_VIDEOFIELDORDER,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, field_order), { .u = MATROSKA_VIDEO_FIELDORDER_UNDETERMINED } },
@@ -2878,6 +2882,11 @@  static int matroska_parse_tracks(AVFormatContext *s)
             st->codecpar->width      = track->video.pixel_width;
             st->codecpar->height     = track->video.pixel_height;
 
+            st->codecpar->crop_top   = track->video.pixel_cropt;
+            st->codecpar->crop_left  = track->video.pixel_cropl;
+            st->codecpar->crop_bottom= track->video.pixel_cropb;
+            st->codecpar->crop_right = track->video.pixel_cropr;
+
             if (track->video.interlaced == MATROSKA_VIDEO_INTERLACE_FLAG_INTERLACED)
                 st->codecpar->field_order = mkv_field_order(matroska, track->video.field_order);
             else if (track->video.interlaced == MATROSKA_VIDEO_INTERLACE_FLAG_PROGRESSIVE)