diff mbox series

[FFmpeg-devel,3/6] avformat/matroskadec: export cropping values

Message ID 20240529214632.9843-3-jamrial@gmail.com
State New
Headers show
Series [FFmpeg-devel,1/6] avcodec/packet: add a decoded frame cropping side data type | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

James Almer May 29, 2024, 9:46 p.m. UTC
Signed-off-by: James Almer <jamrial@gmail.com>
---
 libavformat/matroskadec.c | 53 +++++++++++++++++++++++++++++++--------
 1 file changed, 43 insertions(+), 10 deletions(-)

Comments

Kacper Michajlow June 1, 2024, 11:24 a.m. UTC | #1
On Wed, 29 May 2024 at 23:47, James Almer <jamrial@gmail.com> wrote:
>
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  libavformat/matroskadec.c | 53 +++++++++++++++++++++++++++++++--------
>  1 file changed, 43 insertions(+), 10 deletions(-)
>
> diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
> index 2f07e11d87..a30bac786b 100644
> --- a/libavformat/matroskadec.c
> +++ b/libavformat/matroskadec.c
> @@ -213,7 +213,13 @@ typedef struct MatroskaTrackVideo {
>      uint64_t display_height;
>      uint64_t pixel_width;
>      uint64_t pixel_height;
> +    uint64_t cropped_width;
> +    uint64_t cropped_height;
>      EbmlBin  color_space;
> +    uint64_t pixel_cropt;
> +    uint64_t pixel_cropl;
> +    uint64_t pixel_cropb;
> +    uint64_t pixel_cropr;
>      uint64_t display_unit;
>      uint64_t interlaced;
>      uint64_t field_order;
> @@ -527,10 +533,10 @@ static EbmlSyntax matroska_track_video[] = {
>      { MATROSKA_ID_VIDEOALPHAMODE,      EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, alpha_mode), { .u = 0 } },
>      { MATROSKA_ID_VIDEOCOLOR,          EBML_NEST,  0, sizeof(MatroskaTrackVideoColor), offsetof(MatroskaTrackVideo, color), { .n = matroska_track_video_color } },
>      { MATROSKA_ID_VIDEOPROJECTION,     EBML_NEST,  0, 0, offsetof(MatroskaTrackVideo, projection), { .n = matroska_track_video_projection } },
> -    { MATROSKA_ID_VIDEOPIXELCROPB,     EBML_NONE },
> -    { MATROSKA_ID_VIDEOPIXELCROPT,     EBML_NONE },
> -    { MATROSKA_ID_VIDEOPIXELCROPL,     EBML_NONE },
> -    { MATROSKA_ID_VIDEOPIXELCROPR,     EBML_NONE },
> +    { MATROSKA_ID_VIDEOPIXELCROPB,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropb), {.u = 0 } },
> +    { MATROSKA_ID_VIDEOPIXELCROPT,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropt), {.u = 0 } },
> +    { MATROSKA_ID_VIDEOPIXELCROPL,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropl), {.u = 0 } },
> +    { MATROSKA_ID_VIDEOPIXELCROPR,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropr), {.u = 0 } },
>      { MATROSKA_ID_VIDEODISPLAYUNIT,    EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, display_unit), { .u= MATROSKA_VIDEO_DISPLAYUNIT_PIXELS } },
>      { MATROSKA_ID_VIDEOFLAGINTERLACED, EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, interlaced),  { .u = MATROSKA_VIDEO_INTERLACE_FLAG_UNDETERMINED } },
>      { MATROSKA_ID_VIDEOFIELDORDER,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, field_order), { .u = MATROSKA_VIDEO_FIELDORDER_UNDETERMINED } },
> @@ -2963,14 +2969,30 @@ static int mkv_parse_video(MatroskaTrack *track, AVStream *st,
>
>      if (track->video.display_unit < MATROSKA_VIDEO_DISPLAYUNIT_UNKNOWN) {
>          if (track->video.display_width && track->video.display_height &&
> -            par->height  < INT64_MAX / track->video.display_width  / display_width_mul &&
> -            par->width   < INT64_MAX / track->video.display_height / display_height_mul)
> +            track->video.cropped_height < INT64_MAX / track->video.display_width  / display_width_mul &&
> +            track->video.cropped_width  < INT64_MAX / track->video.display_height / display_height_mul)
>              av_reduce(&st->sample_aspect_ratio.num,
>                        &st->sample_aspect_ratio.den,
> -                      par->height * track->video.display_width  * display_width_mul,
> -                      par->width  * track->video.display_height * display_height_mul,
> +                      track->video.cropped_height * track->video.display_width  * display_width_mul,
> +                      track->video.cropped_width  * track->video.display_height * display_height_mul,
>                        INT_MAX);
>      }
> +    if (track->video.cropped_width  != track->video.pixel_width ||
> +        track->video.cropped_height != track->video.pixel_height) {
> +        uint8_t *cropping;
> +        AVPacketSideData *sd = av_packet_side_data_new(&st->codecpar->coded_side_data,
> +                                                       &st->codecpar->nb_coded_side_data,
> +                                                       AV_PKT_DATA_FRAME_CROPPING,
> +                                                       sizeof(uint32_t) * 4, 0);
> +        if (!sd)
> +            return AVERROR(ENOMEM);
> +
> +        cropping = sd->data;
> +        bytestream_put_le32(&cropping, track->video.pixel_cropt);
> +        bytestream_put_le32(&cropping, track->video.pixel_cropb);
> +        bytestream_put_le32(&cropping, track->video.pixel_cropl);
> +        bytestream_put_le32(&cropping, track->video.pixel_cropr);
> +    }
>      if (par->codec_id != AV_CODEC_ID_HEVC)
>          sti->need_parsing = AVSTREAM_PARSE_HEADERS;
>
> @@ -3136,10 +3158,21 @@ static int matroska_parse_tracks(AVFormatContext *s)
>                      track->default_duration = default_duration;
>                  }
>              }
> +            track->video.cropped_width  = track->video.pixel_width;
> +            track->video.cropped_height = track->video.pixel_height;
> +            if (track->video.display_unit == 0) {
> +                if (track->video.pixel_cropl >= INT_MAX - track->video.pixel_cropr ||
> +                    track->video.pixel_cropt >= INT_MAX - track->video.pixel_cropb ||
> +                    (track->video.pixel_cropl + track->video.pixel_cropr) >= track->video.pixel_width ||
> +                    (track->video.pixel_cropt + track->video.pixel_cropb) >= track->video.pixel_height)
> +                    return AVERROR_INVALIDDATA;
> +                track->video.cropped_width  -= track->video.pixel_cropl + track->video.pixel_cropr;
> +                track->video.cropped_height -= track->video.pixel_cropt + track->video.pixel_cropb;
> +            }
>              if (track->video.display_width == -1)
> -                track->video.display_width = track->video.pixel_width;
> +                track->video.display_width = track->video.cropped_width;
>              if (track->video.display_height == -1)
> -                track->video.display_height = track->video.pixel_height;
> +                track->video.display_height = track->video.cropped_height;
>          } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
>              if (!track->audio.out_samplerate)
>                  track->audio.out_samplerate = track->audio.samplerate;
> --
> 2.45.1

Have you seen the discussion over the MKVToolNix issue tracker?
https://gitlab.com/mbunkus/mkvtoolnix/-/issues/2389

I think it is the reason it wasn't implemented in ffmpeg sooner. The
issue summarizes the problem and current status quo, quite well I
think. I think the main problem is that vast majority of Matroska
files doesn't are not following the standard with regards to
DisplayWidth/Height.

- Kacper
diff mbox series

Patch

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 2f07e11d87..a30bac786b 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -213,7 +213,13 @@  typedef struct MatroskaTrackVideo {
     uint64_t display_height;
     uint64_t pixel_width;
     uint64_t pixel_height;
+    uint64_t cropped_width;
+    uint64_t cropped_height;
     EbmlBin  color_space;
+    uint64_t pixel_cropt;
+    uint64_t pixel_cropl;
+    uint64_t pixel_cropb;
+    uint64_t pixel_cropr;
     uint64_t display_unit;
     uint64_t interlaced;
     uint64_t field_order;
@@ -527,10 +533,10 @@  static EbmlSyntax matroska_track_video[] = {
     { MATROSKA_ID_VIDEOALPHAMODE,      EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, alpha_mode), { .u = 0 } },
     { MATROSKA_ID_VIDEOCOLOR,          EBML_NEST,  0, sizeof(MatroskaTrackVideoColor), offsetof(MatroskaTrackVideo, color), { .n = matroska_track_video_color } },
     { MATROSKA_ID_VIDEOPROJECTION,     EBML_NEST,  0, 0, offsetof(MatroskaTrackVideo, projection), { .n = matroska_track_video_projection } },
-    { MATROSKA_ID_VIDEOPIXELCROPB,     EBML_NONE },
-    { MATROSKA_ID_VIDEOPIXELCROPT,     EBML_NONE },
-    { MATROSKA_ID_VIDEOPIXELCROPL,     EBML_NONE },
-    { MATROSKA_ID_VIDEOPIXELCROPR,     EBML_NONE },
+    { MATROSKA_ID_VIDEOPIXELCROPB,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropb), {.u = 0 } },
+    { MATROSKA_ID_VIDEOPIXELCROPT,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropt), {.u = 0 } },
+    { MATROSKA_ID_VIDEOPIXELCROPL,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropl), {.u = 0 } },
+    { MATROSKA_ID_VIDEOPIXELCROPR,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropr), {.u = 0 } },
     { MATROSKA_ID_VIDEODISPLAYUNIT,    EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, display_unit), { .u= MATROSKA_VIDEO_DISPLAYUNIT_PIXELS } },
     { MATROSKA_ID_VIDEOFLAGINTERLACED, EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, interlaced),  { .u = MATROSKA_VIDEO_INTERLACE_FLAG_UNDETERMINED } },
     { MATROSKA_ID_VIDEOFIELDORDER,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, field_order), { .u = MATROSKA_VIDEO_FIELDORDER_UNDETERMINED } },
@@ -2963,14 +2969,30 @@  static int mkv_parse_video(MatroskaTrack *track, AVStream *st,
 
     if (track->video.display_unit < MATROSKA_VIDEO_DISPLAYUNIT_UNKNOWN) {
         if (track->video.display_width && track->video.display_height &&
-            par->height  < INT64_MAX / track->video.display_width  / display_width_mul &&
-            par->width   < INT64_MAX / track->video.display_height / display_height_mul)
+            track->video.cropped_height < INT64_MAX / track->video.display_width  / display_width_mul &&
+            track->video.cropped_width  < INT64_MAX / track->video.display_height / display_height_mul)
             av_reduce(&st->sample_aspect_ratio.num,
                       &st->sample_aspect_ratio.den,
-                      par->height * track->video.display_width  * display_width_mul,
-                      par->width  * track->video.display_height * display_height_mul,
+                      track->video.cropped_height * track->video.display_width  * display_width_mul,
+                      track->video.cropped_width  * track->video.display_height * display_height_mul,
                       INT_MAX);
     }
+    if (track->video.cropped_width  != track->video.pixel_width ||
+        track->video.cropped_height != track->video.pixel_height) {
+        uint8_t *cropping;
+        AVPacketSideData *sd = av_packet_side_data_new(&st->codecpar->coded_side_data,
+                                                       &st->codecpar->nb_coded_side_data,
+                                                       AV_PKT_DATA_FRAME_CROPPING,
+                                                       sizeof(uint32_t) * 4, 0);
+        if (!sd)
+            return AVERROR(ENOMEM);
+
+        cropping = sd->data;
+        bytestream_put_le32(&cropping, track->video.pixel_cropt);
+        bytestream_put_le32(&cropping, track->video.pixel_cropb);
+        bytestream_put_le32(&cropping, track->video.pixel_cropl);
+        bytestream_put_le32(&cropping, track->video.pixel_cropr);
+    }
     if (par->codec_id != AV_CODEC_ID_HEVC)
         sti->need_parsing = AVSTREAM_PARSE_HEADERS;
 
@@ -3136,10 +3158,21 @@  static int matroska_parse_tracks(AVFormatContext *s)
                     track->default_duration = default_duration;
                 }
             }
+            track->video.cropped_width  = track->video.pixel_width;
+            track->video.cropped_height = track->video.pixel_height;
+            if (track->video.display_unit == 0) {
+                if (track->video.pixel_cropl >= INT_MAX - track->video.pixel_cropr ||
+                    track->video.pixel_cropt >= INT_MAX - track->video.pixel_cropb ||
+                    (track->video.pixel_cropl + track->video.pixel_cropr) >= track->video.pixel_width ||
+                    (track->video.pixel_cropt + track->video.pixel_cropb) >= track->video.pixel_height)
+                    return AVERROR_INVALIDDATA;
+                track->video.cropped_width  -= track->video.pixel_cropl + track->video.pixel_cropr;
+                track->video.cropped_height -= track->video.pixel_cropt + track->video.pixel_cropb;
+            }
             if (track->video.display_width == -1)
-                track->video.display_width = track->video.pixel_width;
+                track->video.display_width = track->video.cropped_width;
             if (track->video.display_height == -1)
-                track->video.display_height = track->video.pixel_height;
+                track->video.display_height = track->video.cropped_height;
         } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
             if (!track->audio.out_samplerate)
                 track->audio.out_samplerate = track->audio.samplerate;