diff mbox series

[FFmpeg-devel,3/6] avformat/matroskadec: export cropping values

Message ID 20240529214632.9843-3-jamrial@gmail.com
State New
Headers show
Series [FFmpeg-devel,1/6] avcodec/packet: add a decoded frame cropping side data type | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

James Almer May 29, 2024, 9:46 p.m. UTC
Signed-off-by: James Almer <jamrial@gmail.com>
---
 libavformat/matroskadec.c | 53 +++++++++++++++++++++++++++++++--------
 1 file changed, 43 insertions(+), 10 deletions(-)

Comments

Kacper Michajlow June 1, 2024, 11:24 a.m. UTC | #1
On Wed, 29 May 2024 at 23:47, James Almer <jamrial@gmail.com> wrote:
>
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  libavformat/matroskadec.c | 53 +++++++++++++++++++++++++++++++--------
>  1 file changed, 43 insertions(+), 10 deletions(-)
>
> diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
> index 2f07e11d87..a30bac786b 100644
> --- a/libavformat/matroskadec.c
> +++ b/libavformat/matroskadec.c
> @@ -213,7 +213,13 @@ typedef struct MatroskaTrackVideo {
>      uint64_t display_height;
>      uint64_t pixel_width;
>      uint64_t pixel_height;
> +    uint64_t cropped_width;
> +    uint64_t cropped_height;
>      EbmlBin  color_space;
> +    uint64_t pixel_cropt;
> +    uint64_t pixel_cropl;
> +    uint64_t pixel_cropb;
> +    uint64_t pixel_cropr;
>      uint64_t display_unit;
>      uint64_t interlaced;
>      uint64_t field_order;
> @@ -527,10 +533,10 @@ static EbmlSyntax matroska_track_video[] = {
>      { MATROSKA_ID_VIDEOALPHAMODE,      EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, alpha_mode), { .u = 0 } },
>      { MATROSKA_ID_VIDEOCOLOR,          EBML_NEST,  0, sizeof(MatroskaTrackVideoColor), offsetof(MatroskaTrackVideo, color), { .n = matroska_track_video_color } },
>      { MATROSKA_ID_VIDEOPROJECTION,     EBML_NEST,  0, 0, offsetof(MatroskaTrackVideo, projection), { .n = matroska_track_video_projection } },
> -    { MATROSKA_ID_VIDEOPIXELCROPB,     EBML_NONE },
> -    { MATROSKA_ID_VIDEOPIXELCROPT,     EBML_NONE },
> -    { MATROSKA_ID_VIDEOPIXELCROPL,     EBML_NONE },
> -    { MATROSKA_ID_VIDEOPIXELCROPR,     EBML_NONE },
> +    { MATROSKA_ID_VIDEOPIXELCROPB,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropb), {.u = 0 } },
> +    { MATROSKA_ID_VIDEOPIXELCROPT,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropt), {.u = 0 } },
> +    { MATROSKA_ID_VIDEOPIXELCROPL,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropl), {.u = 0 } },
> +    { MATROSKA_ID_VIDEOPIXELCROPR,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropr), {.u = 0 } },
>      { MATROSKA_ID_VIDEODISPLAYUNIT,    EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, display_unit), { .u= MATROSKA_VIDEO_DISPLAYUNIT_PIXELS } },
>      { MATROSKA_ID_VIDEOFLAGINTERLACED, EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, interlaced),  { .u = MATROSKA_VIDEO_INTERLACE_FLAG_UNDETERMINED } },
>      { MATROSKA_ID_VIDEOFIELDORDER,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, field_order), { .u = MATROSKA_VIDEO_FIELDORDER_UNDETERMINED } },
> @@ -2963,14 +2969,30 @@ static int mkv_parse_video(MatroskaTrack *track, AVStream *st,
>
>      if (track->video.display_unit < MATROSKA_VIDEO_DISPLAYUNIT_UNKNOWN) {
>          if (track->video.display_width && track->video.display_height &&
> -            par->height  < INT64_MAX / track->video.display_width  / display_width_mul &&
> -            par->width   < INT64_MAX / track->video.display_height / display_height_mul)
> +            track->video.cropped_height < INT64_MAX / track->video.display_width  / display_width_mul &&
> +            track->video.cropped_width  < INT64_MAX / track->video.display_height / display_height_mul)
>              av_reduce(&st->sample_aspect_ratio.num,
>                        &st->sample_aspect_ratio.den,
> -                      par->height * track->video.display_width  * display_width_mul,
> -                      par->width  * track->video.display_height * display_height_mul,
> +                      track->video.cropped_height * track->video.display_width  * display_width_mul,
> +                      track->video.cropped_width  * track->video.display_height * display_height_mul,
>                        INT_MAX);
>      }
> +    if (track->video.cropped_width  != track->video.pixel_width ||
> +        track->video.cropped_height != track->video.pixel_height) {
> +        uint8_t *cropping;
> +        AVPacketSideData *sd = av_packet_side_data_new(&st->codecpar->coded_side_data,
> +                                                       &st->codecpar->nb_coded_side_data,
> +                                                       AV_PKT_DATA_FRAME_CROPPING,
> +                                                       sizeof(uint32_t) * 4, 0);
> +        if (!sd)
> +            return AVERROR(ENOMEM);
> +
> +        cropping = sd->data;
> +        bytestream_put_le32(&cropping, track->video.pixel_cropt);
> +        bytestream_put_le32(&cropping, track->video.pixel_cropb);
> +        bytestream_put_le32(&cropping, track->video.pixel_cropl);
> +        bytestream_put_le32(&cropping, track->video.pixel_cropr);
> +    }
>      if (par->codec_id != AV_CODEC_ID_HEVC)
>          sti->need_parsing = AVSTREAM_PARSE_HEADERS;
>
> @@ -3136,10 +3158,21 @@ static int matroska_parse_tracks(AVFormatContext *s)
>                      track->default_duration = default_duration;
>                  }
>              }
> +            track->video.cropped_width  = track->video.pixel_width;
> +            track->video.cropped_height = track->video.pixel_height;
> +            if (track->video.display_unit == 0) {
> +                if (track->video.pixel_cropl >= INT_MAX - track->video.pixel_cropr ||
> +                    track->video.pixel_cropt >= INT_MAX - track->video.pixel_cropb ||
> +                    (track->video.pixel_cropl + track->video.pixel_cropr) >= track->video.pixel_width ||
> +                    (track->video.pixel_cropt + track->video.pixel_cropb) >= track->video.pixel_height)
> +                    return AVERROR_INVALIDDATA;
> +                track->video.cropped_width  -= track->video.pixel_cropl + track->video.pixel_cropr;
> +                track->video.cropped_height -= track->video.pixel_cropt + track->video.pixel_cropb;
> +            }
>              if (track->video.display_width == -1)
> -                track->video.display_width = track->video.pixel_width;
> +                track->video.display_width = track->video.cropped_width;
>              if (track->video.display_height == -1)
> -                track->video.display_height = track->video.pixel_height;
> +                track->video.display_height = track->video.cropped_height;
>          } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
>              if (!track->audio.out_samplerate)
>                  track->audio.out_samplerate = track->audio.samplerate;
> --
> 2.45.1

Have you seen the discussion over the MKVToolNix issue tracker?
https://gitlab.com/mbunkus/mkvtoolnix/-/issues/2389

I think it is the reason it wasn't implemented in ffmpeg sooner. The
issue summarizes the problem and current status quo, quite well I
think. I think the main problem is that vast majority of Matroska
files doesn't are not following the standard with regards to
DisplayWidth/Height.

- Kacper
James Almer July 5, 2024, 8:51 p.m. UTC | #2
On 6/1/2024 8:24 AM, Kacper Michajlow wrote:
> On Wed, 29 May 2024 at 23:47, James Almer <jamrial@gmail.com> wrote:
>>
>> Signed-off-by: James Almer <jamrial@gmail.com>
>> ---
>>   libavformat/matroskadec.c | 53 +++++++++++++++++++++++++++++++--------
>>   1 file changed, 43 insertions(+), 10 deletions(-)
>>
>> diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
>> index 2f07e11d87..a30bac786b 100644
>> --- a/libavformat/matroskadec.c
>> +++ b/libavformat/matroskadec.c
>> @@ -213,7 +213,13 @@ typedef struct MatroskaTrackVideo {
>>       uint64_t display_height;
>>       uint64_t pixel_width;
>>       uint64_t pixel_height;
>> +    uint64_t cropped_width;
>> +    uint64_t cropped_height;
>>       EbmlBin  color_space;
>> +    uint64_t pixel_cropt;
>> +    uint64_t pixel_cropl;
>> +    uint64_t pixel_cropb;
>> +    uint64_t pixel_cropr;
>>       uint64_t display_unit;
>>       uint64_t interlaced;
>>       uint64_t field_order;
>> @@ -527,10 +533,10 @@ static EbmlSyntax matroska_track_video[] = {
>>       { MATROSKA_ID_VIDEOALPHAMODE,      EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, alpha_mode), { .u = 0 } },
>>       { MATROSKA_ID_VIDEOCOLOR,          EBML_NEST,  0, sizeof(MatroskaTrackVideoColor), offsetof(MatroskaTrackVideo, color), { .n = matroska_track_video_color } },
>>       { MATROSKA_ID_VIDEOPROJECTION,     EBML_NEST,  0, 0, offsetof(MatroskaTrackVideo, projection), { .n = matroska_track_video_projection } },
>> -    { MATROSKA_ID_VIDEOPIXELCROPB,     EBML_NONE },
>> -    { MATROSKA_ID_VIDEOPIXELCROPT,     EBML_NONE },
>> -    { MATROSKA_ID_VIDEOPIXELCROPL,     EBML_NONE },
>> -    { MATROSKA_ID_VIDEOPIXELCROPR,     EBML_NONE },
>> +    { MATROSKA_ID_VIDEOPIXELCROPB,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropb), {.u = 0 } },
>> +    { MATROSKA_ID_VIDEOPIXELCROPT,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropt), {.u = 0 } },
>> +    { MATROSKA_ID_VIDEOPIXELCROPL,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropl), {.u = 0 } },
>> +    { MATROSKA_ID_VIDEOPIXELCROPR,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropr), {.u = 0 } },
>>       { MATROSKA_ID_VIDEODISPLAYUNIT,    EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, display_unit), { .u= MATROSKA_VIDEO_DISPLAYUNIT_PIXELS } },
>>       { MATROSKA_ID_VIDEOFLAGINTERLACED, EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, interlaced),  { .u = MATROSKA_VIDEO_INTERLACE_FLAG_UNDETERMINED } },
>>       { MATROSKA_ID_VIDEOFIELDORDER,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, field_order), { .u = MATROSKA_VIDEO_FIELDORDER_UNDETERMINED } },
>> @@ -2963,14 +2969,30 @@ static int mkv_parse_video(MatroskaTrack *track, AVStream *st,
>>
>>       if (track->video.display_unit < MATROSKA_VIDEO_DISPLAYUNIT_UNKNOWN) {
>>           if (track->video.display_width && track->video.display_height &&
>> -            par->height  < INT64_MAX / track->video.display_width  / display_width_mul &&
>> -            par->width   < INT64_MAX / track->video.display_height / display_height_mul)
>> +            track->video.cropped_height < INT64_MAX / track->video.display_width  / display_width_mul &&
>> +            track->video.cropped_width  < INT64_MAX / track->video.display_height / display_height_mul)
>>               av_reduce(&st->sample_aspect_ratio.num,
>>                         &st->sample_aspect_ratio.den,
>> -                      par->height * track->video.display_width  * display_width_mul,
>> -                      par->width  * track->video.display_height * display_height_mul,
>> +                      track->video.cropped_height * track->video.display_width  * display_width_mul,
>> +                      track->video.cropped_width  * track->video.display_height * display_height_mul,
>>                         INT_MAX);
>>       }
>> +    if (track->video.cropped_width  != track->video.pixel_width ||
>> +        track->video.cropped_height != track->video.pixel_height) {
>> +        uint8_t *cropping;
>> +        AVPacketSideData *sd = av_packet_side_data_new(&st->codecpar->coded_side_data,
>> +                                                       &st->codecpar->nb_coded_side_data,
>> +                                                       AV_PKT_DATA_FRAME_CROPPING,
>> +                                                       sizeof(uint32_t) * 4, 0);
>> +        if (!sd)
>> +            return AVERROR(ENOMEM);
>> +
>> +        cropping = sd->data;
>> +        bytestream_put_le32(&cropping, track->video.pixel_cropt);
>> +        bytestream_put_le32(&cropping, track->video.pixel_cropb);
>> +        bytestream_put_le32(&cropping, track->video.pixel_cropl);
>> +        bytestream_put_le32(&cropping, track->video.pixel_cropr);
>> +    }
>>       if (par->codec_id != AV_CODEC_ID_HEVC)
>>           sti->need_parsing = AVSTREAM_PARSE_HEADERS;
>>
>> @@ -3136,10 +3158,21 @@ static int matroska_parse_tracks(AVFormatContext *s)
>>                       track->default_duration = default_duration;
>>                   }
>>               }
>> +            track->video.cropped_width  = track->video.pixel_width;
>> +            track->video.cropped_height = track->video.pixel_height;
>> +            if (track->video.display_unit == 0) {
>> +                if (track->video.pixel_cropl >= INT_MAX - track->video.pixel_cropr ||
>> +                    track->video.pixel_cropt >= INT_MAX - track->video.pixel_cropb ||
>> +                    (track->video.pixel_cropl + track->video.pixel_cropr) >= track->video.pixel_width ||
>> +                    (track->video.pixel_cropt + track->video.pixel_cropb) >= track->video.pixel_height)
>> +                    return AVERROR_INVALIDDATA;
>> +                track->video.cropped_width  -= track->video.pixel_cropl + track->video.pixel_cropr;
>> +                track->video.cropped_height -= track->video.pixel_cropt + track->video.pixel_cropb;
>> +            }
>>               if (track->video.display_width == -1)
>> -                track->video.display_width = track->video.pixel_width;
>> +                track->video.display_width = track->video.cropped_width;
>>               if (track->video.display_height == -1)
>> -                track->video.display_height = track->video.pixel_height;
>> +                track->video.display_height = track->video.cropped_height;
>>           } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
>>               if (!track->audio.out_samplerate)
>>                   track->audio.out_samplerate = track->audio.samplerate;
>> --
>> 2.45.1
> 
> Have you seen the discussion over the MKVToolNix issue tracker?
> https://gitlab.com/mbunkus/mkvtoolnix/-/issues/2389
> 
> I think it is the reason it wasn't implemented in ffmpeg sooner. The
> issue summarizes the problem and current status quo, quite well I
> think. I think the main problem is that vast majority of Matroska
> files doesn't are not following the standard with regards to
> DisplayWidth/Height.

The only thing i can think to do about this is adding an option to 
ignore display dimension fields and have the demuxer derive them as if 
they were not coded in. There's no amount of heuristics one could do to 
figure out if the values in display dimension fields are correct and 
intended (taking into account any cropping value that may be present, 
and/or trying to achieve a certain DAR), or if they are bogus.
diff mbox series

Patch

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 2f07e11d87..a30bac786b 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -213,7 +213,13 @@  typedef struct MatroskaTrackVideo {
     uint64_t display_height;
     uint64_t pixel_width;
     uint64_t pixel_height;
+    uint64_t cropped_width;
+    uint64_t cropped_height;
     EbmlBin  color_space;
+    uint64_t pixel_cropt;
+    uint64_t pixel_cropl;
+    uint64_t pixel_cropb;
+    uint64_t pixel_cropr;
     uint64_t display_unit;
     uint64_t interlaced;
     uint64_t field_order;
@@ -527,10 +533,10 @@  static EbmlSyntax matroska_track_video[] = {
     { MATROSKA_ID_VIDEOALPHAMODE,      EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, alpha_mode), { .u = 0 } },
     { MATROSKA_ID_VIDEOCOLOR,          EBML_NEST,  0, sizeof(MatroskaTrackVideoColor), offsetof(MatroskaTrackVideo, color), { .n = matroska_track_video_color } },
     { MATROSKA_ID_VIDEOPROJECTION,     EBML_NEST,  0, 0, offsetof(MatroskaTrackVideo, projection), { .n = matroska_track_video_projection } },
-    { MATROSKA_ID_VIDEOPIXELCROPB,     EBML_NONE },
-    { MATROSKA_ID_VIDEOPIXELCROPT,     EBML_NONE },
-    { MATROSKA_ID_VIDEOPIXELCROPL,     EBML_NONE },
-    { MATROSKA_ID_VIDEOPIXELCROPR,     EBML_NONE },
+    { MATROSKA_ID_VIDEOPIXELCROPB,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropb), {.u = 0 } },
+    { MATROSKA_ID_VIDEOPIXELCROPT,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropt), {.u = 0 } },
+    { MATROSKA_ID_VIDEOPIXELCROPL,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropl), {.u = 0 } },
+    { MATROSKA_ID_VIDEOPIXELCROPR,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, pixel_cropr), {.u = 0 } },
     { MATROSKA_ID_VIDEODISPLAYUNIT,    EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, display_unit), { .u= MATROSKA_VIDEO_DISPLAYUNIT_PIXELS } },
     { MATROSKA_ID_VIDEOFLAGINTERLACED, EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, interlaced),  { .u = MATROSKA_VIDEO_INTERLACE_FLAG_UNDETERMINED } },
     { MATROSKA_ID_VIDEOFIELDORDER,     EBML_UINT,  0, 0, offsetof(MatroskaTrackVideo, field_order), { .u = MATROSKA_VIDEO_FIELDORDER_UNDETERMINED } },
@@ -2963,14 +2969,30 @@  static int mkv_parse_video(MatroskaTrack *track, AVStream *st,
 
     if (track->video.display_unit < MATROSKA_VIDEO_DISPLAYUNIT_UNKNOWN) {
         if (track->video.display_width && track->video.display_height &&
-            par->height  < INT64_MAX / track->video.display_width  / display_width_mul &&
-            par->width   < INT64_MAX / track->video.display_height / display_height_mul)
+            track->video.cropped_height < INT64_MAX / track->video.display_width  / display_width_mul &&
+            track->video.cropped_width  < INT64_MAX / track->video.display_height / display_height_mul)
             av_reduce(&st->sample_aspect_ratio.num,
                       &st->sample_aspect_ratio.den,
-                      par->height * track->video.display_width  * display_width_mul,
-                      par->width  * track->video.display_height * display_height_mul,
+                      track->video.cropped_height * track->video.display_width  * display_width_mul,
+                      track->video.cropped_width  * track->video.display_height * display_height_mul,
                       INT_MAX);
     }
+    if (track->video.cropped_width  != track->video.pixel_width ||
+        track->video.cropped_height != track->video.pixel_height) {
+        uint8_t *cropping;
+        AVPacketSideData *sd = av_packet_side_data_new(&st->codecpar->coded_side_data,
+                                                       &st->codecpar->nb_coded_side_data,
+                                                       AV_PKT_DATA_FRAME_CROPPING,
+                                                       sizeof(uint32_t) * 4, 0);
+        if (!sd)
+            return AVERROR(ENOMEM);
+
+        cropping = sd->data;
+        bytestream_put_le32(&cropping, track->video.pixel_cropt);
+        bytestream_put_le32(&cropping, track->video.pixel_cropb);
+        bytestream_put_le32(&cropping, track->video.pixel_cropl);
+        bytestream_put_le32(&cropping, track->video.pixel_cropr);
+    }
     if (par->codec_id != AV_CODEC_ID_HEVC)
         sti->need_parsing = AVSTREAM_PARSE_HEADERS;
 
@@ -3136,10 +3158,21 @@  static int matroska_parse_tracks(AVFormatContext *s)
                     track->default_duration = default_duration;
                 }
             }
+            track->video.cropped_width  = track->video.pixel_width;
+            track->video.cropped_height = track->video.pixel_height;
+            if (track->video.display_unit == 0) {
+                if (track->video.pixel_cropl >= INT_MAX - track->video.pixel_cropr ||
+                    track->video.pixel_cropt >= INT_MAX - track->video.pixel_cropb ||
+                    (track->video.pixel_cropl + track->video.pixel_cropr) >= track->video.pixel_width ||
+                    (track->video.pixel_cropt + track->video.pixel_cropb) >= track->video.pixel_height)
+                    return AVERROR_INVALIDDATA;
+                track->video.cropped_width  -= track->video.pixel_cropl + track->video.pixel_cropr;
+                track->video.cropped_height -= track->video.pixel_cropt + track->video.pixel_cropb;
+            }
             if (track->video.display_width == -1)
-                track->video.display_width = track->video.pixel_width;
+                track->video.display_width = track->video.cropped_width;
             if (track->video.display_height == -1)
-                track->video.display_height = track->video.pixel_height;
+                track->video.display_height = track->video.cropped_height;
         } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
             if (!track->audio.out_samplerate)
                 track->audio.out_samplerate = track->audio.samplerate;