Message ID | 20220921112905.1753-1-ovchinnikov.dmitrii@gmail.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel] libavformat\matroskadec.c: crop support for matroska demuxer. | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
OvchinnikovDmitrii: > In webm specification, it supports cropping information. (https://www.webmproject.org/docs/container/) So does Matroska; actually, the only reason WebM supports this type of cropping is because it is part of the subset of Matroska that WebM kept. > In ffmpeg, the implementation of webm is a subset of matroska. In matroskadec.c, those cropping related four fields are forced to 0. > > for the sample file with crop (crop_bottom =8, crop_top=crop_left=crop_right=0.) > ffmpeg.exe -i test_with_container_crop.webm -pix_fmt yuv420p -y output.yuv > > original ffmpeg code - the output.yuv resolution is 1920x1088 > changed code - the output.yuv resolution is 1920x1080" > --- > libavcodec/avcodec.h | 8 ++++++++ > libavcodec/codec_par.c | 8 ++++++++ > libavcodec/codec_par.h | 8 ++++++++ > libavcodec/decode.c | 9 +++++++++ > libavformat/matroskadec.c | 17 +++++++++++++---- > 5 files changed, 46 insertions(+), 4 deletions(-) > > diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h > index 7db5d1b1c5..4420cfa0c9 100644 > --- a/libavcodec/avcodec.h > +++ b/libavcodec/avcodec.h > @@ -585,6 +585,14 @@ typedef struct AVCodecContext { > */ > int coded_width, coded_height; > > + /** > + * The dimensions of the crop, usually from container. > + */ > + int crop_top; > + int crop_left; > + int crop_bottom; > + int crop_right; > + > /** > * the number of pictures in a group of pictures, or 0 for intra_only > * - encoding: Set by user. > diff --git a/libavcodec/codec_par.c b/libavcodec/codec_par.c > index abda649aa8..f74964a817 100644 > --- a/libavcodec/codec_par.c > +++ b/libavcodec/codec_par.c > @@ -118,6 +118,10 @@ int avcodec_parameters_from_context(AVCodecParameters *par, > par->format = codec->pix_fmt; > par->width = codec->width; > par->height = codec->height; > + par->crop_top = codec->crop_top; > + par->crop_left = codec->crop_left; > + par->crop_bottom = codec->crop_bottom; > + par->crop_right = codec->crop_right; > par->field_order = codec->field_order; > par->color_range = codec->color_range; > par->color_primaries = codec->color_primaries; > @@ -199,6 +203,10 @@ int avcodec_parameters_to_context(AVCodecContext *codec, > codec->pix_fmt = par->format; > codec->width = par->width; > codec->height = par->height; > + codec->crop_top = par->crop_top; > + codec->crop_left = par->crop_left; > + codec->crop_bottom = par->crop_bottom; > + codec->crop_right = par->crop_right; > codec->field_order = par->field_order; > codec->color_range = par->color_range; > codec->color_primaries = par->color_primaries; > diff --git a/libavcodec/codec_par.h b/libavcodec/codec_par.h > index 7660791a12..6671f1810b 100644 > --- a/libavcodec/codec_par.h > +++ b/libavcodec/codec_par.h > @@ -127,6 +127,14 @@ typedef struct AVCodecParameters { > int width; > int height; > > + /** > + * The dimensions of the crop, usually from container. > + */ > + int crop_top; > + int crop_left; > + int crop_bottom; > + int crop_right; > + > /** > * Video only. The aspect ratio (width / height) which a single pixel > * should have when displayed. > diff --git a/libavcodec/decode.c b/libavcodec/decode.c > index 2961705c9d..905eb8401e 100644 > --- a/libavcodec/decode.c > +++ b/libavcodec/decode.c > @@ -324,6 +324,15 @@ static inline int decode_simple_internal(AVCodecContext *avctx, AVFrame *frame, > emms_c(); > actual_got_frame = got_frame; > > + if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) { > + if (avctx->crop_top != 0 || avctx->crop_left != 0 || avctx->crop_right != 0 || avctx->crop_bottom != 0){ > + frame->crop_top = avctx->crop_top; > + frame->crop_left = avctx->crop_left; > + frame->crop_right = avctx->crop_right; > + frame->crop_bottom = avctx->crop_bottom; > + } > + } > + > if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) { > if (frame->flags & AV_FRAME_FLAG_DISCARD) > got_frame = 0; > diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c > index 16a3e93611..6e05b36e58 100644 > --- a/libavformat/matroskadec.c > +++ b/libavformat/matroskadec.c > @@ -209,6 +209,10 @@ typedef struct MatroskaTrackVideo { > uint64_t pixel_width; > uint64_t pixel_height; > EbmlBin color_space; > + uint64_t pixel_cropt; > + uint64_t pixel_cropl; > + uint64_t pixel_cropb; > + uint64_t pixel_cropr; > uint64_t display_unit; > uint64_t interlaced; > uint64_t field_order; > @@ -516,10 +520,10 @@ static EbmlSyntax matroska_track_video[] = { > { MATROSKA_ID_VIDEOALPHAMODE, EBML_UINT, 0, 0, offsetof(MatroskaTrackVideo, alpha_mode), { .u = 0 } }, > { MATROSKA_ID_VIDEOCOLOR, EBML_NEST, 0, sizeof(MatroskaTrackVideoColor), offsetof(MatroskaTrackVideo, color), { .n = matroska_track_video_color } }, > { MATROSKA_ID_VIDEOPROJECTION, EBML_NEST, 0, 0, offsetof(MatroskaTrackVideo, projection), { .n = matroska_track_video_projection } }, > - { MATROSKA_ID_VIDEOPIXELCROPB, EBML_NONE }, > - { MATROSKA_ID_VIDEOPIXELCROPT, EBML_NONE }, > - { MATROSKA_ID_VIDEOPIXELCROPL, EBML_NONE }, > - { MATROSKA_ID_VIDEOPIXELCROPR, EBML_NONE }, > + { MATROSKA_ID_VIDEOPIXELCROPT, EBML_UINT, 0, 0, offsetof(MatroskaTrackVideo, pixel_cropt), {.u = 0 } }, > + { MATROSKA_ID_VIDEOPIXELCROPL, EBML_UINT, 0, 0, offsetof(MatroskaTrackVideo, pixel_cropl), {.u = 0 } }, > + { MATROSKA_ID_VIDEOPIXELCROPB, EBML_UINT, 0, 0, offsetof(MatroskaTrackVideo, pixel_cropb), {.u = 0 } }, > + { MATROSKA_ID_VIDEOPIXELCROPR, EBML_UINT, 0, 0, offsetof(MatroskaTrackVideo, pixel_cropr), {.u = 0 } }, > { MATROSKA_ID_VIDEODISPLAYUNIT, EBML_UINT, 0, 0, offsetof(MatroskaTrackVideo, display_unit), { .u= MATROSKA_VIDEO_DISPLAYUNIT_PIXELS } }, > { MATROSKA_ID_VIDEOFLAGINTERLACED, EBML_UINT, 0, 0, offsetof(MatroskaTrackVideo, interlaced), { .u = MATROSKA_VIDEO_INTERLACE_FLAG_UNDETERMINED } }, > { MATROSKA_ID_VIDEOFIELDORDER, EBML_UINT, 0, 0, offsetof(MatroskaTrackVideo, field_order), { .u = MATROSKA_VIDEO_FIELDORDER_UNDETERMINED } }, > @@ -2878,6 +2882,11 @@ static int matroska_parse_tracks(AVFormatContext *s) > st->codecpar->width = track->video.pixel_width; > st->codecpar->height = track->video.pixel_height; > > + st->codecpar->crop_top = track->video.pixel_cropt; > + st->codecpar->crop_left = track->video.pixel_cropl; > + st->codecpar->crop_bottom= track->video.pixel_cropb; > + st->codecpar->crop_right = track->video.pixel_cropr; > + > if (track->video.interlaced == MATROSKA_VIDEO_INTERLACE_FLAG_INTERLACED) > st->codecpar->field_order = mkv_field_order(matroska, track->video.field_order); > else if (track->video.interlaced == MATROSKA_VIDEO_INTERLACE_FLAG_PROGRESSIVE) 1. You can't add fields in the middle of AVCodecParameters, as the offset of all these fields is part of the public ABI. New fields need to be added at the end. 2. Patches that add fields to these public structs should be separate from the patches to e.g. the Matroska demuxer. 3. You are simply overwriting the frame's cropping info with the one from the container. Yet there is a problem here: In codecs like H.264, the bitstream can signal cropping of its own (e.g. 1920x1080p is actually 1920x1088 internally). The Matroska pixel_width and pixel_height fields correspond to the dimensions after this decoder-internal cropping has already been applied, so they need to be applied additionally to the bitstream cropping. Yet you are overwriting the cropping parameters with the ones from the container. 4. You are only applying the cropping for decoders that use the simple API, not for decoders implementing the receive_frame API. 5. mov has something similar to these cropping parameters, namely "clean aperture". It also allows to crop by subpixels (which will probably never be supported by our decoders). If one wants to make the mov->mov remuxing work without information loss, one will have to use the more general clean aperture information and translate the Matroska crop information to that. 6. There was an attempt more than two years ago to do that; see https://ffmpeg.org/pipermail/ffmpeg-devel/2020-April/261465.html, https://ffmpeg.org/pipermail/ffmpeg-devel/2020-May/262153.html and https://ffmpeg.org/pipermail/ffmpeg-devel/2020-June/264146.html 7. And the state of these crop values for Matroska in general is unfortunately very bad. Let me quote myself from https://ffmpeg.org/pipermail/ffmpeg-devel/2020-May/262270.html: "And let me also add other points about the sorry state of the PixelCrop* elements: a) The display aspect ratio usually changes when using cropping; the pixel aspect ratio doesn't change, yet unfortunately (and inexplicably) Matroska does not support that. This means that if one sets the display aspect ratio explicitly when using cropping, a player that ignores the cropping will not only display the video with the area that ought to be cropped away; it will also display it with a wrong pixel aspect ratio. Given that almost all players don't support the cropping flags simply saying that these players are broken and need to be fixed is not an option. But there is a method to create videos that will be played with the correct pixel aspect ratio by both types of players. It makes use of the fact that (when using pixels as DisplayUnit (it's the default value anyway)) the default value of DisplayWidth and DisplayHeight are given by the dimensions of the cropped frame. It works especially well with nonanamorphic videos. Consider the case of a 1280x720 nonanamorphic video with 40 pixels on top and bottom cropping on top and bottom (each) and 240 pixels on the left and right (each). If one leaves the DisplayWidth/Height away, a player that ignores the crop values will infer DisplayWidth to be 1280 and DisplayHeight to be 720, whereas a player that supports the crop values will infer DisplayHeight to be 640 and DisplayWidth to be 800. Both players will use quadratic pixels. It does not work so well with anamorphic videos. If one only wants to crop in one dimension (say, the vertical one), then one can still produce videos that work well with both players: Leave out the DisplayHeight and set DisplayWidth to the common value suitable for both. (This already has a downside: An absolutely precise display aspect ratio is no longer attainable with this method in general.) If one has anamorphic video and wants to crop in both dimensions, then there is no way of making both types of players display the video with the same pixel aspect ratio (unless the cropping happens to be so that the ratio of the uncropped frame and the ratio of the cropped frame coincide). We already don't write the display dimensions when nonanamorphic and your (revised) patch needs to preserve that (should be trivial). Given that the method outlined above in case cropping is restricted to one dimension might have side-effects I don't think it should be implemented (would also be too niche); but one (needn't be you) should add a warning for users in case players not supporting the crop elements might play the output with the wrong pixel aspect ratio. (Said warning should preferably be added in a separate commit.) b) MKVToolNix behaviour wrt cropping is unfortunately buggy: mkvmerge reads and propagates the cropping parameters upon remuxing, but it does not take them into account when inferring the display dimensions (which it explicitly sets); therefore it changes the pixel aspect ratio upon remuxing (with the usual exception of the ratios of cropped and uncropped frames agreeing). And the GUIs help text does not mention that one should also modify the display dimensions when editing the cropping parameters. E.g. both videos from ticket 4489 (and also the original in VLC ticket 13982) are anamorphic when the specs are followed. This was certainly not intended. Certainly a large part of the files in existence that have crop values set have them set in the wrong way. Adding a workaround would unfortunately incur the possibility of misparsing valid files. My current approach is to wait and see how many users complain before deciding on whether to add a workaround." - Andreas
Hi Andreas, Thanks for your quick and direct feedback about the patch. Your feedback about the displayWidth, displayHeight and the display aspect ratio shows that you have deep thinking in this area. I have several questions and comments about your feedback. 1. Yes, i will get this change, and put the added fields at the rear part of AVCodecParameters 2. I will split these changes into different commits. Should I also split the edits in the AVCodecContext and codec_pack.c files into different commits? 3. Yes, I will apply the container cropping info additionally to the bitstream cropping. 4. Yes, I will apply the cropping for receive_frame API as well. 5. I'm also working on “clean aperture” for MP4 container. mov is a part of the subset of MP4. I will submit that patch to ffmpeg after i finished it. Maybe one month later. 6. Thanks for a good reference. 7. Yes, there are many problems in the MKVToolNix. MKVToolNix may result in some inconsistent of display aspect ratio when editing crop information. There are two part of work(for support of Matroska cropping), one part is matroska encoder, another is matroska decoder. This time i submit the matroska decoder patch. In the matroska encoder, i have already taken the inconsistent into consideration, and did some work to make sure it keeps consistent. Do you want me to put the matroska encoder and matroska decode together in the same patch to submit ffmpeg, or do you want I put them in two separate patch and submit seperately?
I fixed issues1-4 and sent an updated version: https://patchwork.ffmpeg.org/project/ffmpeg/list/?series=7668
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 7db5d1b1c5..4420cfa0c9 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -585,6 +585,14 @@ typedef struct AVCodecContext { */ int coded_width, coded_height; + /** + * The dimensions of the crop, usually from container. + */ + int crop_top; + int crop_left; + int crop_bottom; + int crop_right; + /** * the number of pictures in a group of pictures, or 0 for intra_only * - encoding: Set by user. diff --git a/libavcodec/codec_par.c b/libavcodec/codec_par.c index abda649aa8..f74964a817 100644 --- a/libavcodec/codec_par.c +++ b/libavcodec/codec_par.c @@ -118,6 +118,10 @@ int avcodec_parameters_from_context(AVCodecParameters *par, par->format = codec->pix_fmt; par->width = codec->width; par->height = codec->height; + par->crop_top = codec->crop_top; + par->crop_left = codec->crop_left; + par->crop_bottom = codec->crop_bottom; + par->crop_right = codec->crop_right; par->field_order = codec->field_order; par->color_range = codec->color_range; par->color_primaries = codec->color_primaries; @@ -199,6 +203,10 @@ int avcodec_parameters_to_context(AVCodecContext *codec, codec->pix_fmt = par->format; codec->width = par->width; codec->height = par->height; + codec->crop_top = par->crop_top; + codec->crop_left = par->crop_left; + codec->crop_bottom = par->crop_bottom; + codec->crop_right = par->crop_right; codec->field_order = par->field_order; codec->color_range = par->color_range; codec->color_primaries = par->color_primaries; diff --git a/libavcodec/codec_par.h b/libavcodec/codec_par.h index 7660791a12..6671f1810b 100644 --- a/libavcodec/codec_par.h +++ b/libavcodec/codec_par.h @@ -127,6 +127,14 @@ typedef struct AVCodecParameters { int width; int height; + /** + * The dimensions of the crop, usually from container. + */ + int crop_top; + int crop_left; + int crop_bottom; + int crop_right; + /** * Video only. The aspect ratio (width / height) which a single pixel * should have when displayed. diff --git a/libavcodec/decode.c b/libavcodec/decode.c index 2961705c9d..905eb8401e 100644 --- a/libavcodec/decode.c +++ b/libavcodec/decode.c @@ -324,6 +324,15 @@ static inline int decode_simple_internal(AVCodecContext *avctx, AVFrame *frame, emms_c(); actual_got_frame = got_frame; + if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) { + if (avctx->crop_top != 0 || avctx->crop_left != 0 || avctx->crop_right != 0 || avctx->crop_bottom != 0){ + frame->crop_top = avctx->crop_top; + frame->crop_left = avctx->crop_left; + frame->crop_right = avctx->crop_right; + frame->crop_bottom = avctx->crop_bottom; + } + } + if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) { if (frame->flags & AV_FRAME_FLAG_DISCARD) got_frame = 0; diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c index 16a3e93611..6e05b36e58 100644 --- a/libavformat/matroskadec.c +++ b/libavformat/matroskadec.c @@ -209,6 +209,10 @@ typedef struct MatroskaTrackVideo { uint64_t pixel_width; uint64_t pixel_height; EbmlBin color_space; + uint64_t pixel_cropt; + uint64_t pixel_cropl; + uint64_t pixel_cropb; + uint64_t pixel_cropr; uint64_t display_unit; uint64_t interlaced; uint64_t field_order; @@ -516,10 +520,10 @@ static EbmlSyntax matroska_track_video[] = { { MATROSKA_ID_VIDEOALPHAMODE, EBML_UINT, 0, 0, offsetof(MatroskaTrackVideo, alpha_mode), { .u = 0 } }, { MATROSKA_ID_VIDEOCOLOR, EBML_NEST, 0, sizeof(MatroskaTrackVideoColor), offsetof(MatroskaTrackVideo, color), { .n = matroska_track_video_color } }, { MATROSKA_ID_VIDEOPROJECTION, EBML_NEST, 0, 0, offsetof(MatroskaTrackVideo, projection), { .n = matroska_track_video_projection } }, - { MATROSKA_ID_VIDEOPIXELCROPB, EBML_NONE }, - { MATROSKA_ID_VIDEOPIXELCROPT, EBML_NONE }, - { MATROSKA_ID_VIDEOPIXELCROPL, EBML_NONE }, - { MATROSKA_ID_VIDEOPIXELCROPR, EBML_NONE }, + { MATROSKA_ID_VIDEOPIXELCROPT, EBML_UINT, 0, 0, offsetof(MatroskaTrackVideo, pixel_cropt), {.u = 0 } }, + { MATROSKA_ID_VIDEOPIXELCROPL, EBML_UINT, 0, 0, offsetof(MatroskaTrackVideo, pixel_cropl), {.u = 0 } }, + { MATROSKA_ID_VIDEOPIXELCROPB, EBML_UINT, 0, 0, offsetof(MatroskaTrackVideo, pixel_cropb), {.u = 0 } }, + { MATROSKA_ID_VIDEOPIXELCROPR, EBML_UINT, 0, 0, offsetof(MatroskaTrackVideo, pixel_cropr), {.u = 0 } }, { MATROSKA_ID_VIDEODISPLAYUNIT, EBML_UINT, 0, 0, offsetof(MatroskaTrackVideo, display_unit), { .u= MATROSKA_VIDEO_DISPLAYUNIT_PIXELS } }, { MATROSKA_ID_VIDEOFLAGINTERLACED, EBML_UINT, 0, 0, offsetof(MatroskaTrackVideo, interlaced), { .u = MATROSKA_VIDEO_INTERLACE_FLAG_UNDETERMINED } }, { MATROSKA_ID_VIDEOFIELDORDER, EBML_UINT, 0, 0, offsetof(MatroskaTrackVideo, field_order), { .u = MATROSKA_VIDEO_FIELDORDER_UNDETERMINED } }, @@ -2878,6 +2882,11 @@ static int matroska_parse_tracks(AVFormatContext *s) st->codecpar->width = track->video.pixel_width; st->codecpar->height = track->video.pixel_height; + st->codecpar->crop_top = track->video.pixel_cropt; + st->codecpar->crop_left = track->video.pixel_cropl; + st->codecpar->crop_bottom= track->video.pixel_cropb; + st->codecpar->crop_right = track->video.pixel_cropr; + if (track->video.interlaced == MATROSKA_VIDEO_INTERLACE_FLAG_INTERLACED) st->codecpar->field_order = mkv_field_order(matroska, track->video.field_order); else if (track->video.interlaced == MATROSKA_VIDEO_INTERLACE_FLAG_PROGRESSIVE)