Message ID | 20210830111407.5655-3-jeebjp@gmail.com |
---|---|
State | New |
Headers | show |
Series | Support for stream dispositions in MP4 | expand |
Context | Check | Description |
---|---|---|
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
andriy/make_ppc | success | Make finished |
andriy/make_fate_ppc | success | Make fate finished |
> On Aug 30, 2021, at 7:14 PM, Jan Ekström <jeebjp@gmail.com> wrote: > > From: Jan Ekström <jan.ekstrom@24i.com> > > Unfortunately the current production versions of this software > do not 100% adhere to the CMAF specification, and have decided > to utilize the HTML5 media track identifier for audio descriptions. > > This way the default mode of operation is according to the CMAF > specification, but it is also possible to output streams with which > this piece of software is capable of interoperating with. > > Signed-off-by: Jan Ekström <jan.ekstrom@24i.com> > --- > libavformat/isom.c | 23 ++++-- > libavformat/isom.h | 6 ++ > libavformat/movenc.c | 12 ++- > libavformat/movenc.h | 2 + > tests/fate/mov.mak | 8 ++ > ...p4-disposition-unified-origin-mpegts-remux | 81 +++++++++++++++++++ > 6 files changed, 124 insertions(+), 8 deletions(-) > create mode 100644 tests/ref/fate/mov-mp4-disposition-unified-origin-mpegts-remux > > diff --git a/libavformat/isom.c b/libavformat/isom.c > index 300ba927c2..fb8ad3d824 100644 > --- a/libavformat/isom.c > +++ b/libavformat/isom.c > @@ -433,19 +433,32 @@ void ff_mov_write_chan(AVIOContext *pb, int64_t channel_layout) > > static const struct MP4TrackKindValueMapping dash_role_map[] = { > { AV_DISPOSITION_HEARING_IMPAIRED|AV_DISPOSITION_CAPTIONS, > - "caption" }, > + "caption", > + KindWritingModeCMAF|KindWritingModeUnifiedOrigin }, > { AV_DISPOSITION_COMMENT, > - "commentary" }, > + "commentary", > + KindWritingModeCMAF|KindWritingModeUnifiedOrigin }, > { AV_DISPOSITION_VISUAL_IMPAIRED|AV_DISPOSITION_DESCRIPTIONS, > - "description" }, > + "description", > + KindWritingModeCMAF }, > { AV_DISPOSITION_DUB, > - "dub" }, > + "dub", > + KindWritingModeCMAF|KindWritingModeUnifiedOrigin }, > { AV_DISPOSITION_FORCED, > - "forced-subtitle" }, > + "forced-subtitle", > + KindWritingModeCMAF|KindWritingModeUnifiedOrigin }, > + { 0, NULL } > +}; The patch set LGTM. Nit-picking, it’s hard to see `KindWritingModeCMAF|KindWritingModeUnifiedOrigin` is a bitwise or. > + > +static const struct MP4TrackKindValueMapping html_kind_map[] = { > + { AV_DISPOSITION_VISUAL_IMPAIRED|AV_DISPOSITION_DESCRIPTIONS, > + "main-desc", > + KindWritingModeUnifiedOrigin }, > { 0, NULL } > }; > > const struct MP4TrackKindMapping ff_mov_track_kind_table[] = { > { "urn:mpeg:dash:role:2011", dash_role_map }, > + { "about:html-kind", html_kind_map }, > { 0, NULL } > }; > diff --git a/libavformat/isom.h b/libavformat/isom.h > index c62fcf2bfe..1252fc6603 100644 > --- a/libavformat/isom.h > +++ b/libavformat/isom.h > @@ -390,9 +390,15 @@ static inline enum AVCodecID ff_mov_get_lpcm_codec_id(int bps, int flags) > #define MOV_ISMV_TTML_TAG MKTAG('d', 'f', 'x', 'p') > #define MOV_MP4_TTML_TAG MKTAG('s', 't', 'p', 'p') > > +enum MP4TrackKindWritingMode { > + KindWritingModeCMAF = (1 << 0), > + KindWritingModeUnifiedOrigin = (1 << 1), > +}; > + > struct MP4TrackKindValueMapping { > int disposition; > const char *value; > + uint32_t writing_modes; > }; > > struct MP4TrackKindMapping { > diff --git a/libavformat/movenc.c b/libavformat/movenc.c > index 4070fc9ef7..baaae7d3ad 100644 > --- a/libavformat/movenc.c > +++ b/libavformat/movenc.c > @@ -111,6 +111,9 @@ static const AVOption options[] = { > { "pts", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MOV_PRFT_SRC_PTS}, 0, 0, AV_OPT_FLAG_ENCODING_PARAM, "prft"}, > { "empty_hdlr_name", "write zero-length name string in hdlr atoms within mdia and minf atoms", offsetof(MOVMuxContext, empty_hdlr_name), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM}, > { "movie_timescale", "set movie timescale", offsetof(MOVMuxContext, movie_timescale), AV_OPT_TYPE_INT, {.i64 = MOV_TIMESCALE}, 1, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM}, > + { "kind_writing_mode", "set kind box writing mode", offsetof(MOVMuxContext, kind_writing_mode), AV_OPT_TYPE_INT, {.i64 = KindWritingModeCMAF}, KindWritingModeCMAF, KindWritingModeUnifiedOrigin, AV_OPT_FLAG_ENCODING_PARAM, "kind_writing_mode"}, > + { "cmaf", "CMAF writing mode", 0, AV_OPT_TYPE_CONST, {.i64 = KindWritingModeCMAF}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "kind_writing_mode"}, > + { "unified_origin", "Compatibility mode for Unified Origin (all DASH except for audio description)", 0, AV_OPT_TYPE_CONST, {.i64 = KindWritingModeUnifiedOrigin}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "kind_writing_mode"}, > { NULL }, > }; > > @@ -3355,7 +3358,8 @@ static int mov_write_track_kind(AVIOContext *pb, const char *scheme_uri, > return update_size(pb, pos); > } > > -static int mov_write_track_kinds(AVIOContext *pb, AVStream *st) > +static int mov_write_track_kinds(AVIOContext *pb, AVStream *st, > + enum MP4TrackKindWritingMode mode) > { > int ret = AVERROR_BUG; > > @@ -3364,7 +3368,8 @@ static int mov_write_track_kinds(AVIOContext *pb, AVStream *st) > > for (int j = 0; map.value_maps[j].disposition; j++) { > const struct MP4TrackKindValueMapping value_map = map.value_maps[j]; > - if (!(st->disposition & value_map.disposition)) > + if (!(st->disposition & value_map.disposition) || > + !(value_map.writing_modes & mode)) > continue; > > if ((ret = mov_write_track_kind(pb, map.scheme_uri, value_map.value)) < 0) > @@ -3393,7 +3398,8 @@ static int mov_write_track_udta_tag(AVIOContext *pb, MOVMuxContext *mov, > mov_write_track_metadata(pb_buf, st, "name", "title"); > > if (mov->mode & MODE_MP4) { > - if ((ret = mov_write_track_kinds(pb_buf, st)) < 0) > + if ((ret = mov_write_track_kinds(pb_buf, st, > + mov->kind_writing_mode)) < 0) > return ret; > } > > diff --git a/libavformat/movenc.h b/libavformat/movenc.h > index 40077b1afe..e02a086b1f 100644 > --- a/libavformat/movenc.h > +++ b/libavformat/movenc.h > @@ -25,6 +25,7 @@ > #define AVFORMAT_MOVENC_H > > #include "avformat.h" > +#include "isom.h" > #include "movenccenc.h" > #include "libavcodec/packet_internal.h" > > @@ -242,6 +243,7 @@ typedef struct MOVMuxContext { > MOVPrftBox write_prft; > int empty_hdlr_name; > int movie_timescale; > + enum MP4TrackKindWritingMode kind_writing_mode; > } MOVMuxContext; > > #define FF_MOV_FLAG_RTP_HINT (1 << 0) > diff --git a/tests/fate/mov.mak b/tests/fate/mov.mak > index 5ca992e181..20f085803b 100644 > --- a/tests/fate/mov.mak > +++ b/tests/fate/mov.mak > @@ -145,6 +145,14 @@ FATE_MOV_FFMPEG_FFPROBE-$(call ALLYES, FILE_PROTOCOL PIPE_PROTOCOL \ > += fate-mov-mp4-disposition-mpegts-remux > fate-mov-mp4-disposition-mpegts-remux: CMD = transcode mpegts $(TARGET_SAMPLES)/mpegts/pmtchange.ts mp4 "-map 0:1 -map 0:2 -c copy -disposition:a:0 +hearing_impaired" "-map 0 -c copy" "" "-of json -show_entries stream_disposition:stream=index" > > +# Same as the previous test, but the audio disposition should now be tagged > +# with the HTML5 media track identifier as opposed to the DASH identifier. > +FATE_MOV_FFMPEG_FFPROBE-$(call ALLYES, FILE_PROTOCOL PIPE_PROTOCOL \ > + MPEGTS_DEMUXER MOV_DEMUXER AC3_DECODER \ > + MP4_MUXER FRAMECRC_MUXER ) \ > + += fate-mov-mp4-disposition-unified-origin-mpegts-remux > +fate-mov-mp4-disposition-unified-origin-mpegts-remux: CMD = transcode mpegts $(TARGET_SAMPLES)/mpegts/pmtchange.ts mp4 "-map 0:1 -map 0:2 -c copy -disposition:a:0 +hearing_impaired -kind_writing_mode unified_origin" "-map 0 -c copy" "" "-of json -show_entries stream_disposition:stream=index" > + > FATE_SAMPLES_FFMPEG_FFPROBE += $(FATE_MOV_FFMPEG_FFPROBE-yes) > > fate-mov: $(FATE_MOV) $(FATE_MOV_FFPROBE) $(FATE_MOV_FASTSTART) $(FATE_MOV_FFMPEG_FFPROBE-yes) > diff --git a/tests/ref/fate/mov-mp4-disposition-unified-origin-mpegts-remux b/tests/ref/fate/mov-mp4-disposition-unified-origin-mpegts-remux > new file mode 100644 > index 0000000000..0242cffb9b > --- /dev/null > +++ b/tests/ref/fate/mov-mp4-disposition-unified-origin-mpegts-remux > @@ -0,0 +1,81 @@ > +99f1d34f8028c674cb10d2604a7f0117 *tests/data/fate/mov-mp4-disposition-unified-origin-mpegts-remux.mp4 > +5695 tests/data/fate/mov-mp4-disposition-unified-origin-mpegts-remux.mp4 > +#tb 0: 1/48000 > +#media_type 0: audio > +#codec_id 0: ac3 > +#sample_rate 0: 48000 > +#channel_layout 0: 3 > +#channel_layout_name 0: stereo > +#tb 1: 1/48000 > +#media_type 1: audio > +#codec_id 1: ac3 > +#sample_rate 1: 48000 > +#channel_layout 1: 3 > +#channel_layout_name 1: stereo > +1, 0, 0, 1536, 768, 0xa63778d4, S=1, 4 > +1, 1536, 1536, 1536, 768, 0x7d577f3f > +0, 3072, 3072, 1536, 768, 0xc2867884, S=1, 4 > +1, 3072, 3072, 1536, 768, 0xd86b7c8f > +0, 4608, 4608, 1536, 690, 0xa2714bf3 > +1, 4608, 4608, 1536, 626, 0x09f4382f > +{ > + "programs": [ > + > + ], > + "streams": [ > + { > + "index": 0, > + "disposition": { > + "default": 1, > + "dub": 0, > + "original": 0, > + "comment": 0, > + "lyrics": 0, > + "karaoke": 0, > + "forced": 0, > + "hearing_impaired": 1, > + "visual_impaired": 0, > + "clean_effects": 0, > + "attached_pic": 0, > + "timed_thumbnails": 0, > + "captions": 1, > + "descriptions": 0, > + "metadata": 0, > + "dependent": 0, > + "still_image": 0 > + }, > + "side_data_list": [ > + { > + > + } > + ] > + }, > + { > + "index": 1, > + "disposition": { > + "default": 0, > + "dub": 0, > + "original": 0, > + "comment": 0, > + "lyrics": 0, > + "karaoke": 0, > + "forced": 0, > + "hearing_impaired": 0, > + "visual_impaired": 1, > + "clean_effects": 0, > + "attached_pic": 0, > + "timed_thumbnails": 0, > + "captions": 0, > + "descriptions": 1, > + "metadata": 0, > + "dependent": 0, > + "still_image": 0 > + }, > + "side_data_list": [ > + { > + > + } > + ] > + } > + ] > +} > -- > 2.31.1 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff --git a/libavformat/isom.c b/libavformat/isom.c index 300ba927c2..fb8ad3d824 100644 --- a/libavformat/isom.c +++ b/libavformat/isom.c @@ -433,19 +433,32 @@ void ff_mov_write_chan(AVIOContext *pb, int64_t channel_layout) static const struct MP4TrackKindValueMapping dash_role_map[] = { { AV_DISPOSITION_HEARING_IMPAIRED|AV_DISPOSITION_CAPTIONS, - "caption" }, + "caption", + KindWritingModeCMAF|KindWritingModeUnifiedOrigin }, { AV_DISPOSITION_COMMENT, - "commentary" }, + "commentary", + KindWritingModeCMAF|KindWritingModeUnifiedOrigin }, { AV_DISPOSITION_VISUAL_IMPAIRED|AV_DISPOSITION_DESCRIPTIONS, - "description" }, + "description", + KindWritingModeCMAF }, { AV_DISPOSITION_DUB, - "dub" }, + "dub", + KindWritingModeCMAF|KindWritingModeUnifiedOrigin }, { AV_DISPOSITION_FORCED, - "forced-subtitle" }, + "forced-subtitle", + KindWritingModeCMAF|KindWritingModeUnifiedOrigin }, + { 0, NULL } +}; + +static const struct MP4TrackKindValueMapping html_kind_map[] = { + { AV_DISPOSITION_VISUAL_IMPAIRED|AV_DISPOSITION_DESCRIPTIONS, + "main-desc", + KindWritingModeUnifiedOrigin }, { 0, NULL } }; const struct MP4TrackKindMapping ff_mov_track_kind_table[] = { { "urn:mpeg:dash:role:2011", dash_role_map }, + { "about:html-kind", html_kind_map }, { 0, NULL } }; diff --git a/libavformat/isom.h b/libavformat/isom.h index c62fcf2bfe..1252fc6603 100644 --- a/libavformat/isom.h +++ b/libavformat/isom.h @@ -390,9 +390,15 @@ static inline enum AVCodecID ff_mov_get_lpcm_codec_id(int bps, int flags) #define MOV_ISMV_TTML_TAG MKTAG('d', 'f', 'x', 'p') #define MOV_MP4_TTML_TAG MKTAG('s', 't', 'p', 'p') +enum MP4TrackKindWritingMode { + KindWritingModeCMAF = (1 << 0), + KindWritingModeUnifiedOrigin = (1 << 1), +}; + struct MP4TrackKindValueMapping { int disposition; const char *value; + uint32_t writing_modes; }; struct MP4TrackKindMapping { diff --git a/libavformat/movenc.c b/libavformat/movenc.c index 4070fc9ef7..baaae7d3ad 100644 --- a/libavformat/movenc.c +++ b/libavformat/movenc.c @@ -111,6 +111,9 @@ static const AVOption options[] = { { "pts", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MOV_PRFT_SRC_PTS}, 0, 0, AV_OPT_FLAG_ENCODING_PARAM, "prft"}, { "empty_hdlr_name", "write zero-length name string in hdlr atoms within mdia and minf atoms", offsetof(MOVMuxContext, empty_hdlr_name), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM}, { "movie_timescale", "set movie timescale", offsetof(MOVMuxContext, movie_timescale), AV_OPT_TYPE_INT, {.i64 = MOV_TIMESCALE}, 1, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM}, + { "kind_writing_mode", "set kind box writing mode", offsetof(MOVMuxContext, kind_writing_mode), AV_OPT_TYPE_INT, {.i64 = KindWritingModeCMAF}, KindWritingModeCMAF, KindWritingModeUnifiedOrigin, AV_OPT_FLAG_ENCODING_PARAM, "kind_writing_mode"}, + { "cmaf", "CMAF writing mode", 0, AV_OPT_TYPE_CONST, {.i64 = KindWritingModeCMAF}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "kind_writing_mode"}, + { "unified_origin", "Compatibility mode for Unified Origin (all DASH except for audio description)", 0, AV_OPT_TYPE_CONST, {.i64 = KindWritingModeUnifiedOrigin}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "kind_writing_mode"}, { NULL }, }; @@ -3355,7 +3358,8 @@ static int mov_write_track_kind(AVIOContext *pb, const char *scheme_uri, return update_size(pb, pos); } -static int mov_write_track_kinds(AVIOContext *pb, AVStream *st) +static int mov_write_track_kinds(AVIOContext *pb, AVStream *st, + enum MP4TrackKindWritingMode mode) { int ret = AVERROR_BUG; @@ -3364,7 +3368,8 @@ static int mov_write_track_kinds(AVIOContext *pb, AVStream *st) for (int j = 0; map.value_maps[j].disposition; j++) { const struct MP4TrackKindValueMapping value_map = map.value_maps[j]; - if (!(st->disposition & value_map.disposition)) + if (!(st->disposition & value_map.disposition) || + !(value_map.writing_modes & mode)) continue; if ((ret = mov_write_track_kind(pb, map.scheme_uri, value_map.value)) < 0) @@ -3393,7 +3398,8 @@ static int mov_write_track_udta_tag(AVIOContext *pb, MOVMuxContext *mov, mov_write_track_metadata(pb_buf, st, "name", "title"); if (mov->mode & MODE_MP4) { - if ((ret = mov_write_track_kinds(pb_buf, st)) < 0) + if ((ret = mov_write_track_kinds(pb_buf, st, + mov->kind_writing_mode)) < 0) return ret; } diff --git a/libavformat/movenc.h b/libavformat/movenc.h index 40077b1afe..e02a086b1f 100644 --- a/libavformat/movenc.h +++ b/libavformat/movenc.h @@ -25,6 +25,7 @@ #define AVFORMAT_MOVENC_H #include "avformat.h" +#include "isom.h" #include "movenccenc.h" #include "libavcodec/packet_internal.h" @@ -242,6 +243,7 @@ typedef struct MOVMuxContext { MOVPrftBox write_prft; int empty_hdlr_name; int movie_timescale; + enum MP4TrackKindWritingMode kind_writing_mode; } MOVMuxContext; #define FF_MOV_FLAG_RTP_HINT (1 << 0) diff --git a/tests/fate/mov.mak b/tests/fate/mov.mak index 5ca992e181..20f085803b 100644 --- a/tests/fate/mov.mak +++ b/tests/fate/mov.mak @@ -145,6 +145,14 @@ FATE_MOV_FFMPEG_FFPROBE-$(call ALLYES, FILE_PROTOCOL PIPE_PROTOCOL \ += fate-mov-mp4-disposition-mpegts-remux fate-mov-mp4-disposition-mpegts-remux: CMD = transcode mpegts $(TARGET_SAMPLES)/mpegts/pmtchange.ts mp4 "-map 0:1 -map 0:2 -c copy -disposition:a:0 +hearing_impaired" "-map 0 -c copy" "" "-of json -show_entries stream_disposition:stream=index" +# Same as the previous test, but the audio disposition should now be tagged +# with the HTML5 media track identifier as opposed to the DASH identifier. +FATE_MOV_FFMPEG_FFPROBE-$(call ALLYES, FILE_PROTOCOL PIPE_PROTOCOL \ + MPEGTS_DEMUXER MOV_DEMUXER AC3_DECODER \ + MP4_MUXER FRAMECRC_MUXER ) \ + += fate-mov-mp4-disposition-unified-origin-mpegts-remux +fate-mov-mp4-disposition-unified-origin-mpegts-remux: CMD = transcode mpegts $(TARGET_SAMPLES)/mpegts/pmtchange.ts mp4 "-map 0:1 -map 0:2 -c copy -disposition:a:0 +hearing_impaired -kind_writing_mode unified_origin" "-map 0 -c copy" "" "-of json -show_entries stream_disposition:stream=index" + FATE_SAMPLES_FFMPEG_FFPROBE += $(FATE_MOV_FFMPEG_FFPROBE-yes) fate-mov: $(FATE_MOV) $(FATE_MOV_FFPROBE) $(FATE_MOV_FASTSTART) $(FATE_MOV_FFMPEG_FFPROBE-yes) diff --git a/tests/ref/fate/mov-mp4-disposition-unified-origin-mpegts-remux b/tests/ref/fate/mov-mp4-disposition-unified-origin-mpegts-remux new file mode 100644 index 0000000000..0242cffb9b --- /dev/null +++ b/tests/ref/fate/mov-mp4-disposition-unified-origin-mpegts-remux @@ -0,0 +1,81 @@ +99f1d34f8028c674cb10d2604a7f0117 *tests/data/fate/mov-mp4-disposition-unified-origin-mpegts-remux.mp4 +5695 tests/data/fate/mov-mp4-disposition-unified-origin-mpegts-remux.mp4 +#tb 0: 1/48000 +#media_type 0: audio +#codec_id 0: ac3 +#sample_rate 0: 48000 +#channel_layout 0: 3 +#channel_layout_name 0: stereo +#tb 1: 1/48000 +#media_type 1: audio +#codec_id 1: ac3 +#sample_rate 1: 48000 +#channel_layout 1: 3 +#channel_layout_name 1: stereo +1, 0, 0, 1536, 768, 0xa63778d4, S=1, 4 +1, 1536, 1536, 1536, 768, 0x7d577f3f +0, 3072, 3072, 1536, 768, 0xc2867884, S=1, 4 +1, 3072, 3072, 1536, 768, 0xd86b7c8f +0, 4608, 4608, 1536, 690, 0xa2714bf3 +1, 4608, 4608, 1536, 626, 0x09f4382f +{ + "programs": [ + + ], + "streams": [ + { + "index": 0, + "disposition": { + "default": 1, + "dub": 0, + "original": 0, + "comment": 0, + "lyrics": 0, + "karaoke": 0, + "forced": 0, + "hearing_impaired": 1, + "visual_impaired": 0, + "clean_effects": 0, + "attached_pic": 0, + "timed_thumbnails": 0, + "captions": 1, + "descriptions": 0, + "metadata": 0, + "dependent": 0, + "still_image": 0 + }, + "side_data_list": [ + { + + } + ] + }, + { + "index": 1, + "disposition": { + "default": 0, + "dub": 0, + "original": 0, + "comment": 0, + "lyrics": 0, + "karaoke": 0, + "forced": 0, + "hearing_impaired": 0, + "visual_impaired": 1, + "clean_effects": 0, + "attached_pic": 0, + "timed_thumbnails": 0, + "captions": 0, + "descriptions": 1, + "metadata": 0, + "dependent": 0, + "still_image": 0 + }, + "side_data_list": [ + { + + } + ] + } + ] +}