diff mbox series

[FFmpeg-devel,v3,4/5] avformat/{isom, mov, movenc}: add support for CMAF DASH roles

Message ID 20210920150048.8790-5-jeebjp@gmail.com
State New
Headers show
Series Support for stream dispositions in MP4
Related show

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Jan Ekström Sept. 20, 2021, 3 p.m. UTC
From: Jan Ekström <jan.ekstrom@24i.com>

This information is coded in a standard MP4 KindBox and utilizes the
scheme and values as per the DASH role scheme defined in MPEG-DASH.
Other schemes are technically allowed, but where multiple schemes
define the same concepts, the DASH scheme should be utilized.

Such flagging is additionally utilized by the DASH-IF CMAF ingest
specification, enabling an encoder to inform the following component
of the roles of the incoming media streams.

A test is added for this functionality in a similar manner to the
matroska test.

Signed-off-by: Jan Ekström <jan.ekstrom@24i.com>
---
 libavformat/isom.c                            | 19 ++++
 libavformat/isom.h                            | 12 +++
 libavformat/mov.c                             | 91 +++++++++++++++++++
 libavformat/movenc.c                          | 51 +++++++++++
 tests/fate/mov.mak                            |  9 ++
 .../ref/fate/mov-mp4-disposition-mpegts-remux | 81 +++++++++++++++++
 6 files changed, 263 insertions(+)
 create mode 100644 tests/ref/fate/mov-mp4-disposition-mpegts-remux
diff mbox series

Patch

diff --git a/libavformat/isom.c b/libavformat/isom.c
index 4df5440023..300ba927c2 100644
--- a/libavformat/isom.c
+++ b/libavformat/isom.c
@@ -430,3 +430,22 @@  void ff_mov_write_chan(AVIOContext *pb, int64_t channel_layout)
     }
     avio_wb32(pb, 0);              // mNumberChannelDescriptions
 }
+
+static const struct MP4TrackKindValueMapping dash_role_map[] = {
+    { AV_DISPOSITION_HEARING_IMPAIRED|AV_DISPOSITION_CAPTIONS,
+        "caption" },
+    { AV_DISPOSITION_COMMENT,
+        "commentary" },
+    { AV_DISPOSITION_VISUAL_IMPAIRED|AV_DISPOSITION_DESCRIPTIONS,
+        "description" },
+    { AV_DISPOSITION_DUB,
+        "dub" },
+    { AV_DISPOSITION_FORCED,
+        "forced-subtitle" },
+    { 0, NULL }
+};
+
+const struct MP4TrackKindMapping ff_mov_track_kind_table[] = {
+    { "urn:mpeg:dash:role:2011", dash_role_map },
+    { 0, NULL }
+};
diff --git a/libavformat/isom.h b/libavformat/isom.h
index 34a58c79b7..c62fcf2bfe 100644
--- a/libavformat/isom.h
+++ b/libavformat/isom.h
@@ -390,4 +390,16 @@  static inline enum AVCodecID ff_mov_get_lpcm_codec_id(int bps, int flags)
 #define MOV_ISMV_TTML_TAG MKTAG('d', 'f', 'x', 'p')
 #define MOV_MP4_TTML_TAG  MKTAG('s', 't', 'p', 'p')
 
+struct MP4TrackKindValueMapping {
+    int         disposition;
+    const char *value;
+};
+
+struct MP4TrackKindMapping {
+    const char   *scheme_uri;
+    const struct  MP4TrackKindValueMapping *value_maps;
+};
+
+extern const struct MP4TrackKindMapping ff_mov_track_kind_table[];
+
 #endif /* AVFORMAT_ISOM_H */
diff --git a/libavformat/mov.c b/libavformat/mov.c
index ca14646a38..81e8d3bac9 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -28,6 +28,7 @@ 
 #include <stdint.h>
 
 #include "libavutil/attributes.h"
+#include "libavutil/bprint.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
@@ -6851,6 +6852,95 @@  static int mov_read_dvcc_dvvc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     return 0;
 }
 
+static int mov_read_kind(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    AVFormatContext *ctx = c->fc;
+    AVStream *st = NULL;
+    AVBPrint scheme_buf, value_buf;
+    int64_t scheme_str_len = 0, value_str_len = 0;
+    int version, flags, ret = AVERROR_BUG;
+    int64_t size = atom.size;
+
+    if (atom.size < 6)
+        // 4 bytes for version + flags, 2x 1 byte for null
+        return AVERROR_INVALIDDATA;
+
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams-1];
+
+    version = avio_r8(pb);
+    flags   = avio_rb24(pb);
+    size   -= 4;
+
+    if (version != 0 || flags != 0) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Unsupported 'kind' box with version %d, flags: %x",
+               version, flags);
+        return AVERROR_INVALIDDATA;
+    }
+
+    av_bprint_init(&scheme_buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+    av_bprint_init(&value_buf,  0, AV_BPRINT_SIZE_UNLIMITED);
+
+    if ((scheme_str_len = ff_read_string_to_bprint_overwrite(pb, &scheme_buf,
+                                                             size)) < 0) {
+        ret = scheme_str_len;
+        goto cleanup;
+    }
+
+    if (scheme_str_len + 1 >= size) {
+        // we need to have another string, even if nullptr.
+        // we check with + 1 since we expect that if size was not hit,
+        // an additional null was read.
+        ret = AVERROR_INVALIDDATA;
+        goto cleanup;
+    }
+
+    size -= scheme_str_len + 1;
+
+    if ((value_str_len = ff_read_string_to_bprint_overwrite(pb, &value_buf,
+                                                            size)) < 0) {
+        ret = value_str_len;
+        goto cleanup;
+    }
+
+    if (value_str_len == size) {
+        // in case of no trailing null, box is not valid.
+        ret = AVERROR_INVALIDDATA;
+        goto cleanup;
+    }
+
+    av_log(ctx, AV_LOG_TRACE,
+           "%s stream %d KindBox(scheme: %s, value: %s)\n",
+           av_get_media_type_string(st->codecpar->codec_type),
+           st->index,
+           scheme_buf.str, value_buf.str);
+
+    for (int i = 0; ff_mov_track_kind_table[i].scheme_uri; i++) {
+        const struct MP4TrackKindMapping map = ff_mov_track_kind_table[i];
+        if (!av_strstart(scheme_buf.str, map.scheme_uri, NULL))
+            continue;
+
+        for (int j = 0; map.value_maps[j].disposition; j++) {
+            const struct MP4TrackKindValueMapping value_map = map.value_maps[j];
+            if (!av_strstart(value_buf.str, value_map.value, NULL))
+                continue;
+
+            st->disposition |= value_map.disposition;
+        }
+    }
+
+    ret = 0;
+
+cleanup:
+
+    av_bprint_finalize(&scheme_buf, NULL);
+    av_bprint_finalize(&value_buf,  NULL);
+
+    return ret;
+}
+
 static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('A','C','L','R'), mov_read_aclr },
 { MKTAG('A','P','R','G'), mov_read_avid },
@@ -6948,6 +7038,7 @@  static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('c','l','l','i'), mov_read_clli },
 { MKTAG('d','v','c','C'), mov_read_dvcc_dvvc },
 { MKTAG('d','v','v','C'), mov_read_dvcc_dvvc },
+{ MKTAG('k','i','n','d'), mov_read_kind },
 { 0, NULL }
 };
 
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index cfb5a5c725..0c4a24c313 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -3329,6 +3329,52 @@  static int mov_write_track_metadata(AVIOContext *pb, AVStream *st,
     return update_size(pb, pos);
 }
 
+static int mov_write_track_kind(AVIOContext *pb, const char *scheme_uri,
+                                const char *value)
+{
+    int64_t pos = avio_tell(pb);
+
+    /* Box|FullBox basics */
+    avio_wb32(pb, 0); /* size placeholder */
+    ffio_wfourcc(pb, (const unsigned char *)"kind");
+    avio_w8(pb, 0);   /* version = 0 */
+    avio_wb24(pb, 0); /* flags = 0 */
+
+    /* Required null-terminated scheme URI */
+    avio_write(pb, (const unsigned char *)scheme_uri,
+               strlen(scheme_uri));
+    avio_w8(pb, 0);
+
+    /* Optional value string */
+    if (value && value[0])
+        avio_write(pb, (const unsigned char *)value,
+                   strlen(value));
+
+    avio_w8(pb, 0);
+
+    return update_size(pb, pos);
+}
+
+static int mov_write_track_kinds(AVIOContext *pb, AVStream *st)
+{
+    int ret = AVERROR_BUG;
+
+    for (int i = 0; ff_mov_track_kind_table[i].scheme_uri; i++) {
+        const struct MP4TrackKindMapping map = ff_mov_track_kind_table[i];
+
+        for (int j = 0; map.value_maps[j].disposition; j++) {
+            const struct MP4TrackKindValueMapping value_map = map.value_maps[j];
+            if (!(st->disposition & value_map.disposition))
+                continue;
+
+            if ((ret = mov_write_track_kind(pb, map.scheme_uri, value_map.value)) < 0)
+                return ret;
+        }
+    }
+
+    return 0;
+}
+
 static int mov_write_track_udta_tag(AVIOContext *pb, MOVMuxContext *mov,
                                     AVStream *st)
 {
@@ -3346,6 +3392,11 @@  static int mov_write_track_udta_tag(AVIOContext *pb, MOVMuxContext *mov,
     if (mov->mode & (MODE_MP4|MODE_MOV))
         mov_write_track_metadata(pb_buf, st, "name", "title");
 
+    if (mov->mode & MODE_MP4) {
+        if ((ret = mov_write_track_kinds(pb_buf, st)) < 0)
+            return ret;
+    }
+
     if ((size = avio_get_dyn_buf(pb_buf, &buf)) > 0) {
         avio_wb32(pb, size + 8);
         ffio_wfourcc(pb, "udta");
diff --git a/tests/fate/mov.mak b/tests/fate/mov.mak
index 0c9177aa81..5ca992e181 100644
--- a/tests/fate/mov.mak
+++ b/tests/fate/mov.mak
@@ -136,6 +136,15 @@  FATE_MOV_FFMPEG_FFPROBE-$(call ALLYES, FILE_PROTOCOL SRT_DEMUXER MOV_DEMUXER SUB
 fate-mov-mp4-ttml-stpp: CMD = transcode srt $(TARGET_SAMPLES)/sub/SubRip_capability_tester.srt mp4 "-map 0:s -c:s ttml -time_base:s 1:1000" "-map 0 -c copy" "" "-of json -show_entries packet:stream=index,codec_type,codec_tag_string,codec_tag,codec_name,time_base,start_time,duration_ts,duration,nb_frames,nb_read_packets:stream_tags"
 fate-mov-mp4-ttml-dfxp: CMD = transcode srt $(TARGET_SAMPLES)/sub/SubRip_capability_tester.srt mp4 "-map 0:s -c:s ttml -time_base:s 1:1000 -tag:s dfxp -strict unofficial" "-map 0 -c copy" "" "-of json -show_entries packet:stream=index,codec_type,codec_tag_string,codec_tag,codec_name,time_base,start_time,duration_ts,duration,nb_frames,nb_read_packets:stream_tags"
 
+# Resulting remux should have:
+# 1. first audio stream with AV_DISPOSITION_HEARING_IMPAIRED
+# 2. second audio stream with AV_DISPOSITION_VISUAL_IMPAIRED | DESCRIPTIONS
+FATE_MOV_FFMPEG_FFPROBE-$(call ALLYES, FILE_PROTOCOL PIPE_PROTOCOL \
+                                       MPEGTS_DEMUXER MOV_DEMUXER AC3_DECODER \
+                                       MP4_MUXER FRAMECRC_MUXER ) \
+                          += fate-mov-mp4-disposition-mpegts-remux
+fate-mov-mp4-disposition-mpegts-remux: CMD = transcode mpegts $(TARGET_SAMPLES)/mpegts/pmtchange.ts mp4 "-map 0:1 -map 0:2 -c copy -disposition:a:0 +hearing_impaired" "-map 0 -c copy" "" "-of json -show_entries stream_disposition:stream=index"
+
 FATE_SAMPLES_FFMPEG_FFPROBE += $(FATE_MOV_FFMPEG_FFPROBE-yes)
 
 fate-mov: $(FATE_MOV) $(FATE_MOV_FFPROBE) $(FATE_MOV_FASTSTART) $(FATE_MOV_FFMPEG_FFPROBE-yes)
diff --git a/tests/ref/fate/mov-mp4-disposition-mpegts-remux b/tests/ref/fate/mov-mp4-disposition-mpegts-remux
new file mode 100644
index 0000000000..2df48a0b0d
--- /dev/null
+++ b/tests/ref/fate/mov-mp4-disposition-mpegts-remux
@@ -0,0 +1,81 @@ 
+e9a12470bc7645cebca985a9f3762c6c *tests/data/fate/mov-mp4-disposition-mpegts-remux.mp4
+5705 tests/data/fate/mov-mp4-disposition-mpegts-remux.mp4
+#tb 0: 1/48000
+#media_type 0: audio
+#codec_id 0: ac3
+#sample_rate 0: 48000
+#channel_layout 0: 3
+#channel_layout_name 0: stereo
+#tb 1: 1/48000
+#media_type 1: audio
+#codec_id 1: ac3
+#sample_rate 1: 48000
+#channel_layout 1: 3
+#channel_layout_name 1: stereo
+1,          0,          0,     1536,      768, 0xa63778d4, S=1,        4
+1,       1536,       1536,     1536,      768, 0x7d577f3f
+0,       3072,       3072,     1536,      768, 0xc2867884, S=1,        4
+1,       3072,       3072,     1536,      768, 0xd86b7c8f
+0,       4608,       4608,     1536,      690, 0xa2714bf3
+1,       4608,       4608,     1536,      626, 0x09f4382f
+{
+    "programs": [
+
+    ],
+    "streams": [
+        {
+            "index": 0,
+            "disposition": {
+                "default": 1,
+                "dub": 0,
+                "original": 0,
+                "comment": 0,
+                "lyrics": 0,
+                "karaoke": 0,
+                "forced": 0,
+                "hearing_impaired": 1,
+                "visual_impaired": 0,
+                "clean_effects": 0,
+                "attached_pic": 0,
+                "timed_thumbnails": 0,
+                "captions": 1,
+                "descriptions": 0,
+                "metadata": 0,
+                "dependent": 0,
+                "still_image": 0
+            },
+            "side_data_list": [
+                {
+
+                }
+            ]
+        },
+        {
+            "index": 1,
+            "disposition": {
+                "default": 0,
+                "dub": 0,
+                "original": 0,
+                "comment": 0,
+                "lyrics": 0,
+                "karaoke": 0,
+                "forced": 0,
+                "hearing_impaired": 0,
+                "visual_impaired": 1,
+                "clean_effects": 0,
+                "attached_pic": 0,
+                "timed_thumbnails": 0,
+                "captions": 0,
+                "descriptions": 1,
+                "metadata": 0,
+                "dependent": 0,
+                "still_image": 0
+            },
+            "side_data_list": [
+                {
+
+                }
+            ]
+        }
+    ]
+}