diff mbox series

[FFmpeg-devel] avformat: add MMTP parser and MMT/TLV demuxer

Message ID 20230428173128.21074-1-admin@superfashi.com
State New
Headers show
Series [FFmpeg-devel] avformat: add MMTP parser and MMT/TLV demuxer | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

SuperFashi April 28, 2023, 5:31 p.m. UTC
This patch adds an MPEG Media Transport Protocol (MMTP) parser, as defined in ISO/IEC 23008-1, and an MMT protocol over TLV packets (MMT/TLV) demuxer, as defined in ARIB STD-B32. Currently, it supports hevc, aac (loas), and arib-ttml demuxing.

Since MMTP is designed to transmit over IP, there is no size information within each MMTP packet, and there is no filesystem format defined alongside the protocol. One of the solution is a simple container format using type–length–value packets, which is defined in ARIB STD-B32.

Another known container format for MMTP is using packet capture (pcap) files which records network packets. This patch does not include the demuxer for this container format.

Signed-off-by: SuperFashi <admin@superfashi.com>
---
 Changelog                |    1 +
 doc/demuxers.texi        |    4 +
 libavformat/Makefile     |    1 +
 libavformat/allformats.c |    1 +
 libavformat/mmtp.c       | 1169 ++++++++++++++++++++++++++++++++++++++
 libavformat/mmtp.h       |   61 ++
 libavformat/mmttlv.c     |  324 +++++++++++
 libavformat/version.h    |    2 +-
 8 files changed, 1562 insertions(+), 1 deletion(-)
 create mode 100644 libavformat/mmtp.c
 create mode 100644 libavformat/mmtp.h
 create mode 100644 libavformat/mmttlv.c

Comments

Michael Niedermayer April 28, 2023, 8:44 p.m. UTC | #1
On Sat, Apr 29, 2023 at 02:31:28AM +0900, SuperFashi wrote:
> This patch adds an MPEG Media Transport Protocol (MMTP) parser, as defined in ISO/IEC 23008-1, and an MMT protocol over TLV packets (MMT/TLV) demuxer, as defined in ARIB STD-B32. Currently, it supports hevc, aac (loas), and arib-ttml demuxing.
> 
> Since MMTP is designed to transmit over IP, there is no size information within each MMTP packet, and there is no filesystem format defined alongside the protocol. One of the solution is a simple container format using type–length–value packets, which is defined in ARIB STD-B32.
> 
> Another known container format for MMTP is using packet capture (pcap) files which records network packets. This patch does not include the demuxer for this container format.
> 
> Signed-off-by: SuperFashi <admin@superfashi.com>


fails to build

libavformat/mmtp.c: In function ‘parse_video_component_descriptor’:
libavformat/mmtp.c:216:36: error: ‘VIDEO_COMPONENT_DESCRIPTOR_ID’ undeclared (first use in this function); did you mean ‘VIDEO_COMPONENT_DESCRIPTOR’?
         av_assert1(AV_RB16(buf) == VIDEO_COMPONENT_DESCRIPTOR_ID);
                                    ^
libavformat/mmtp.c:33:5: note: in definition of macro ‘LIMIT_READ’
     block; \
     ^~~~~
[...]


> +#include <stdbool.h>
> +

> +#define LIMIT_READ(consume, block) \
> +    if (size < (consume)) return AVERROR_INVALIDDATA; \
> +    block; \
> +    buf += (consume); \
> +    size -= (consume);

This makes the code hard to debug and read


> +
> +#define MUST_CONSUME(consume) \
> +    av_assert1((consume) <= size); \
> +    buf += (consume); \
> +    size -= (consume);

this could maybe use some bytestream reader
either way a function should be cleaner than a macro

thx

[...]
diff mbox series

Patch

diff --git a/Changelog b/Changelog
index b6f6682904..2483fdd547 100644
--- a/Changelog
+++ b/Changelog
@@ -6,6 +6,7 @@  version <next>:
 - Playdate video decoder and demuxer
 - Extend VAAPI support for libva-win32 on Windows
 - afireqsrc audio source filter
+- MMTP parser and MMT/TLV demuxer
 
 version 6.0:
 - Radiance HDR image support
diff --git a/doc/demuxers.texi b/doc/demuxers.texi
index 2d33b47a56..56aab251b2 100644
--- a/doc/demuxers.texi
+++ b/doc/demuxers.texi
@@ -689,6 +689,10 @@  Set the sample rate for libopenmpt to output.
 Range is from 1000 to INT_MAX. The value default is 48000.
 @end table
 
+@section mmttlv
+
+Demuxer for MMT protocol over TLV packets (MMT/TLV), as defined in ARIB STD-B32.
+
 @section mov/mp4/3gp
 
 Demuxer for Quicktime File Format & ISO/IEC Base Media File Format (ISO/IEC 14496-12 or MPEG-4 Part 12, ISO/IEC 15444-12 or JPEG 2000 Part 12).
diff --git a/libavformat/Makefile b/libavformat/Makefile
index f8ad7c6a11..e32d6e71a3 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -354,6 +354,7 @@  OBJS-$(CONFIG_MLV_DEMUXER)               += mlvdec.o riffdec.o
 OBJS-$(CONFIG_MM_DEMUXER)                += mm.o
 OBJS-$(CONFIG_MMF_DEMUXER)               += mmf.o
 OBJS-$(CONFIG_MMF_MUXER)                 += mmf.o rawenc.o
+OBJS-$(CONFIG_MMTTLV_DEMUXER)            += mmtp.o mmttlv.o
 OBJS-$(CONFIG_MODS_DEMUXER)              += mods.o
 OBJS-$(CONFIG_MOFLEX_DEMUXER)            += moflex.o
 OBJS-$(CONFIG_MOV_DEMUXER)               += mov.o mov_chan.o mov_esds.o \
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index efdb34e29d..d5f4f5680e 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -270,6 +270,7 @@  extern const AVInputFormat  ff_mlv_demuxer;
 extern const AVInputFormat  ff_mm_demuxer;
 extern const AVInputFormat  ff_mmf_demuxer;
 extern const FFOutputFormat ff_mmf_muxer;
+extern const AVInputFormat  ff_mmttlv_demuxer;
 extern const AVInputFormat  ff_mods_demuxer;
 extern const AVInputFormat  ff_moflex_demuxer;
 extern const AVInputFormat  ff_mov_demuxer;
diff --git a/libavformat/mmtp.c b/libavformat/mmtp.c
new file mode 100644
index 0000000000..cb40b822fb
--- /dev/null
+++ b/libavformat/mmtp.c
@@ -0,0 +1,1169 @@ 
+/*
+ * MPEG Media Transport Protocol (MMTP) parser, as defined in ISO/IEC 23008-1.
+ * Copyright (c) 2023 SuperFashi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "libavutil/mem.h"
+#include "libavutil/avassert.h"
+#include "libavutil/intreadwrite.h"
+#include "network.h"
+#include "mmtp.h"
+#include "internal.h"
+#include "demux.h"
+
+#include <stdbool.h>
+
+#define LIMIT_READ(consume, block) \
+    if (size < (consume)) return AVERROR_INVALIDDATA; \
+    block; \
+    buf += (consume); \
+    size -= (consume);
+
+#define MUST_CONSUME(consume) \
+    av_assert1((consume) <= size); \
+    buf += (consume); \
+    size -= (consume);
+
+struct MMTGeneralLocationInfo {
+    uint8_t location_type;
+    union {
+        struct {
+            uint16_t packet_id;
+        } type0;
+        struct {
+            struct in_addr ipv4_src_addr;
+            struct in_addr ipv4_dst_addr;
+            in_port_t      dst_port;
+            uint16_t       packet_id;
+        } type1;
+        struct {
+            struct in6_addr ipv6_src_addr;
+            struct in6_addr ipv6_dst_addr;
+            in_port_t       dst_port;
+            uint16_t        packet_id;
+        } type2;
+        struct {
+            uint16_t network_id;
+            uint16_t MPEG_2_transport_stream_id;
+            uint16_t MPEG_2_PID: 13;
+        } type3;
+        struct {
+            struct in6_addr ipv6_src_addr;
+            struct in6_addr ipv6_dst_addr;
+            in_port_t       dst_port;
+            uint16_t        MPEG_2_PID: 13;
+        } type4;
+        struct {
+            char URL[0x100 + 1];
+        } type5;
+    };
+};
+
+static inline ssize_t
+parse_mmt_general_location_info(struct MMTGeneralLocationInfo *info, const uint8_t *buf, uint32_t size) {
+    if (size < 1) return AVERROR_INVALIDDATA;
+    switch (info->location_type = buf[0]) {
+    case 0x00:
+        if (size < 3) return AVERROR_INVALIDDATA;
+        info->type0.packet_id = AV_RB16(buf + 1);
+        return 3;
+    case 0x01:
+        if (size < (32 + 32 + 16 + 16) / 8 + 1) return AVERROR_INVALIDDATA;
+        memcpy(&info->type1.ipv4_src_addr, buf + 1, 4);
+        memcpy(&info->type1.ipv4_dst_addr, buf + 1 + 4, 4);
+        info->type1.dst_port  = AV_RB16(buf + 1 + 4 + 4);
+        info->type1.packet_id = AV_RB16(buf + 1 + 4 + 4 + 2);
+        return (32 + 32 + 16 + 16) / 8 + 1;
+    case 0x02:
+        if (size < (128 + 128 + 16 + 16) / 8 + 1) return AVERROR_INVALIDDATA;
+        memcpy(&info->type2.ipv6_src_addr, buf + 1, 16);
+        memcpy(&info->type2.ipv6_dst_addr, buf + 1 + 16, 16);
+        info->type2.dst_port  = AV_RB16(buf + 1 + 16 + 16);
+        info->type2.packet_id = AV_RB16(buf + 1 + 16 + 16 + 2);
+        return (128 + 128 + 16 + 16) / 8 + 1;
+    case 0x03:
+        if (size < (16 + 16 + 3 + 13) / 8 + 1) return AVERROR_INVALIDDATA;
+        info->type3.network_id                 = AV_RB16(buf + 1);
+        info->type3.MPEG_2_transport_stream_id = AV_RB16(buf + 1 + 2);
+        info->type3.MPEG_2_PID                 = AV_RB16(buf + 1 + 2 + 2) & 0b1111111111111;
+        return (16 + 16 + 3 + 13) / 8 + 1;
+    case 0x04:
+        if (size < (128 + 128 + 16 + 3 + 13) / 8 + 1) return AVERROR_INVALIDDATA;
+        memcpy(&info->type4.ipv6_src_addr, buf + 1, 16);
+        memcpy(&info->type4.ipv6_dst_addr, buf + 1 + 16, 16);
+        info->type4.dst_port   = AV_RB16(buf + 1 + 16 + 16);
+        info->type4.MPEG_2_PID = AV_RB16(buf + 1 + 16 + 16 + 2) & 0b1111111111111;
+        return (128 + 128 + 16 + 3 + 13) / 8 + 1;
+    case 0x05:
+        if (size < 2) return AVERROR_INVALIDDATA;
+        if (size < 1 + 1 + buf[1]) return AVERROR_INVALIDDATA;
+        memcpy(info->type5.URL, buf + 2, buf[1]);
+        info->type5.URL[buf[1]] = '\0';
+        return 1 + 1 + buf[1];
+    default:
+        return AVERROR_INVALIDDATA;
+    }
+}
+
+
+struct Streams {
+    AVStream *stream;
+
+    int num_timestamp_descriptors;
+    struct MPUTimestampDescriptor {
+        uint32_t sequence_number;
+        int64_t  presentation_time;
+    } *timestamp_descriptor;
+
+    int num_ext_timestamp_descriptors;
+    struct MPUExtendedTimestampDescriptor {
+        uint32_t sequence_number;
+        uint16_t decoding_time_offset;
+        uint8_t  num_of_au;
+        struct {
+            uint16_t dts_pts_offset;
+            uint16_t pts_offset;
+        } au[0x100];
+    } *ext_timestamp_descriptor;
+
+    uint32_t    last_sequence_number;
+    uint16_t    au_count;
+    AVBufferRef *pending_buffer;
+    int64_t     offset;
+    int         flags;
+
+    struct Streams *next;
+};
+
+struct MMTPContext {
+    struct FragmentAssembler *assembler;
+    struct Streams           *streams;
+    AVProgram                *program;
+    // struct MMTGeneralLocationInfo mpt_location; TODO
+
+    // below are temporary fields available for the scope of a single packet
+    AVFormatContext *s;
+    AVPacket        *pkt;
+    uint16_t        current_pid;
+    bool is_rap;
+};
+
+static inline struct Streams *find_current_stream(struct MMTPContext *ctx) {
+    struct Streams *streams;
+    for (streams = ctx->streams; streams != NULL; streams = streams->next)
+        if (streams->stream->id == ctx->current_pid)
+            return streams;
+    return NULL;
+}
+
+static inline struct Streams *find_or_allocate_stream(struct MMTPContext *ctx, uint16_t pid) {
+    AVStream       *stream;
+    struct Streams *streams;
+    for (streams = ctx->streams; streams != NULL; streams = streams->next)
+        if (streams->stream->id == pid) {
+            ffstream(streams->stream)->need_context_update = 1;
+            return streams;
+        }
+
+    stream = avformat_new_stream(ctx->s, NULL);
+    if (stream == NULL) return NULL;
+    stream->id = pid;
+    av_program_add_stream_index(ctx->s, ctx->program->id, stream->index);
+
+    streams = av_mallocz(sizeof(struct Streams));
+    if (streams == NULL) return NULL;
+    streams->stream = stream;
+    streams->next   = ctx->streams;
+    streams->offset = -1;
+    ctx->streams    = streams;
+    return streams;
+}
+
+enum {
+    MMT_PACKAGE_TABLE_ID  = 0x20,
+    PACKAGE_LIST_TABLE_ID = 0x80,
+};
+
+enum {
+    MPU_TIMESTAMP_DESCRIPTOR          = 0x0001,
+    VIDEO_COMPONENT_DESCRIPTOR        = 0x8010,
+    MH_STREAM_IDENTIFIER_DESCRIPTOR   = 0x8011,
+    MH_AUDIO_COMPONENT_DESCRIPTOR     = 0x8014,
+    MH_DATA_COMPONENT_DESCRIPTOR      = 0x8020,
+    MPU_EXTENDED_TIMESTAMP_DESCRIPTOR = 0x8026,
+};
+
+static inline ssize_t parse_video_component_descriptor(AVStream *stream, const uint8_t *buf, uint32_t size) {
+    uint32_t read_size;
+    uint8_t  language_code[4];
+
+    LIMIT_READ((16 + 8 + 4 + 4 + 1 + 2 + 5 + 16 + 4 + 4 + 24) / 8, {
+        av_assert1(AV_RB16(buf) == VIDEO_COMPONENT_DESCRIPTOR_ID);
+        read_size = size = buf[2] + 3;
+        memcpy(language_code, buf + 8, 3);
+        language_code[3] = 0;
+    })
+
+    av_dict_set(&stream->metadata, "language", language_code, 0);
+
+    return read_size;
+}
+
+static inline ssize_t parse_mh_audio_component_descriptor(AVStream *stream, const uint8_t *buf, uint32_t size) {
+    uint32_t read_size;
+    uint8_t  stream_content;
+    uint8_t  stream_type;
+    bool     ES_multi_lingual_flag;
+    uint8_t  language_code[4];
+
+    LIMIT_READ((16 + 8 + 4 + 4 + 8 + 16 + 8 + 8 + 1 + 1 + 2 + 3 + 1 + 24) / 8, {
+        av_assert1(AV_RB16(buf) == MH_AUDIO_COMPONENT_DESCRIPTOR_ID);
+        read_size             = size = buf[2] + 3;
+        stream_content        = buf[3] & 0b1111;
+        stream_type           = buf[7];
+        ES_multi_lingual_flag = buf[9] >> 7;
+        memcpy(language_code, buf + 10, 3);
+        language_code[3] = 0;
+    })
+
+    if (ES_multi_lingual_flag) {
+        LIMIT_READ(3,)
+    }
+
+    switch (stream_content) {
+    case 0x3:
+        switch (stream_type) {
+        case 0x11:
+            stream->codecpar->codec_id = AV_CODEC_ID_AAC_LATM;
+            break;
+        case 0x1c:
+            stream->codecpar->codec_id = AV_CODEC_ID_AAC;
+            break;
+        }
+        break;
+    case 0x4:
+        stream->codecpar->codec_id = AV_CODEC_ID_MP4ALS;
+        break;
+    }
+
+    av_dict_set(&stream->metadata, "language", language_code, 0);
+
+    return read_size;
+}
+
+#define MAX_NUM_TIMESTAMP_DESCRIPTOR 32
+#define DIFF(a, b) ((a) > (b) ? ((a) - (b)) : ((b) - (a)))
+
+static inline ssize_t parse_mpu_timestamp_descriptor(struct Streams *streams, const uint8_t *buf, uint32_t size) {
+    uint32_t read_size;
+
+    LIMIT_READ((16 + 8) / 8, {
+        av_assert1(AV_RB16(buf) == MPU_TIMESTAMP_DESCRIPTOR_ID);
+        read_size = size = buf[2] + 3;
+    })
+
+    while (size > 0) {
+        uint64_t mpu_seq_num;
+        int64_t  mpu_presentation_time;
+
+        LIMIT_READ((32 + 64) / 8, {
+            mpu_seq_num           = AV_RB32(buf);
+            mpu_presentation_time = ff_parse_ntp_time(AV_RB64(buf + 4)) - NTP_OFFSET_US;
+        })
+
+        do {
+            struct MPUTimestampDescriptor *desc;
+            size_t                        i;
+
+            if (mpu_seq_num < streams->last_sequence_number) break;
+
+            for (i = 0; i < streams->num_timestamp_descriptors; ++i)
+                if (streams->timestamp_descriptor[i].sequence_number == mpu_seq_num) {
+                    desc = streams->timestamp_descriptor + i;
+                    goto end2;
+                }
+
+            for (i = 0; i < streams->num_timestamp_descriptors; ++i)
+                if (streams->timestamp_descriptor[i].sequence_number < streams->last_sequence_number) {
+                    desc = streams->timestamp_descriptor + i;
+                    goto end1;
+                }
+
+            if (streams->num_timestamp_descriptors + 1 > MAX_NUM_TIMESTAMP_DESCRIPTOR) {
+                // we have all descriptors larger than the current sequence number
+                // we can't add more, so we should evict the one with the largest distance
+                uint64_t max_dist = 0;
+                for (i = 0; i < streams->num_timestamp_descriptors; ++i)
+                    if (DIFF(streams->timestamp_descriptor[i].sequence_number, mpu_seq_num) > max_dist) {
+                        desc     = streams->timestamp_descriptor + i;
+                        max_dist = DIFF(streams->timestamp_descriptor[i].sequence_number, mpu_seq_num);
+                    }
+                av_assert1(desc != NULL); // should never fail
+                goto end1;
+            }
+
+            desc = av_dynarray2_add(
+                (void **) &streams->timestamp_descriptor, &streams->num_timestamp_descriptors,
+                sizeof(struct MPUTimestampDescriptor), NULL);
+            if (desc == NULL) return AVERROR(ENOMEM);
+
+            end1:
+            desc->sequence_number   = mpu_seq_num;
+            end2:
+            desc->presentation_time = mpu_presentation_time;
+        } while (0);
+    }
+
+    return read_size;
+}
+
+static inline ssize_t
+parse_mpu_extended_timestamp_descriptor(struct Streams *streams, const uint8_t *buf, uint32_t size) {
+    uint32_t read_size;
+
+    uint8_t  pts_offset_type;
+    bool     timescale_flag;
+    uint16_t default_pts_offset = 0;
+
+    AVStream                              *stream = streams->stream;
+    struct MPUExtendedTimestampDescriptor *desc;
+
+    LIMIT_READ((16 + 8 + 5 + 2 + 1) / 8, {
+        av_assert1(AV_RB16(buf) == MPU_EXTENDED_TIMESTAMP_DESCRIPTOR_ID);
+        read_size       = size = buf[2] + 3;
+        pts_offset_type = (buf[3] >> 1) & 0b11;
+        timescale_flag  = buf[3] & 1;
+    })
+
+    if (timescale_flag) {
+        LIMIT_READ(4, {
+            stream->time_base.num = 1;
+            stream->time_base.den = AV_RB32(buf);
+        })
+    }
+
+    if (pts_offset_type == 1) {
+        LIMIT_READ(2, default_pts_offset = AV_RB16(buf))
+    }
+
+    while (size > 0) {
+        size_t  i;
+        uint8_t num_of_au;
+
+        if (pts_offset_type == 0)
+            return AVERROR_PATCHWELCOME;
+
+        desc = NULL;
+        LIMIT_READ((32 + 2 + 6 + 16 + 8) / 8, do {
+            const uint64_t mpu_seq_num = AV_RB32(buf);
+            num_of_au                  = buf[7];
+
+            if (mpu_seq_num < streams->last_sequence_number) break;
+
+            for (i = 0; i < streams->num_ext_timestamp_descriptors; ++i)
+                if (streams->ext_timestamp_descriptor[i].sequence_number == mpu_seq_num) {
+                    desc = streams->ext_timestamp_descriptor + i;
+                    goto end2;
+                }
+
+            for (i = 0; i < streams->num_ext_timestamp_descriptors; ++i)
+                if (streams->ext_timestamp_descriptor[i].sequence_number < streams->last_sequence_number) {
+                    desc = streams->ext_timestamp_descriptor + i;
+                    goto end1;
+                }
+
+            if (streams->num_ext_timestamp_descriptors + 1 > MAX_NUM_TIMESTAMP_DESCRIPTOR) {
+                uint64_t max_diff = 0;
+                for (i = 0; i < streams->num_ext_timestamp_descriptors; ++i)
+                    if (DIFF(streams->ext_timestamp_descriptor[i].sequence_number, mpu_seq_num) > max_diff) {
+                        desc     = streams->ext_timestamp_descriptor + i;
+                        max_diff = DIFF(streams->ext_timestamp_descriptor[i].sequence_number, mpu_seq_num);
+                    }
+                av_assert1(desc != NULL);
+                goto end1;
+            }
+
+            desc = av_dynarray2_add(
+                (void **) &streams->ext_timestamp_descriptor, &streams->num_ext_timestamp_descriptors,
+                sizeof(struct MPUExtendedTimestampDescriptor), NULL);
+            if (desc == NULL)
+                return AVERROR(ENOMEM);
+
+            end1:
+            desc->sequence_number      = mpu_seq_num;
+            end2:
+            desc->decoding_time_offset = AV_RB16(buf + 5);
+            desc->num_of_au            = num_of_au;
+        } while (0))
+
+        for (i = 0; i < num_of_au; ++i) {
+            LIMIT_READ(2, if (desc != NULL) desc->au[i].dts_pts_offset = AV_RB16(buf))
+            if (pts_offset_type == 2) {
+                LIMIT_READ(2, if (desc != NULL) desc->au[i].pts_offset = AV_RB16(buf))
+            } else if (desc != NULL) {
+                desc->au[i].pts_offset = default_pts_offset;
+            }
+        }
+    }
+
+    return read_size;
+}
+
+static int parse_additional_arib_subtitle_info(AVStream *stream, const uint8_t *buf, uint32_t size) {
+    bool    start_mpu_sequence_number_flag;
+    char    language_code[4];
+    uint8_t subtitle_format;
+
+    LIMIT_READ((8 + 4 + 1 + 3 + 24 + 2 + 4 + 2 + 4 + 4 + 4 + 4) / 8, {
+        start_mpu_sequence_number_flag = buf[1] >> 3;
+        memcpy(language_code, buf + 2, 3);
+        language_code[3] = '\0';
+        subtitle_format  = (buf[5] >> 2) & 0b1111;
+    })
+
+    if (start_mpu_sequence_number_flag) {
+        LIMIT_READ(32 / 8,);
+    }
+
+    switch (subtitle_format) {
+    case 0b0000:
+        stream->codecpar->codec_id = AV_CODEC_ID_TTML;
+        break;
+    }
+
+    av_dict_set(&stream->metadata, "language", language_code, 0);
+
+    return 0;
+}
+
+static ssize_t parse_mh_data_component_descriptor(AVStream *stream, const uint8_t *buf, uint32_t size) {
+    uint32_t read_size;
+    int      err;
+    uint16_t data_component_id;
+
+    LIMIT_READ((16 + 8 + 16) / 8, {
+        av_assert1(AV_RB16(buf) == MH_DATA_COMPONENT_DESCRIPTOR_ID);
+        read_size         = size = buf[2] + 3;
+        data_component_id = AV_RB16(buf + 3);
+    })
+
+    switch (data_component_id) {
+    case 0x0020: // additional ARIB subtitle info (Table 7-74, ARIB STD-B60, Version 1.14-E1)
+        err = parse_additional_arib_subtitle_info(stream, buf, size);
+        if (err < 0) return err;
+        break;
+    }
+
+    return read_size;
+}
+
+static ssize_t parse_descriptor(struct Streams *streams, const uint8_t *buf, uint32_t size) {
+    if (size < 3) return AVERROR_INVALIDDATA;
+    switch (AV_RB16(buf)) {
+    case MPU_TIMESTAMP_DESCRIPTOR:
+        return parse_mpu_timestamp_descriptor(streams, buf, size);
+    case VIDEO_COMPONENT_DESCRIPTOR:
+        return parse_video_component_descriptor(streams->stream, buf, size);
+    case MH_STREAM_IDENTIFIER_DESCRIPTOR:
+        return buf[2] + 3;
+    case MH_AUDIO_COMPONENT_DESCRIPTOR:
+        return parse_mh_audio_component_descriptor(streams->stream, buf, size);
+    case MH_DATA_COMPONENT_DESCRIPTOR:
+        return parse_mh_data_component_descriptor(streams->stream, buf, size);
+    case MPU_EXTENDED_TIMESTAMP_DESCRIPTOR:
+        return parse_mpu_extended_timestamp_descriptor(streams, buf, size);
+    }
+    return AVERROR_PATCHWELCOME;
+}
+
+static inline ssize_t parse_mmt_package_table(MMTPContext *ctx, const uint8_t *buf, uint32_t size) {
+    uint8_t  package_id_length;
+    uint16_t descriptors_length;
+    uint8_t  number_of_assets;
+
+    size_t   i, j;
+    ssize_t  read;
+    uint32_t read_size;
+
+    LIMIT_READ((8 + 8 + 16 + 8 + 8) / 8, {
+        av_assert1(buf[0] == MMT_PACKAGE_TABLE_ID);
+        read_size         = size = AV_RB16(buf + 2) + 4;
+        package_id_length = buf[5];
+    })
+
+    LIMIT_READ(package_id_length,)
+    LIMIT_READ(2, descriptors_length = AV_RB16(buf))
+    LIMIT_READ(descriptors_length,)
+    LIMIT_READ(1, number_of_assets = buf[0])
+
+    for (i = 0; i < number_of_assets; ++i) {
+        uint8_t                       asset_id_length;
+        uint8_t                       location_count;
+        uint16_t                      asset_descriptors_length;
+        uint32_t                      asset_type;
+        struct Streams                *stream = NULL;
+        struct MMTGeneralLocationInfo info;
+
+        LIMIT_READ((8 + 32 + 8) / 8, asset_id_length = buf[5])
+        LIMIT_READ(asset_id_length,)
+
+        LIMIT_READ(4, asset_type = AV_RL32(buf))
+
+        // skip reserved, asset_clock_relation_flag
+        LIMIT_READ((7 + 1 + 8) / 8, location_count = buf[1])
+
+        for (j = 0; j < location_count; ++j) {
+            if ((read = parse_mmt_general_location_info(&info, buf, size)) < 0)
+                return read;
+            MUST_CONSUME(read)
+        }
+        switch (asset_type) {
+        case MKTAG('h', 'e', 'v', '1'):
+            if (info.location_type != 0x00) return AVERROR_PATCHWELCOME;
+            stream = find_or_allocate_stream(ctx, info.type0.packet_id);
+            if (stream == NULL) return AVERROR(ENOMEM);
+            stream->stream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
+            stream->stream->codecpar->codec_id   = AV_CODEC_ID_HEVC;
+            stream->stream->codecpar->codec_tag  = asset_type;
+            break;
+        case MKTAG('m', 'p', '4', 'a'):
+            if (info.location_type != 0x00) return AVERROR_PATCHWELCOME;
+            stream = find_or_allocate_stream(ctx, info.type0.packet_id);
+            if (stream == NULL) return AVERROR(ENOMEM);
+            stream->stream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
+            stream->stream->codecpar->codec_tag  = asset_type;
+            break;
+        case MKTAG('s', 't', 'p', 'p'):
+            if (info.location_type == 0x00) {
+                stream = find_or_allocate_stream(ctx, info.type0.packet_id);
+                if (stream == NULL) return AVERROR(ENOMEM);
+                stream->stream->codecpar->codec_type = AVMEDIA_TYPE_SUBTITLE;
+                stream->stream->codecpar->codec_tag  = asset_type;
+            }
+            break;
+        case MKTAG('a', 'a', 'p', 'p'):
+        case MKTAG('a', 's', 'g', 'd'):
+        case MKTAG('a', 'a', 'g', 'd'):
+            break; // TODO
+        }
+
+        LIMIT_READ(2, asset_descriptors_length = AV_RB16(buf))
+        LIMIT_READ(asset_descriptors_length, if (stream != NULL) {
+            for (j = 0; j < asset_descriptors_length;) {
+                if ((read = parse_descriptor(stream, buf + j, size - j)) < 0)
+                    return read;
+                j += read;
+            }
+            if (j != asset_descriptors_length)
+                return AVERROR_INVALIDDATA;
+        })
+    }
+
+    if (size != 0) return AVERROR_INVALIDDATA;
+    return read_size;
+}
+
+static inline ssize_t parse_package_list_table(MMTPContext *ctx, const uint8_t *buf, uint32_t size) {
+    uint8_t num_of_package;
+    uint8_t num_of_ip_delivery;
+
+    size_t   i;
+    ssize_t  read;
+    uint32_t read_size;
+
+    LIMIT_READ((8 + 8 + 16 + 8) / 8, {
+        av_assert1(buf[0] == PACKAGE_LIST_TABLE_ID);
+        read_size      = size = AV_RB16(buf + 2) + 4;
+        num_of_package = buf[4];
+    })
+
+    for (i = 0; i < num_of_package; ++i) {
+        uint8_t                       package_id_length;
+        struct MMTGeneralLocationInfo info;
+
+        LIMIT_READ(1, package_id_length = buf[0])
+        LIMIT_READ(package_id_length,)
+
+        if ((read = parse_mmt_general_location_info(&info, buf, size)) < 0)
+            return read;
+        MUST_CONSUME(read)
+    }
+
+    LIMIT_READ(1, num_of_ip_delivery = buf[0])
+    if (num_of_ip_delivery > 0)
+        return AVERROR_PATCHWELCOME;
+
+    return read_size;
+}
+
+static ssize_t parse_table(MMTPContext *ctx, const uint8_t *buf, uint32_t size) {
+    if (size < 2) return AVERROR_INVALIDDATA;
+    switch (buf[0]) {
+    case MMT_PACKAGE_TABLE_ID:
+        return parse_mmt_package_table(ctx, buf, size);
+    case PACKAGE_LIST_TABLE_ID:
+        return parse_package_list_table(ctx, buf, size);
+    }
+    return size; // TODO
+}
+
+enum {
+    PA_MESSAGE_ID = 0x0000,
+};
+
+static inline int parse_pa_message(MMTPContext *ctx, const uint8_t *buf, uint32_t size) {
+    uint8_t number_of_tables;
+    ssize_t read;
+    size_t  i;
+
+    LIMIT_READ((16 + 8 + 32 + 8) / 8, {
+        av_assert1(AV_RB16(buf) == PA_MESSAGE_ID);
+        if (AV_RB32(buf + 3) != size - 7) return AVERROR_INVALIDDATA;
+        number_of_tables = buf[7];
+    })
+
+    for (i = 0; i < number_of_tables; ++i) {
+        LIMIT_READ((8 + 8 + 16) / 8,)
+    }
+
+    while (size > 0) {
+        if ((read = parse_table(ctx, buf, size)) < 0) return (int) read;
+        MUST_CONSUME(read)
+    }
+
+    return 0;
+}
+
+static int parse_signalling_message(MMTPContext *ctx, const uint8_t *buf, uint32_t size) {
+    if (size < 3) return AVERROR_INVALIDDATA;
+    switch (AV_RB16(buf)) {
+    case PA_MESSAGE_ID:
+        return parse_pa_message(ctx, buf, size);
+    }
+    return 0;
+}
+
+enum FragmentationIndicator {
+    NOT_FRAGMENTED  = 0b00,
+    FIRST_FRAGMENT  = 0b01,
+    MIDDLE_FRAGMENT = 0b10,
+    LAST_FRAGMENT   = 0b11,
+};
+
+struct FragmentAssembler {
+    uint16_t                 pid;
+    struct FragmentAssembler *next;
+
+    uint8_t *data;
+    size_t  size, cap;
+
+    uint32_t last_seq;
+
+    enum {
+        INIT = 0,
+        NOT_STARTED,
+        IN_FRAGMENT,
+        SKIP,
+    } state;
+};
+
+inline static int append_data(struct FragmentAssembler *ctx, const uint8_t *data, uint32_t size) {
+    if (ctx->size + size > UINT32_MAX) return AVERROR(EOVERFLOW);
+    if (ctx->cap < ctx->size + size) {
+        void   *new_data;
+        size_t new_cap = ctx->cap == 0 ? 1024 : ctx->cap * 2;
+        while (new_cap < ctx->size + size) new_cap *= 2;
+
+        new_data = av_realloc(ctx->data, new_cap);
+        if (new_data == NULL) return AVERROR(errno);
+        ctx->data = new_data;
+        ctx->cap  = new_cap;
+    }
+    memcpy(ctx->data + ctx->size, data, size);
+    ctx->size += size;
+    return 0;
+}
+
+static inline int check_state(MMTPContext *ctx, struct FragmentAssembler *ass, uint32_t seq_num) {
+    if (ass->state == INIT) {
+        ass->state = SKIP;
+    } else if (seq_num != ass->last_seq + 1) {
+        if (ass->size != 0) {
+            av_log(ctx->s, AV_LOG_WARNING,
+                   "Packet sequence number jump: %u + 1 != %u, drop %zu bytes\n",
+                   ass->last_seq, seq_num, ass->size);
+            ass->size = 0;
+        } else {
+            av_log(ctx->s, AV_LOG_WARNING, "Packet sequence number jump: %u + 1 != %u\n",
+                   ass->last_seq, seq_num);
+        }
+        ass->state = SKIP;
+    }
+    ass->last_seq = seq_num;
+    return 0;
+}
+
+static int assemble_fragment(
+    struct FragmentAssembler *ctx, uint32_t seq_num, enum FragmentationIndicator indicator,
+    const uint8_t *data, uint32_t size, int (*parser)(MMTPContext *, const uint8_t *, uint32_t),
+    MMTPContext *opaque) {
+    int err;
+
+    if (indicator == NOT_FRAGMENTED) {
+        if (ctx->state == IN_FRAGMENT) return AVERROR_INVALIDDATA;
+        ctx->state = NOT_STARTED;
+        return parser(opaque, data, size);
+    }
+
+    if (indicator == FIRST_FRAGMENT) {
+        if (ctx->state == IN_FRAGMENT) return AVERROR_INVALIDDATA;
+        ctx->state = IN_FRAGMENT;
+        return append_data(ctx, data, size);
+    }
+
+    if (indicator == MIDDLE_FRAGMENT) {
+        if (ctx->state == SKIP) {
+            av_log(opaque->s, AV_LOG_VERBOSE, "Drop packet %u\n", seq_num);
+            return 0;
+        }
+        if (ctx->state != IN_FRAGMENT) return AVERROR_INVALIDDATA;
+        return append_data(ctx, data, size);
+    }
+
+    if (indicator == LAST_FRAGMENT) {
+        if (ctx->state == SKIP) {
+            av_log(opaque->s, AV_LOG_VERBOSE, "Drop packet %u\n", seq_num);
+            return 0;
+        }
+        if (ctx->state != IN_FRAGMENT) return AVERROR_INVALIDDATA;
+        if ((err = append_data(ctx, data, size)) < 0) return err;
+        err      = parser(opaque, ctx->data, ctx->size);
+        ctx->size  = 0;
+        ctx->state = NOT_STARTED;
+        return err;
+    }
+
+    return AVERROR_OPTION_NOT_FOUND;
+}
+
+static inline struct FragmentAssembler *find_or_allocate_assembler(MMTPContext *ctx, uint16_t pid) {
+    struct FragmentAssembler *ass;
+    for (ass = ctx->assembler; ass != NULL; ass = ass->next)
+        if (ass->pid == pid)
+            return ass;
+
+    ass = av_mallocz(sizeof(struct FragmentAssembler));
+    if (ass == NULL) return NULL;
+    ass->pid              = pid;
+    ass->next             = ctx->assembler;
+    return ctx->assembler = ass;
+}
+
+static inline int parse_signalling_messages(
+    MMTPContext *ctx, uint32_t seq_num, const uint8_t *buf, uint16_t size) {
+    int                         err;
+    enum FragmentationIndicator fragmentation_indicator;
+    bool                        length_extension_flag;
+    bool                        aggregation_flag;
+
+    struct FragmentAssembler *assembler = find_or_allocate_assembler(ctx, ctx->current_pid);
+    if (assembler == NULL) return AVERROR(errno);
+
+    LIMIT_READ(2, {
+        fragmentation_indicator = buf[0] >> 6;
+        length_extension_flag   = (buf[0] >> 1) & 1;
+        aggregation_flag        = buf[0] & 1;
+    })
+
+    if ((err = check_state(ctx, assembler, seq_num)) < 0)
+        return err;
+
+    if (!aggregation_flag)
+        return assemble_fragment(assembler, seq_num, fragmentation_indicator, buf, size, parse_signalling_message, ctx);
+
+    if (fragmentation_indicator != NOT_FRAGMENTED)
+        return AVERROR_INVALIDDATA; // cannot be both fragmented and aggregated
+
+    while (size > 0) {
+        uint32_t length;
+
+        if (length_extension_flag) {
+            LIMIT_READ(4, length = AV_RB32(buf))
+        } else {
+            LIMIT_READ(2, length = AV_RB16(buf))
+        }
+        LIMIT_READ(length, if ((err = parse_signalling_message(ctx, buf, length)) < 0) return err)
+    }
+
+    return 0;
+}
+
+static inline int fill_pts_dts(MMTPContext *ctx, struct Streams *s) {
+    struct MPUTimestampDescriptor         *desc     = NULL;
+    struct MPUExtendedTimestampDescriptor *ext_desc = NULL;
+    int64_t                               ptime;
+    size_t                                i, j;
+
+    for (i = 0; i < s->num_timestamp_descriptors; ++i) {
+        if (s->timestamp_descriptor[i].sequence_number == s->last_sequence_number) {
+            desc = s->timestamp_descriptor + i;
+            break;
+        }
+    }
+
+    for (i = 0; i < s->num_ext_timestamp_descriptors; ++i) {
+        if (s->ext_timestamp_descriptor[i].sequence_number == s->last_sequence_number) {
+            ext_desc = s->ext_timestamp_descriptor + i;
+            break;
+        }
+    }
+
+    if (desc == NULL || ext_desc == NULL) return FFERROR_REDO;
+    ptime = av_rescale(desc->presentation_time, s->stream->time_base.den, 1000000ll * s->stream->time_base.num);
+
+    if (s->au_count >= ext_desc->num_of_au)
+        return AVERROR_INVALIDDATA;
+
+    ctx->pkt->dts = ptime - ext_desc->decoding_time_offset;
+
+    for (j = 0; j < s->au_count; ++j)
+        ctx->pkt->dts += ext_desc->au[j].pts_offset;
+
+    ctx->pkt->pts = ctx->pkt->dts + ext_desc->au[s->au_count].dts_pts_offset;
+
+    ++s->au_count;
+    return 0;
+}
+
+static inline int emit_closed_caption_mfu(MMTPContext *ctx, struct Streams *st, const uint8_t *buf, uint32_t size) {
+    uint8_t  data_type, subsample_number, last_subsample_number;
+    uint32_t data_size;
+    size_t   i;
+    int      err;
+    bool     length_ext_flag, subsample_info_list_flag;
+
+    av_assert0(ctx->pkt != NULL);
+
+    LIMIT_READ((8 + 8 + 8 + 8 + 4 + 1 + 1 + 2) / 8, {
+        subsample_number         = buf[2];
+        last_subsample_number    = buf[3];
+        data_type                = buf[4] >> 4;
+        length_ext_flag          = (buf[4] >> 3) & 1;
+        subsample_info_list_flag = (buf[4] >> 2) & 1;
+    });
+
+    if (data_type != 0b0000) return AVERROR_PATCHWELCOME;
+
+    if (length_ext_flag) {
+        LIMIT_READ(4, data_size = AV_RB32(buf));
+    } else {
+        LIMIT_READ(2, data_size = AV_RB16(buf));
+    }
+
+    if (subsample_number == 0 && last_subsample_number > 0 && subsample_info_list_flag) {
+        for (i = 0; i < last_subsample_number; ++i) {
+            LIMIT_READ((4 + 4) / 8,);
+            if (length_ext_flag) {
+                LIMIT_READ(4,);
+            } else {
+                LIMIT_READ(2,);
+            }
+        }
+    }
+
+    if (size < data_size) return AVERROR_INVALIDDATA;
+    if ((err = av_new_packet(ctx->pkt, data_size)) < 0) return err;
+    memcpy(ctx->pkt->data, buf, data_size);
+    ctx->pkt->stream_index = st->stream->index;
+    ctx->pkt->flags       |= st->flags;
+    ctx->pkt->pos          = st->offset;
+    ctx->pkt               = NULL;
+
+    st->flags  = 0;
+    st->offset = -1;
+    return 0;
+}
+
+static int emit_packet(MMTPContext *ctx, struct Streams *st, AVBufferRef *buf) {
+    int err;
+    av_assert0(ctx->pkt != NULL);
+    av_packet_unref(ctx->pkt);
+    if ((err = fill_pts_dts(ctx, st)) < 0) {
+        av_buffer_unref(&buf);
+        return err;
+    }
+    ctx->pkt->buf          = buf;
+    ctx->pkt->data         = buf->data;
+    ctx->pkt->size         = buf->size - AV_INPUT_BUFFER_PADDING_SIZE;
+    ctx->pkt->stream_index = st->stream->index;
+    ctx->pkt->flags       |= st->flags;
+    ctx->pkt->pos          = st->offset;
+    ctx->pkt               = NULL;
+
+    st->flags  = 0;
+    st->offset = -1;
+    return 0;
+}
+
+static int consume_mfu(MMTPContext *ctx, const uint8_t *buf, uint32_t size) {
+    int            err;
+    AVBufferRef    *buf_ref;
+    size_t         old_size;
+    struct Streams *st = find_current_stream(ctx);
+    av_assert0(st != NULL);
+
+    switch (st->stream->codecpar->codec_id) {
+    case AV_CODEC_ID_HEVC:
+        LIMIT_READ(4, if (AV_RB32(buf) != size - 4) return AVERROR_INVALIDDATA)
+        if (size < 1) return AVERROR_INVALIDDATA; // we expect to extract NAL unit header type below
+        if ((buf[0] >> 7) != 0) return AVERROR_INVALIDDATA; // forbidden_zero_bit
+
+        old_size = st->pending_buffer == NULL ? 0 : (st->pending_buffer->size - AV_INPUT_BUFFER_PADDING_SIZE);
+        if ((err = av_buffer_realloc(&st->pending_buffer, old_size + size + 4 + AV_INPUT_BUFFER_PADDING_SIZE)) < 0)
+            return err;
+        // fix start code (00 00 00 01)
+        AV_WB32(st->pending_buffer->data + old_size, 1);
+        memcpy(st->pending_buffer->data + old_size + 4, buf, size);
+        if (((buf[0] >> 1) & 0b111111) < 0x20) { // a VCL NAL unit
+            // Because we can't emit a packet without a valid PTS, we need to
+            // aggregate the non-VCL NAL units with VCL ones. Although we didn't
+            // technically identify an access unit here, this works for all samples
+            // we have.
+            buf_ref = st->pending_buffer;
+            st->pending_buffer = NULL;
+
+            memset(buf_ref->data + old_size + size + 4, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+            return emit_packet(ctx, st, buf_ref);
+        }
+        break;
+    case AV_CODEC_ID_AAC_LATM:
+        if (size >> 13) return AVERROR(EOVERFLOW);
+        if ((buf_ref = av_buffer_alloc(size + 3 + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL)
+            return AVERROR(ENOMEM);
+        buf_ref->data[0] = 0x56;
+        buf_ref->data[1] = 0xe0 | (size >> 8);
+        buf_ref->data[2] = size & 0xff;
+        memcpy(buf_ref->data + 3, buf, size);
+        memset(buf_ref->data + 3 + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+        return emit_packet(ctx, st, buf_ref);
+    case AV_CODEC_ID_TTML:
+        return emit_closed_caption_mfu(ctx, st, buf, size);
+    default:
+        return AVERROR_PATCHWELCOME;
+    }
+
+    return 0;
+}
+
+static inline int parse_mfu_timed_data(
+    MMTPContext *ctx, struct FragmentAssembler *assembler,
+    uint32_t seq_num, enum FragmentationIndicator indicator,
+    const uint8_t *buf, uint16_t size) {
+    LIMIT_READ((32 + 32 + 32 + 8 + 8) / 8,)
+    return assemble_fragment(assembler, seq_num, indicator, buf, size, consume_mfu, ctx);
+}
+
+static inline int parse_mfu_non_timed_data(
+    MMTPContext *ctx, struct FragmentAssembler *assembler,
+    uint32_t seq_num, enum FragmentationIndicator indicator,
+    const uint8_t *buf, uint16_t size) {
+    LIMIT_READ(32 / 8,)
+    return assemble_fragment(assembler, seq_num, indicator, buf, size, consume_mfu, ctx);
+}
+
+static inline int parse_mpu(MMTPContext *ctx, uint32_t seq_num, const uint8_t *buf, uint16_t size) {
+    int                         err;
+    uint8_t                     fragment_type;
+    bool                        timed_flag;
+    enum FragmentationIndicator fragmentation_indicator;
+    bool                        aggregation_flag;
+    uint16_t                    length;
+    uint32_t                    mpu_sequence_number;
+    struct FragmentAssembler    *assembler;
+    struct Streams              *streams;
+
+    streams = find_current_stream(ctx);
+    if (streams == NULL) return 0;
+    if (streams->stream->discard >= AVDISCARD_ALL)
+        return 0;
+
+    assembler = find_or_allocate_assembler(ctx, ctx->current_pid);
+    if (assembler == NULL) return AVERROR(errno);
+
+    LIMIT_READ((16 + 4 + 1 + 2 + 1 + 8 + 32) / 8, {
+        if (AV_RB16(buf) != size - 2)
+            return AVERROR_INVALIDDATA;
+
+        fragment_type           = buf[2] >> 4;
+        timed_flag              = (buf[2] >> 3) & 1;
+        fragmentation_indicator = (buf[2] >> 1) & 0b11;
+        aggregation_flag        = buf[2] & 1;
+
+        mpu_sequence_number = AV_RB32(buf + 4);
+    })
+
+    if (aggregation_flag && fragmentation_indicator != NOT_FRAGMENTED)
+        return AVERROR_INVALIDDATA; // cannot be both fragmented and aggregated
+
+    if (fragment_type != 2) return 0;
+
+    if (assembler->state == INIT && !ctx->is_rap) {
+        // wait for the first RAP
+        return FFERROR_REDO;
+    }
+
+    if (assembler->state == INIT) {
+        streams->last_sequence_number = mpu_sequence_number;
+    } else if (mpu_sequence_number == streams->last_sequence_number + 1) {
+        streams->last_sequence_number = mpu_sequence_number;
+        streams->au_count             = 0;
+    } else if (mpu_sequence_number != streams->last_sequence_number) {
+        av_log(streams->stream, AV_LOG_ERROR, "MPU sequence number jump: %u + 1 != %u\n",
+               streams->last_sequence_number, mpu_sequence_number);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if ((err = check_state(ctx, assembler, seq_num)) < 0)
+        return err;
+
+    if (fragmentation_indicator == NOT_FRAGMENTED || fragmentation_indicator == FIRST_FRAGMENT)
+        streams->offset = ctx->pkt->pos;
+
+    if (ctx->is_rap)
+        streams->flags |= AV_PKT_FLAG_KEY;
+
+    if (timed_flag) {
+        if (aggregation_flag) {
+            while (size > 0) {
+                LIMIT_READ(2, length = AV_RB16(buf))
+                LIMIT_READ(length, {
+                    err = parse_mfu_timed_data(ctx, assembler, seq_num, NOT_FRAGMENTED, buf, length);
+                    if (err < 0) return err;
+                })
+            }
+        } else {
+            return parse_mfu_timed_data(ctx, assembler, seq_num, fragmentation_indicator, buf, size);
+        }
+    } else {
+        if (aggregation_flag) {
+            while (size > 0) {
+                LIMIT_READ(2, length = AV_RB16(buf))
+                LIMIT_READ(length, {
+                    err = parse_mfu_non_timed_data(ctx, assembler, seq_num, NOT_FRAGMENTED, buf, length);
+                    if (err < 0) return err;
+                })
+            }
+        } else {
+            return parse_mfu_non_timed_data(ctx, assembler, seq_num, fragmentation_indicator, buf, size);
+        }
+    }
+
+    return 0;
+}
+
+MMTPContext *avpriv_mmtp_parse_open(AVProgram *program) {
+    MMTPContext *ctx = av_mallocz(sizeof(MMTPContext));
+    if (ctx == NULL) return NULL;
+    ctx->program = program;
+    return ctx;
+}
+
+int avpriv_mmtp_parse_packet(MMTPContext *ctx, AVFormatContext *s, AVPacket *pkt, const uint8_t *buf, uint16_t size) {
+    bool     packet_counter_flag;
+    bool     extension_header_flag;
+    uint8_t  payload_type;
+    uint32_t packet_sequence_number;
+    int      err = 0;
+
+    ctx->s   = s;
+    ctx->pkt = pkt;
+
+    LIMIT_READ((2 + 1 + 2 + 1 + 1 + 1 + 2 + 6 + 16 + 32 + 32) / 8, {
+        packet_counter_flag    = (buf[0] >> 5) & 1;
+        extension_header_flag  = (buf[0] >> 1) & 1;
+        ctx->is_rap            = buf[0] & 1;
+        payload_type           = buf[1] & 0b111111;
+        ctx->current_pid       = AV_RB16(buf + 2);
+        packet_sequence_number = AV_RB32(buf + 8);
+    })
+
+    if (packet_counter_flag) {
+        LIMIT_READ(4,)
+    }
+
+    if (extension_header_flag) {
+        uint16_t extension_header_length;
+        LIMIT_READ(4, extension_header_length = AV_RB16(buf + 2))
+        LIMIT_READ(extension_header_length,)
+    }
+
+    switch (payload_type) {
+    case 0x00: // MPU
+        if (pkt != NULL)
+            err = parse_mpu(ctx, packet_sequence_number, buf, size);
+        break;
+    case 0x02: // signalling messages
+        err = parse_signalling_messages(ctx, packet_sequence_number, buf, size);
+        break;
+    }
+    if (err < 0) return err;
+    return ctx->pkt == NULL ? 0 : FFERROR_REDO;
+}
+
+void avpriv_mmtp_reset_state(MMTPContext *ctx) {
+    struct Streams           *streams;
+    struct FragmentAssembler *assembler;
+
+    for (assembler = ctx->assembler; assembler != NULL; assembler = assembler->next) {
+        assembler->state = INIT;
+        assembler->size  = 0;
+    }
+    for (streams = ctx->streams; streams != NULL; streams = streams->next) {
+        streams->last_sequence_number = 0;
+        streams->au_count             = 0;
+        streams->flags                = 0;
+        streams->offset               = -1;
+        av_buffer_unref(&streams->pending_buffer);
+    }
+}
+
+void avpriv_mmtp_parse_close(MMTPContext *ctx) {
+    struct FragmentAssembler *ass;
+    struct Streams           *streams;
+
+    for (ass = ctx->assembler; ass != NULL;) {
+        struct FragmentAssembler *next = ass->next;
+        if (ass->data != NULL)
+            av_free(ass->data);
+        av_free(ass);
+        ass = next;
+    }
+
+    for (streams = ctx->streams; streams != NULL;) {
+        struct Streams *next = streams->next;
+        if (streams->timestamp_descriptor != NULL)
+            av_free(streams->timestamp_descriptor);
+        if (streams->ext_timestamp_descriptor != NULL)
+            av_free(streams->ext_timestamp_descriptor);
+        av_buffer_unref(&streams->pending_buffer);
+        av_free(streams);
+        streams = next;
+    }
+
+    av_free(ctx);
+}
diff --git a/libavformat/mmtp.h b/libavformat/mmtp.h
new file mode 100644
index 0000000000..ff00115796
--- /dev/null
+++ b/libavformat/mmtp.h
@@ -0,0 +1,61 @@ 
+/*
+ * MPEG Media Transport Protocol (MMTP) parser, as defined in ISO/IEC 23008-1.
+ * Copyright (c) 2023 SuperFashi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVFORMAT_MMTP_H
+#define AVFORMAT_MMTP_H
+
+#include "avformat.h"
+
+typedef struct MMTPContext MMTPContext;
+
+/**
+ * Open an MMT protocol parser context.
+ * @param program The AVProgram this context is associated with.
+ * @return A new MMTPContext, or NULL on allocation error.
+ */
+MMTPContext *avpriv_mmtp_parse_open(AVProgram *program);
+
+/**
+ * Parse an MMT protocol packet.
+ *
+ * @param ctx The MMT protocol parser context.
+ * @param s The AVFormatContext.
+ * @param pkt The AVPacket to fill.
+ * @param buf
+ * @param size
+ * @return 0 if a new AVPacket is emitted, FFERROR_REDO if the next packet is needed, or another negative value on error.
+ */
+int avpriv_mmtp_parse_packet(MMTPContext *ctx, AVFormatContext *s, AVPacket *pkt, const uint8_t *buf, uint16_t size);
+
+/**
+ * Reset the state of the MMTP parser. Useful when seeking.
+ *
+ * @param ctx The MMT protocol parser context.
+ */
+void avpriv_mmtp_reset_state(MMTPContext *ctx);
+
+/**
+ * Close an MMT protocol parser context, frees all associated resources.
+ *
+ * @param ctx The MMT protocol parser context.
+ */
+void avpriv_mmtp_parse_close(MMTPContext *ctx);
+
+#endif /* AVFORMAT_MMTP_H */
diff --git a/libavformat/mmttlv.c b/libavformat/mmttlv.c
new file mode 100644
index 0000000000..c0b25df7af
--- /dev/null
+++ b/libavformat/mmttlv.c
@@ -0,0 +1,324 @@ 
+/*
+ * MMT protocol over TLV packets (MMT/TLV) demuxer, as defined in ARIB STD-B32.
+ * Copyright (c) 2023 SuperFashi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "libavutil/avassert.h"
+#include "libavutil/internal.h"
+#include "avio_internal.h"
+#include "avformat.h"
+#include "mmtp.h"
+#include "demux.h"
+#include "internal.h"
+
+#define HEADER_BYTE 0b01111111
+
+enum {
+    UNDEFINED_PACKET            = 0x00,
+    IPV4_PACKET                 = 0x01,
+    IPV6_PACKET                 = 0x02,
+    HEADER_COMPRESSED_IP_PACKET = 0x03,
+    TRANSMISSION_CONTROL_PACKET = 0xFE,
+    NULL_PACKET                 = 0xFF,
+};
+
+enum {
+    CONTEXT_IDENTIFICATION_PARTIAL_IPV4_AND_PARTIAL_UDP_HEADER = 0x20,
+    CONTEXT_IDENTIFICATION_IPV4_HEADER                         = 0x21,
+    CONTEXT_IDENTIFICATION_PARTIAL_IPV6_AND_PARTIAL_UDP_HEADER = 0x60,
+    CONTEXT_IDENTIFICATION_NO_COMPRESSED_HEADER                = 0x61,
+};
+
+static int mmttlv_probe(const AVProbeData *p) {
+    size_t   i, j;
+    uint8_t  packet_type;
+    uint16_t data_length;
+
+    int processed  = 0;
+    int recognized = 0;
+
+    for (i = 0; i + 4 < p->buf_size && processed < 100; ++processed) {
+        if (p->buf[i] != HEADER_BYTE) return 0;
+
+        packet_type = p->buf[i + 1];
+        data_length = AV_RB16(p->buf + i + 2);
+        i += 4;
+
+        if (packet_type == HEADER_COMPRESSED_IP_PACKET) {
+            if (data_length < 3 || i + 2 >= p->buf_size) goto skip;
+            switch (p->buf[i + 2]) {
+            case CONTEXT_IDENTIFICATION_PARTIAL_IPV4_AND_PARTIAL_UDP_HEADER:
+            case CONTEXT_IDENTIFICATION_IPV4_HEADER:
+            case CONTEXT_IDENTIFICATION_PARTIAL_IPV6_AND_PARTIAL_UDP_HEADER:
+            case CONTEXT_IDENTIFICATION_NO_COMPRESSED_HEADER:
+                ++recognized;
+            }
+        } else if (packet_type == NULL_PACKET) {
+            // null packets should contain all 0xFFs
+            for (j = i; j < i + data_length && j < p->buf_size; ++j) {
+                if (p->buf[j] != 0xFF) goto skip;
+            }
+            ++recognized;
+        }
+
+        skip:
+        i += data_length;
+    }
+
+    return recognized * AVPROBE_SCORE_MAX / FFMAX(processed, 10);
+}
+
+struct MMTTLVContext {
+    struct Program {
+        uint32_t       cid;
+        MMTPContext    *mmtp;
+        struct Program *next;
+    } *programs;
+
+    int64_t last_pos;
+    size_t  resync_size;
+
+    size_t  cap;
+    uint8_t *buf;
+};
+
+static int mmttlv_read_compressed_ip_packet(
+    struct MMTTLVContext *ctx, AVFormatContext *s, AVPacket *pkt, const uint8_t *buf, uint16_t size) {
+    // partial udp header are udp header without data length (16 bits) and checksum (16 bits)
+#define PARTIAL_UDP_HEADER_LENGTH (8 - 4)
+    // partial ipv6 header are ipv6 header without payload length (16 bits)
+#define PARTIAL_IPV6_HEADER_LENGTH (40 - 2)
+
+    uint32_t       context_id;
+    struct Program *program;
+
+    if (size < 3)
+        return AVERROR_INVALIDDATA;
+    context_id = AV_RB16(buf) >> 4;
+    buf += 3;
+    size -= 3;
+
+    for (program = ctx->programs; program != NULL; program = program->next)
+        if (program->cid == context_id)
+            break;
+
+    if (program == NULL) {
+        AVProgram *p = av_new_program(s, context_id);
+        if (p == NULL) return AVERROR(errno);
+
+        program = av_malloc(sizeof(struct Program));
+        if (program == NULL) return AVERROR(errno);
+
+        program->mmtp = avpriv_mmtp_parse_open(p);
+        program->next = ctx->programs;
+        ctx->programs = program;
+        program->cid  = context_id;
+    }
+
+    switch (buf[-1]) {
+    case CONTEXT_IDENTIFICATION_PARTIAL_IPV4_AND_PARTIAL_UDP_HEADER:
+    case CONTEXT_IDENTIFICATION_IPV4_HEADER:
+        return AVERROR_PATCHWELCOME;
+    case CONTEXT_IDENTIFICATION_PARTIAL_IPV6_AND_PARTIAL_UDP_HEADER:
+        if (size < PARTIAL_IPV6_HEADER_LENGTH + PARTIAL_UDP_HEADER_LENGTH)
+            return AVERROR_INVALIDDATA;
+        size -= PARTIAL_IPV6_HEADER_LENGTH + PARTIAL_UDP_HEADER_LENGTH;
+        buf += PARTIAL_IPV6_HEADER_LENGTH + PARTIAL_UDP_HEADER_LENGTH;
+    case CONTEXT_IDENTIFICATION_NO_COMPRESSED_HEADER:
+        break;
+    default:
+        return AVERROR_INVALIDDATA;
+    }
+
+    return avpriv_mmtp_parse_packet(program->mmtp, s, pkt, buf, size);
+}
+
+static int mmttlv_read_packet(AVFormatContext *s, AVPacket *pkt) {
+    uint8_t              header[4];
+    uint16_t             size;
+    int                  err;
+    struct MMTTLVContext *ctx = s->priv_data;
+    int64_t              pos  = avio_tell(s->pb);
+
+    if (pos < 0) return (int) pos;
+    if (pos != ctx->last_pos) {
+        ctx->last_pos = pos;
+
+        while (pos - ctx->last_pos < ctx->resync_size) {
+            if ((err = ffio_ensure_seekback(s->pb, 4)) < 0)
+                return err;
+
+            if ((err = avio_read(s->pb, header, 4)) < 0)
+                return avio_feof(s->pb) ? AVERROR_EOF : err;
+
+            if (header[0] != HEADER_BYTE) {
+                if ((pos = avio_seek(s->pb, -3, SEEK_CUR)) < 0)
+                    return (int) pos;
+                continue;
+            }
+
+            size = AV_RB16(header + 2);
+
+            if ((pos = avio_seek(s->pb, -4, SEEK_CUR)) < 0)
+                return (int) pos;
+
+            if ((err = ffio_ensure_seekback(s->pb, 4 + size + 1)) < 0)
+                return err;
+
+            if ((pos = avio_skip(s->pb, 4 + size)) < 0)
+                return (int) pos;
+
+            if ((err = avio_read(s->pb, header, 1)) < 0)
+                return avio_feof(s->pb) ? AVERROR_EOF : err;
+
+            if (header[0] == HEADER_BYTE) {
+                // found HEADER, [size], HEADER, should be good
+                if ((pos = avio_seek(s->pb, -size - 1 - 4, SEEK_CUR)) < 0)
+                    return (int) pos;
+                goto success;
+            }
+
+            if ((pos = avio_seek(s->pb, -size - 1 - 3, SEEK_CUR)) < 0)
+                return (int) pos;
+        }
+        return AVERROR_INVALIDDATA;
+
+        success:
+        ctx->last_pos = pos;
+
+        for (struct Program *program = ctx->programs; program != NULL; program = program->next)
+            avpriv_mmtp_reset_state(program->mmtp);
+    }
+
+    if (pkt != NULL) pkt->pos = ctx->last_pos;
+    if ((err = ffio_read_size(s->pb, header, 4)) < 0)
+        return avio_feof(s->pb) ? AVERROR_EOF : err;
+    ctx->last_pos += 4;
+
+    if (header[0] != HEADER_BYTE)
+        return AVERROR_INVALIDDATA;
+
+    size = AV_RB16(header + 2);
+    if (header[1] != HEADER_COMPRESSED_IP_PACKET) {
+        if ((ctx->last_pos = avio_skip(s->pb, size)) < 0)
+            return (int) ctx->last_pos;
+        return FFERROR_REDO;
+    }
+
+    if (ctx->cap < size) {
+        if (ctx->buf != NULL)
+            av_free(ctx->buf);
+        if ((ctx->buf = av_malloc(ctx->cap = size)) == NULL)
+            return AVERROR(errno);
+    }
+    if ((err = ffio_read_size(s->pb, ctx->buf, size)) < 0)
+        return avio_feof(s->pb) ? AVERROR_EOF : err;
+    ctx->last_pos += size;
+    return mmttlv_read_compressed_ip_packet(ctx, s, pkt, ctx->buf, size);
+}
+
+static int mmttlv_read_header(AVFormatContext *s) {
+    int64_t              pos;
+    int64_t              allow = s->probesize;
+    struct MMTTLVContext *ctx  = s->priv_data;
+
+    ctx->last_pos = avio_tell(s->pb);
+    if (ctx->last_pos < 0)
+        return (int) ctx->last_pos;
+    ctx->last_pos -= 1; // force resync
+
+    ctx->resync_size = 4096;
+    s->ctx_flags |= AVFMTCTX_NOHEADER;
+
+    if (!s->pb->seekable)
+        return 0;
+
+    if ((pos = avio_tell(s->pb)) < 0)
+        return (int) pos;
+
+    while (s->nb_streams <= 0 && allow > 0) {
+        const int64_t cur = ctx->last_pos;
+        const int     err = mmttlv_read_packet(s, NULL);
+        if (err < 0 && err != FFERROR_REDO)
+            return err;
+        allow -= ctx->last_pos - cur;
+    }
+
+    ctx->last_pos = avio_tell(s->pb);
+    if (ctx->last_pos < 0)
+        return (int) ctx->last_pos;
+
+    if ((pos = avio_seek(s->pb, pos, SEEK_SET)) < 0)
+        return (int) pos;
+
+    return 0;
+}
+
+static int mmttlv_read_close(AVFormatContext *ctx) {
+    struct Program       *program;
+    struct MMTTLVContext *priv = ctx->priv_data;
+    for (program = priv->programs; program != NULL;) {
+        struct Program *next = program->next;
+        avpriv_mmtp_parse_close(program->mmtp);
+        av_free(program);
+        program = next;
+    }
+    if (priv->buf != NULL) av_free(priv->buf);
+    return 0;
+}
+
+static int64_t mmttlv_read_timestamp(struct AVFormatContext *s, int stream_index, int64_t *pos, int64_t pos_limit) {
+    struct MMTTLVContext *ctx = s->priv_data;
+
+    if ((*pos = avio_seek(s->pb, *pos, SEEK_SET)) < 0)
+        return (int) *pos;
+
+    while (pos_limit > 0) {
+        AVPacket      packet = {0};
+        const int     err    = mmttlv_read_packet(s, &packet);
+        const int64_t ts     = packet.dts;
+        const int64_t off    = packet.pos;
+        const int     sid    = packet.stream_index;
+        av_packet_unref(&packet);
+        if (err >= 0 && (stream_index < 0 || sid == stream_index)) {
+            *pos = off;
+            return ts;
+        }
+        pos_limit -= ctx->last_pos - *pos;
+        *pos = ctx->last_pos;
+        if (err < 0 && err != FFERROR_REDO)
+            return AV_NOPTS_VALUE;
+    }
+
+    return AV_NOPTS_VALUE;
+}
+
+const AVInputFormat ff_mmttlv_demuxer = {
+    .name           = "mmttlv",
+    .long_name      = NULL_IF_CONFIG_SMALL("MMT protocol over TLV packets (ARIB STD-B32)"),
+    .priv_data_size = sizeof(struct MMTTLVContext),
+    .flags_internal = FF_FMT_INIT_CLEANUP,
+    .read_probe     = mmttlv_probe,
+    .read_header    = mmttlv_read_header,
+    .read_packet    = mmttlv_read_packet,
+    .read_close     = mmttlv_read_close,
+    .read_timestamp = mmttlv_read_timestamp,
+    .flags          = AVFMT_SHOW_IDS,
+};
diff --git a/libavformat/version.h b/libavformat/version.h
index e2634b85ae..4bde82abb4 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -31,7 +31,7 @@ 
 
 #include "version_major.h"
 
-#define LIBAVFORMAT_VERSION_MINOR   5
+#define LIBAVFORMAT_VERSION_MINOR   6
 #define LIBAVFORMAT_VERSION_MICRO 100
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \