diff mbox series

[FFmpeg-devel,3/2,WIP,RFC] avformat: Immersive Audio Model and Formats demuxer

Message ID 20231023172527.4460-2-jamrial@gmail.com
State New
Headers show
Series [FFmpeg-devel,v2] avformat: introduce AVStreamGroup | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

James Almer Oct. 23, 2023, 5:25 p.m. UTC
Signed-off-by: James Almer <jamrial@gmail.com>
---
This is a demuxer implementation for the Immersive Audio Model and Formats,
defined in https://aomediacodec.github.io/iamf
It makes use of the new AVStreamGroup API to export both the Audio Elements
and the Mixing Parameters.

Each Audio Element references one or more streams in the file, and can have
more than one "layer" using an incrementing number of said streams, each with a
specific channel layout. The layers have no ID, only the Audio Elements to, so
i considered i should group things at the Audio Element level, more so
considering Mixing Parameters reference Audio Elements and not the underlying
layers.
In turn, Mixing Parameters like i said reference Audio Elements, so to avoid
doing "Group of groups", i just made them reference every stream across every
relevant Audio Element.

Three packet side data types are added as each Audio Frame can have one of more
of thee parameters, and packet side data does not support more than one entry
per type.

 libavcodec/avpacket.c       |    3 +
 libavcodec/packet.h         |   24 +
 libavformat/Makefile        |    2 +
 libavformat/allformats.c    |    1 +
 libavformat/avformat.c      |   10 +-
 libavformat/avformat.h      |    8 +-
 libavformat/dump.c          |   73 +-
 libavformat/iamf.c          |  336 +++++++
 libavformat/iamf.h          |  228 +++++
 libavformat/iamf_internal.h |   86 ++
 libavformat/iamfdec.c       | 1646 +++++++++++++++++++++++++++++++++++
 libavformat/options.c       |   15 +-
 12 files changed, 2423 insertions(+), 9 deletions(-)
 create mode 100644 libavformat/iamf.c
 create mode 100644 libavformat/iamf.h
 create mode 100644 libavformat/iamf_internal.h
 create mode 100644 libavformat/iamfdec.c
diff mbox series

Patch

diff --git a/libavcodec/avpacket.c b/libavcodec/avpacket.c
index e29725c2d2..0f8c9b77ae 100644
--- a/libavcodec/avpacket.c
+++ b/libavcodec/avpacket.c
@@ -301,6 +301,9 @@  const char *av_packet_side_data_name(enum AVPacketSideDataType type)
     case AV_PKT_DATA_DOVI_CONF:                  return "DOVI configuration record";
     case AV_PKT_DATA_S12M_TIMECODE:              return "SMPTE ST 12-1:2014 timecode";
     case AV_PKT_DATA_DYNAMIC_HDR10_PLUS:         return "HDR10+ Dynamic Metadata (SMPTE 2094-40)";
+    case AV_PKT_DATA_IAMF_MIX_GAIN_PARAM:        return "IAMF Mix Gain Parameter Data";
+    case AV_PKT_DATA_IAMF_DEMIXING_INFO_PARAM:   return "IAMF Demixing Info Parameter Data";
+    case AV_PKT_DATA_IAMF_RECON_GAIN_INFO_PARAM: return "IAMF Recon Gain Info Parameter Data";
     }
     return NULL;
 }
diff --git a/libavcodec/packet.h b/libavcodec/packet.h
index b19409b719..2c57d262c6 100644
--- a/libavcodec/packet.h
+++ b/libavcodec/packet.h
@@ -299,6 +299,30 @@  enum AVPacketSideDataType {
      */
     AV_PKT_DATA_DYNAMIC_HDR10_PLUS,
 
+    /**
+     * IAMF Mix Gain Parameter Data associated with the audio frame. This metadata
+     * is in the form of the AVIAMFParamDefinition struct and contains information
+     * defined in sections 3.6.1 and 3.8.1 of the Immersive Audio Model and
+     * Formats standard.
+     */
+    AV_PKT_DATA_IAMF_MIX_GAIN_PARAM,
+
+    /**
+     * IAMF Demixing Info Parameter Data associated with the audio frame. This
+     * metadata is in the form of the AVIAMFParamDefinition struct and contains
+     * information defined in sections 3.6.1 and 3.8.2 of the Immersive Audio Model
+     * and Formats standard.
+     */
+    AV_PKT_DATA_IAMF_DEMIXING_INFO_PARAM,
+
+    /**
+     * IAMF Recon Gain Info Parameter Data associated with the audio frame. This
+     * metadata is in the form of the AVIAMFParamDefinition struct and contains
+     * information defined in sections 3.6.1 and 3.8.3 of the Immersive Audio Model
+     * and Formats standard.
+     */
+    AV_PKT_DATA_IAMF_RECON_GAIN_INFO_PARAM,
+
     /**
      * The number of side data types.
      * This is not part of the public API/ABI in the sense that it may
diff --git a/libavformat/Makefile b/libavformat/Makefile
index 329055ccfd..364bc417a3 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -3,6 +3,7 @@  DESC = FFmpeg container format library
 
 HEADERS = avformat.h                                                    \
           avio.h                                                        \
+          iamf.h                                                        \
           version.h                                                     \
           version_major.h                                               \
 
@@ -258,6 +259,7 @@  OBJS-$(CONFIG_EVC_MUXER)                 += rawenc.o
 OBJS-$(CONFIG_HLS_DEMUXER)               += hls.o hls_sample_encryption.o
 OBJS-$(CONFIG_HLS_MUXER)                 += hlsenc.o hlsplaylist.o avc.o
 OBJS-$(CONFIG_HNM_DEMUXER)               += hnm.o
+OBJS-$(CONFIG_IAMF_DEMUXER)              += iamfdec.o iamf.o
 OBJS-$(CONFIG_ICO_DEMUXER)               += icodec.o
 OBJS-$(CONFIG_ICO_MUXER)                 += icoenc.o
 OBJS-$(CONFIG_IDCIN_DEMUXER)             += idcin.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index d4b505a5a3..63ca44bacd 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -212,6 +212,7 @@  extern const FFOutputFormat ff_hevc_muxer;
 extern const AVInputFormat  ff_hls_demuxer;
 extern const FFOutputFormat ff_hls_muxer;
 extern const AVInputFormat  ff_hnm_demuxer;
+extern const AVInputFormat  ff_iamf_demuxer;
 extern const AVInputFormat  ff_ico_demuxer;
 extern const FFOutputFormat ff_ico_muxer;
 extern const AVInputFormat  ff_idcin_demuxer;
diff --git a/libavformat/avformat.c b/libavformat/avformat.c
index 99cda56c2f..caba3308c2 100644
--- a/libavformat/avformat.c
+++ b/libavformat/avformat.c
@@ -37,6 +37,7 @@ 
 #include "avformat.h"
 #include "avio.h"
 #include "demux.h"
+#include "iamf.h"
 #include "mux.h"
 #include "internal.h"
 
@@ -90,7 +91,14 @@  void ff_free_stream_group(AVStreamGroup **pstg)
     av_freep(&stg->streams);
     av_freep(&stg->priv_data);
     switch (stg->type) {
-    // Structs in the union are freed here
+    case AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT: {
+        avformat_iamf_audio_element_free(&stg->params.iamf_audio_element);
+        break;
+    }
+    case AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION: {
+        avformat_iamf_mix_presentation_free(&stg->params.iamf_mix_presentation);
+        break;
+    }
     default:
         break;
     }
diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index f045084c8d..455e07333e 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -1020,8 +1020,13 @@  typedef struct AVStream {
 
 enum AVStreamGroupParamsType {
     AV_STREAM_GROUP_PARAMS_NONE,
+    AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT,
+    AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION,
 };
 
+struct AVIAMFAudioElement;
+struct AVIAMFMixPresentation;
+
 typedef struct AVStreamGroup {
     /**
      * A class for @ref avoptions. Set on group creation.
@@ -1055,7 +1060,8 @@  typedef struct AVStreamGroup {
      * Group-specific type parameters
      */
     union {
-        uintptr_t dummy; // Placeholder
+        struct AVIAMFAudioElement *iamf_audio_element;
+        struct AVIAMFMixPresentation *iamf_mix_presentation;
     } params;
 
     /**
diff --git a/libavformat/dump.c b/libavformat/dump.c
index c0868a1bb3..f48afdf679 100644
--- a/libavformat/dump.c
+++ b/libavformat/dump.c
@@ -38,6 +38,7 @@ 
 #include "libavcodec/avcodec.h"
 
 #include "avformat.h"
+#include "iamf.h"
 #include "internal.h"
 
 #define HEXDUMP_PRINT(...)                                                    \
@@ -509,7 +510,7 @@  static void dump_sidedata(void *ctx, const AVStream *st, const char *indent)
 
 /* "user interface" functions */
 static void dump_stream_format(const AVFormatContext *ic, int i,
-                               int index, int is_output)
+                               int group_index, int index, int is_output)
 {
     char buf[256];
     int flags = (is_output ? ic->oformat->flags : ic->iformat->flags);
@@ -517,6 +518,8 @@  static void dump_stream_format(const AVFormatContext *ic, int i,
     const FFStream *const sti = cffstream(st);
     const AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL, 0);
     const char *separator = ic->dump_separator;
+    const char *group_indent = group_index >= 0 ? "    " : "";
+    const char *extra_indent = group_index >= 0 ? "        " : "      ";
     AVCodecContext *avctx;
     int ret;
 
@@ -543,7 +546,10 @@  static void dump_stream_format(const AVFormatContext *ic, int i,
     avcodec_string(buf, sizeof(buf), avctx, is_output);
     avcodec_free_context(&avctx);
 
-    av_log(NULL, AV_LOG_INFO, "  Stream #%d:%d", index, i);
+    av_log(NULL, AV_LOG_INFO, "%s  Stream #%d", group_indent, index);
+    if (group_index >= 0)
+        av_log(NULL, AV_LOG_INFO, ":%d", group_index);
+    av_log(NULL, AV_LOG_INFO, ":%d", i);
 
     /* the pid is an important information, so we display it */
     /* XXX: add a generic system */
@@ -621,9 +627,61 @@  static void dump_stream_format(const AVFormatContext *ic, int i,
         av_log(NULL, AV_LOG_INFO, " (non-diegetic)");
     av_log(NULL, AV_LOG_INFO, "\n");
 
-    dump_metadata(NULL, st->metadata, "    ");
+    dump_metadata(NULL, st->metadata, extra_indent);
 
-    dump_sidedata(NULL, st, "    ");
+    dump_sidedata(NULL, st, extra_indent);
+}
+
+static void dump_stream_group(const AVFormatContext *ic, uint8_t *printed,
+                              int i, int index, int is_output)
+{
+    const AVStreamGroup *stg = ic->stream_groups[i];
+    char buf[512];
+    int ret;
+
+    av_log(NULL, AV_LOG_INFO, "  Stream group #%d:%d:", index, i);
+
+    switch (stg->type) {
+    case AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT: {
+        AVIAMFAudioElement *iamf = stg->params.iamf_audio_element;
+        int substream_count = 0;
+        av_log(NULL, AV_LOG_INFO, " IAMF Audio Element\n");
+        for (int j = 0; j < iamf->num_layers; j++) {
+            AVIAMFLayer *layer = iamf->layers[j];
+            substream_count += layer->substream_count;
+            av_log(NULL, AV_LOG_INFO, "    Layer %d:", j);
+            ret = av_channel_layout_describe(&layer->ch_layout, buf, sizeof(buf));
+            if (ret >= 0)
+                av_log(NULL, AV_LOG_INFO, " %s", buf);
+            av_log(NULL, AV_LOG_INFO, "\n");
+            for (int k = 0; k < substream_count && k < stg->nb_streams; k++) {
+                dump_stream_format(ic, stg->streams[k]->index, i, index, is_output);
+                printed[stg->streams[k]->index] = 1;
+            }
+        }
+        break;
+    }
+    case AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION: {
+        AVIAMFMixPresentation *mix_presentation = stg->params.iamf_mix_presentation;
+        av_log(NULL, AV_LOG_INFO, " IAMF Mix Presentation\n");
+        for (int j = 0; j < mix_presentation->num_sub_mixes; j++) {
+            AVIAMFSubmixPresentation *sub_mix = mix_presentation->sub_mixes[j];
+            av_log(NULL, AV_LOG_INFO, "    Submix %d:\n", j);
+            for (int k = 0; k < sub_mix->num_submix_layouts; k++) {
+                AVIAMFSubmixLayout *submix_layout = sub_mix->submix_layouts[k];
+                av_log(NULL, AV_LOG_INFO, "      Layout %d", k);
+                if (submix_layout->layout_type == 2) {
+                    ret = av_channel_layout_describe(&submix_layout->sound_system, buf, sizeof(buf));
+                    if (ret >= 0)
+                        av_log(NULL, AV_LOG_INFO, " %s", buf);
+                } else if (submix_layout->layout_type == 3)
+                    av_log(NULL, AV_LOG_INFO, " Binaural");
+                av_log(NULL, AV_LOG_INFO, "\n");
+            }
+        }
+        break;
+    }
+    }
 }
 
 void av_dump_format(AVFormatContext *ic, int index,
@@ -699,7 +757,7 @@  void av_dump_format(AVFormatContext *ic, int index,
             dump_metadata(NULL, program->metadata, "    ");
             for (k = 0; k < program->nb_stream_indexes; k++) {
                 dump_stream_format(ic, program->stream_index[k],
-                                   index, is_output);
+                                   -1, index, is_output);
                 printed[program->stream_index[k]] = 1;
             }
             total += program->nb_stream_indexes;
@@ -708,9 +766,12 @@  void av_dump_format(AVFormatContext *ic, int index,
             av_log(NULL, AV_LOG_INFO, "  No Program\n");
     }
 
+    for (i = 0; i < ic->nb_stream_groups; i++)
+         dump_stream_group(ic, printed, i, index, is_output);
+
     for (i = 0; i < ic->nb_streams; i++)
         if (!printed[i])
-            dump_stream_format(ic, i, index, is_output);
+            dump_stream_format(ic, i, -1, index, is_output);
 
     av_free(printed);
 }
diff --git a/libavformat/iamf.c b/libavformat/iamf.c
new file mode 100644
index 0000000000..2d6f84a073
--- /dev/null
+++ b/libavformat/iamf.c
@@ -0,0 +1,336 @@ 
+/*
+ * Immersive Audio Model and Formats helper functions and defines
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/error.h"
+#include "libavutil/mem.h"
+
+#include "iamf.h"
+#include "iamf_internal.h"
+
+const AVChannelLayout ff_iamf_scalable_ch_layouts[10] = {
+    AV_CHANNEL_LAYOUT_MONO,
+    AV_CHANNEL_LAYOUT_STEREO,
+    // "Loudspeaker configuration for Sound System B"
+    AV_CHANNEL_LAYOUT_5POINT1_BACK,
+    // "Loudspeaker configuration for Sound System C"
+    AV_CHANNEL_LAYOUT_7POINT1_TOP_BACK,
+    // "Loudspeaker configuration for Sound System D"
+    {
+        .nb_channels = 10,
+        .order       = AV_CHANNEL_ORDER_NATIVE,
+        .u.mask      = AV_CH_LAYOUT_7POINT1_TOP_BACK | AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT,
+    },
+    // "Loudspeaker configuration for Sound System I"
+    AV_CHANNEL_LAYOUT_7POINT1,
+    // "Loudspeaker configuration for Sound System I" + Ltf + Rtf
+    {
+        .nb_channels = 10,
+        .order       = AV_CHANNEL_ORDER_NATIVE,
+        .u.mask      = AV_CH_LAYOUT_7POINT1 | AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT,
+    },
+    // "Loudspeaker configuration for Sound System J"
+    {
+        .nb_channels = 12,
+        .order       = AV_CHANNEL_ORDER_NATIVE,
+        .u.mask      = AV_CH_LAYOUT_7POINT1 | AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT |
+                                              AV_CH_TOP_BACK_LEFT  | AV_CH_TOP_BACK_RIGHT,
+    },
+    // Front subset of "Loudspeaker configuration for Sound System J"
+    {
+        .nb_channels = 6,
+        .order       = AV_CHANNEL_ORDER_NATIVE,
+        .u.mask      = AV_CH_LAYOUT_3POINT1 | AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT,
+    },
+    // Binaural
+    AV_CHANNEL_LAYOUT_STEREO,
+};
+
+const struct IAMFSoundSystemMap ff_iamf_sound_system_map[13] = {
+    { SOUND_SYSTEM_A_0_2_0, AV_CHANNEL_LAYOUT_STEREO },
+    { SOUND_SYSTEM_B_0_5_0, AV_CHANNEL_LAYOUT_5POINT1_BACK },
+    { SOUND_SYSTEM_C_2_5_0, AV_CHANNEL_LAYOUT_7POINT1_TOP_BACK },
+    { SOUND_SYSTEM_D_4_5_0,
+        {
+            .nb_channels = 10,
+            .order       = AV_CHANNEL_ORDER_NATIVE,
+            .u.mask      = AV_CH_LAYOUT_7POINT1_TOP_BACK | AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT,
+        },
+    },
+    { SOUND_SYSTEM_E_4_5_1,
+        {
+            .nb_channels = 11,
+            .order       = AV_CHANNEL_ORDER_NATIVE,
+            .u.mask      = AV_CH_LAYOUT_7POINT1_TOP_BACK | AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT | AV_CH_BOTTOM_FRONT_CENTER,
+        },
+    },
+    { SOUND_SYSTEM_F_3_7_0,
+        {
+            .nb_channels = 12,
+            .order       = AV_CHANNEL_ORDER_NATIVE,
+            .u.mask      = AV_CH_LAYOUT_7POINT1 | AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT | AV_CH_TOP_BACK_CENTER | AV_CH_LOW_FREQUENCY_2,
+        },
+    },
+    { SOUND_SYSTEM_G_4_9_0,
+        {
+            .nb_channels = 14,
+            .order       = AV_CHANNEL_ORDER_NATIVE,
+            .u.mask      = AV_CH_LAYOUT_7POINT1 | AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT | AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT |
+                                                  AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_FRONT_RIGHT_OF_CENTER,
+        },
+    },
+    { SOUND_SYSTEM_H_9_10_3, AV_CHANNEL_LAYOUT_22POINT2 },
+    { SOUND_SYSTEM_I_0_7_0, AV_CHANNEL_LAYOUT_7POINT1 },
+    { SOUND_SYSTEM_J_4_7_0,
+        {
+            .nb_channels = 12,
+            .order       = AV_CHANNEL_ORDER_NATIVE,
+            .u.mask      = AV_CH_LAYOUT_7POINT1 | AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT |
+                                                  AV_CH_TOP_BACK_LEFT  | AV_CH_TOP_BACK_RIGHT,
+        },
+    },
+    { SOUND_SYSTEM_10_2_7_0,
+        {
+            .nb_channels = 10,
+            .order       = AV_CHANNEL_ORDER_NATIVE,
+            .u.mask      = AV_CH_LAYOUT_7POINT1 | AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT,
+        },
+    },
+    { SOUND_SYSTEM_11_2_3_0,
+        {
+            .nb_channels = 6,
+            .order       = AV_CHANNEL_ORDER_NATIVE,
+            .u.mask      = AV_CH_LAYOUT_3POINT1 | AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT,
+        },
+    },
+    { SOUND_SYSTEM_12_0_1_0, AV_CHANNEL_LAYOUT_MONO },
+};
+
+AVIAMFAudioElement *avformat_iamf_audio_element_alloc()
+{
+    return av_mallocz(sizeof(AVIAMFAudioElement));
+}
+
+int avformat_iamf_audio_element_add_layer(AVIAMFAudioElement *audio_element)
+{
+    AVIAMFLayer **layers;
+
+    if (audio_element->num_layers == UINT_MAX)
+        return AVERROR(EINVAL);
+
+    layers = av_realloc_array(audio_element->layers, audio_element->num_layers + 1,
+                              sizeof(*audio_element->layers));
+    if (!layers)
+        return AVERROR(ENOMEM);
+
+    audio_element->layers = layers;
+
+    audio_element->layers[audio_element->num_layers] = av_mallocz(sizeof(AVIAMFLayer));
+    if (!audio_element->layers[audio_element->num_layers])
+        return AVERROR(ENOMEM);
+
+    av_channel_layout_uninit(&audio_element->layers[audio_element->num_layers++]->ch_layout);
+
+    return 0;
+}
+
+void avformat_iamf_audio_element_free(AVIAMFAudioElement **paudio_element)
+{
+    AVIAMFAudioElement *audio_element = *paudio_element;
+
+    if (!audio_element)
+        return;
+
+    for (int i; i < audio_element->num_layers; i++) {
+        AVIAMFLayer *layer = audio_element->layers[i];
+        av_channel_layout_uninit(&layer->ch_layout);
+        av_free(layer->demixing_matrix);
+        av_free(layer);
+    }
+    av_free(audio_element->layers);
+
+    av_free(audio_element->demixing_info);
+    av_free(audio_element->recon_gain_info);
+    av_freep(paudio_element);
+}
+
+AVIAMFMixPresentation *avformat_iamf_mix_presentation_alloc()
+{
+    return av_mallocz(sizeof(AVIAMFMixPresentation));
+}
+
+int avformat_iamf_mix_presentation_add_submix(AVIAMFMixPresentation *mix_presentation,
+                                              unsigned int num_submix_elements,
+                                              unsigned int num_submix_layouts)
+{
+    AVIAMFSubmixPresentation **sub_mixes, *sub_mix;
+
+    if (mix_presentation->num_sub_mixes == UINT_MAX)
+        return AVERROR(EINVAL);
+
+    sub_mixes = av_realloc_array(mix_presentation->sub_mixes, mix_presentation->num_sub_mixes + 1,
+                                 sizeof(*mix_presentation->sub_mixes));
+    if (!sub_mixes)
+        return AVERROR(ENOMEM);
+
+    mix_presentation->sub_mixes = sub_mixes;
+
+    sub_mix = av_mallocz(sizeof(*sub_mix));
+    if (!sub_mix)
+        return AVERROR(ENOMEM);
+
+    sub_mix->submix_elements = av_calloc(num_submix_elements, sizeof(*sub_mix->submix_elements));
+    if (!sub_mix->submix_elements)
+        goto fail;
+
+    sub_mix->submix_layouts = av_calloc(num_submix_layouts, sizeof(*sub_mix->submix_layouts));
+    if (!sub_mix->submix_layouts)
+        goto fail;
+
+    for (int i = 0; i < num_submix_elements; i++) {
+        AVIAMFSubmixElement *submix_element = av_mallocz(sizeof(*submix_element));
+        if (!submix_element)
+            goto fail;
+
+        sub_mix->submix_elements[sub_mix->num_submix_elements++] = submix_element;
+    }
+
+    for (int i = 0; i < num_submix_layouts; i++) {
+        AVIAMFSubmixLayout *submix_layout = av_mallocz(sizeof(*submix_layout));
+        if (!submix_layout)
+            goto fail;
+
+        av_channel_layout_uninit(&submix_layout->sound_system);
+        sub_mix->submix_layouts[sub_mix->num_submix_layouts++] = submix_layout;
+    }
+
+    mix_presentation->sub_mixes[mix_presentation->num_sub_mixes++] = sub_mix;
+
+    return 0;
+fail:
+    for (int i = 0; i < sub_mix->num_submix_elements; i++) {
+        av_free(sub_mix->submix_elements[i]->element_mix_config);
+        av_free(sub_mix->submix_elements[i]);
+    }
+    for (int i = 0; i < sub_mix->num_submix_layouts; i++)
+        av_free(sub_mix->submix_layouts[i]);
+    av_free(sub_mix->submix_elements);
+    av_free(sub_mix->submix_layouts);
+    av_free(sub_mix);
+
+    return AVERROR(ENOMEM);
+}
+
+void avformat_iamf_mix_presentation_free(AVIAMFMixPresentation **pmix_presentation)
+{
+    AVIAMFMixPresentation *mix_presentation = *pmix_presentation;
+
+    if (!mix_presentation)
+        return;
+
+    for (int i; i < mix_presentation->num_sub_mixes; i++) {
+        AVIAMFSubmixPresentation *sub_mix = mix_presentation->sub_mixes[i];
+        for (int j; j < sub_mix->num_submix_elements; j++) {
+            AVIAMFSubmixElement *submix_element = sub_mix->submix_elements[j];
+            for (int k; k < mix_presentation->count_label; k++)
+                av_free(submix_element->mix_presentation_element_annotations[k]);
+            av_free(submix_element->mix_presentation_element_annotations);
+            av_free(submix_element->element_mix_config);
+            av_free(submix_element);
+        }
+        av_free(sub_mix->submix_elements);
+        for (int j; j < sub_mix->num_submix_layouts; j++) {
+            AVIAMFSubmixLayout *submix_layout = sub_mix->submix_layouts[j];
+            av_channel_layout_uninit(&submix_layout->sound_system);
+            av_free(submix_layout);
+        }
+        av_free(sub_mix->submix_layouts);
+        av_free(sub_mix->output_mix_config);
+        av_free(sub_mix);
+    }
+    for (int i; i < mix_presentation->count_label; i++) {
+        av_free(mix_presentation->language_label[i]);
+        av_free(mix_presentation->mix_presentation_annotations[i]);
+    }
+    av_free(mix_presentation->sub_mixes);
+    av_free(mix_presentation->language_label);
+    av_free(mix_presentation->mix_presentation_annotations);
+
+    av_freep(pmix_presentation);
+}
+
+AVIAMFParamDefinition *avformat_iamf_param_definition_alloc(enum AVIAMFParamDefinitionType type,
+                                                            unsigned int num_subblocks, size_t *out_size)
+{
+
+    struct MixGainStruct {
+        AVIAMFParamDefinition p;
+        AVIAMFMixGainParameterData m;
+    };
+    struct DemixStruct {
+        AVIAMFParamDefinition p;
+        AVIAMFDemixingInfoParameterData d;
+    };
+    struct ReconGainStruct {
+        AVIAMFParamDefinition p;
+        AVIAMFReconGainParameterData r;
+    };
+    size_t subblocks_offset, subblock_size;
+    size_t size;
+    AVIAMFParamDefinition *par;
+
+    switch (type) {
+    case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN:
+        subblocks_offset = offsetof(struct MixGainStruct, m);
+        subblock_size = sizeof(AVIAMFMixGainParameterData);
+        break;
+    case AV_IAMF_PARAMETER_DEFINITION_DEMIXING:
+        subblocks_offset = offsetof(struct DemixStruct, d);
+        subblock_size = sizeof(AVIAMFDemixingInfoParameterData);
+        break;
+    case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN:
+        subblocks_offset = offsetof(struct ReconGainStruct, r);
+        subblock_size = sizeof(AVIAMFReconGainParameterData);
+        break;
+    default:
+        return NULL;
+    }
+
+    size = subblocks_offset;
+    if (num_subblocks > (SIZE_MAX - size) / subblock_size)
+        return NULL;
+    size += subblock_size * num_subblocks;
+
+    par = av_mallocz(size);
+    if (!par)
+        return NULL;
+
+    par->param_definition_type = type;
+    par->num_subblocks = num_subblocks;
+    par->subblock_size = subblock_size;
+    par->subblocks_offset = subblocks_offset;
+    if (out_size)
+        *out_size = size;
+
+    return par;
+}
diff --git a/libavformat/iamf.h b/libavformat/iamf.h
new file mode 100644
index 0000000000..2743fd0c07
--- /dev/null
+++ b/libavformat/iamf.h
@@ -0,0 +1,228 @@ 
+/*
+ * Immersive Audio Model and Formats helper functions and defines
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFORMAT_IAMF_H
+#define AVFORMAT_IAMF_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
+#include "libavutil/channel_layout.h"
+
+struct AVStreamGroup;
+
+enum AVIAMFAudioElementType {
+    AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL,
+    AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE,
+};
+
+enum AVIAMFParamDefinitionType {
+    AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN,
+    AV_IAMF_PARAMETER_DEFINITION_DEMIXING,
+    AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN,
+};
+
+/**
+ * Parameters as defined in 3.6.1
+ */
+typedef struct AVIAMFParamDefinition {
+    size_t subblocks_offset;
+    size_t subblock_size;
+    unsigned int parameter_id;
+    enum AVIAMFParamDefinitionType param_definition_type;
+    unsigned int parameter_rate;
+    unsigned int param_definition_mode;
+    unsigned int duration;
+    unsigned int constant_subblock_duration;
+    unsigned int num_subblocks;
+} AVIAMFParamDefinition;
+
+AVIAMFParamDefinition *avformat_iamf_param_definition_alloc(enum AVIAMFParamDefinitionType param_definition_type,
+                                                            unsigned int num_subblocks, size_t *size);
+
+/**
+ * Get the subblock at the specified {@code idx}. Must be between 0 and num_subblocks.
+ * The @ref AVIAMFParamDefinition.param_definition_type "param definition type" defines
+ * the struct type of the returned pointer.
+ */
+static av_always_inline void*
+avformat_iamf_param_definition_get_subblock(AVIAMFParamDefinition *par, unsigned int idx)
+{
+    av_assert0(idx < par->num_subblocks);
+    return (void *)((uint8_t *)par + par->subblocks_offset + idx * par->subblock_size);
+}
+
+enum AVIAMFAnimationType {
+    AV_IAMF_ANIMATION_TYPE_STEP,
+    AV_IAMF_ANIMATION_TYPE_LINEAR,
+    AV_IAMF_ANIMATION_TYPE_BEZIER,
+};
+
+/**
+ * Mix Gain Parameter Data as defined in 3.8.1
+ *
+ * Subblocks in AVIAMFParamDefinition use this struct when the value or
+ * @ref AVIAMFParamDefinition.param_definition_type param_definition_type is
+ * AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN.
+ */
+typedef struct AVIAMFMixGainParameterData {
+    unsigned int subblock_duration;
+    enum AVIAMFAnimationType animation_type;
+    int start_point_value;
+    int end_point_value;
+    int control_point_value;
+    unsigned int control_point_relative_time;
+} AVIAMFMixGainParameterData;
+
+/**
+ * Demixing Info Parameter Data as defined in 3.8.2
+ *
+ * Subblocks in AVIAMFParamDefinition use this struct when the value or
+ * @ref AVIAMFParamDefinition.param_definition_type param_definition_type is
+ * AV_IAMF_PARAMETER_DEFINITION_DEMIXING.
+ */
+typedef struct AVIAMFDemixingInfoParameterData {
+    unsigned int subblock_duration;
+    unsigned int dmixp_mode;
+} AVIAMFDemixingInfoParameterData;
+
+/**
+ * Recon Gain Info Parameter Data as defined in 3.8.3
+ *
+ * Subblocks in AVIAMFParamDefinition use this struct when the value or
+ * @ref AVIAMFParamDefinition.param_definition_type param_definition_type is
+ * AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN.
+ */
+typedef struct AVIAMFReconGainParameterData {
+    unsigned int subblock_duration;
+} AVIAMFReconGainParameterData;
+
+typedef struct AVIAMFLayer {
+    AVChannelLayout ch_layout;
+    unsigned int substream_count;
+
+    unsigned int recon_gain_is_present;
+    /**
+     * Output gain flags as defined in 3.6.2
+     *
+     * This field is defined only if audio_element_type is
+     * AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, must be 0 otherwise.
+     */
+    unsigned int output_gain_flags;
+    /**
+     * Output gain as defined in 3.6.2
+     *
+     * Must be 0 if @ref output_gain_flags is 0.
+     */
+    int output_gain;
+    /**
+     * Ambisonics mode as defined in 3.6.3
+     *
+     * This field is defined only if audio_element_type is
+     * AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, must be 0 otherwise.
+     *
+     * If 0, channel_mapping is defined implicitly (Ambisonic Order)
+     * or explicitly (Custom Order with ambi channels) in @ref ch_layout.
+     * If 1, @ref demixing_matrix must be set.
+     */
+    unsigned int ambisonics_mode;
+    /**
+     * Demixing matrix as defined in 3.6.3
+     *
+     * Set only if @ref ambisonics_mode == 1, must be NULL otherwise.
+     */
+    int16_t *demixing_matrix;
+} AVIAMFLayer;
+
+typedef struct AVIAMFAudioElement {
+    AVIAMFLayer **layers;
+    unsigned int num_layers;
+
+    unsigned int codec_config_id;
+
+    AVIAMFParamDefinition *demixing_info;
+    AVIAMFParamDefinition *recon_gain_info;
+
+    /**
+     * Audio element type as defined in 3.6
+     */
+    enum AVIAMFAudioElementType audio_element_type;
+
+    /**
+     * Default weight value as defined in 3.6
+     */
+    unsigned int default_w;
+} AVIAMFAudioElement;
+
+AVIAMFAudioElement *avformat_iamf_audio_element_alloc(void);
+
+int avformat_iamf_audio_element_add_layer(AVIAMFAudioElement *audio_element);
+
+void avformat_iamf_audio_element_free(AVIAMFAudioElement **audio_element);
+
+typedef struct AVIAMFSubmixElement {
+    const struct AVStreamGroup *audio_element;
+
+    char **mix_presentation_element_annotations;
+
+    unsigned int headphones_rendering_mode;
+    AVIAMFParamDefinition *element_mix_config;
+    int default_mix_gain;
+} AVIAMFSubmixElement;
+
+typedef struct AVIAMFSubmixLayout {
+    unsigned int layout_type;
+    AVChannelLayout sound_system;
+
+    int integrated_loudness;
+    int digital_peak;
+} AVIAMFSubmixLayout;
+
+typedef struct AVIAMFSubmixPresentation {
+    AVIAMFSubmixElement **submix_elements;
+    unsigned int num_submix_elements;
+
+    AVIAMFSubmixLayout **submix_layouts;
+    unsigned int num_submix_layouts;
+
+    AVIAMFParamDefinition *output_mix_config;
+    int default_mix_gain;
+} AVIAMFSubmixPresentation;
+
+typedef struct AVIAMFMixPresentation {
+    AVIAMFSubmixPresentation **sub_mixes;
+    unsigned int num_sub_mixes;
+
+    unsigned int count_label;
+    char **language_label;
+    char **mix_presentation_annotations;
+} AVIAMFMixPresentation;
+
+AVIAMFMixPresentation *avformat_iamf_mix_presentation_alloc(void);
+
+int avformat_iamf_mix_presentation_add_submix(AVIAMFMixPresentation *mix_presentation,
+                                              unsigned int num_submix_elements,
+                                              unsigned int num_submix_layouts);
+
+void avformat_iamf_mix_presentation_free(AVIAMFMixPresentation **mix_presentation);
+
+#endif /* AVFORMAT_IAMF_H */
diff --git a/libavformat/iamf_internal.h b/libavformat/iamf_internal.h
new file mode 100644
index 0000000000..e4cfbcae33
--- /dev/null
+++ b/libavformat/iamf_internal.h
@@ -0,0 +1,86 @@ 
+/*
+ * Immersive Audio Model and Formats helper functions and defines
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFORMAT_IAMF_INTERNAL_H
+#define AVFORMAT_IAMF_INTERNAL_H
+
+#include <stdint.h>
+
+#include "libavutil/channel_layout.h"
+
+#define MAX_IAMF_OBU_HEADER_SIZE (1 + 8 * 3)
+
+// OBU types (section 3.2).
+enum IAMF_OBU_Type {
+    // 0 reserved.
+    IAMF_OBU_IA_CODEC_CONFIG        = 0,
+    IAMF_OBU_IA_AUDIO_ELEMENT       = 1,
+    IAMF_OBU_IA_MIX_PRESENTATION    = 2,
+    IAMF_OBU_IA_PARAMETER_BLOCK     = 3,
+    IAMF_OBU_IA_TEMPORAL_DELIMITER  = 4,
+    IAMF_OBU_IA_AUDIO_FRAME         = 5,
+    IAMF_OBU_IA_AUDIO_FRAME_ID0     = 6,
+    IAMF_OBU_IA_AUDIO_FRAME_ID1     = 7,
+    IAMF_OBU_IA_AUDIO_FRAME_ID2     = 8,
+    IAMF_OBU_IA_AUDIO_FRAME_ID3     = 9,
+    IAMF_OBU_IA_AUDIO_FRAME_ID4     = 10,
+    IAMF_OBU_IA_AUDIO_FRAME_ID5     = 11,
+    IAMF_OBU_IA_AUDIO_FRAME_ID6     = 12,
+    IAMF_OBU_IA_AUDIO_FRAME_ID7     = 13,
+    IAMF_OBU_IA_AUDIO_FRAME_ID8     = 14,
+    IAMF_OBU_IA_AUDIO_FRAME_ID9     = 15,
+    IAMF_OBU_IA_AUDIO_FRAME_ID10    = 16,
+    IAMF_OBU_IA_AUDIO_FRAME_ID11    = 17,
+    IAMF_OBU_IA_AUDIO_FRAME_ID12    = 18,
+    IAMF_OBU_IA_AUDIO_FRAME_ID13    = 19,
+    IAMF_OBU_IA_AUDIO_FRAME_ID14    = 20,
+    IAMF_OBU_IA_AUDIO_FRAME_ID15    = 21,
+    IAMF_OBU_IA_AUDIO_FRAME_ID16    = 22,
+    IAMF_OBU_IA_AUDIO_FRAME_ID17    = 23,
+    // 24~30 reserved.
+    IAMF_OBU_IA_SEQUENCE_HEADER     = 31,
+};
+
+enum IAMF_Sound_System {
+    SOUND_SYSTEM_A_0_2_0  = 0,  // "Loudspeaker configuration for Sound System A"
+    SOUND_SYSTEM_B_0_5_0  = 1,  // "Loudspeaker configuration for Sound System B"
+    SOUND_SYSTEM_C_2_5_0  = 2,  // "Loudspeaker configuration for Sound System C"
+    SOUND_SYSTEM_D_4_5_0  = 3,  // "Loudspeaker configuration for Sound System D"
+    SOUND_SYSTEM_E_4_5_1  = 4,  // "Loudspeaker configuration for Sound System E"
+    SOUND_SYSTEM_F_3_7_0  = 5,  // "Loudspeaker configuration for Sound System F"
+    SOUND_SYSTEM_G_4_9_0  = 6,  // "Loudspeaker configuration for Sound System G"
+    SOUND_SYSTEM_H_9_10_3 = 7,  // "Loudspeaker configuration for Sound System H"
+    SOUND_SYSTEM_I_0_7_0  = 8,  // "Loudspeaker configuration for Sound System I"
+    SOUND_SYSTEM_J_4_7_0  = 9, // "Loudspeaker configuration for Sound System J"
+    SOUND_SYSTEM_10_2_7_0 = 10, // "Loudspeaker configuration for Sound System I" + Ltf + Rtf
+    SOUND_SYSTEM_11_2_3_0 = 11, // Front subset of "Loudspeaker configuration for Sound System J"
+    SOUND_SYSTEM_12_0_1_0 = 12, // Mono
+};
+
+extern const AVChannelLayout ff_iamf_scalable_ch_layouts[10];
+
+struct IAMFSoundSystemMap {
+    enum IAMF_Sound_System id;
+    AVChannelLayout layout;
+};
+
+extern const struct IAMFSoundSystemMap ff_iamf_sound_system_map[13];
+
+#endif /* AVFORMAT_IAMF_INTERNAL_H */
diff --git a/libavformat/iamfdec.c b/libavformat/iamfdec.c
new file mode 100644
index 0000000000..799addbc8c
--- /dev/null
+++ b/libavformat/iamfdec.c
@@ -0,0 +1,1646 @@ 
+/*
+ * Immersive Audio Model and Formats demuxer
+ * Copyright (c) 2023 James Almer <jamrial@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config_components.h"
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
+#include "libavcodec/get_bits.h"
+#include "libavcodec/flac.h"
+#include "libavcodec/mpeg4audio.h"
+#include "libavcodec/put_bits.h"
+#include "avformat.h"
+#include "avio_internal.h"
+#include "demux.h"
+#include "iamf.h"
+#include "iamf_internal.h"
+#include "internal.h"
+#include "isom.h"
+
+typedef struct IAMFCodecConfig {
+    unsigned codec_config_id;
+    enum AVCodecID codec_id;
+    unsigned nb_samples;
+    int seek_preroll;
+    uint8_t *extradata;
+    int extradata_size;
+    int sample_rate;
+} IAMFCodecConfig;
+
+typedef struct IAMFAudioElement {
+    AVStreamGroup *stream_group;
+
+    AVStream **audio_substreams;
+    int num_substreams;
+} IAMFAudioElement;
+
+typedef struct IAMFMixPresentation {
+    AVStreamGroup *stream_group;
+} IAMFMixPresentation;
+
+typedef struct IAMFParamDefinition {
+    const AVIAMFAudioElement *audio_element;
+    AVIAMFParamDefinition *param;
+} IAMFParamDefinition;
+
+typedef struct IAMFDemuxContext {
+    IAMFCodecConfig *codec_configs;
+    int nb_codec_configs;
+    IAMFAudioElement *audio_elements;
+    int nb_audio_elements;
+    IAMFMixPresentation *mix_presentations;
+    int nb_mix_presentations;
+    IAMFParamDefinition *param_definitions;
+    int nb_param_definitions;
+
+    // Packet side data
+    AVIAMFParamDefinition *mix;
+    size_t mix_size;
+    AVIAMFParamDefinition *demix;
+    size_t demix_size;
+    AVIAMFParamDefinition *recon;
+    size_t recon_size;
+} IAMFDemuxContext;
+
+static inline unsigned get_leb128(GetBitContext *gb) {
+    int more, i = 0;
+    unsigned len = 0;
+
+    do {
+        unsigned bits;
+        int byte = get_bits(gb, 8);
+        more = byte & 0x80;
+        bits = byte & 0x7f;
+        if (i <= 3 || (i == 4 && bits < (1 << 4)))
+            len |= bits << (i * 7);
+        else if (bits)
+            return AVERROR_INVALIDDATA;
+        if (++i == 8 && more)
+            return AVERROR_INVALIDDATA;
+    } while (more);
+
+    return len;
+}
+
+static int parse_obu_header(const uint8_t *buf, int buf_size,
+                            unsigned *obu_size, int *start_pos, enum IAMF_OBU_Type *type)
+{
+    GetBitContext gb;
+    int ret, extension_flag, trimming, start;
+    unsigned size;
+
+    ret = init_get_bits8(&gb, buf, FFMIN(buf_size, MAX_IAMF_OBU_HEADER_SIZE));
+    if (ret < 0)
+        return ret;
+
+    *type          = get_bits(&gb, 5);
+    av_log(NULL, AV_LOG_DEBUG, "OBU type %d\n", *type);
+    /*redundant      =*/ get_bits1(&gb);
+    trimming       = get_bits1(&gb);
+    extension_flag = get_bits1(&gb);
+
+    *obu_size = get_leb128(&gb);
+    if (*obu_size > INT_MAX)
+        return AVERROR_INVALIDDATA;
+
+    start = get_bits_count(&gb) / 8;
+
+    if (trimming) {
+        get_leb128(&gb); // num_samples_to_trim_at_end
+        get_leb128(&gb); // num_samples_to_trim_at_start
+    }
+
+    if (extension_flag) {
+        unsigned extension_bytes = get_leb128(&gb);
+        if (extension_bytes > INT_MAX / 8)
+            return AVERROR_INVALIDDATA;
+        skip_bits_long(&gb, extension_bytes * 8);
+    }
+
+    if (get_bits_left(&gb) < 0)
+        return AVERROR_INVALIDDATA;
+
+    size = *obu_size + start;
+    if (size > INT_MAX)
+        return AVERROR_INVALIDDATA;
+
+    *obu_size -= get_bits_count(&gb) / 8 - start;
+    *start_pos = size - *obu_size;
+
+    av_log(NULL, AV_LOG_DEBUG, "OBU size %u\n", *obu_size);
+    return size;
+}
+
+//return < 0 if we need more data
+static int get_score(const uint8_t *buf, int buf_size, enum IAMF_OBU_Type type, int *seq)
+{
+    if (type == IAMF_OBU_IA_SEQUENCE_HEADER) {
+        if (buf_size < 4 || AV_RB32(buf) != MKBETAG('i','a','m','f'))
+            return 0;
+        *seq = 1;
+        return -1;
+    }
+    if (type >= IAMF_OBU_IA_CODEC_CONFIG && type <= IAMF_OBU_IA_TEMPORAL_DELIMITER)
+        return *seq ? -1 : 0;
+    if (type >= IAMF_OBU_IA_AUDIO_FRAME && type <= IAMF_OBU_IA_AUDIO_FRAME_ID17)
+        return *seq ? AVPROBE_SCORE_EXTENSION + 1 : 0;
+    return 0;
+}
+
+static int iamf_probe(const AVProbeData *p)
+{
+    unsigned obu_size;
+    enum IAMF_OBU_Type type;
+    int seq = 0, cnt = 0, start_pos;
+    int ret;
+
+    while (1) {
+        int size = parse_obu_header(p->buf + cnt, p->buf_size - cnt,
+                                    &obu_size, &start_pos, &type);
+        if (size < 0)
+            return 0;
+
+        ret = get_score(p->buf + cnt + start_pos,
+                        p->buf_size - cnt - start_pos,
+                        type, &seq);
+        if (ret >= 0)
+            return ret;
+
+        cnt += FFMIN(size, p->buf_size - cnt);
+    }
+    return 0;
+}
+
+static inline int leb(AVIOContext *pb, unsigned *len) {
+    int more, i = 0;
+    *len = 0;
+
+    do {
+        unsigned bits;
+        int byte = avio_r8(pb);
+        if (pb->error)
+            return pb->error;
+        if (pb->eof_reached)
+            return  AVERROR_INVALIDDATA;
+        more = byte & 0x80;
+        bits = byte & 0x7f;
+        if (i <= 3 || (i == 4 && bits < (1 << 4)))
+            *len |= bits << (i * 7);
+        else if (bits)
+            return AVERROR_INVALIDDATA;
+        if (++i == 8 && more)
+            return AVERROR_INVALIDDATA;
+    } while (more);
+
+    return i;
+}
+
+static int opus_decoder_config(AVFormatContext *s, AVIOContext *pb, int len,
+                               IAMFCodecConfig *codec_config)
+{
+    int left = len - avio_tell(pb);
+
+    if (left < 11)
+        return AVERROR_INVALIDDATA;
+
+    codec_config->extradata = av_malloc(left + 8);
+    if (!codec_config->extradata)
+        return AVERROR(ENOMEM);
+
+    AV_WB32(codec_config->extradata, MKBETAG('O','p','u','s'));
+    AV_WB32(codec_config->extradata + 4, MKBETAG('H','e','a','d'));
+    codec_config->extradata_size = avio_read(pb, codec_config->extradata + 8, left);
+    if (codec_config->extradata_size < left)
+        return AVERROR_INVALIDDATA;
+
+    codec_config->extradata_size += 8;
+    codec_config->sample_rate = 48000;
+
+    return 0;
+}
+
+static int aac_decoder_config(AVFormatContext *s, AVIOContext *pb, int len,
+                              IAMFCodecConfig *codec_config)
+{
+    MPEG4AudioConfig cfg = { 0 };
+    int object_type_id, codec_id, stream_type;
+    int ret, tag, left;
+
+    tag = avio_r8(pb);
+    if (tag != MP4DecConfigDescrTag)
+        return AVERROR_INVALIDDATA;
+
+    object_type_id = avio_r8(pb);
+    if (object_type_id != 0x40)
+        return AVERROR_INVALIDDATA;
+
+    stream_type = avio_r8(pb);
+    if (((stream_type >> 2) != 5) || ((stream_type >> 1) & 1))
+        return AVERROR_INVALIDDATA;
+
+    avio_skip(pb, 3); // buffer size db
+    avio_skip(pb, 4); // rc_max_rate
+    avio_skip(pb, 4); // avg bitrate
+
+    codec_id = ff_codec_get_id(ff_mp4_obj_type, object_type_id);
+    if (codec_id && codec_id != codec_config->codec_id)
+        return AVERROR_INVALIDDATA;
+
+    tag = avio_r8(pb);
+    if (tag != MP4DecSpecificDescrTag)
+        return AVERROR_INVALIDDATA;
+
+    left = len - avio_tell(pb);
+    if (left <= 0)
+        return AVERROR_INVALIDDATA;
+
+    codec_config->extradata = av_malloc(left);
+    if (!codec_config->extradata)
+        return AVERROR(ENOMEM);
+
+    codec_config->extradata_size = avio_read(pb, codec_config->extradata, left);
+    if (codec_config->extradata_size < left)
+        return AVERROR_INVALIDDATA;
+
+    ret = avpriv_mpeg4audio_get_config2(&cfg, codec_config->extradata,
+                                        codec_config->extradata_size, 1, s);
+    if (ret < 0)
+        return ret;
+
+    codec_config->sample_rate = cfg.sample_rate;
+
+    return 0;
+}
+
+static int flac_decoder_config(AVFormatContext *s, AVIOContext *pb, int len,
+                               IAMFCodecConfig *codec_config)
+{
+    int left;
+
+    avio_skip(pb, 4); // METADATA_BLOCK_HEADER
+
+    left = len - avio_tell(pb);
+    if (left < FLAC_STREAMINFO_SIZE)
+        return AVERROR_INVALIDDATA;
+
+    codec_config->extradata = av_malloc(left);
+    if (!codec_config->extradata)
+        return AVERROR(ENOMEM);
+
+    codec_config->extradata_size = avio_read(pb, codec_config->extradata, left);
+    if (codec_config->extradata_size < left)
+        return AVERROR_INVALIDDATA;
+
+    codec_config->sample_rate = AV_RB24(codec_config->extradata + 10) >> 4;
+
+    return 0;
+}
+
+static int ipcm_decoder_config(AVFormatContext *s, AVIOContext *pb, int len,
+                               IAMFCodecConfig *codec_config)
+{
+    static const enum AVSampleFormat sample_fmt[2][3] = {
+        { AV_CODEC_ID_PCM_S16BE, AV_CODEC_ID_PCM_S24BE, AV_CODEC_ID_PCM_S32BE },
+        { AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S24LE, AV_CODEC_ID_PCM_S32LE },
+    };
+    int sample_format = avio_r8(pb); // 0 = BE, 1 = LE
+    int sample_size = (avio_r8(pb) / 8 - 2); // 16, 24, 32
+    if (sample_format > 1 || sample_size > 2)
+        return AVERROR_INVALIDDATA;
+
+    codec_config->codec_id = sample_fmt[sample_format][sample_size];
+    codec_config->sample_rate = avio_rb32(pb);
+
+    if (len - avio_tell(pb))
+        return AVERROR_INVALIDDATA;
+
+    return 0;
+}
+
+static int codec_config_obu(AVFormatContext *s, int len)
+{
+    IAMFDemuxContext *const c = s->priv_data;
+    IAMFCodecConfig *codec_config = NULL;
+    FFIOContext b;
+    AVIOContext *pb;
+    uint8_t *buf;
+    enum AVCodecID avcodec_id;
+    unsigned codec_config_id, nb_samples, codec_id;
+    int16_t seek_preroll;
+    int ret;
+
+    buf = av_malloc(len);
+    if (!buf)
+        return AVERROR(ENOMEM);
+
+    ret = avio_read(s->pb, buf, len);
+    if (ret != len) {
+        if (ret >= 0)
+            ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
+    pb = &b.pub;
+
+    ret = leb(pb, &codec_config_id);
+    if (ret < 0)
+        goto fail;
+
+    codec_id = avio_rb32(pb);
+    ret = leb(pb, &nb_samples);
+    if (ret < 0)
+        goto fail;
+
+    seek_preroll = avio_rb16(pb);
+
+    switch(codec_id) {
+    case MKBETAG('O','p','u','s'):
+        avcodec_id = AV_CODEC_ID_OPUS;
+        break;
+    case MKBETAG('m','p','4','a'):
+        avcodec_id = AV_CODEC_ID_AAC;
+        break;
+    case MKBETAG('f','L','a','C'):
+        avcodec_id = AV_CODEC_ID_FLAC;
+        break;
+    default:
+        avcodec_id = AV_CODEC_ID_NONE;
+        break;
+    }
+
+    for (int i = 0; i < c->nb_codec_configs; i++)
+        if (c->codec_configs[i].codec_config_id == codec_config_id) {
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+    codec_config = av_dynarray2_add_nofree((void **)&c->codec_configs, &c->nb_codec_configs,
+                                            sizeof(*c->codec_configs), NULL);
+    if (!codec_config) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    memset(codec_config, 0, sizeof(*codec_config));
+
+    codec_config->codec_config_id = codec_config_id;
+    codec_config->codec_id = avcodec_id;
+    codec_config->nb_samples = nb_samples;
+    codec_config->seek_preroll = seek_preroll;
+
+    switch(codec_id) {
+    case MKBETAG('O','p','u','s'):
+        ret = opus_decoder_config(s, pb, len, codec_config);
+        break;
+    case MKBETAG('m','p','4','a'):
+        ret = aac_decoder_config(s, pb, len, codec_config);
+        break;
+    case MKBETAG('f','L','a','C'):
+        ret = flac_decoder_config(s, pb, len, codec_config);
+        break;
+    case MKBETAG('i','p','c','m'):
+        ret = ipcm_decoder_config(s, pb, len, codec_config);
+        break;
+    default:
+        break;
+    }
+    if (ret < 0)
+        goto fail;
+
+    av_log(s, AV_LOG_DEBUG, "%"PRId64" bytes left at the end of codec_config_obu\n", len - avio_tell(pb));
+
+    ret = 0;
+fail:
+    av_free(buf);
+    return ret;
+}
+
+static int update_extradata(AVFormatContext *s, AVStream *st)
+{
+    GetBitContext gb;
+    PutBitContext pb;
+    int ret;
+
+    switch(st->codecpar->codec_id) {
+    case AV_CODEC_ID_OPUS:
+        AV_WB8(st->codecpar->extradata + 9, st->codecpar->ch_layout.nb_channels);
+        break;
+    case AV_CODEC_ID_AAC: {
+        uint8_t buf[5];
+
+        init_put_bits(&pb, buf, sizeof(buf));
+        ret = init_get_bits8(&gb, st->codecpar->extradata, st->codecpar->extradata_size);
+        if (ret < 0)
+            return ret;
+
+        ret = get_bits(&gb, 5);
+        put_bits(&pb, 5, ret);
+        if (ret == AOT_ESCAPE) // violates section 3.11.2, but better check for it
+            put_bits(&pb, 6, get_bits(&gb, 6));
+        ret = get_bits(&gb, 4);
+        put_bits(&pb, 4, ret);
+        if (ret == 0x0f)
+            put_bits(&pb, 24, get_bits(&gb, 24));
+
+        skip_bits(&gb, 4);
+        put_bits(&pb, 4, st->codecpar->ch_layout.nb_channels); // set channel config
+        ret = put_bits_left(&pb);
+        put_bits(&pb, ret, get_bits(&gb, ret));
+        flush_put_bits(&pb);
+
+        memcpy(st->codecpar->extradata, buf, sizeof(buf));
+        break;
+    }
+    case AV_CODEC_ID_FLAC: {
+        uint8_t buf[13];
+
+        init_put_bits(&pb, buf, sizeof(buf));
+        ret = init_get_bits8(&gb, st->codecpar->extradata, st->codecpar->extradata_size);
+        if (ret < 0)
+            return ret;
+
+        put_bits32(&pb, get_bits_long(&gb, 32)); // min/max blocksize
+        put_bits64(&pb, 48, get_bits64(&gb, 48)); // min/max framesize
+        put_bits(&pb, 20, get_bits(&gb, 20)); // samplerate
+        skip_bits(&gb, 3);
+        put_bits(&pb, 3, st->codecpar->ch_layout.nb_channels - 1);
+        ret = put_bits_left(&pb);
+        put_bits(&pb, ret, get_bits(&gb, ret));
+        flush_put_bits(&pb);
+
+        memcpy(st->codecpar->extradata, buf, sizeof(buf));
+        break;
+    }
+    }
+
+    return 0;
+}
+
+static int scalable_channel_layout_config(AVFormatContext *s, AVIOContext *pb,
+                                          IAMFAudioElement *audio_element,
+                                          const IAMFCodecConfig *codec_config)
+{
+    AVStreamGroup *stg = audio_element->stream_group;
+    int num_layers, k = 0;
+
+    num_layers = avio_r8(pb) >> 5; // get_bits(&gb, 3);
+    // skip_bits(&gb, 5); //reserved
+
+    if (num_layers > 6)
+        return AVERROR_INVALIDDATA;
+
+    for (int i = 0; i < num_layers; i++) {
+        AVIAMFLayer *layer;
+        int loudspeaker_layout, output_gain_is_present_flag;
+        int coupled_substream_count;
+        int ret, byte = avio_r8(pb);
+
+        ret = avformat_iamf_audio_element_add_layer(stg->params.iamf_audio_element);
+        if (ret < 0)
+            return ret;
+
+        loudspeaker_layout = byte >> 4; // get_bits(&gb, 4);
+        output_gain_is_present_flag = (byte >> 3) & 1; //get_bits1(&gb);
+        layer = stg->params.iamf_audio_element->layers[i];
+        layer->recon_gain_is_present = (byte >> 2) & 1;
+        layer->substream_count = avio_r8(pb);
+        coupled_substream_count = avio_r8(pb);
+
+        if (output_gain_is_present_flag) {
+            layer->output_gain_flags = avio_r8(pb) >> 2;  // get_bits(&gb, 6);
+            layer->output_gain = sign_extend(avio_rb16(pb), 16);
+        }
+
+        if (loudspeaker_layout < 10)
+            av_channel_layout_copy(&layer->ch_layout, &ff_iamf_scalable_ch_layouts[loudspeaker_layout]);
+        else
+            layer->ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_UNSPEC,
+                                                          .nb_channels = layer->substream_count +
+                                                                         coupled_substream_count };
+
+        for (int j = 0; j < layer->substream_count; j++) {
+            AVStream *st = audio_element->audio_substreams[k++];
+
+            ret = avformat_stream_group_add_stream(stg, st);
+            if (ret < 0)
+                return ret;
+
+            st->codecpar->ch_layout = coupled_substream_count-- > 0 ? (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO :
+                                                                      (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
+
+            ret = update_extradata(s, st);
+            if (ret < 0)
+                return ret;
+
+            avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
+        }
+
+    }
+
+    return 0;
+}
+
+static int ambisonics_config(AVFormatContext *s, AVIOContext *pb,
+                             IAMFAudioElement *audio_element,
+                             const IAMFCodecConfig *codec_config)
+{
+    AVStreamGroup *stg = audio_element->stream_group;
+    AVIAMFLayer *layer;
+    unsigned ambisonics_mode;
+    int output_channel_count, substream_count, order;
+    int ret;
+
+    ret = leb(pb, &ambisonics_mode);
+    if (ret < 0)
+        return ret;
+
+    if (ambisonics_mode > 1)
+        return 0;
+
+    output_channel_count = avio_r8(pb);  // C
+    substream_count = avio_r8(pb);  // N
+    if (audio_element->num_substreams != substream_count)
+        return AVERROR_INVALIDDATA;
+
+    order = floor(sqrt(output_channel_count - 1));
+    /* incomplete order - some harmonics are missing */
+    if ((order + 1) * (order + 1) != output_channel_count)
+        return AVERROR_INVALIDDATA;
+
+    ret = avformat_iamf_audio_element_add_layer(stg->params.iamf_audio_element);
+    if (ret < 0)
+        return ret;
+
+    layer = stg->params.iamf_audio_element->layers[0];
+    layer->ambisonics_mode = ambisonics_mode;
+    layer->substream_count = substream_count;
+    if (ambisonics_mode == 0) {
+        for (int i = 0; i < substream_count; i++) {
+            AVStream *st = audio_element->audio_substreams[i];
+
+            st->codecpar->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
+
+            ret = avformat_stream_group_add_stream(stg, st);
+            if (ret < 0)
+                return ret;
+
+            ret = update_extradata(s, st);
+            if (ret < 0)
+                return ret;
+
+            avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
+        }
+
+        layer->ch_layout.order = AV_CHANNEL_ORDER_CUSTOM;
+        layer->ch_layout.nb_channels = output_channel_count;
+        layer->ch_layout.u.map = av_calloc(output_channel_count, sizeof(*layer->ch_layout.u.map));
+        if (!layer->ch_layout.u.map)
+            return AVERROR(ENOMEM);
+
+        for (int i = 0; i < output_channel_count; i++)
+            layer->ch_layout.u.map[i].id = avio_r8(pb) + AV_CHAN_AMBISONIC_BASE;
+    } else {
+        int coupled_substream_count = avio_r8(pb);  // M
+        int nb_demixing_matrix = substream_count + coupled_substream_count;
+        int demixing_matrix_size = nb_demixing_matrix * output_channel_count;
+
+        layer->ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_AMBISONIC, .nb_channels = output_channel_count };
+        layer->demixing_matrix = av_malloc_array(demixing_matrix_size, sizeof(*layer->demixing_matrix));
+        if (!layer->demixing_matrix)
+            return AVERROR(ENOMEM);
+
+        for (int i = 0; i < demixing_matrix_size; i++)
+            layer->demixing_matrix[i] = sign_extend(avio_rb16(pb), 16);
+
+        for (int i = 0; i < substream_count; i++) {
+            AVStream *st = audio_element->audio_substreams[i];
+
+            st->codecpar->ch_layout = coupled_substream_count-- > 0 ? (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO :
+                                                                      (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
+
+            ret = avformat_stream_group_add_stream(stg, st);
+            if (ret < 0)
+                return ret;
+
+            ret = update_extradata(s, st);
+            if (ret < 0)
+                return ret;
+
+            avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
+        }
+    }
+
+    return 0;
+}
+
+static int param_parse(AVFormatContext *s, AVIOContext *pb,
+                       unsigned int param_definition_type,
+                       const AVIAMFAudioElement *audio_element,
+                       AVIAMFParamDefinition **out_param_definition)
+{
+    IAMFDemuxContext *const c = s->priv_data;
+    IAMFParamDefinition *param_definition;
+    const IAMFParamDefinition *old_param = NULL;
+    unsigned int parameter_id, parameter_rate, param_definition_mode;
+    unsigned int duration, constant_subblock_duration, num_subblocks = 0;
+    int nb_param_definitions = c->nb_param_definitions, ret;
+
+    ret = leb(pb, &parameter_id);
+    if (ret < 0)
+        return ret;
+
+    for (int i = 0; i < c->nb_param_definitions; i++)
+        if (c->param_definitions[i].param->parameter_id == parameter_id) {
+            old_param = param_definition = &c->param_definitions[i];
+            break;
+        }
+
+    if (!old_param) {
+        param_definition = av_dynarray2_add_nofree((void **)&c->param_definitions, &nb_param_definitions,
+                                                   sizeof(*c->param_definitions), NULL);
+        if (!param_definition)
+            return AVERROR(ENOMEM);
+
+        memset(param_definition, 0, sizeof(*param_definition));
+    }
+
+    ret = leb(pb, &parameter_rate);
+    if (ret < 0)
+        return ret;
+
+    param_definition_mode = avio_r8(pb) >> 7;
+
+    if (old_param && (param_definition_mode != old_param->param->param_definition_mode ||
+                      param_definition_type != old_param->param->param_definition_type)) {
+        av_log(s, AV_LOG_ERROR, "Inconsistent param_definition_mode or param_definition_type values "
+                                "for parameter_id %d\n", parameter_id);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (param_definition_mode == 0) {
+        ret = leb(pb, &duration);
+        if (ret < 0)
+            return ret;
+
+        ret = leb(pb, &constant_subblock_duration);
+        if (ret < 0)
+            return ret;
+
+        if (constant_subblock_duration == 0) {
+            ret = leb(pb, &num_subblocks);
+            if (ret < 0)
+                return ret;
+        } else
+            num_subblocks = duration / constant_subblock_duration;
+    }
+
+    if (old_param) {
+        if (num_subblocks != old_param->param->num_subblocks) {
+            av_log(s, AV_LOG_ERROR, "Inconsistent num_subblocks values for parameter_id %d\n", parameter_id);
+            return AVERROR_INVALIDDATA;
+        }
+    } else {
+        param_definition->param = avformat_iamf_param_definition_alloc(param_definition_type, num_subblocks, NULL);
+        if (!param_definition->param)
+            return AVERROR(ENOMEM);
+        param_definition->audio_element = audio_element;
+    }
+
+    for (int i = 0; i < num_subblocks; i++) {
+        void *subblock = avformat_iamf_param_definition_get_subblock(param_definition->param, i);
+        unsigned int subblock_duration = constant_subblock_duration;
+
+        if (constant_subblock_duration == 0) {
+            ret = leb(pb, &subblock_duration);
+            if (ret < 0) {
+                if (!old_param)
+                    av_freep(&param_definition->param);
+                return ret;
+            }
+        }
+
+        switch (param_definition_type) {
+        case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
+            AVIAMFMixGainParameterData *mix = subblock;
+            mix->subblock_duration = subblock_duration;
+            break;
+        }
+        case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
+            AVIAMFDemixingInfoParameterData *demix = subblock;
+            demix->subblock_duration = subblock_duration;
+            // DemixingInfoParameterData
+            demix->dmixp_mode = avio_r8(pb) >> 5;
+            break;
+        }
+        case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
+            AVIAMFReconGainParameterData *recon = subblock;
+            recon->subblock_duration = subblock_duration;
+            break;
+        }
+        default:
+            if (!old_param)
+                av_freep(&param_definition->param);
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    param_definition->param->parameter_id = parameter_id;
+    param_definition->param->parameter_rate = parameter_rate;
+    param_definition->param->param_definition_mode = param_definition_mode;
+    param_definition->param->duration = duration;
+    param_definition->param->constant_subblock_duration = constant_subblock_duration;
+    param_definition->param->num_subblocks = num_subblocks;
+
+    av_assert0(out_param_definition);
+    *out_param_definition = param_definition->param;
+
+    if (!old_param)
+        c->nb_param_definitions = nb_param_definitions;
+
+    return 0;
+}
+
+static int audio_element_obu(AVFormatContext *s, int len)
+{
+    IAMFDemuxContext *const c = s->priv_data;
+    const IAMFCodecConfig *codec_config = NULL;
+    AVIAMFAudioElement *avaudio_element;
+    IAMFAudioElement *audio_element;
+    FFIOContext b;
+    AVIOContext *pb;
+    uint8_t *buf;
+    unsigned audio_element_id, codec_config_id, num_substreams, num_parameters;
+    int audio_element_type, ret;
+
+    buf = av_malloc(len);
+    if (!buf)
+        return AVERROR(ENOMEM);
+
+    ret = avio_read(s->pb, buf, len);
+    if (ret != len) {
+        if (ret >= 0)
+            ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
+    pb = &b.pub;
+
+    ret = leb(pb, &audio_element_id);
+    if (ret < 0)
+        goto fail;
+
+    for (int i = 0; i < c->nb_audio_elements; i++)
+        if (c->audio_elements[i].stream_group->id == audio_element_id) {
+            av_log(s, AV_LOG_ERROR, "Duplicate audio_element_id %d\n", audio_element_id);
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+    audio_element_type = avio_r8(pb) >> 5;
+
+    ret = leb(pb, &codec_config_id);
+    if (ret < 0)
+        goto fail;
+
+    for (int i = 0; i < c->nb_codec_configs; i++) {
+        if (c->codec_configs[i].codec_config_id == codec_config_id) {
+            codec_config = &c->codec_configs[i];
+            break;
+        }
+    }
+
+    if (!codec_config) {
+        av_log(s, AV_LOG_ERROR, "Non existant codec config id %d referenced in an audio element\n", codec_config_id);
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    if (codec_config->codec_id == AV_CODEC_ID_NONE) {
+        av_log(s, AV_LOG_DEBUG, "Unknown codec id referenced in an audio element. Ignoring\n");
+        ret = 0;
+        goto fail;
+    }
+
+    ret = leb(pb, &num_substreams);
+    if (ret < 0)
+        goto fail;
+
+    audio_element = av_dynarray2_add_nofree((void **)&c->audio_elements, &c->nb_audio_elements,
+                                            sizeof(*c->audio_elements), NULL);
+    if (!audio_element) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    memset(audio_element, 0, sizeof(*audio_element));
+
+    audio_element->audio_substreams = av_calloc(num_substreams, sizeof(*audio_element->audio_substreams));
+    if (!audio_element->audio_substreams) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    audio_element->stream_group = avformat_stream_group_create(s, AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT);
+    if (!audio_element->stream_group)
+        return AVERROR(ENOMEM);
+    audio_element->stream_group->id = audio_element_id;
+    avaudio_element = audio_element->stream_group->params.iamf_audio_element;
+    avaudio_element->codec_config_id = codec_config_id;
+    avaudio_element->audio_element_type = audio_element_type;
+
+    audio_element->num_substreams = num_substreams;
+
+    for (int i = 0; i < num_substreams; i++) {
+        AVStream *st = audio_element->audio_substreams[i] = avformat_new_stream(s, NULL);
+        unsigned audio_substream_id;
+
+        if (!st) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        ret = leb(pb, &audio_substream_id);
+        if (ret < 0)
+            goto fail;
+
+        st->id = audio_substream_id;
+        st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
+        st->codecpar->codec_id   = codec_config->codec_id;
+        st->codecpar->frame_size = codec_config->nb_samples;
+        st->codecpar->sample_rate = codec_config->sample_rate;
+        st->codecpar->seek_preroll = codec_config->seek_preroll;
+        ffstream(st)->need_parsing = AVSTREAM_PARSE_HEADERS;
+
+        switch(st->codecpar->codec_id) {
+        case AV_CODEC_ID_AAC:
+        case AV_CODEC_ID_FLAC:
+        case AV_CODEC_ID_OPUS:
+            st->codecpar->extradata = av_malloc(codec_config->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
+            if (!st->codecpar->extradata) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+            memcpy(st->codecpar->extradata, codec_config->extradata, codec_config->extradata_size);
+            memset(st->codecpar->extradata + codec_config->extradata_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+            st->codecpar->extradata_size = codec_config->extradata_size;
+            break;
+        }
+    }
+
+    ret = leb(pb, &num_parameters);
+    if (ret < 0)
+        goto fail;
+
+    for (int i = 0; i < num_parameters; i++) {
+        unsigned param_definition_type;
+
+        ret = leb(pb, &param_definition_type);
+        if (ret < 0)
+            goto fail;
+
+        if (param_definition_type == 0) {
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        } else if (param_definition_type == 1) {
+            ret = param_parse(s, pb, param_definition_type, avaudio_element, &avaudio_element->demixing_info);
+            if (ret < 0)
+                goto fail;
+
+            avaudio_element->default_w = avio_r8(pb) >> 4;
+        } else if (param_definition_type == 2) {
+            ret = param_parse(s, pb, param_definition_type, avaudio_element, &avaudio_element->recon_gain_info);
+            if (ret < 0)
+                goto fail;
+        } else {
+            unsigned param_definition_size;
+            ret = leb(pb, &param_definition_size);
+            if (ret < 0)
+                goto fail;
+
+            avio_skip(pb, param_definition_size);
+        }
+    }
+
+    if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL) {
+        ret = scalable_channel_layout_config(s, pb, audio_element, codec_config);
+        if (ret < 0)
+            goto fail;
+    } else if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE) {
+        ret = ambisonics_config(s, pb, audio_element, codec_config);
+        if (ret < 0)
+            goto fail;
+    } else {
+        unsigned audio_element_config_size;
+        ret = leb(pb, &audio_element_config_size);
+        if (ret < 0)
+            goto fail;
+    }
+
+    av_log(s, AV_LOG_DEBUG, "%"PRId64" bytes left at the end of audio_element_obu\n", len - avio_tell(pb));
+
+    ret = 0;
+fail:
+    av_free(buf);
+
+    return ret;
+}
+
+static int label_string(AVFormatContext *s, AVIOContext *pb, char **label)
+{
+    uint8_t buf[128];
+
+    avio_get_str(pb, sizeof(buf), buf, sizeof(buf));
+
+    if (pb->error)
+        return pb->error;
+    if (pb->eof_reached)
+        return AVERROR_INVALIDDATA;
+    *label = av_strdup(buf);
+    if (!*label)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static int mix_presentation_obu(AVFormatContext *s, int len)
+{
+    IAMFDemuxContext *const c = s->priv_data;
+    AVIAMFMixPresentation *mix_presentation;
+    IAMFMixPresentation *mixi;
+    FFIOContext b;
+    AVIOContext *pb;
+    uint8_t *buf;
+    unsigned mix_presentation_id;
+    int ret;
+
+    buf = av_malloc(len);
+    if (!buf)
+        return AVERROR(ENOMEM);
+
+    ret = avio_read(s->pb, buf, len);
+    if (ret != len) {
+        if (ret >= 0)
+            ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
+    pb = &b.pub;
+
+    ret = leb(pb, &mix_presentation_id);
+    if (ret < 0)
+        goto fail;
+
+    for (int i = 0; i < c->nb_mix_presentations; i++)
+        if (c->mix_presentations[i].stream_group->id == mix_presentation_id) {
+            av_log(s, AV_LOG_ERROR, "Duplicate mix_presentation_id %d\n", mix_presentation_id);
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+    mixi = av_dynarray2_add_nofree((void **)&c->mix_presentations, &c->nb_mix_presentations,
+                                   sizeof(*c->mix_presentations), NULL);
+    if (!mixi) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    memset(mixi, 0, sizeof(*mixi));
+    mixi->stream_group = avformat_stream_group_create(s, AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION);
+    if (!mixi->stream_group) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    mixi->stream_group->id = mix_presentation_id;
+    mix_presentation = mixi->stream_group->params.iamf_mix_presentation;
+
+    ret = leb(pb, &mix_presentation->count_label);
+    if (ret < 0)
+        goto fail;
+
+    mix_presentation->language_label = av_calloc(mix_presentation->count_label, sizeof(*mix_presentation->language_label));
+    if (!mix_presentation->language_label) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (int i = 0; i < mix_presentation->count_label; i++) {
+        ret = label_string(s, pb, &mix_presentation->language_label[i]);
+        if (ret < 0)
+            goto fail;
+    }
+
+    mix_presentation->mix_presentation_annotations = av_calloc(mix_presentation->count_label, sizeof(*mix_presentation->mix_presentation_annotations));
+    if (!mix_presentation->mix_presentation_annotations) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (int i = 0; i < mix_presentation->count_label; i++) {
+        ret = label_string(s, pb, &mix_presentation->mix_presentation_annotations[i]);
+        if (ret < 0)
+            goto fail;
+    }
+
+    ret = leb(pb, &mix_presentation->num_sub_mixes);
+    if (ret < 0)
+        goto fail;
+
+    mix_presentation->sub_mixes = av_calloc(mix_presentation->num_sub_mixes, sizeof(*mix_presentation->sub_mixes));
+    if (!mix_presentation->sub_mixes) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (int i = 0; i < mix_presentation->num_sub_mixes; i++) {
+        AVIAMFSubmixPresentation *sub_mix;
+
+        sub_mix = mix_presentation->sub_mixes[i] = av_mallocz(sizeof(*sub_mix));
+        if (!sub_mix) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        ret = leb(pb, &sub_mix->num_submix_elements);
+        if (ret < 0)
+            goto fail;
+
+        sub_mix->submix_elements = av_calloc(sub_mix->num_submix_elements, sizeof(*sub_mix->submix_elements));
+        if (!sub_mix->submix_elements) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        for (int j = 0; j < sub_mix->num_submix_elements; j++) {
+            AVIAMFSubmixElement *submix_element;
+            IAMFAudioElement *audio_element = NULL;
+            unsigned int audio_element_id, rendering_config_extension_size;
+
+            submix_element = sub_mix->submix_elements[j] = av_mallocz(sizeof(*submix_element));
+            if (!submix_element) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+
+            ret = leb(pb, &audio_element_id);
+            if (ret < 0)
+                goto fail;
+
+            for (int k = 0; k < c->nb_audio_elements; k++)
+                if (c->audio_elements[k].stream_group->id == audio_element_id) {
+                    audio_element = &c->audio_elements[k];
+                    submix_element->audio_element = audio_element->stream_group;
+                }
+
+            if (!audio_element) {
+                av_log(s, AV_LOG_ERROR, "Invalid Audio Element with id %u referenced by Mix Parameters %u\n", audio_element_id, mix_presentation_id);
+                ret = AVERROR_INVALIDDATA;
+                goto fail;
+            }
+
+            for (int k = 0; k < audio_element->num_substreams; k++) {
+                ret = avformat_stream_group_add_stream(mixi->stream_group, audio_element->audio_substreams[k]);
+                if (ret < 0 && ret != AVERROR(EEXIST))
+                    goto fail;
+            }
+
+            submix_element->mix_presentation_element_annotations = av_calloc(mix_presentation->count_label, sizeof(*submix_element->mix_presentation_element_annotations));
+            if (!submix_element->mix_presentation_element_annotations) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+
+            for (int k = 0; k < mix_presentation->count_label; k++) {
+                ret = label_string(s, pb, &submix_element->mix_presentation_element_annotations[k]);
+                if (ret < 0)
+                    goto fail;
+            }
+
+            submix_element->headphones_rendering_mode = avio_r8(pb) >> 6;
+
+            ret = leb(pb, &rendering_config_extension_size);
+            if (ret < 0)
+                goto fail;
+            avio_skip(pb, rendering_config_extension_size);
+
+            ret = param_parse(s, pb, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, NULL, &submix_element->element_mix_config);
+            if (ret < 0)
+                goto fail;
+            submix_element->default_mix_gain = sign_extend(avio_rb16(pb), 16);
+        }
+        ret = param_parse(s, pb, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, NULL, &sub_mix->output_mix_config);
+        if (ret < 0)
+            goto fail;
+        sub_mix->default_mix_gain = sign_extend(avio_rb16(pb), 16);
+
+        ret = leb(pb, &sub_mix->num_submix_layouts);
+        if (ret < 0)
+            goto fail;
+
+        sub_mix->submix_layouts = av_calloc(sub_mix->num_submix_layouts, sizeof(*sub_mix->submix_layouts));
+        if (!sub_mix->submix_layouts) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        for (int j = 0; j < sub_mix->num_submix_layouts; j++) {
+            AVIAMFSubmixLayout *submix_layout;
+            int info_type;
+            int byte = avio_r8(pb);
+
+            submix_layout = sub_mix->submix_layouts[j] = av_mallocz(sizeof(*submix_layout));
+            if (!submix_layout) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+
+            submix_layout->layout_type = byte >> 6;
+            if (submix_layout->layout_type < 2 && submix_layout->layout_type > 3) {
+                av_log(s, AV_LOG_ERROR, "Invalid Layout type %u in a submix from Mix Presentation %u\n", submix_layout->layout_type, mix_presentation_id);
+                ret = AVERROR_INVALIDDATA;
+                goto fail;
+            }
+            if (submix_layout->layout_type == 2) {
+                int sound_system;
+                sound_system = (byte >> 2) & 0xF;
+                av_channel_layout_copy(&submix_layout->sound_system, &ff_iamf_sound_system_map[sound_system].layout);
+            }
+
+            info_type = avio_r8(pb);
+            submix_layout->integrated_loudness = sign_extend(avio_rb16(pb), 16);
+            submix_layout->digital_peak = sign_extend(avio_rb16(pb), 16);
+
+            if (info_type & 1)
+                sign_extend(avio_rb16(pb), 16); // true_peak
+
+            if (info_type & 2) {
+                unsigned int num_anchored_loudness = avio_r8(pb);
+
+                for (int k = 0; k < num_anchored_loudness; k++) {
+                    avio_r8(pb); // anchor_element
+                    sign_extend(avio_rb16(pb), 16); // anchored_loudness
+                }
+            }
+
+            if (info_type & 0xFC) {
+                unsigned int info_type_size;
+                ret = leb(pb, &info_type_size);
+                if (ret < 0)
+                    goto fail;
+
+                avio_skip(pb, info_type_size);
+            }
+        }
+    }
+
+    av_log(s, AV_LOG_DEBUG, "%"PRId64" bytes left at the end of mixing_parameters_obu\n", len - avio_tell(pb));
+
+    ret = 0;
+fail:
+    av_free(buf);
+
+    return ret;
+}
+
+static int iamf_read_header(AVFormatContext *s)
+{
+    IAMFDemuxContext *const c = s->priv_data;
+    uint8_t header[MAX_IAMF_OBU_HEADER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
+    int ret;
+
+    av_log(s, AV_LOG_DEBUG, "HEADER\n");
+    while (1) {
+        unsigned obu_size;
+        enum IAMF_OBU_Type type;
+        int start_pos, len, size;
+
+        if ((ret = ffio_ensure_seekback(s->pb, MAX_IAMF_OBU_HEADER_SIZE)) < 0)
+            return ret;
+        size = avio_read(s->pb, header, MAX_IAMF_OBU_HEADER_SIZE);
+        if (size < 0)
+            return size;
+
+        len = parse_obu_header(header, size, &obu_size, &start_pos, &type);
+        if (len < 0) {
+            av_log(s, AV_LOG_ERROR, "Failed to read obu\n");
+            return len;
+        }
+
+        if (type >= IAMF_OBU_IA_PARAMETER_BLOCK && type < IAMF_OBU_IA_SEQUENCE_HEADER) {
+            avio_seek(s->pb, -size, SEEK_CUR);
+            break;
+        }
+
+        avio_seek(s->pb, -(size - start_pos), SEEK_CUR);
+        switch (type) {
+        case IAMF_OBU_IA_CODEC_CONFIG:
+            ret = codec_config_obu(s, obu_size);
+            break;
+        case IAMF_OBU_IA_AUDIO_ELEMENT:
+            ret = audio_element_obu(s, obu_size);
+            break;
+        case IAMF_OBU_IA_MIX_PRESENTATION:
+            ret = mix_presentation_obu(s, obu_size);
+            break;
+        case IAMF_OBU_IA_TEMPORAL_DELIMITER:
+            av_freep(&c->mix);
+            c->mix_size = 0;
+            av_freep(&c->demix);
+            c->demix_size = 0;
+            av_freep(&c->recon);
+            c->recon_size = 0;
+            break;
+        default: {
+            int64_t offset = avio_skip(s->pb, obu_size);
+            if (offset < 0)
+                ret = offset;
+            break;
+        }
+        }
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+static AVStream *find_stream_by_id(AVFormatContext *s, int id)
+{
+    for (int i = 0; i < s->nb_streams; i++)
+        if (s->streams[i]->id == id)
+            return s->streams[i];
+
+    av_log(s, AV_LOG_ERROR, "Invalid stream id %d\n", id);
+    return NULL;
+}
+
+static int audio_frame_obu(AVFormatContext *s, AVPacket *pkt, int len,
+                           enum IAMF_OBU_Type type, int id_in_bitstream)
+{
+    const IAMFDemuxContext *const c = s->priv_data;
+    AVStream *st;
+    int ret, audio_substream_id;
+
+    if (id_in_bitstream) {
+        unsigned explicit_audio_substream_id;
+        ret = leb(s->pb, &explicit_audio_substream_id);
+        if (ret < 0)
+            return ret;
+        len -= ret;
+        audio_substream_id = explicit_audio_substream_id;
+    } else
+        audio_substream_id = type - IAMF_OBU_IA_AUDIO_FRAME_ID0;
+
+    st = find_stream_by_id(s, audio_substream_id);
+    if (!st)
+        return AVERROR_INVALIDDATA;
+
+    ret = av_get_packet(s->pb, pkt, len);
+    if (ret < 0)
+        return ret;
+    if (ret != len)
+        return AVERROR_INVALIDDATA;
+
+    if (c->mix) {
+        uint8_t *side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_IAMF_MIX_GAIN_PARAM, c->mix_size);
+        if (!side_data)
+            return AVERROR(ENOMEM);
+        memcpy(side_data, c->mix, c->mix_size);
+    }
+    if (c->demix) {
+        uint8_t *side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_IAMF_DEMIXING_INFO_PARAM, c->demix_size);
+        if (!side_data)
+            return AVERROR(ENOMEM);
+        memcpy(side_data, c->demix, c->demix_size);
+    }
+    if (c->recon) {
+        uint8_t *side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_IAMF_RECON_GAIN_INFO_PARAM, c->recon_size);
+        if (!side_data)
+            return AVERROR(ENOMEM);
+        memcpy(side_data, c->recon, c->recon_size);
+    }
+
+    pkt->stream_index = st->index;
+    return 0;
+}
+
+static const IAMFParamDefinition *get_param_definition(AVFormatContext *s, unsigned int parameter_id)
+{
+    const IAMFDemuxContext *const c = s->priv_data;
+    const IAMFParamDefinition *param_definition = NULL;
+
+    for (int i = 0; i < c->nb_param_definitions; i++)
+        if (c->param_definitions[i].param->parameter_id == parameter_id) {
+            param_definition = &c->param_definitions[i];
+            break;
+        }
+
+    return param_definition;
+}
+
+static int parameter_block_obu(AVFormatContext *s, int len)
+{
+    IAMFDemuxContext *const c = s->priv_data;
+    const IAMFParamDefinition *param_definition;
+    const AVIAMFParamDefinition *param;
+    AVIAMFParamDefinition *out_param = NULL;
+    FFIOContext b;
+    AVIOContext *pb;
+    uint8_t *buf;
+    unsigned int duration, constant_subblock_duration;
+    unsigned int num_subblocks;
+    unsigned int parameter_id;
+    size_t out_param_size;
+    int ret;
+
+    buf = av_malloc(len);
+    if (!buf)
+        return AVERROR(ENOMEM);
+
+    ret = avio_read(s->pb, buf, len);
+    if (ret != len) {
+        if (ret >= 0)
+            ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
+    pb = &b.pub;
+
+    ret = leb(pb, &parameter_id);
+    if (ret < 0)
+        goto fail;
+
+    param_definition = get_param_definition(s, parameter_id);
+    if (!param_definition) {
+        av_log(s, AV_LOG_VERBOSE, "Non existant parameter_id %d referenced in a parameter block. Ignoring\n", parameter_id);
+        ret = 0;
+        goto fail;
+    }
+
+    param = param_definition->param;
+    if (param->param_definition_mode) {
+        ret = leb(pb, &duration);
+        if (ret < 0)
+            goto fail;
+
+        ret = leb(pb, &constant_subblock_duration);
+        if (ret < 0)
+            goto fail;
+
+        if (constant_subblock_duration == 0) {
+            ret = leb(pb, &num_subblocks);
+            if (ret < 0)
+                goto fail;
+        } else
+            num_subblocks = duration / constant_subblock_duration;
+    } else {
+        duration = param->duration;
+        constant_subblock_duration = param->constant_subblock_duration;
+        num_subblocks = param->num_subblocks;
+        if (!num_subblocks)
+            num_subblocks = duration / constant_subblock_duration;
+    }
+
+    out_param = avformat_iamf_param_definition_alloc(param->param_definition_type, num_subblocks, &out_param_size);
+    if (!out_param) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    out_param->parameter_id = param->parameter_id;
+    out_param->param_definition_type = param->param_definition_type;
+    out_param->parameter_rate = param->parameter_rate;
+    out_param->param_definition_mode = param->param_definition_mode;
+    out_param->duration = duration;
+    out_param->constant_subblock_duration = constant_subblock_duration;
+    out_param->num_subblocks = num_subblocks;
+
+    for (int i = 0; i < num_subblocks; i++) {
+        void *subblock = avformat_iamf_param_definition_get_subblock(out_param, i);
+        unsigned int subblock_duration;
+
+        if (param->param_definition_mode && !constant_subblock_duration) {
+            ret = leb(pb, &subblock_duration);
+            if (ret < 0)
+                goto fail;
+
+        } else {
+            switch (param->param_definition_type) {
+            case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN:
+                subblock_duration = ((AVIAMFMixGainParameterData *)subblock)->subblock_duration;
+                break;
+            case AV_IAMF_PARAMETER_DEFINITION_DEMIXING:
+                subblock_duration = ((AVIAMFDemixingInfoParameterData *)subblock)->subblock_duration;
+                break;
+            case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN:
+                subblock_duration = ((AVIAMFReconGainParameterData *)subblock)->subblock_duration;
+                break;
+            default:
+                return AVERROR_INVALIDDATA;
+            }
+        }
+
+        switch (param->param_definition_type) {
+        case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
+            AVIAMFMixGainParameterData *mix = subblock;
+
+            ret = leb(pb, &mix->animation_type);
+            if (ret < 0)
+                goto fail;
+
+            if (mix->animation_type > AV_IAMF_ANIMATION_TYPE_BEZIER) {
+                ret = 0;
+                av_free(out_param);
+                goto fail;
+            }
+
+            mix->start_point_value = sign_extend(avio_rb16(pb), 16);
+            if (mix->animation_type >= AV_IAMF_ANIMATION_TYPE_LINEAR)
+                mix->end_point_value = sign_extend(avio_rb16(pb), 16);
+            if (mix->animation_type == AV_IAMF_ANIMATION_TYPE_BEZIER) {
+                mix->control_point_value = sign_extend(avio_rb16(pb), 16);
+                mix->control_point_relative_time = avio_r8(pb);
+            }
+            mix->subblock_duration = subblock_duration;
+            break;
+        }
+        case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
+            AVIAMFDemixingInfoParameterData *demix = subblock;
+
+            demix->dmixp_mode = avio_r8(pb) >> 5;
+            demix->subblock_duration = subblock_duration;
+            break;
+        }
+        case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
+            const AVIAMFAudioElement *audio_element = param_definition->audio_element;
+
+            av_assert0(audio_element);
+            for (int i = 0; i < audio_element->num_layers; i++) {
+                const AVIAMFLayer *layer = audio_element->layers[i];
+                if (layer->recon_gain_is_present) {
+                    unsigned int recon_gain_flags, bitcount;
+                    ret = leb(pb, &recon_gain_flags);
+                    if (ret < 0)
+                        goto fail;
+
+                    bitcount = 7 + 5 * !!(recon_gain_flags & 0x80);
+                    recon_gain_flags = (recon_gain_flags & 0x7F) | ((recon_gain_flags & 0xFF00) >> 1);
+                    for (int j = 0; j < bitcount; j++) {
+                        if (recon_gain_flags & (1 << j)) {
+                            avio_r8(pb); // recon_gain
+                        }
+                    }
+                }
+            }
+            break;
+        }
+        default: {
+            unsigned parameter_data_size;
+            ret = leb(pb, &parameter_data_size);
+            if (ret < 0)
+                goto fail;
+
+            avio_skip(pb, parameter_data_size);
+            break;
+        }
+        }
+    }
+
+    av_log(s, AV_LOG_DEBUG, "%"PRId64" bytes left at the end of parameter_block_obu\n", len - avio_tell(pb));
+
+    switch (param->param_definition_type) {
+    case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN:
+        av_free(c->mix);
+        c->mix = out_param;
+        c->mix_size = out_param_size;
+        break;
+    case AV_IAMF_PARAMETER_DEFINITION_DEMIXING:
+        av_free(c->demix);
+        c->demix = out_param;
+        c->demix_size = out_param_size;
+        break;
+    case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: // TODO
+        ret = 0;
+        av_free(out_param);
+        goto fail;
+    default:
+        return AVERROR_INVALIDDATA;
+    }
+
+    ret = 0;
+fail:
+    if (ret < 0)
+        av_free(out_param);
+    av_free(buf);
+
+    return ret;
+}
+
+static int iamf_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    uint8_t header[MAX_IAMF_OBU_HEADER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
+    unsigned obu_size;
+    int ret;
+
+    av_log(s, AV_LOG_DEBUG, "PACKET\n");
+    while (1) {
+        enum IAMF_OBU_Type type;
+        int len, size, start_pos;
+
+        if ((ret = ffio_ensure_seekback(s->pb, MAX_IAMF_OBU_HEADER_SIZE)) < 0)
+            return ret;
+        size = avio_read(s->pb, header, MAX_IAMF_OBU_HEADER_SIZE);
+        if (size < 0)
+            return size;
+
+        len = parse_obu_header(header, size, &obu_size, &start_pos, &type);
+        if (len < 0) {
+            av_log(s, AV_LOG_ERROR, "Failed to read obu\n");
+            return len;
+        }
+        avio_seek(s->pb, -(size - start_pos), SEEK_CUR);
+
+        if (type == IAMF_OBU_IA_AUDIO_FRAME)
+            return audio_frame_obu(s, pkt, obu_size, type, 1);
+        else if (type >= IAMF_OBU_IA_AUDIO_FRAME_ID0 && type <= IAMF_OBU_IA_AUDIO_FRAME_ID17)
+            return audio_frame_obu(s, pkt, obu_size, type, 0);
+        else if (type == IAMF_OBU_IA_PARAMETER_BLOCK) {
+            ret = parameter_block_obu(s, obu_size);
+            if (ret < 0)
+                return ret;
+        } else {
+            int64_t offset = avio_skip(s->pb, obu_size);
+            if (offset < 0)
+                ret = offset;
+            break;
+        }
+    }
+
+    return ret;
+}
+
+static int iamf_read_close(AVFormatContext *s)
+{
+    IAMFDemuxContext *const c = s->priv_data;
+
+    for (int i = 0; i < c->nb_codec_configs; i++)
+        av_free(c->codec_configs[i].extradata);
+    av_freep(&c->codec_configs);
+    c->nb_codec_configs = 0;
+
+    for (int i = 0; i < c->nb_audio_elements; i++)
+        av_free(c->audio_elements[i].audio_substreams);
+    av_freep(&c->audio_elements);
+    c->nb_audio_elements = 0;
+
+    av_freep(&c->mix_presentations);
+    c->nb_mix_presentations = 0;
+
+    av_freep(&c->param_definitions);
+    c->nb_param_definitions = 0;
+
+    av_freep(&c->mix);
+    c->mix_size = 0;
+    av_freep(&c->demix);
+    c->demix_size = 0;
+    av_freep(&c->recon);
+    c->recon_size = 0;
+    return 0;
+}
+
+const AVInputFormat ff_iamf_demuxer = {
+    .name           = "iamf",
+    .long_name      = NULL_IF_CONFIG_SMALL("Raw Immersive Audio Model and Formats"),
+    .priv_data_size = sizeof(IAMFDemuxContext),
+    .flags_internal = FF_FMT_INIT_CLEANUP,
+    .read_probe     = iamf_probe,
+    .read_header    = iamf_read_header,
+    .read_packet    = iamf_read_packet,
+    .read_close     = iamf_read_close,
+    .extensions     = "iamf",
+    .flags          = AVFMT_GENERIC_INDEX | AVFMT_NO_BYTE_SEEK | AVFMT_NOTIMESTAMPS | AVFMT_SHOW_IDS,
+};
diff --git a/libavformat/options.c b/libavformat/options.c
index 09eb13e97a..115f95d48a 100644
--- a/libavformat/options.c
+++ b/libavformat/options.c
@@ -20,6 +20,7 @@ 
 #include "avformat.h"
 #include "avio_internal.h"
 #include "demux.h"
+#include "iamf.h"
 #include "internal.h"
 
 #include "libavcodec/avcodec.h"
@@ -345,7 +346,16 @@  AVStreamGroup *avformat_stream_group_create(AVFormatContext *s,
 
     stg->type = type;
     switch (type) {
-    // Structs in the union are allocated here
+    case AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT:
+        stg->params.iamf_audio_element = avformat_iamf_audio_element_alloc();
+        if (!stg->params.iamf_audio_element)
+            goto fail;
+        break;
+    case AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION:
+        stg->params.iamf_mix_presentation = avformat_iamf_mix_presentation_alloc();
+        if (!stg->params.iamf_mix_presentation)
+            goto fail;
+        // fall-through
     default:
         break;
     }
@@ -356,6 +366,9 @@  AVStreamGroup *avformat_stream_group_create(AVFormatContext *s,
     s->stream_groups[s->nb_stream_groups++] = stg;
 
     return stg;
+fail:
+    ff_free_stream_group(&stg);
+    return NULL;
 }
 
 static int stream_group_add_stream(AVStreamGroup *stg, const AVStream *st)