diff mbox series

[FFmpeg-devel,4/9] avutil: introduce an Immersive Audio Model and Formats API

Message ID 20231126012858.40388-5-jamrial@gmail.com
State New
Headers show
Series avformat: introduce AVStreamGroup | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

James Almer Nov. 26, 2023, 1:28 a.m. UTC
Signed-off-by: James Almer <jamrial@gmail.com>
---
 libavutil/Makefile |   2 +
 libavutil/iamf.c   | 582 +++++++++++++++++++++++++++++++++++++++++++++
 libavutil/iamf.h   | 377 +++++++++++++++++++++++++++++
 3 files changed, 961 insertions(+)
 create mode 100644 libavutil/iamf.c
 create mode 100644 libavutil/iamf.h

Comments

Anton Khirnov Nov. 30, 2023, 11:01 a.m. UTC | #1
Quoting James Almer (2023-11-26 02:28:53)
> diff --git a/libavutil/iamf.h b/libavutil/iamf.h
> new file mode 100644
> index 0000000000..1f4919efdb
> --- /dev/null
> +++ b/libavutil/iamf.h
> +enum AVIAMFAudioElementType {
> +    AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL,
> +    AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE,

'audio' in the names is redundant and makes already long identifiers
unnecessarily longer

> +};
> +
> +/**
> + * @defgroup lavf_iamf_params Parameter Definition
> + * @{
> + * Parameters as defined in section 3.6.1 and 3.8

of what?

> +/**
> + * Mix Gain Parameter Data as defined in section 3.8.1
> + *
> + * Subblocks in AVIAMFParamDefinition use this struct when the value or
> + * @ref AVIAMFParamDefinition.param_definition_type param_definition_type is
> + * AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN.
> + */
> +typedef struct AVIAMFMixGainParameterData {

Does 'ParameterData' at the end really serve any purpose?

> +    const AVClass *av_class;
> +
> +    // AVOption enabled fields
> +    unsigned int subblock_duration;
> +    enum AVIAMFAnimationType animation_type;
> +    AVRational start_point_value;
> +    AVRational end_point_value;
> +    AVRational control_point_value;
> +    unsigned int control_point_relative_time;

All these should really be documented. Also, some vertical alignment
would improve readability.

> +/**
> + * Parameters as defined in section 3.6.1

This really REALLY needs more documentation.

> + */
> +typedef struct AVIAMFParamDefinition {
> +    const AVClass *av_class;
> +
> +    size_t subblocks_offset;
> +    size_t subblock_size;
> +
> +    enum AVIAMFParamDefinitionType param_definition_type;
> +    unsigned int num_subblocks;

We use nb_foo generally.

> +AVIAMFParamDefinition *av_iamf_param_definition_alloc(enum AVIAMFParamDefinitionType param_definition_type,
> +                                                            AVDictionary **options,
> +                                                            unsigned int num_subblocks, AVDictionary **subblock_options,

What are the dicts for?

> + *
> + * When audio_element_type is AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, this
> + * corresponds to an Scalable Channel Layout layer as defined in section 3.6.2.
> + * For AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, it is an Ambisonics channel
> + * layout as defined in section 3.6.3
> + */
> +typedef struct AVIAMFLayer {
> +    const AVClass *av_class;
> +
> +    // AVOption enabled fields
> +    AVChannelLayout ch_layout;
> +
> +    unsigned int recon_gain_is_present;

Every time you dedicate 4 bytes to storing one bit, God kills a kitten.

> +    /**
> +     * Output gain flags as defined in section 3.6.2

It would be really really nice if people could understand the struct
contents without some external document.

> +     * This field is defined only if audio_element_type is

presumably the parent's audio_element_type

> +     * AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, must be 0 otherwise.
> +     */
> +    unsigned int output_gain_flags;
> +    /**
> +     * Output gain as defined in section 3.6.2
> +     *
> +     * Must be 0 if @ref output_gain_flags is 0.
> +     */
> +    AVRational output_gain;
> +    /**
> +     * Ambisonics mode as defined in section 3.6.3
> +     *
> +     * This field is defined only if audio_element_type is
> +     * AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, must be 0 otherwise.
> +     *
> +     * If 0, channel_mapping is defined implicitly (Ambisonic Order)
> +     * or explicitly (Custom Order with ambi channels) in @ref ch_layout.
> +     * If 1, @ref demixing_matrix must be set.
> +     */
> +    enum AVIAMFAmbisonicsMode ambisonics_mode;
> +
> +    // End of AVOption enabled fields

What purpose does this comment serve?

> +    /**
> +     * Demixing matrix as defined in section 3.6.3
> +     *
> +     * Set only if @ref ambisonics_mode == 1, must be NULL otherwise.
> +     */
> +    AVRational *demixing_matrix;

Who sets this?

> +typedef struct AVIAMFAudioElement {
> +    const AVClass *av_class;
> +
> +    AVIAMFLayer **layers;
> +    /**
> +     * Number of layers, or channel groups, in the Audio Element.
> +     * For audio_element_type AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, there
> +     * may be exactly 1.
> +     *
> +     * Set by av_iamf_audio_element_add_layer(), must not be
> +     * modified by any other code.
> +     */
> +    unsigned int num_layers;
> +
> +    unsigned int codec_config_id;

???

> +int av_iamf_audio_element_add_layer(AVIAMFAudioElement *audio_element, AVDictionary **options);

I would much prefer to have the caller call av_opt_set* manually rather
than sprinkle AVDictionary function arguments everywhere.
Do note that their usage in lavc and lavf APIs is out of necessity, not
because it's very pretty.
James Almer Nov. 30, 2023, 1:01 p.m. UTC | #2
On 11/30/2023 8:01 AM, Anton Khirnov wrote:
> Quoting James Almer (2023-11-26 02:28:53)
>> diff --git a/libavutil/iamf.h b/libavutil/iamf.h
>> new file mode 100644
>> index 0000000000..1f4919efdb
>> --- /dev/null
>> +++ b/libavutil/iamf.h
>> +enum AVIAMFAudioElementType {
>> +    AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL,
>> +    AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE,
> 
> 'audio' in the names is redundant and makes already long identifiers
> unnecessarily longer

I'm trying to keep everything namespaced. Audio Elements are not the 
only part of the spec to use "element". See Submixes.

> 
>> +};
>> +
>> +/**
>> + * @defgroup lavf_iamf_params Parameter Definition
>> + * @{
>> + * Parameters as defined in section 3.6.1 and 3.8
> 
> of what?

Should i link https://aomediacodec.github.io/iamf/ somewhere?

> 
>> +/**
>> + * Mix Gain Parameter Data as defined in section 3.8.1
>> + *
>> + * Subblocks in AVIAMFParamDefinition use this struct when the value or
>> + * @ref AVIAMFParamDefinition.param_definition_type param_definition_type is
>> + * AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN.
>> + */
>> +typedef struct AVIAMFMixGainParameterData {
> 
> Does 'ParameterData' at the end really serve any purpose?

Follow the names as in 
https://aomediacodec.github.io/iamf/#obu-parameterblock
I can change it to Parameters or Params, or just remove it.

> 
>> +    const AVClass *av_class;
>> +
>> +    // AVOption enabled fields
>> +    unsigned int subblock_duration;
>> +    enum AVIAMFAnimationType animation_type;
>> +    AVRational start_point_value;
>> +    AVRational end_point_value;
>> +    AVRational control_point_value;
>> +    unsigned int control_point_relative_time;
> 
> All these should really be documented. Also, some vertical alignment
> would improve readability.
> 
>> +/**
>> + * Parameters as defined in section 3.6.1
> 
> This really REALLY needs more documentation.

Yes, was keeping better documentation for last.

> 
>> + */
>> +typedef struct AVIAMFParamDefinition {
>> +    const AVClass *av_class;
>> +
>> +    size_t subblocks_offset;
>> +    size_t subblock_size;
>> +
>> +    enum AVIAMFParamDefinitionType param_definition_type;
>> +    unsigned int num_subblocks;
> 
> We use nb_foo generally.

For these public fields i'm keeping the same name as they are in the 
spec. I use nb_foo for arrays of structs in the demuxer/muxer patches. 
But i can change it if you prefer.

> 
>> +AVIAMFParamDefinition *av_iamf_param_definition_alloc(enum AVIAMFParamDefinitionType param_definition_type,
>> +                                                            AVDictionary **options,
>> +                                                            unsigned int num_subblocks, AVDictionary **subblock_options,
> 
> What are the dicts for?

Setting AVOptions for the AVIAMFParamDefinition and each subblock, 
respectively.

> 
>> + *
>> + * When audio_element_type is AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, this
>> + * corresponds to an Scalable Channel Layout layer as defined in section 3.6.2.
>> + * For AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, it is an Ambisonics channel
>> + * layout as defined in section 3.6.3
>> + */
>> +typedef struct AVIAMFLayer {
>> +    const AVClass *av_class;
>> +
>> +    // AVOption enabled fields
>> +    AVChannelLayout ch_layout;
>> +
>> +    unsigned int recon_gain_is_present;
> 
> Every time you dedicate 4 bytes to storing one bit, God kills a kitten.

I'll shave a few bytes.

> 
>> +    /**
>> +     * Output gain flags as defined in section 3.6.2
> 
> It would be really really nice if people could understand the struct
> contents without some external document.
> 
>> +     * This field is defined only if audio_element_type is
> 
> presumably the parent's audio_element_type

Yes, forgot the @ref. Good catch.

> 
>> +     * AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, must be 0 otherwise.
>> +     */
>> +    unsigned int output_gain_flags;
>> +    /**
>> +     * Output gain as defined in section 3.6.2
>> +     *
>> +     * Must be 0 if @ref output_gain_flags is 0.
>> +     */
>> +    AVRational output_gain;
>> +    /**
>> +     * Ambisonics mode as defined in section 3.6.3
>> +     *
>> +     * This field is defined only if audio_element_type is
>> +     * AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, must be 0 otherwise.
>> +     *
>> +     * If 0, channel_mapping is defined implicitly (Ambisonic Order)
>> +     * or explicitly (Custom Order with ambi channels) in @ref ch_layout.
>> +     * If 1, @ref demixing_matrix must be set.
>> +     */
>> +    enum AVIAMFAmbisonicsMode ambisonics_mode;
>> +
>> +    // End of AVOption enabled fields
> 
> What purpose does this comment serve?

It was a reminder for me of what could be set through AVOptions. I'll 
remove it.

> 
>> +    /**
>> +     * Demixing matrix as defined in section 3.6.3
>> +     *
>> +     * Set only if @ref ambisonics_mode == 1, must be NULL otherwise.
>> +     */
>> +    AVRational *demixing_matrix;
> 
> Who sets this?

lavf for demuxing and the user for muxing, as usual. Will mention it.

> 
>> +typedef struct AVIAMFAudioElement {
>> +    const AVClass *av_class;
>> +
>> +    AVIAMFLayer **layers;
>> +    /**
>> +     * Number of layers, or channel groups, in the Audio Element.
>> +     * For audio_element_type AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, there
>> +     * may be exactly 1.
>> +     *
>> +     * Set by av_iamf_audio_element_add_layer(), must not be
>> +     * modified by any other code.
>> +     */
>> +    unsigned int num_layers;
>> +
>> +    unsigned int codec_config_id;
> 
> ???

Ah, good catch. Need to remove this and adapt the muxer.

> 
>> +int av_iamf_audio_element_add_layer(AVIAMFAudioElement *audio_element, AVDictionary **options);
> 
> I would much prefer to have the caller call av_opt_set* manually rather
> than sprinkle AVDictionary function arguments everywhere.
> Do note that their usage in lavc and lavf APIs is out of necessity, not
> because it's very pretty.

Alright, will try to remove it from most of these.
Anton Khirnov Nov. 30, 2023, 1:47 p.m. UTC | #3
Quoting James Almer (2023-11-30 14:01:16)
> 
> Should i link https://aomediacodec.github.io/iamf/ somewhere?

Most definitely.

> > 
> >> +/**
> >> + * Mix Gain Parameter Data as defined in section 3.8.1
> >> + *
> >> + * Subblocks in AVIAMFParamDefinition use this struct when the value or
> >> + * @ref AVIAMFParamDefinition.param_definition_type param_definition_type is
> >> + * AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN.
> >> + */
> >> +typedef struct AVIAMFMixGainParameterData {
> > 
> > Does 'ParameterData' at the end really serve any purpose?
> 
> Follow the names as in 
> https://aomediacodec.github.io/iamf/#obu-parameterblock
> I can change it to Parameters or Params, or just remove it.

I'd drop it. The names are long enough as they are.

> >> +    const AVClass *av_class;
> >> +
> >> +    // AVOption enabled fields
> >> +    unsigned int subblock_duration;
> >> +    enum AVIAMFAnimationType animation_type;
> >> +    AVRational start_point_value;
> >> +    AVRational end_point_value;
> >> +    AVRational control_point_value;
> >> +    unsigned int control_point_relative_time;
> > 
> > All these should really be documented. Also, some vertical alignment
> > would improve readability.
> > 
> >> +/**
> >> + * Parameters as defined in section 3.6.1
> > 
> > This really REALLY needs more documentation.
> 
> Yes, was keeping better documentation for last.
> 
> > 
> >> + */
> >> +typedef struct AVIAMFParamDefinition {
> >> +    const AVClass *av_class;
> >> +
> >> +    size_t subblocks_offset;
> >> +    size_t subblock_size;
> >> +
> >> +    enum AVIAMFParamDefinitionType param_definition_type;
> >> +    unsigned int num_subblocks;
> > 
> > We use nb_foo generally.
> 
> For these public fields i'm keeping the same name as they are in the 
> spec. I use nb_foo for arrays of structs in the demuxer/muxer patches. 
> But i can change it if you prefer.

I prefer to be consistent with ourselves in this rather than a spec.
Specs come and go.

> > 
> >> + *
> >> + * When audio_element_type is AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, this
> >> + * corresponds to an Scalable Channel Layout layer as defined in section 3.6.2.
> >> + * For AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, it is an Ambisonics channel
> >> + * layout as defined in section 3.6.3
> >> + */
> >> +typedef struct AVIAMFLayer {
> >> +    const AVClass *av_class;
> >> +
> >> +    // AVOption enabled fields
> >> +    AVChannelLayout ch_layout;
> >> +
> >> +    unsigned int recon_gain_is_present;
> > 
> > Every time you dedicate 4 bytes to storing one bit, God kills a kitten.
> 
> I'll shave a few bytes.

I don't see how that can be done easily due to struct alignment. I was
thinking you could make it into a flags field instead.
James Almer Nov. 30, 2023, 2:27 p.m. UTC | #4
On 11/30/2023 10:47 AM, Anton Khirnov wrote:
> Quoting James Almer (2023-11-30 14:01:16)
>>
>> Should i link https://aomediacodec.github.io/iamf/ somewhere?
> 
> Most definitely.
> 
>>>
>>>> +/**
>>>> + * Mix Gain Parameter Data as defined in section 3.8.1
>>>> + *
>>>> + * Subblocks in AVIAMFParamDefinition use this struct when the value or
>>>> + * @ref AVIAMFParamDefinition.param_definition_type param_definition_type is
>>>> + * AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN.
>>>> + */
>>>> +typedef struct AVIAMFMixGainParameterData {
>>>
>>> Does 'ParameterData' at the end really serve any purpose?
>>
>> Follow the names as in
>> https://aomediacodec.github.io/iamf/#obu-parameterblock
>> I can change it to Parameters or Params, or just remove it.
> 
> I'd drop it. The names are long enough as they are.
> 
>>>> +    const AVClass *av_class;
>>>> +
>>>> +    // AVOption enabled fields
>>>> +    unsigned int subblock_duration;
>>>> +    enum AVIAMFAnimationType animation_type;
>>>> +    AVRational start_point_value;
>>>> +    AVRational end_point_value;
>>>> +    AVRational control_point_value;
>>>> +    unsigned int control_point_relative_time;
>>>
>>> All these should really be documented. Also, some vertical alignment
>>> would improve readability.
>>>
>>>> +/**
>>>> + * Parameters as defined in section 3.6.1
>>>
>>> This really REALLY needs more documentation.
>>
>> Yes, was keeping better documentation for last.
>>
>>>
>>>> + */
>>>> +typedef struct AVIAMFParamDefinition {
>>>> +    const AVClass *av_class;
>>>> +
>>>> +    size_t subblocks_offset;
>>>> +    size_t subblock_size;
>>>> +
>>>> +    enum AVIAMFParamDefinitionType param_definition_type;
>>>> +    unsigned int num_subblocks;
>>>
>>> We use nb_foo generally.
>>
>> For these public fields i'm keeping the same name as they are in the
>> spec. I use nb_foo for arrays of structs in the demuxer/muxer patches.
>> But i can change it if you prefer.
> 
> I prefer to be consistent with ourselves in this rather than a spec.
> Specs come and go.
> 
>>>
>>>> + *
>>>> + * When audio_element_type is AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, this
>>>> + * corresponds to an Scalable Channel Layout layer as defined in section 3.6.2.
>>>> + * For AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, it is an Ambisonics channel
>>>> + * layout as defined in section 3.6.3
>>>> + */
>>>> +typedef struct AVIAMFLayer {
>>>> +    const AVClass *av_class;
>>>> +
>>>> +    // AVOption enabled fields
>>>> +    AVChannelLayout ch_layout;
>>>> +
>>>> +    unsigned int recon_gain_is_present;
>>>
>>> Every time you dedicate 4 bytes to storing one bit, God kills a kitten.
>>
>> I'll shave a few bytes.
> 
> I don't see how that can be done easily due to struct alignment. I was
> thinking you could make it into a flags field instead.

But this is the only boolean field. Also, there can be at most six 
layers, so it's not exactly a huge waste either way.
Anton Khirnov Nov. 30, 2023, 2:30 p.m. UTC | #5
Quoting James Almer (2023-11-30 15:27:49)
> 
> But this is the only boolean field.

For now. Who's to say there will not be more in the future.
diff mbox series

Patch

diff --git a/libavutil/Makefile b/libavutil/Makefile
index 4711f8cde8..62cc1a1831 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -51,6 +51,7 @@  HEADERS = adler32.h                                                     \
           hwcontext_videotoolbox.h                                      \
           hwcontext_vdpau.h                                             \
           hwcontext_vulkan.h                                            \
+          iamf.h                                                        \
           imgutils.h                                                    \
           intfloat.h                                                    \
           intreadwrite.h                                                \
@@ -140,6 +141,7 @@  OBJS = adler32.o                                                        \
        hdr_dynamic_vivid_metadata.o                                     \
        hmac.o                                                           \
        hwcontext.o                                                      \
+       iamf.o                                                           \
        imgutils.o                                                       \
        integer.o                                                        \
        intmath.o                                                        \
diff --git a/libavutil/iamf.c b/libavutil/iamf.c
new file mode 100644
index 0000000000..fffb9fab20
--- /dev/null
+++ b/libavutil/iamf.c
@@ -0,0 +1,582 @@ 
+/*
+ * Immersive Audio Model and Formats helper functions and defines
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "avassert.h"
+#include "error.h"
+#include "iamf.h"
+#include "log.h"
+#include "mem.h"
+#include "opt.h"
+
+#define IAMF_ADD_FUNC_TEMPLATE(parent_type, parent_name, child_type, child_name, suffix)                   \
+int av_iamf_ ## parent_name ## _add_ ## child_name(parent_type *parent_name, AVDictionary **options) \
+{                                                                                                          \
+    child_type **child_name ## suffix, *child_name;                                                        \
+                                                                                                           \
+    if (parent_name->num_## child_name ## suffix == UINT_MAX)                                              \
+        return AVERROR(EINVAL);                                                                            \
+                                                                                                           \
+    child_name ## suffix = av_realloc_array(parent_name->child_name ## suffix,                             \
+                                            parent_name->num_## child_name ## suffix + 1,                  \
+                                            sizeof(*parent_name->child_name ## suffix));                   \
+    if (!child_name ## suffix)                                                                             \
+        return AVERROR(ENOMEM);                                                                            \
+                                                                                                           \
+    parent_name->child_name ## suffix = child_name ## suffix;                                              \
+                                                                                                           \
+    child_name = parent_name->child_name ## suffix[parent_name->num_## child_name ## suffix]               \
+               = av_mallocz(sizeof(*child_name));                                                          \
+    if (!child_name)                                                                                       \
+        return AVERROR(ENOMEM);                                                                            \
+                                                                                                           \
+    child_name->av_class = &child_name ## _class;                                                          \
+    av_opt_set_defaults(child_name);                                                                       \
+    if (options) {                                                                                         \
+        int ret = av_opt_set_dict2(child_name, options, AV_OPT_SEARCH_CHILDREN);                           \
+        if (ret < 0) {                                                                                     \
+            av_freep(&parent_name->child_name ## suffix[parent_name->num_## child_name ## suffix]);        \
+            return ret;                                                                                    \
+        }                                                                                                  \
+    }                                                                                                      \
+    parent_name->num_## child_name ## suffix++;                                                            \
+                                                                                                           \
+    return 0;                                                                                              \
+}
+
+#define FLAGS AV_OPT_FLAG_ENCODING_PARAM
+
+//
+// Param Definition
+//
+#define OFFSET(x) offsetof(AVIAMFMixGainParameterData, x)
+static const AVOption mix_gain_options[] = {
+    { "subblock_duration", "set subblock_duration", OFFSET(subblock_duration), AV_OPT_TYPE_INT64, {.i64 = 1 }, 1, UINT_MAX, FLAGS },
+    { "animation_type", "set animation_type", OFFSET(animation_type), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 2, FLAGS },
+    { "start_point_value", "set start_point_value", OFFSET(animation_type), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, -128.0, 128.0, FLAGS },
+    { "end_point_value", "set end_point_value", OFFSET(animation_type), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, -128.0, 128.0, FLAGS },
+    { "control_point_value", "set control_point_value", OFFSET(animation_type), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, -128.0, 128.0, FLAGS },
+    { "control_point_relative_time", "set control_point_relative_time", OFFSET(animation_type), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, UINT8_MAX, FLAGS },
+    { NULL },
+};
+
+static const AVClass mix_gain_class = {
+    .class_name     = "AVIAMFSubmixElement",
+    .item_name      = av_default_item_name,
+    .version        = LIBAVUTIL_VERSION_INT,
+    .option         = mix_gain_options,
+};
+
+#undef OFFSET
+#define OFFSET(x) offsetof(AVIAMFDemixingInfoParameterData, x)
+static const AVOption demixing_info_options[] = {
+    { "subblock_duration", "set subblock_duration", OFFSET(subblock_duration), AV_OPT_TYPE_INT64, {.i64 = 1 }, 1, UINT_MAX, FLAGS },
+    { "dmixp_mode", "set dmixp_mode", OFFSET(dmixp_mode), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 6, FLAGS },
+    { NULL },
+};
+
+static const AVClass demixing_info_class = {
+    .class_name     = "AVIAMFDemixingInfoParameterData",
+    .item_name      = av_default_item_name,
+    .version        = LIBAVUTIL_VERSION_INT,
+    .option         = demixing_info_options,
+};
+
+#undef OFFSET
+#define OFFSET(x) offsetof(AVIAMFReconGainParameterData, x)
+static const AVOption recon_gain_options[] = {
+    { "subblock_duration", "set subblock_duration", OFFSET(subblock_duration), AV_OPT_TYPE_INT64, {.i64 = 1 }, 1, UINT_MAX, FLAGS },
+    { NULL },
+};
+
+static const AVClass recon_gain_class = {
+    .class_name     = "AVIAMFReconGainParameterData",
+    .item_name      = av_default_item_name,
+    .version        = LIBAVUTIL_VERSION_INT,
+    .option         = recon_gain_options,
+};
+
+#undef OFFSET
+#define OFFSET(x) offsetof(AVIAMFParamDefinition, x)
+static const AVOption param_definition_options[] = {
+    { "parameter_id", "set parameter_id", OFFSET(parameter_id), AV_OPT_TYPE_INT64, {.i64 = 0 }, 0, UINT_MAX, FLAGS },
+    { "parameter_rate", "set parameter_rate", OFFSET(parameter_rate), AV_OPT_TYPE_INT64, {.i64 = 0 }, 0, UINT_MAX, FLAGS },
+    { "param_definition_mode", "set param_definition_mode", OFFSET(param_definition_mode), AV_OPT_TYPE_INT, {.i64 = 1 }, 0, 1, FLAGS },
+    { "duration", "set duration", OFFSET(duration), AV_OPT_TYPE_INT64, {.i64 = 0 }, 0, UINT_MAX, FLAGS },
+    { "constant_subblock_duration", "set constant_subblock_duration", OFFSET(constant_subblock_duration), AV_OPT_TYPE_INT64, {.i64 = 0 }, 0, UINT_MAX, FLAGS },
+    { NULL },
+};
+
+static const AVClass *param_definition_child_iterate(void **opaque)
+{
+    uintptr_t i = (uintptr_t)*opaque;
+    const AVClass *ret = NULL;
+
+    switch(i) {
+    case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN:
+        ret = &mix_gain_class;
+        break;
+    case AV_IAMF_PARAMETER_DEFINITION_DEMIXING:
+        ret = &demixing_info_class;
+        break;
+    case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN:
+        ret = &recon_gain_class;
+        break;
+    default:
+        break;
+    }
+
+    if (ret)
+        *opaque = (void*)(i + 1);
+    return ret;
+}
+
+static const AVClass param_definition_class = {
+    .class_name          = "AVIAMFParamDefinition",
+    .item_name           = av_default_item_name,
+    .version             = LIBAVUTIL_VERSION_INT,
+    .option              = param_definition_options,
+    .child_class_iterate = param_definition_child_iterate,
+};
+
+const AVClass *av_iamf_param_definition_get_class(void)
+{
+    return &param_definition_class;
+}
+
+AVIAMFParamDefinition *av_iamf_param_definition_alloc(enum AVIAMFParamDefinitionType type, AVDictionary **options,
+                                                            unsigned int num_subblocks, AVDictionary **subblock_options,
+                                                            size_t *out_size)
+{
+
+    struct MixGainStruct {
+        AVIAMFParamDefinition p;
+        AVIAMFMixGainParameterData m;
+    };
+    struct DemixStruct {
+        AVIAMFParamDefinition p;
+        AVIAMFDemixingInfoParameterData d;
+    };
+    struct ReconGainStruct {
+        AVIAMFParamDefinition p;
+        AVIAMFReconGainParameterData r;
+    };
+    size_t subblocks_offset, subblock_size;
+    size_t size;
+    AVIAMFParamDefinition *par;
+
+    switch (type) {
+    case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN:
+        subblocks_offset = offsetof(struct MixGainStruct, m);
+        subblock_size = sizeof(AVIAMFMixGainParameterData);
+        break;
+    case AV_IAMF_PARAMETER_DEFINITION_DEMIXING:
+        subblocks_offset = offsetof(struct DemixStruct, d);
+        subblock_size = sizeof(AVIAMFDemixingInfoParameterData);
+        break;
+    case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN:
+        subblocks_offset = offsetof(struct ReconGainStruct, r);
+        subblock_size = sizeof(AVIAMFReconGainParameterData);
+        break;
+    default:
+        return NULL;
+    }
+
+    size = subblocks_offset;
+    if (num_subblocks > (SIZE_MAX - size) / subblock_size)
+        return NULL;
+    size += subblock_size * num_subblocks;
+
+    par = av_mallocz(size);
+    if (!par)
+        return NULL;
+
+    par->av_class = &param_definition_class;
+    av_opt_set_defaults(par);
+    if (options) {
+        int ret = av_opt_set_dict(par, options);
+        if (ret < 0) {
+            av_free(par);
+            return NULL;
+        }
+    }
+    par->param_definition_type = type;
+    par->num_subblocks = num_subblocks;
+    par->subblock_size = subblock_size;
+    par->subblocks_offset = subblocks_offset;
+
+    for (int i = 0; i < num_subblocks; i++) {
+        void *subblock = av_iamf_param_definition_get_subblock(par, i);
+
+        switch (type) {
+        case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN:
+            ((AVIAMFMixGainParameterData *)subblock)->av_class = &mix_gain_class;
+            break;
+        case AV_IAMF_PARAMETER_DEFINITION_DEMIXING:
+            ((AVIAMFDemixingInfoParameterData *)subblock)->av_class = &demixing_info_class;
+            break;
+        case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN:
+            ((AVIAMFReconGainParameterData *)subblock)->av_class = &recon_gain_class;
+            break;
+        default:
+            av_assert0(0);
+        }
+
+        av_opt_set_defaults(subblock);
+        if (subblock_options && subblock_options[i]) {
+            int ret = av_opt_set_dict(subblock, &subblock_options[i]);
+            if (ret < 0) {
+                av_free(par);
+                return NULL;
+            }
+        }
+    }
+
+    if (out_size)
+        *out_size = size;
+
+    return par;
+}
+
+//
+// Audio Element
+//
+#undef OFFSET
+#define OFFSET(x) offsetof(AVIAMFLayer, x)
+static const AVOption layer_options[] = {
+    { "ch_layout", "set ch_layout", OFFSET(ch_layout), AV_OPT_TYPE_CHLAYOUT, {.str = NULL }, 0, 0, FLAGS },
+    { "recon_gain_is_present", "set recon_gain_is_present", OFFSET(recon_gain_is_present), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, FLAGS },
+    { "output_gain_flags", "set output_gain_flags", OFFSET(output_gain_flags), AV_OPT_TYPE_FLAGS,
+        {.i64 = 0 }, 0, (1 << 6) - 1, FLAGS, "output_gain_flags" },
+            {"FL",  "Left channel",            0, AV_OPT_TYPE_CONST,
+                {.i64 = 1 << 5 }, INT_MIN, INT_MAX, FLAGS, "output_gain_flags"},
+            {"FR",  "Right channel",           0, AV_OPT_TYPE_CONST,
+                {.i64 = 1 << 4 }, INT_MIN, INT_MAX, FLAGS, "output_gain_flags"},
+            {"BL",  "Left surround channel",   0, AV_OPT_TYPE_CONST,
+                {.i64 = 1 << 3 }, INT_MIN, INT_MAX, FLAGS, "output_gain_flags"},
+            {"BR",  "Right surround channel",  0, AV_OPT_TYPE_CONST,
+                {.i64 = 1 << 2 }, INT_MIN, INT_MAX, FLAGS, "output_gain_flags"},
+            {"TFL", "Left top front channel",  0, AV_OPT_TYPE_CONST,
+                {.i64 = 1 << 1 }, INT_MIN, INT_MAX, FLAGS, "output_gain_flags"},
+            {"TFR", "Right top front channel", 0, AV_OPT_TYPE_CONST,
+                {.i64 = 1 << 0 }, INT_MIN, INT_MAX, FLAGS, "output_gain_flags"},
+    { "output_gain", "set output_gain", OFFSET(output_gain), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS },
+    { "ambisonics_mode", "set ambisonics_mode", OFFSET(ambisonics_mode), AV_OPT_TYPE_INT,
+            { .i64 = AV_IAMF_AMBISONICS_MODE_MONO },
+            AV_IAMF_AMBISONICS_MODE_MONO, AV_IAMF_AMBISONICS_MODE_PROJECTION, FLAGS, "ambisonics_mode" },
+        { "mono",       NULL, 0, AV_OPT_TYPE_CONST,
+                   { .i64 = AV_IAMF_AMBISONICS_MODE_MONO },       .unit = "ambisonics_mode" },
+        { "projection", NULL, 0, AV_OPT_TYPE_CONST,
+                   { .i64 = AV_IAMF_AMBISONICS_MODE_PROJECTION }, .unit = "ambisonics_mode" },
+    { NULL },
+};
+
+static const AVClass layer_class = {
+    .class_name     = "AVIAMFLayer",
+    .item_name      = av_default_item_name,
+    .version        = LIBAVUTIL_VERSION_INT,
+    .option         = layer_options,
+};
+
+#undef OFFSET
+#define OFFSET(x) offsetof(AVIAMFAudioElement, x)
+static const AVOption audio_element_options[] = {
+    { "audio_element_type", "set audio_element_type", OFFSET(audio_element_type), AV_OPT_TYPE_INT,
+            {.i64 = AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL },
+            AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, FLAGS, "audio_element_type" },
+        { "channel", NULL, 0, AV_OPT_TYPE_CONST,
+                   { .i64 = AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL }, .unit = "audio_element_type" },
+        { "scene",   NULL, 0, AV_OPT_TYPE_CONST,
+                   { .i64 = AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE },   .unit = "audio_element_type" },
+    { "default_w", "set default_w", OFFSET(default_w), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 10, FLAGS },
+    { NULL },
+};
+
+static const AVClass *audio_element_child_iterate(void **opaque)
+{
+    uintptr_t i = (uintptr_t)*opaque;
+    const AVClass *ret = NULL;
+
+    if (i)
+        ret = &layer_class;
+
+    if (ret)
+        *opaque = (void*)(i + 1);
+    return ret;
+}
+
+static const AVClass audio_element_class = {
+    .class_name          = "AVIAMFAudioElement",
+    .item_name           = av_default_item_name,
+    .version             = LIBAVUTIL_VERSION_INT,
+    .option              = audio_element_options,
+    .child_class_iterate = audio_element_child_iterate,
+};
+
+const AVClass *av_iamf_audio_element_get_class(void)
+{
+    return &audio_element_class;
+}
+
+AVIAMFAudioElement *av_iamf_audio_element_alloc(void)
+{
+    AVIAMFAudioElement *audio_element = av_mallocz(sizeof(*audio_element));
+
+    if (audio_element) {
+        audio_element->av_class = &audio_element_class;
+        av_opt_set_defaults(audio_element);
+    }
+
+    return audio_element;
+}
+
+IAMF_ADD_FUNC_TEMPLATE(AVIAMFAudioElement, audio_element, AVIAMFLayer, layer, s)
+
+void av_iamf_audio_element_free(AVIAMFAudioElement **paudio_element)
+{
+    AVIAMFAudioElement *audio_element = *paudio_element;
+
+    if (!audio_element)
+        return;
+
+    for (int i = 0; i < audio_element->num_layers; i++) {
+        AVIAMFLayer *layer = audio_element->layers[i];
+        av_opt_free(layer);
+        av_free(layer->demixing_matrix);
+        av_free(layer);
+    }
+    av_free(audio_element->layers);
+
+    av_free(audio_element->demixing_info);
+    av_free(audio_element->recon_gain_info);
+    av_freep(paudio_element);
+}
+
+//
+// Mix Presentation
+//
+#undef OFFSET
+#define OFFSET(x) offsetof(AVIAMFSubmixElement, x)
+static const AVOption submix_element_options[] = {
+    { "headphones_rendering_mode", "Headphones rendering mode", OFFSET(headphones_rendering_mode), AV_OPT_TYPE_INT,
+            { .i64 = AV_IAMF_HEADPHONES_MODE_STEREO },
+            AV_IAMF_HEADPHONES_MODE_STEREO, AV_IAMF_HEADPHONES_MODE_BINAURAL, FLAGS, "headphones_rendering_mode" },
+        { "stereo",   NULL, 0, AV_OPT_TYPE_CONST,
+                   { .i64 = AV_IAMF_HEADPHONES_MODE_STEREO },   .unit = "headphones_rendering_mode" },
+        { "binaural", NULL, 0, AV_OPT_TYPE_CONST,
+                   { .i64 = AV_IAMF_HEADPHONES_MODE_BINAURAL }, .unit = "headphones_rendering_mode" },
+    { "default_mix_gain", "Default mix gain", OFFSET(default_mix_gain), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS },
+    { "annotations", "Annotations", OFFSET(annotations), AV_OPT_TYPE_DICT, { .str = NULL }, 0, 0, FLAGS },
+    { NULL },
+};
+
+static void *submix_element_child_next(void *obj, void *prev)
+{
+    AVIAMFSubmixElement *submix_element = obj;
+    if (!prev)
+        return submix_element->element_mix_config;
+
+    return NULL;
+}
+
+static const AVClass *submix_element_child_iterate(void **opaque)
+{
+    uintptr_t i = (uintptr_t)*opaque;
+    const AVClass *ret = NULL;
+
+    if (i)
+        ret = &param_definition_class;
+
+    if (ret)
+        *opaque = (void*)(i + 1);
+    return ret;
+}
+
+static const AVClass element_class = {
+    .class_name          = "AVIAMFSubmixElement",
+    .item_name           = av_default_item_name,
+    .version             = LIBAVUTIL_VERSION_INT,
+    .option              = submix_element_options,
+    .child_next          = submix_element_child_next,
+    .child_class_iterate = submix_element_child_iterate,
+};
+
+IAMF_ADD_FUNC_TEMPLATE(AVIAMFSubmix, submix, AVIAMFSubmixElement, element, s)
+
+#undef OFFSET
+#define OFFSET(x) offsetof(AVIAMFSubmixLayout, x)
+static const AVOption submix_layout_options[] = {
+    { "layout_type", "Layout type", OFFSET(layout_type), AV_OPT_TYPE_INT,
+            { .i64 = AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS },
+            AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS, AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL, FLAGS, "layout_type" },
+        { "loudspeakers", NULL, 0, AV_OPT_TYPE_CONST,
+                   { .i64 = AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS }, .unit = "layout_type" },
+        { "binaural",     NULL, 0, AV_OPT_TYPE_CONST,
+                   { .i64 = AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL },     .unit = "layout_type" },
+    { "sound_system", "Sound System", OFFSET(sound_system), AV_OPT_TYPE_CHLAYOUT, { .str = NULL }, 0, 0, FLAGS },
+    { "integrated_loudness", "Integrated loudness", OFFSET(integrated_loudness), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS },
+    { "digital_peak", "Digital peak", OFFSET(digital_peak), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS },
+    { "true_peak", "True peak", OFFSET(true_peak), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS },
+    { "dialog_anchored_loudness", "Anchored loudness (Dialog)", OFFSET(dialogue_anchored_loudness), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS },
+    { "album_anchored_loudness", "Anchored loudness (Album)", OFFSET(album_anchored_loudness), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS },
+    { NULL },
+};
+
+static const AVClass layout_class = {
+    .class_name     = "AVIAMFSubmixLayout",
+    .item_name      = av_default_item_name,
+    .version        = LIBAVUTIL_VERSION_INT,
+    .option         = submix_layout_options,
+};
+
+IAMF_ADD_FUNC_TEMPLATE(AVIAMFSubmix, submix, AVIAMFSubmixLayout, layout, s)
+
+#undef OFFSET
+#define OFFSET(x) offsetof(AVIAMFSubmix, x)
+static const AVOption submix_presentation_options[] = {
+    { "default_mix_gain", "Default mix gain", OFFSET(default_mix_gain), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS },
+    { NULL },
+};
+
+static void *submix_presentation_child_next(void *obj, void *prev)
+{
+    AVIAMFSubmix *sub_mix = obj;
+    if (!prev)
+        return sub_mix->output_mix_config;
+
+    return NULL;
+}
+
+static const AVClass *submix_presentation_child_iterate(void **opaque)
+{
+    uintptr_t i = (uintptr_t)*opaque;
+    const AVClass *ret = NULL;
+
+    switch(i) {
+    case 0:
+        ret = &element_class;
+        break;
+    case 1:
+        ret = &layout_class;
+        break;
+    case 2:
+        ret = &param_definition_class;
+        break;
+    default:
+        break;
+    }
+
+    if (ret)
+        *opaque = (void*)(i + 1);
+    return ret;
+}
+
+static const AVClass submix_class = {
+    .class_name          = "AVIAMFSubmix",
+    .item_name           = av_default_item_name,
+    .version             = LIBAVUTIL_VERSION_INT,
+    .option              = submix_presentation_options,
+    .child_next          = submix_presentation_child_next,
+    .child_class_iterate = submix_presentation_child_iterate,
+};
+
+#undef OFFSET
+#define OFFSET(x) offsetof(AVIAMFMixPresentation, x)
+static const AVOption mix_presentation_options[] = {
+    { "annotations", "set annotations", OFFSET(annotations), AV_OPT_TYPE_DICT, {.str = NULL }, 0, 0, FLAGS },
+    { NULL },
+};
+
+#undef OFFSET
+#undef FLAGS
+
+static const AVClass *mix_presentation_child_iterate(void **opaque)
+{
+    uintptr_t i = (uintptr_t)*opaque;
+    const AVClass *ret = NULL;
+
+    if (i)
+        ret = &submix_class;
+
+    if (ret)
+        *opaque = (void*)(i + 1);
+    return ret;
+}
+
+static const AVClass mix_presentation_class = {
+    .class_name          = "AVIAMFMixPresentation",
+    .item_name           = av_default_item_name,
+    .version             = LIBAVUTIL_VERSION_INT,
+    .option              = mix_presentation_options,
+    .child_class_iterate = mix_presentation_child_iterate,
+};
+
+const AVClass *av_iamf_mix_presentation_get_class(void)
+{
+    return &mix_presentation_class;
+}
+
+AVIAMFMixPresentation *av_iamf_mix_presentation_alloc(void)
+{
+    AVIAMFMixPresentation *mix_presentation = av_mallocz(sizeof(*mix_presentation));
+
+    if (mix_presentation) {
+        mix_presentation->av_class = &mix_presentation_class;
+        av_opt_set_defaults(mix_presentation);
+    }
+
+    return mix_presentation;
+}
+
+IAMF_ADD_FUNC_TEMPLATE(AVIAMFMixPresentation, mix_presentation, AVIAMFSubmix, submix, es)
+
+void av_iamf_mix_presentation_free(AVIAMFMixPresentation **pmix_presentation)
+{
+    AVIAMFMixPresentation *mix_presentation = *pmix_presentation;
+
+    if (!mix_presentation)
+        return;
+
+    for (int i = 0; i < mix_presentation->num_submixes; i++) {
+        AVIAMFSubmix *sub_mix = mix_presentation->submixes[i];
+        for (int j = 0; j < sub_mix->num_elements; j++) {
+            AVIAMFSubmixElement *submix_element = sub_mix->elements[j];
+            av_opt_free(submix_element);
+            av_free(submix_element->element_mix_config);
+            av_free(submix_element);
+        }
+        av_free(sub_mix->elements);
+        for (int j = 0; j < sub_mix->num_layouts; j++) {
+            AVIAMFSubmixLayout *submix_layout = sub_mix->layouts[j];
+            av_opt_free(submix_layout);
+            av_free(submix_layout);
+        }
+        av_free(sub_mix->layouts);
+        av_free(sub_mix->output_mix_config);
+        av_free(sub_mix);
+    }
+    av_opt_free(mix_presentation);
+    av_free(mix_presentation->submixes);
+
+    av_freep(pmix_presentation);
+}
diff --git a/libavutil/iamf.h b/libavutil/iamf.h
new file mode 100644
index 0000000000..1f4919efdb
--- /dev/null
+++ b/libavutil/iamf.h
@@ -0,0 +1,377 @@ 
+/*
+ * Immersive Audio Model and Formats helper functions and defines
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_IAMF_H
+#define AVUTIL_IAMF_H
+
+/**
+ * @file
+ * Immersive Audio Model and Formats API header
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+
+#include "attributes.h"
+#include "avassert.h"
+#include "channel_layout.h"
+#include "dict.h"
+#include "rational.h"
+
+enum AVIAMFAudioElementType {
+    AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL,
+    AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE,
+};
+
+/**
+ * @defgroup lavf_iamf_params Parameter Definition
+ * @{
+ * Parameters as defined in section 3.6.1 and 3.8
+ * @}
+ * @defgroup lavf_iamf_audio Audio Element
+ * @{
+ * Audio Elements as defined in section 3.6
+ * @}
+ * @defgroup lavf_iamf_mix Mix Presentation
+ * @{
+ * Mix Presentations as defined in section 3.7
+ * @}
+ *
+ * @}
+ * @addtogroup lavf_iamf_params
+ * @{
+ */
+enum AVIAMFAnimationType {
+    AV_IAMF_ANIMATION_TYPE_STEP,
+    AV_IAMF_ANIMATION_TYPE_LINEAR,
+    AV_IAMF_ANIMATION_TYPE_BEZIER,
+};
+
+/**
+ * Mix Gain Parameter Data as defined in section 3.8.1
+ *
+ * Subblocks in AVIAMFParamDefinition use this struct when the value or
+ * @ref AVIAMFParamDefinition.param_definition_type param_definition_type is
+ * AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN.
+ */
+typedef struct AVIAMFMixGainParameterData {
+    const AVClass *av_class;
+
+    // AVOption enabled fields
+    unsigned int subblock_duration;
+    enum AVIAMFAnimationType animation_type;
+    AVRational start_point_value;
+    AVRational end_point_value;
+    AVRational control_point_value;
+    unsigned int control_point_relative_time;
+} AVIAMFMixGainParameterData;
+
+/**
+ * Demixing Info Parameter Data as defined in section 3.8.2
+ *
+ * Subblocks in AVIAMFParamDefinition use this struct when the value or
+ * @ref AVIAMFParamDefinition.param_definition_type param_definition_type is
+ * AV_IAMF_PARAMETER_DEFINITION_DEMIXING.
+ */
+typedef struct AVIAMFDemixingInfoParameterData {
+    const AVClass *av_class;
+
+    // AVOption enabled fields
+    unsigned int subblock_duration;
+    unsigned int dmixp_mode;
+} AVIAMFDemixingInfoParameterData;
+
+/**
+ * Recon Gain Info Parameter Data as defined in section 3.8.3
+ *
+ * Subblocks in AVIAMFParamDefinition use this struct when the value or
+ * @ref AVIAMFParamDefinition.param_definition_type param_definition_type is
+ * AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN.
+ */
+typedef struct AVIAMFReconGainParameterData {
+    const AVClass *av_class;
+
+    // AVOption enabled fields
+    unsigned int subblock_duration;
+    // End of AVOption enabled fields
+    uint8_t recon_gain[6][12];
+} AVIAMFReconGainParameterData;
+
+enum AVIAMFParamDefinitionType {
+    AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN,
+    AV_IAMF_PARAMETER_DEFINITION_DEMIXING,
+    AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN,
+};
+
+/**
+ * Parameters as defined in section 3.6.1
+ */
+typedef struct AVIAMFParamDefinition {
+    const AVClass *av_class;
+
+    size_t subblocks_offset;
+    size_t subblock_size;
+
+    enum AVIAMFParamDefinitionType param_definition_type;
+    unsigned int num_subblocks;
+
+    // AVOption enabled fields
+    unsigned int parameter_id;
+    unsigned int parameter_rate;
+    unsigned int param_definition_mode;
+    unsigned int duration;
+    unsigned int constant_subblock_duration;
+} AVIAMFParamDefinition;
+
+const AVClass *av_iamf_param_definition_get_class(void);
+
+AVIAMFParamDefinition *av_iamf_param_definition_alloc(enum AVIAMFParamDefinitionType param_definition_type,
+                                                            AVDictionary **options,
+                                                            unsigned int num_subblocks, AVDictionary **subblock_options,
+                                                            size_t *size);
+
+/**
+ * Get the subblock at the specified {@code idx}. Must be between 0 and num_subblocks - 1.
+ *
+ * The @ref AVIAMFParamDefinition.param_definition_type "param definition type" defines
+ * the struct type of the returned pointer.
+ */
+static av_always_inline void*
+av_iamf_param_definition_get_subblock(AVIAMFParamDefinition *par, unsigned int idx)
+{
+    av_assert0(idx < par->num_subblocks);
+    return (void *)((uint8_t *)par + par->subblocks_offset + idx * par->subblock_size);
+}
+
+/**
+ * @}
+ * @addtogroup lavf_iamf_audio
+ * @{
+ */
+
+enum AVIAMFAmbisonicsMode {
+    AV_IAMF_AMBISONICS_MODE_MONO,
+    AV_IAMF_AMBISONICS_MODE_PROJECTION,
+};
+
+/**
+ * A layer defining a Channel Layout in the Audio Element.
+ *
+ * When audio_element_type is AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, this
+ * corresponds to an Scalable Channel Layout layer as defined in section 3.6.2.
+ * For AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, it is an Ambisonics channel
+ * layout as defined in section 3.6.3
+ */
+typedef struct AVIAMFLayer {
+    const AVClass *av_class;
+
+    // AVOption enabled fields
+    AVChannelLayout ch_layout;
+
+    unsigned int recon_gain_is_present;
+    /**
+     * Output gain flags as defined in section 3.6.2
+     *
+     * This field is defined only if audio_element_type is
+     * AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, must be 0 otherwise.
+     */
+    unsigned int output_gain_flags;
+    /**
+     * Output gain as defined in section 3.6.2
+     *
+     * Must be 0 if @ref output_gain_flags is 0.
+     */
+    AVRational output_gain;
+    /**
+     * Ambisonics mode as defined in section 3.6.3
+     *
+     * This field is defined only if audio_element_type is
+     * AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, must be 0 otherwise.
+     *
+     * If 0, channel_mapping is defined implicitly (Ambisonic Order)
+     * or explicitly (Custom Order with ambi channels) in @ref ch_layout.
+     * If 1, @ref demixing_matrix must be set.
+     */
+    enum AVIAMFAmbisonicsMode ambisonics_mode;
+
+    // End of AVOption enabled fields
+    /**
+     * Demixing matrix as defined in section 3.6.3
+     *
+     * Set only if @ref ambisonics_mode == 1, must be NULL otherwise.
+     */
+    AVRational *demixing_matrix;
+} AVIAMFLayer;
+
+typedef struct AVIAMFAudioElement {
+    const AVClass *av_class;
+
+    AVIAMFLayer **layers;
+    /**
+     * Number of layers, or channel groups, in the Audio Element.
+     * For audio_element_type AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, there
+     * may be exactly 1.
+     *
+     * Set by av_iamf_audio_element_add_layer(), must not be
+     * modified by any other code.
+     */
+    unsigned int num_layers;
+
+    unsigned int codec_config_id;
+
+    AVIAMFParamDefinition *demixing_info;
+    AVIAMFParamDefinition *recon_gain_info;
+
+    // AVOption enabled fields
+    /**
+     * Audio element type as defined in section 3.6
+     */
+    enum AVIAMFAudioElementType audio_element_type;
+
+    /**
+     * Default weight value as defined in section 3.6
+     */
+    unsigned int default_w;
+} AVIAMFAudioElement;
+
+const AVClass *av_iamf_audio_element_get_class(void);
+
+AVIAMFAudioElement *av_iamf_audio_element_alloc(void);
+
+int av_iamf_audio_element_add_layer(AVIAMFAudioElement *audio_element, AVDictionary **options);
+
+void av_iamf_audio_element_free(AVIAMFAudioElement **audio_element);
+
+/**
+ * @}
+ * @addtogroup lavf_iamf_mix
+ * @{
+ */
+
+enum AVIAMFHeadphonesMode {
+    AV_IAMF_HEADPHONES_MODE_STEREO,
+    AV_IAMF_HEADPHONES_MODE_BINAURAL,
+};
+
+typedef struct AVIAMFSubmixElement {
+    const AVClass *av_class;
+
+    unsigned int audio_element_id;
+
+    AVIAMFParamDefinition *element_mix_config;
+
+    // AVOption enabled fields
+    enum AVIAMFHeadphonesMode headphones_rendering_mode;
+
+    AVRational default_mix_gain;
+
+    /**
+     * A dictionary of string describing the submix. Must have the same
+     * amount of entries as @ref AVIAMFMixPresentation.annotations "the
+     * mix's annotations".
+     *
+     * decoding: set by libavformat
+     * encoding: set by the user
+     */
+    AVDictionary *annotations;
+} AVIAMFSubmixElement;
+
+enum AVIAMFSubmixLayoutType {
+    AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS = 2,
+    AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL = 3,
+};
+
+typedef struct AVIAMFSubmixLayout {
+    const AVClass *av_class;
+
+    // AVOption enabled fields
+    enum AVIAMFSubmixLayoutType layout_type;
+    AVChannelLayout sound_system;
+    AVRational integrated_loudness;
+    AVRational digital_peak;
+    AVRational true_peak;
+    AVRational dialogue_anchored_loudness;
+    AVRational album_anchored_loudness;
+} AVIAMFSubmixLayout;
+
+typedef struct AVIAMFSubmix {
+    const AVClass *av_class;
+
+    AVIAMFSubmixElement **elements;
+    /**
+     * Set by av_iamf_mix_presentation_add_submix(), must not be
+     * modified by any other code.
+     */
+    unsigned int num_elements;
+
+    AVIAMFSubmixLayout **layouts;
+    /**
+     * Set by av_iamf_mix_presentation_add_submix(), must not be
+     * modified by any other code.
+     */
+    unsigned int num_layouts;
+
+    AVIAMFParamDefinition *output_mix_config;
+
+    // AVOption enabled fields
+    AVRational default_mix_gain;
+} AVIAMFSubmix;
+
+typedef struct AVIAMFMixPresentation {
+    const AVClass *av_class;
+
+    AVIAMFSubmix **submixes;
+    /**
+     * Number of submixes in the presentation.
+     *
+     * Set by av_iamf_mix_presentation_add_submix(), must not be
+     * modified by any other code.
+     */
+    unsigned int num_submixes;
+
+    // AVOption enabled fields
+    /**
+     * A dictionary of string describing the mix. Must have the same
+     * amount of entries as every @ref AVIAMFSubmixElement.annotations
+     * "Submix element annotations".
+     *
+     * decoding: set by libavformat
+     * encoding: set by the user
+     */
+    AVDictionary *annotations;
+} AVIAMFMixPresentation;
+
+const AVClass *av_iamf_mix_presentation_get_class(void);
+
+AVIAMFMixPresentation *av_iamf_mix_presentation_alloc(void);
+
+int av_iamf_mix_presentation_add_submix(AVIAMFMixPresentation *mix_presentation,
+                                              AVDictionary **options);
+
+int av_iamf_submix_add_element(AVIAMFSubmix *submix, AVDictionary **options);
+
+int av_iamf_submix_add_layout(AVIAMFSubmix *submix, AVDictionary **options);
+
+void av_iamf_mix_presentation_free(AVIAMFMixPresentation **mix_presentation);
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_IAMF_H */