[FFmpeg-devel,4/9] avutil: introduce an Immersive Audio Model and Formats API

Message ID	20231126012858.40388-5-jamrial@gmail.com
State	New
Headers	show Delivered-To: ffmpegpatchwork2@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; From: James Almer <jamrial@gmail.com> To: ffmpeg-devel@ffmpeg.org Date: Sat, 25 Nov 2023 22:28:53 -0300 Message-ID: <20231126012858.40388-5-jamrial@gmail.com> In-Reply-To: <20231126012858.40388-1-jamrial@gmail.com> References: <20231126012858.40388-1-jamrial@gmail.com> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH 4/9] avutil: introduce an Immersive Audio Model and Formats API Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Series	avformat: introduce AVStreamGroup \| expand [FFmpeg-devel,v5,0/9] avformat: introduce AVStreamGroup [FFmpeg-devel,1/9] avutil/mem: add av_dynarray2_add_nofree [FFmpeg-devel,2/9] avcodec/get_bits: add get_leb() [FFmpeg-devel,3/9] avformat/aviobuf: add ffio_read_leb() and ffio_write_leb() [FFmpeg-devel,4/9] avutil: introduce an Immersive Audio Model and Formats API [FFmpeg-devel,5/9] avformat: introduce AVStreamGroup [FFmpeg-devel,6/9] ffmpeg: add support for muxing AVStreamGroups [FFmpeg-devel,7/9] avcodec/packet: add IAMF Parameters side data types [FFmpeg-devel,8/9] avformat: Immersive Audio Model and Formats demuxer [FFmpeg-devel,9/9] avformat: Immersive Audio Model and Formats muxer [FFmpeg-devel,10/13] avcodec: add an Immersive Audio Model and Formats frame split bsf [FFmpeg-devel,11/13] avformat/demux: support inserting bitstream filters in demuxing scenarios [FFmpeg-devel,12/13] avformat/mov: make MOVStreamContext refcounted [FFmpeg-devel,13/13] avformat/mov: add support for Immersive Audio Model and Formats in ISOBMFF

Context	Check	Description
andriy/make_x86	success	Make finished
andriy/make_fate_x86	success	Make fate finished

diff --git a/libavutil/Makefile b/libavutil/Makefile index 4711f8cde8..62cc1a1831 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -51,6 +51,7 @@ HEADERS = adler32.h \ hwcontext_videotoolbox.h \ hwcontext_vdpau.h \ hwcontext_vulkan.h \ + iamf.h \ imgutils.h \ intfloat.h \ intreadwrite.h \ @@ -140,6 +141,7 @@ OBJS = adler32.o \ hdr_dynamic_vivid_metadata.o \ hmac.o \ hwcontext.o \ + iamf.o \ imgutils.o \ integer.o \ intmath.o \ diff --git a/libavutil/iamf.c b/libavutil/iamf.c new file mode 100644 index 0000000000..fffb9fab20 --- /dev/null +++ b/libavutil/iamf.c @@ -0,0 +1,582 @@ +/* + * Immersive Audio Model and Formats helper functions and defines + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <limits.h> +#include <stddef.h> +#include <stdint.h> + +#include "avassert.h" +#include "error.h" +#include "iamf.h" +#include "log.h" +#include "mem.h" +#include "opt.h" + +#define IAMF_ADD_FUNC_TEMPLATE(parent_type, parent_name, child_type, child_name, suffix) \ +int av_iamf_ ## parent_name ## _add_ ## child_name(parent_type *parent_name, AVDictionary **options) \ +{ \ + child_type **child_name ## suffix, *child_name; \ + \ + if (parent_name->num_## child_name ## suffix == UINT_MAX) \ + return AVERROR(EINVAL); \ + \ + child_name ## suffix = av_realloc_array(parent_name->child_name ## suffix, \ + parent_name->num_## child_name ## suffix + 1, \ + sizeof(*parent_name->child_name ## suffix)); \ + if (!child_name ## suffix) \ + return AVERROR(ENOMEM); \ + \ + parent_name->child_name ## suffix = child_name ## suffix; \ + \ + child_name = parent_name->child_name ## suffix[parent_name->num_## child_name ## suffix] \ + = av_mallocz(sizeof(*child_name)); \ + if (!child_name) \ + return AVERROR(ENOMEM); \ + \ + child_name->av_class = &child_name ## _class; \ + av_opt_set_defaults(child_name); \ + if (options) { \ + int ret = av_opt_set_dict2(child_name, options, AV_OPT_SEARCH_CHILDREN); \ + if (ret < 0) { \ + av_freep(&parent_name->child_name ## suffix[parent_name->num_## child_name ## suffix]); \ + return ret; \ + } \ + } \ + parent_name->num_## child_name ## suffix++; \ + \ + return 0; \ +} + +#define FLAGS AV_OPT_FLAG_ENCODING_PARAM + +// +// Param Definition +// +#define OFFSET(x) offsetof(AVIAMFMixGainParameterData, x) +static const AVOption mix_gain_options[] = { + { "subblock_duration", "set subblock_duration", OFFSET(subblock_duration), AV_OPT_TYPE_INT64, {.i64 = 1 }, 1, UINT_MAX, FLAGS }, + { "animation_type", "set animation_type", OFFSET(animation_type), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 2, FLAGS }, + { "start_point_value", "set start_point_value", OFFSET(animation_type), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, -128.0, 128.0, FLAGS }, + { "end_point_value", "set end_point_value", OFFSET(animation_type), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, -128.0, 128.0, FLAGS }, + { "control_point_value", "set control_point_value", OFFSET(animation_type), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, -128.0, 128.0, FLAGS }, + { "control_point_relative_time", "set control_point_relative_time", OFFSET(animation_type), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, UINT8_MAX, FLAGS }, + { NULL }, +}; + +static const AVClass mix_gain_class = { + .class_name = "AVIAMFSubmixElement", + .item_name = av_default_item_name, + .version = LIBAVUTIL_VERSION_INT, + .option = mix_gain_options, +}; + +#undef OFFSET +#define OFFSET(x) offsetof(AVIAMFDemixingInfoParameterData, x) +static const AVOption demixing_info_options[] = { + { "subblock_duration", "set subblock_duration", OFFSET(subblock_duration), AV_OPT_TYPE_INT64, {.i64 = 1 }, 1, UINT_MAX, FLAGS }, + { "dmixp_mode", "set dmixp_mode", OFFSET(dmixp_mode), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 6, FLAGS }, + { NULL }, +}; + +static const AVClass demixing_info_class = { + .class_name = "AVIAMFDemixingInfoParameterData", + .item_name = av_default_item_name, + .version = LIBAVUTIL_VERSION_INT, + .option = demixing_info_options, +}; + +#undef OFFSET +#define OFFSET(x) offsetof(AVIAMFReconGainParameterData, x) +static const AVOption recon_gain_options[] = { + { "subblock_duration", "set subblock_duration", OFFSET(subblock_duration), AV_OPT_TYPE_INT64, {.i64 = 1 }, 1, UINT_MAX, FLAGS }, + { NULL }, +}; + +static const AVClass recon_gain_class = { + .class_name = "AVIAMFReconGainParameterData", + .item_name = av_default_item_name, + .version = LIBAVUTIL_VERSION_INT, + .option = recon_gain_options, +}; + +#undef OFFSET +#define OFFSET(x) offsetof(AVIAMFParamDefinition, x) +static const AVOption param_definition_options[] = { + { "parameter_id", "set parameter_id", OFFSET(parameter_id), AV_OPT_TYPE_INT64, {.i64 = 0 }, 0, UINT_MAX, FLAGS }, + { "parameter_rate", "set parameter_rate", OFFSET(parameter_rate), AV_OPT_TYPE_INT64, {.i64 = 0 }, 0, UINT_MAX, FLAGS }, + { "param_definition_mode", "set param_definition_mode", OFFSET(param_definition_mode), AV_OPT_TYPE_INT, {.i64 = 1 }, 0, 1, FLAGS }, + { "duration", "set duration", OFFSET(duration), AV_OPT_TYPE_INT64, {.i64 = 0 }, 0, UINT_MAX, FLAGS }, + { "constant_subblock_duration", "set constant_subblock_duration", OFFSET(constant_subblock_duration), AV_OPT_TYPE_INT64, {.i64 = 0 }, 0, UINT_MAX, FLAGS }, + { NULL }, +}; + +static const AVClass *param_definition_child_iterate(void **opaque) +{ + uintptr_t i = (uintptr_t)*opaque; + const AVClass *ret = NULL; + + switch(i) { + case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: + ret = &mix_gain_class; + break; + case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: + ret = &demixing_info_class; + break; + case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: + ret = &recon_gain_class; + break; + default: + break; + } + + if (ret) + *opaque = (void*)(i + 1); + return ret; +} + +static const AVClass param_definition_class = { + .class_name = "AVIAMFParamDefinition", + .item_name = av_default_item_name, + .version = LIBAVUTIL_VERSION_INT, + .option = param_definition_options, + .child_class_iterate = param_definition_child_iterate, +}; + +const AVClass *av_iamf_param_definition_get_class(void) +{ + return &param_definition_class; +} + +AVIAMFParamDefinition *av_iamf_param_definition_alloc(enum AVIAMFParamDefinitionType type, AVDictionary **options, + unsigned int num_subblocks, AVDictionary **subblock_options, + size_t *out_size) +{ + + struct MixGainStruct { + AVIAMFParamDefinition p; + AVIAMFMixGainParameterData m; + }; + struct DemixStruct { + AVIAMFParamDefinition p; + AVIAMFDemixingInfoParameterData d; + }; + struct ReconGainStruct { + AVIAMFParamDefinition p; + AVIAMFReconGainParameterData r; + }; + size_t subblocks_offset, subblock_size; + size_t size; + AVIAMFParamDefinition *par; + + switch (type) { + case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: + subblocks_offset = offsetof(struct MixGainStruct, m); + subblock_size = sizeof(AVIAMFMixGainParameterData); + break; + case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: + subblocks_offset = offsetof(struct DemixStruct, d); + subblock_size = sizeof(AVIAMFDemixingInfoParameterData); + break; + case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: + subblocks_offset = offsetof(struct ReconGainStruct, r); + subblock_size = sizeof(AVIAMFReconGainParameterData); + break; + default: + return NULL; + } + + size = subblocks_offset; + if (num_subblocks > (SIZE_MAX - size) / subblock_size) + return NULL; + size += subblock_size * num_subblocks; + + par = av_mallocz(size); + if (!par) + return NULL; + + par->av_class = &param_definition_class; + av_opt_set_defaults(par); + if (options) { + int ret = av_opt_set_dict(par, options); + if (ret < 0) { + av_free(par); + return NULL; + } + } + par->param_definition_type = type; + par->num_subblocks = num_subblocks; + par->subblock_size = subblock_size; + par->subblocks_offset = subblocks_offset; + + for (int i = 0; i < num_subblocks; i++) { + void *subblock = av_iamf_param_definition_get_subblock(par, i); + + switch (type) { + case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: + ((AVIAMFMixGainParameterData *)subblock)->av_class = &mix_gain_class; + break; + case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: + ((AVIAMFDemixingInfoParameterData *)subblock)->av_class = &demixing_info_class; + break; + case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: + ((AVIAMFReconGainParameterData *)subblock)->av_class = &recon_gain_class; + break; + default: + av_assert0(0); + } + + av_opt_set_defaults(subblock); + if (subblock_options && subblock_options[i]) { + int ret = av_opt_set_dict(subblock, &subblock_options[i]); + if (ret < 0) { + av_free(par); + return NULL; + } + } + } + + if (out_size) + *out_size = size; + + return par; +} + +// +// Audio Element +// +#undef OFFSET +#define OFFSET(x) offsetof(AVIAMFLayer, x) +static const AVOption layer_options[] = { + { "ch_layout", "set ch_layout", OFFSET(ch_layout), AV_OPT_TYPE_CHLAYOUT, {.str = NULL }, 0, 0, FLAGS }, + { "recon_gain_is_present", "set recon_gain_is_present", OFFSET(recon_gain_is_present), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, FLAGS }, + { "output_gain_flags", "set output_gain_flags", OFFSET(output_gain_flags), AV_OPT_TYPE_FLAGS, + {.i64 = 0 }, 0, (1 << 6) - 1, FLAGS, "output_gain_flags" }, + {"FL", "Left channel", 0, AV_OPT_TYPE_CONST, + {.i64 = 1 << 5 }, INT_MIN, INT_MAX, FLAGS, "output_gain_flags"}, + {"FR", "Right channel", 0, AV_OPT_TYPE_CONST, + {.i64 = 1 << 4 }, INT_MIN, INT_MAX, FLAGS, "output_gain_flags"}, + {"BL", "Left surround channel", 0, AV_OPT_TYPE_CONST, + {.i64 = 1 << 3 }, INT_MIN, INT_MAX, FLAGS, "output_gain_flags"}, + {"BR", "Right surround channel", 0, AV_OPT_TYPE_CONST, + {.i64 = 1 << 2 }, INT_MIN, INT_MAX, FLAGS, "output_gain_flags"}, + {"TFL", "Left top front channel", 0, AV_OPT_TYPE_CONST, + {.i64 = 1 << 1 }, INT_MIN, INT_MAX, FLAGS, "output_gain_flags"}, + {"TFR", "Right top front channel", 0, AV_OPT_TYPE_CONST, + {.i64 = 1 << 0 }, INT_MIN, INT_MAX, FLAGS, "output_gain_flags"}, + { "output_gain", "set output_gain", OFFSET(output_gain), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS }, + { "ambisonics_mode", "set ambisonics_mode", OFFSET(ambisonics_mode), AV_OPT_TYPE_INT, + { .i64 = AV_IAMF_AMBISONICS_MODE_MONO }, + AV_IAMF_AMBISONICS_MODE_MONO, AV_IAMF_AMBISONICS_MODE_PROJECTION, FLAGS, "ambisonics_mode" }, + { "mono", NULL, 0, AV_OPT_TYPE_CONST, + { .i64 = AV_IAMF_AMBISONICS_MODE_MONO }, .unit = "ambisonics_mode" }, + { "projection", NULL, 0, AV_OPT_TYPE_CONST, + { .i64 = AV_IAMF_AMBISONICS_MODE_PROJECTION }, .unit = "ambisonics_mode" }, + { NULL }, +}; + +static const AVClass layer_class = { + .class_name = "AVIAMFLayer", + .item_name = av_default_item_name, + .version = LIBAVUTIL_VERSION_INT, + .option = layer_options, +}; + +#undef OFFSET +#define OFFSET(x) offsetof(AVIAMFAudioElement, x) +static const AVOption audio_element_options[] = { + { "audio_element_type", "set audio_element_type", OFFSET(audio_element_type), AV_OPT_TYPE_INT, + {.i64 = AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL }, + AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, FLAGS, "audio_element_type" }, + { "channel", NULL, 0, AV_OPT_TYPE_CONST, + { .i64 = AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL }, .unit = "audio_element_type" }, + { "scene", NULL, 0, AV_OPT_TYPE_CONST, + { .i64 = AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE }, .unit = "audio_element_type" }, + { "default_w", "set default_w", OFFSET(default_w), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 10, FLAGS }, + { NULL }, +}; + +static const AVClass *audio_element_child_iterate(void **opaque) +{ + uintptr_t i = (uintptr_t)*opaque; + const AVClass *ret = NULL; + + if (i) + ret = &layer_class; + + if (ret) + *opaque = (void*)(i + 1); + return ret; +} + +static const AVClass audio_element_class = { + .class_name = "AVIAMFAudioElement", + .item_name = av_default_item_name, + .version = LIBAVUTIL_VERSION_INT, + .option = audio_element_options, + .child_class_iterate = audio_element_child_iterate, +}; + +const AVClass *av_iamf_audio_element_get_class(void) +{ + return &audio_element_class; +} + +AVIAMFAudioElement *av_iamf_audio_element_alloc(void) +{ + AVIAMFAudioElement *audio_element = av_mallocz(sizeof(*audio_element)); + + if (audio_element) { + audio_element->av_class = &audio_element_class; + av_opt_set_defaults(audio_element); + } + + return audio_element; +} + +IAMF_ADD_FUNC_TEMPLATE(AVIAMFAudioElement, audio_element, AVIAMFLayer, layer, s) + +void av_iamf_audio_element_free(AVIAMFAudioElement **paudio_element) +{ + AVIAMFAudioElement *audio_element = *paudio_element; + + if (!audio_element) + return; + + for (int i = 0; i < audio_element->num_layers; i++) { + AVIAMFLayer *layer = audio_element->layers[i]; + av_opt_free(layer); + av_free(layer->demixing_matrix); + av_free(layer); + } + av_free(audio_element->layers); + + av_free(audio_element->demixing_info); + av_free(audio_element->recon_gain_info); + av_freep(paudio_element); +} + +// +// Mix Presentation +// +#undef OFFSET +#define OFFSET(x) offsetof(AVIAMFSubmixElement, x) +static const AVOption submix_element_options[] = { + { "headphones_rendering_mode", "Headphones rendering mode", OFFSET(headphones_rendering_mode), AV_OPT_TYPE_INT, + { .i64 = AV_IAMF_HEADPHONES_MODE_STEREO }, + AV_IAMF_HEADPHONES_MODE_STEREO, AV_IAMF_HEADPHONES_MODE_BINAURAL, FLAGS, "headphones_rendering_mode" }, + { "stereo", NULL, 0, AV_OPT_TYPE_CONST, + { .i64 = AV_IAMF_HEADPHONES_MODE_STEREO }, .unit = "headphones_rendering_mode" }, + { "binaural", NULL, 0, AV_OPT_TYPE_CONST, + { .i64 = AV_IAMF_HEADPHONES_MODE_BINAURAL }, .unit = "headphones_rendering_mode" }, + { "default_mix_gain", "Default mix gain", OFFSET(default_mix_gain), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS }, + { "annotations", "Annotations", OFFSET(annotations), AV_OPT_TYPE_DICT, { .str = NULL }, 0, 0, FLAGS }, + { NULL }, +}; + +static void *submix_element_child_next(void *obj, void *prev) +{ + AVIAMFSubmixElement *submix_element = obj; + if (!prev) + return submix_element->element_mix_config; + + return NULL; +} + +static const AVClass *submix_element_child_iterate(void **opaque) +{ + uintptr_t i = (uintptr_t)*opaque; + const AVClass *ret = NULL; + + if (i) + ret = &param_definition_class; + + if (ret) + *opaque = (void*)(i + 1); + return ret; +} + +static const AVClass element_class = { + .class_name = "AVIAMFSubmixElement", + .item_name = av_default_item_name, + .version = LIBAVUTIL_VERSION_INT, + .option = submix_element_options, + .child_next = submix_element_child_next, + .child_class_iterate = submix_element_child_iterate, +}; + +IAMF_ADD_FUNC_TEMPLATE(AVIAMFSubmix, submix, AVIAMFSubmixElement, element, s) + +#undef OFFSET +#define OFFSET(x) offsetof(AVIAMFSubmixLayout, x) +static const AVOption submix_layout_options[] = { + { "layout_type", "Layout type", OFFSET(layout_type), AV_OPT_TYPE_INT, + { .i64 = AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS }, + AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS, AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL, FLAGS, "layout_type" }, + { "loudspeakers", NULL, 0, AV_OPT_TYPE_CONST, + { .i64 = AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS }, .unit = "layout_type" }, + { "binaural", NULL, 0, AV_OPT_TYPE_CONST, + { .i64 = AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL }, .unit = "layout_type" }, + { "sound_system", "Sound System", OFFSET(sound_system), AV_OPT_TYPE_CHLAYOUT, { .str = NULL }, 0, 0, FLAGS }, + { "integrated_loudness", "Integrated loudness", OFFSET(integrated_loudness), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS }, + { "digital_peak", "Digital peak", OFFSET(digital_peak), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS }, + { "true_peak", "True peak", OFFSET(true_peak), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS }, + { "dialog_anchored_loudness", "Anchored loudness (Dialog)", OFFSET(dialogue_anchored_loudness), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS }, + { "album_anchored_loudness", "Anchored loudness (Album)", OFFSET(album_anchored_loudness), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS }, + { NULL }, +}; + +static const AVClass layout_class = { + .class_name = "AVIAMFSubmixLayout", + .item_name = av_default_item_name, + .version = LIBAVUTIL_VERSION_INT, + .option = submix_layout_options, +}; + +IAMF_ADD_FUNC_TEMPLATE(AVIAMFSubmix, submix, AVIAMFSubmixLayout, layout, s) + +#undef OFFSET +#define OFFSET(x) offsetof(AVIAMFSubmix, x) +static const AVOption submix_presentation_options[] = { + { "default_mix_gain", "Default mix gain", OFFSET(default_mix_gain), AV_OPT_TYPE_RATIONAL, { .dbl = 0 }, -128.0, 128.0, FLAGS }, + { NULL }, +}; + +static void *submix_presentation_child_next(void *obj, void *prev) +{ + AVIAMFSubmix *sub_mix = obj; + if (!prev) + return sub_mix->output_mix_config; + + return NULL; +} + +static const AVClass *submix_presentation_child_iterate(void **opaque) +{ + uintptr_t i = (uintptr_t)*opaque; + const AVClass *ret = NULL; + + switch(i) { + case 0: + ret = &element_class; + break; + case 1: + ret = &layout_class; + break; + case 2: + ret = &param_definition_class; + break; + default: + break; + } + + if (ret) + *opaque = (void*)(i + 1); + return ret; +} + +static const AVClass submix_class = { + .class_name = "AVIAMFSubmix", + .item_name = av_default_item_name, + .version = LIBAVUTIL_VERSION_INT, + .option = submix_presentation_options, + .child_next = submix_presentation_child_next, + .child_class_iterate = submix_presentation_child_iterate, +}; + +#undef OFFSET +#define OFFSET(x) offsetof(AVIAMFMixPresentation, x) +static const AVOption mix_presentation_options[] = { + { "annotations", "set annotations", OFFSET(annotations), AV_OPT_TYPE_DICT, {.str = NULL }, 0, 0, FLAGS }, + { NULL }, +}; + +#undef OFFSET +#undef FLAGS + +static const AVClass *mix_presentation_child_iterate(void **opaque) +{ + uintptr_t i = (uintptr_t)*opaque; + const AVClass *ret = NULL; + + if (i) + ret = &submix_class; + + if (ret) + *opaque = (void*)(i + 1); + return ret; +} + +static const AVClass mix_presentation_class = { + .class_name = "AVIAMFMixPresentation", + .item_name = av_default_item_name, + .version = LIBAVUTIL_VERSION_INT, + .option = mix_presentation_options, + .child_class_iterate = mix_presentation_child_iterate, +}; + +const AVClass *av_iamf_mix_presentation_get_class(void) +{ + return &mix_presentation_class; +} + +AVIAMFMixPresentation *av_iamf_mix_presentation_alloc(void) +{ + AVIAMFMixPresentation *mix_presentation = av_mallocz(sizeof(*mix_presentation)); + + if (mix_presentation) { + mix_presentation->av_class = &mix_presentation_class; + av_opt_set_defaults(mix_presentation); + } + + return mix_presentation; +} + +IAMF_ADD_FUNC_TEMPLATE(AVIAMFMixPresentation, mix_presentation, AVIAMFSubmix, submix, es) + +void av_iamf_mix_presentation_free(AVIAMFMixPresentation **pmix_presentation) +{ + AVIAMFMixPresentation *mix_presentation = *pmix_presentation; + + if (!mix_presentation) + return; + + for (int i = 0; i < mix_presentation->num_submixes; i++) { + AVIAMFSubmix *sub_mix = mix_presentation->submixes[i]; + for (int j = 0; j < sub_mix->num_elements; j++) { + AVIAMFSubmixElement *submix_element = sub_mix->elements[j]; + av_opt_free(submix_element); + av_free(submix_element->element_mix_config); + av_free(submix_element); + } + av_free(sub_mix->elements); + for (int j = 0; j < sub_mix->num_layouts; j++) { + AVIAMFSubmixLayout *submix_layout = sub_mix->layouts[j]; + av_opt_free(submix_layout); + av_free(submix_layout); + } + av_free(sub_mix->layouts); + av_free(sub_mix->output_mix_config); + av_free(sub_mix); + } + av_opt_free(mix_presentation); + av_free(mix_presentation->submixes); + + av_freep(pmix_presentation); +} diff --git a/libavutil/iamf.h b/libavutil/iamf.h new file mode 100644 index 0000000000..1f4919efdb --- /dev/null +++ b/libavutil/iamf.h @@ -0,0 +1,377 @@ +/* + * Immersive Audio Model and Formats helper functions and defines + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_IAMF_H +#define AVUTIL_IAMF_H + +/** + * @file + * Immersive Audio Model and Formats API header + */ + +#include <stdint.h> +#include <stddef.h> + +#include "attributes.h" +#include "avassert.h" +#include "channel_layout.h" +#include "dict.h" +#include "rational.h" + +enum AVIAMFAudioElementType { + AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, + AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, +}; + +/** + * @defgroup lavf_iamf_params Parameter Definition + * @{ + * Parameters as defined in section 3.6.1 and 3.8 + * @} + * @defgroup lavf_iamf_audio Audio Element + * @{ + * Audio Elements as defined in section 3.6 + * @} + * @defgroup lavf_iamf_mix Mix Presentation + * @{ + * Mix Presentations as defined in section 3.7 + * @} + * + * @} + * @addtogroup lavf_iamf_params + * @{ + */ +enum AVIAMFAnimationType { + AV_IAMF_ANIMATION_TYPE_STEP, + AV_IAMF_ANIMATION_TYPE_LINEAR, + AV_IAMF_ANIMATION_TYPE_BEZIER, +}; + +/** + * Mix Gain Parameter Data as defined in section 3.8.1 + * + * Subblocks in AVIAMFParamDefinition use this struct when the value or + * @ref AVIAMFParamDefinition.param_definition_type param_definition_type is + * AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN. + */ +typedef struct AVIAMFMixGainParameterData { + const AVClass *av_class; + + // AVOption enabled fields + unsigned int subblock_duration; + enum AVIAMFAnimationType animation_type; + AVRational start_point_value; + AVRational end_point_value; + AVRational control_point_value; + unsigned int control_point_relative_time; +} AVIAMFMixGainParameterData; + +/** + * Demixing Info Parameter Data as defined in section 3.8.2 + * + * Subblocks in AVIAMFParamDefinition use this struct when the value or + * @ref AVIAMFParamDefinition.param_definition_type param_definition_type is + * AV_IAMF_PARAMETER_DEFINITION_DEMIXING. + */ +typedef struct AVIAMFDemixingInfoParameterData { + const AVClass *av_class; + + // AVOption enabled fields + unsigned int subblock_duration; + unsigned int dmixp_mode; +} AVIAMFDemixingInfoParameterData; + +/** + * Recon Gain Info Parameter Data as defined in section 3.8.3 + * + * Subblocks in AVIAMFParamDefinition use this struct when the value or + * @ref AVIAMFParamDefinition.param_definition_type param_definition_type is + * AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN. + */ +typedef struct AVIAMFReconGainParameterData { + const AVClass *av_class; + + // AVOption enabled fields + unsigned int subblock_duration; + // End of AVOption enabled fields + uint8_t recon_gain[6][12]; +} AVIAMFReconGainParameterData; + +enum AVIAMFParamDefinitionType { + AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, + AV_IAMF_PARAMETER_DEFINITION_DEMIXING, + AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN, +}; + +/** + * Parameters as defined in section 3.6.1 + */ +typedef struct AVIAMFParamDefinition { + const AVClass *av_class; + + size_t subblocks_offset; + size_t subblock_size; + + enum AVIAMFParamDefinitionType param_definition_type; + unsigned int num_subblocks; + + // AVOption enabled fields + unsigned int parameter_id; + unsigned int parameter_rate; + unsigned int param_definition_mode; + unsigned int duration; + unsigned int constant_subblock_duration; +} AVIAMFParamDefinition; + +const AVClass *av_iamf_param_definition_get_class(void); + +AVIAMFParamDefinition *av_iamf_param_definition_alloc(enum AVIAMFParamDefinitionType param_definition_type, + AVDictionary **options, + unsigned int num_subblocks, AVDictionary **subblock_options, + size_t *size); + +/** + * Get the subblock at the specified {@code idx}. Must be between 0 and num_subblocks - 1. + * + * The @ref AVIAMFParamDefinition.param_definition_type "param definition type" defines + * the struct type of the returned pointer. + */ +static av_always_inline void* +av_iamf_param_definition_get_subblock(AVIAMFParamDefinition *par, unsigned int idx) +{ + av_assert0(idx < par->num_subblocks); + return (void *)((uint8_t *)par + par->subblocks_offset + idx * par->subblock_size); +} + +/** + * @} + * @addtogroup lavf_iamf_audio + * @{ + */ + +enum AVIAMFAmbisonicsMode { + AV_IAMF_AMBISONICS_MODE_MONO, + AV_IAMF_AMBISONICS_MODE_PROJECTION, +}; + +/** + * A layer defining a Channel Layout in the Audio Element. + * + * When audio_element_type is AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, this + * corresponds to an Scalable Channel Layout layer as defined in section 3.6.2. + * For AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, it is an Ambisonics channel + * layout as defined in section 3.6.3 + */ +typedef struct AVIAMFLayer { + const AVClass *av_class; + + // AVOption enabled fields + AVChannelLayout ch_layout; + + unsigned int recon_gain_is_present; + /** + * Output gain flags as defined in section 3.6.2 + * + * This field is defined only if audio_element_type is + * AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL, must be 0 otherwise. + */ + unsigned int output_gain_flags; + /** + * Output gain as defined in section 3.6.2 + * + * Must be 0 if @ref output_gain_flags is 0. + */ + AVRational output_gain; + /** + * Ambisonics mode as defined in section 3.6.3 + * + * This field is defined only if audio_element_type is + * AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, must be 0 otherwise. + * + * If 0, channel_mapping is defined implicitly (Ambisonic Order) + * or explicitly (Custom Order with ambi channels) in @ref ch_layout. + * If 1, @ref demixing_matrix must be set. + */ + enum AVIAMFAmbisonicsMode ambisonics_mode; + + // End of AVOption enabled fields + /** + * Demixing matrix as defined in section 3.6.3 + * + * Set only if @ref ambisonics_mode == 1, must be NULL otherwise. + */ + AVRational *demixing_matrix; +} AVIAMFLayer; + +typedef struct AVIAMFAudioElement { + const AVClass *av_class; + + AVIAMFLayer **layers; + /** + * Number of layers, or channel groups, in the Audio Element. + * For audio_element_type AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE, there + * may be exactly 1. + * + * Set by av_iamf_audio_element_add_layer(), must not be + * modified by any other code. + */ + unsigned int num_layers; + + unsigned int codec_config_id; + + AVIAMFParamDefinition *demixing_info; + AVIAMFParamDefinition *recon_gain_info; + + // AVOption enabled fields + /** + * Audio element type as defined in section 3.6 + */ + enum AVIAMFAudioElementType audio_element_type; + + /** + * Default weight value as defined in section 3.6 + */ + unsigned int default_w; +} AVIAMFAudioElement; + +const AVClass *av_iamf_audio_element_get_class(void); + +AVIAMFAudioElement *av_iamf_audio_element_alloc(void); + +int av_iamf_audio_element_add_layer(AVIAMFAudioElement *audio_element, AVDictionary **options); + +void av_iamf_audio_element_free(AVIAMFAudioElement **audio_element); + +/** + * @} + * @addtogroup lavf_iamf_mix + * @{ + */ + +enum AVIAMFHeadphonesMode { + AV_IAMF_HEADPHONES_MODE_STEREO, + AV_IAMF_HEADPHONES_MODE_BINAURAL, +}; + +typedef struct AVIAMFSubmixElement { + const AVClass *av_class; + + unsigned int audio_element_id; + + AVIAMFParamDefinition *element_mix_config; + + // AVOption enabled fields + enum AVIAMFHeadphonesMode headphones_rendering_mode; + + AVRational default_mix_gain; + + /** + * A dictionary of string describing the submix. Must have the same + * amount of entries as @ref AVIAMFMixPresentation.annotations "the + * mix's annotations". + * + * decoding: set by libavformat + * encoding: set by the user + */ + AVDictionary *annotations; +} AVIAMFSubmixElement; + +enum AVIAMFSubmixLayoutType { + AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS = 2, + AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL = 3, +}; + +typedef struct AVIAMFSubmixLayout { + const AVClass *av_class; + + // AVOption enabled fields + enum AVIAMFSubmixLayoutType layout_type; + AVChannelLayout sound_system; + AVRational integrated_loudness; + AVRational digital_peak; + AVRational true_peak; + AVRational dialogue_anchored_loudness; + AVRational album_anchored_loudness; +} AVIAMFSubmixLayout; + +typedef struct AVIAMFSubmix { + const AVClass *av_class; + + AVIAMFSubmixElement **elements; + /** + * Set by av_iamf_mix_presentation_add_submix(), must not be + * modified by any other code. + */ + unsigned int num_elements; + + AVIAMFSubmixLayout **layouts; + /** + * Set by av_iamf_mix_presentation_add_submix(), must not be + * modified by any other code. + */ + unsigned int num_layouts; + + AVIAMFParamDefinition *output_mix_config; + + // AVOption enabled fields + AVRational default_mix_gain; +} AVIAMFSubmix; + +typedef struct AVIAMFMixPresentation { + const AVClass *av_class; + + AVIAMFSubmix **submixes; + /** + * Number of submixes in the presentation. + * + * Set by av_iamf_mix_presentation_add_submix(), must not be + * modified by any other code. + */ + unsigned int num_submixes; + + // AVOption enabled fields + /** + * A dictionary of string describing the mix. Must have the same + * amount of entries as every @ref AVIAMFSubmixElement.annotations + * "Submix element annotations". + * + * decoding: set by libavformat + * encoding: set by the user + */ + AVDictionary *annotations; +} AVIAMFMixPresentation; + +const AVClass *av_iamf_mix_presentation_get_class(void); + +AVIAMFMixPresentation *av_iamf_mix_presentation_alloc(void); + +int av_iamf_mix_presentation_add_submix(AVIAMFMixPresentation *mix_presentation, + AVDictionary **options); + +int av_iamf_submix_add_element(AVIAMFSubmix *submix, AVDictionary **options); + +int av_iamf_submix_add_layout(AVIAMFSubmix *submix, AVDictionary **options); + +void av_iamf_mix_presentation_free(AVIAMFMixPresentation **mix_presentation); +/** + * @} + */ + +#endif /* AVUTIL_IAMF_H */

[FFmpeg-devel,4/9] avutil: introduce an Immersive Audio Model and Formats API

Checks

Commit Message

Comments

Patch