diff mbox series

[FFmpeg-devel,1/2] avformat/mxf: support MCA audio information

Message ID 20211110174007.34740-1-marc-antoine.arnaud@luminvent.com
State New
Headers show
Series [FFmpeg-devel,1/2] avformat/mxf: support MCA audio information
Related show

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished

Commit Message

Marc-Antoine ARNAUD Nov. 10, 2021, 5:40 p.m. UTC
---
 doc/demuxers.texi     |  10 ++
 libavformat/mxf.h     |   1 +
 libavformat/mxfdec.c  | 284 +++++++++++++++++++++++++++++++++++++++++-
 libavformat/version.h |   2 +-
 4 files changed, 290 insertions(+), 7 deletions(-)

Comments

Marton Balint Nov. 13, 2021, 11:19 a.m. UTC | #1
On Wed, 10 Nov 2021, Marc-Antoine Arnaud wrote:

> ---
> doc/demuxers.texi     |  10 ++
> libavformat/mxf.h     |   1 +
> libavformat/mxfdec.c  | 284 +++++++++++++++++++++++++++++++++++++++++-
> libavformat/version.h |   2 +-
> 4 files changed, 290 insertions(+), 7 deletions(-)

[...]

> @@ -2688,6 +2841,98 @@ static int mxf_parse_structural_metadata(MXFContext *mxf)
>                 sti->need_parsing = AVSTREAM_PARSE_FULL;
>             }
>             st->codecpar->bits_per_coded_sample = av_get_bits_per_sample(st->codecpar->codec_id);
> +
> +            current_channel = 0;
> +
> +            if (descriptor->channels >= FF_SANE_NB_CHANNELS) {
> +                av_log(mxf->fc, AV_LOG_ERROR, "max number of channels %d reached\n", FF_SANE_NB_CHANNELS);
> +                return AVERROR_INVALIDDATA;
> +            }
> +
> +            for (j = 0; j < descriptor->channels; ++j) {
> +                source_track->channel_ordering[j] = j;
> +            }
> +
> +            for (j = 0; j < descriptor->sub_descriptors_count; j++) {
> +                MXFMCASubDescriptor *mca_sub_descriptor = mxf_resolve_strong_ref(mxf, &descriptor->sub_descriptors_refs[j], MCASubDescriptor);
> +                if (mca_sub_descriptor == NULL) {
> +                    continue;
> +                }
> +
> +                // Soundfield group
> +                if (IS_KLV_KEY(mca_sub_descriptor->mca_label_dictionary_id, mxf_soundfield_group)) {
> +                    const MXFSoundfieldGroupUL* group_ptr = mxf_soundfield_groups;
> +
> +                    while (group_ptr->uid[0]) {
> +                        if (IS_KLV_KEY(group_ptr->uid, mca_sub_descriptor->mca_label_dictionary_id)) {
> +                            st->codecpar->channel_layout = group_ptr->id;
> +                            break;
> +                        }
> +                        group_ptr++;
> +                    }
> +                }
> +
> +                // Audio channel
> +                if (IS_KLV_KEY(mca_sub_descriptor->mca_label_dictionary_id, mxf_audio_channel)) {
> +                    const MXFChannelOrderingUL* channel_ordering_ptr = mxf_channel_ordering;
> +
> +                    while (channel_ordering_ptr->uid[0]) {
> +                        if (IS_KLV_KEY(channel_ordering_ptr->uid, mca_sub_descriptor->mca_label_dictionary_id)) {
> +                            source_track->channel_ordering[current_channel] = channel_ordering_ptr->index;

Same what I wrote for the earlier version:

You should check if current_channel < desciptor->channels here, and if 
not, then warn the user and break out of the loop. Otherwise 
current_channel can grow out of array limits.

It should also be checked that channel_ordering_ptr->index < 
descriptor->channels, and if not, then similarly, warn the user and break 
out.

Thomas mentioned he prefers a hard failure, returning 
AVERROR_INVALIDDATA in addition to printing the error.

> +
> +                            if(channel_ordering_ptr->service_type != AV_AUDIO_SERVICE_TYPE_NB) {
> +                                enum AVAudioServiceType *ast;
> +                                uint8_t* side_data = av_stream_new_side_data(st, AV_PKT_DATA_AUDIO_SERVICE_TYPE, sizeof(*ast));
> +                                if (!side_data)
> +                                    goto fail_and_free;
> +                                ast = (enum AVAudioServiceType*)side_data;
> +                                *ast = channel_ordering_ptr->service_type;
> +                            }
> +
> +                            current_channel += 1;
> +                            break;
> +                        }
> +                        channel_ordering_ptr++;
> +                    }
> +                }
> +
> +                // set language from MCA spoken language information
> +                if (mca_sub_descriptor->language) {
> +                    ret = set_language(mxf->fc, mca_sub_descriptor->language, &st->metadata);
> +                    if (ret < 0) {
> +                        return ret;
> +                    }
> +                }
> +            }
> +
> +            // check if the mapping is not required
> +            source_track->require_reordering = 0;
> +            if (is_pcm(st->codecpar->codec_id)) {
> +                for (j = 0; j < descriptor->channels; ++j) {
> +                    if (source_track->channel_ordering[j] != j) {
> +                        source_track->require_reordering = 1;
> +                        break;
> +                    }
> +                }
> +            } else {
> +                st->codecpar->channel_layout = 0;
> +            }

This is wrong if codec is not pcm but no reordering is required 
because it destroys the otherwise valid channel layout. Also it does not 
reset the channel layout if the user want no reordering but it is needed.

So I meant something like this:

for (j = 0; j < descriptor->channels; ++j) {
     if (source_track->channel_ordering[j] != j) {
         if (!is_pcm(st->codecpar->codec_id) || mxf->skip_audio_reordering) {
             av_log(mxf->fc, AV_LOG_WARNING, "Skipping channel reordering!\n");
             st->codecpar->channel_layout = 0;
         } else {
             source_track->require_reordering = 1;
         }
         break;
     }
}


> +
> +            if (source_track->require_reordering) {
> +                current_channel = 0;
> +                av_log(mxf->fc, AV_LOG_DEBUG, "MCA Audio mapping (");
> +                for(j = 0; j < descriptor->channels; ++j) {
> +                    for(int k = 0; k < descriptor->channels; ++k) {
> +                        if(source_track->channel_ordering[k] == current_channel) {
> +                            av_log(mxf->fc, AV_LOG_DEBUG, "%d -> %d", source_track->channel_ordering[k], k);
> +                            if (current_channel != descriptor->channels - 1)
> +                                av_log(mxf->fc, AV_LOG_DEBUG, ", ");
> +                            current_channel += 1;
> +                        }
> +                    }
> +                }
> +                av_log(mxf->fc, AV_LOG_DEBUG, ")\n");
> +            }
>         } else if (st->codecpar->codec_type == AVMEDIA_TYPE_DATA) {
>             enum AVMediaType type;
>             container_ul = mxf_get_codec_ul(mxf_data_essence_container_uls, essence_container_ul);
> @@ -2888,6 +3133,8 @@ static const MXFMetadataReadTableEntry mxf_metadata_read_table[] = {
>     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x5c,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* VANC/VBI - SMPTE 436M */
>     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x5e,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* MPEG2AudioDescriptor */
>     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x64,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* DC Timed Text Descriptor */
> +    { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x6c,0x00 }, mxf_read_mca_sub_descriptor, sizeof(MXFMCASubDescriptor), MCASubDescriptor }, /* Soundfield Group Label Subdescriptor */
> +    { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x6b,0x00 }, mxf_read_mca_sub_descriptor, sizeof(MXFMCASubDescriptor), MCASubDescriptor }, /* Audio Channel Label Subdescriptor */
>     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x3A,0x00 }, mxf_read_track, sizeof(MXFTrack), Track }, /* Static Track */
>     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x3B,0x00 }, mxf_read_track, sizeof(MXFTrack), Track }, /* Generic Track */
>     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x14,0x00 }, mxf_read_timecode_component, sizeof(MXFTimecodeComponent), TimecodeComponent },
> @@ -3187,12 +3434,6 @@ static void mxf_compute_essence_containers(AVFormatContext *s)
>     }
> }
>
> -static int is_pcm(enum AVCodecID codec_id)
> -{
> -    /* we only care about "normal" PCM codecs until we get samples */
> -    return codec_id >= AV_CODEC_ID_PCM_S16LE && codec_id < AV_CODEC_ID_PCM_S24DAUD;
> -}
> -
> static MXFIndexTable *mxf_find_index_table(MXFContext *mxf, int index_sid)
> {
>     int i;
> @@ -3619,6 +3860,25 @@ static int mxf_set_pts(MXFContext *mxf, AVStream *st, AVPacket *pkt)
>     return 0;
> }
>
> +static int mxf_audio_remapping(int* channel_ordering, uint8_t* data, int size, int sample_size, int channels)
> +{
> +    int sample_offset = channels * sample_size;
> +    int number_of_samples = size / sample_offset;
> +    uint8_t tmp[FF_SANE_NB_CHANNELS * 4];
> +    uint8_t* data_ptr = data;
> +
> +    for (int sample = 0; sample < number_of_samples; ++sample) {
> +        memcpy(tmp, data_ptr, sample_offset);
> +
> +        for (int channel = 0; channel < channels; ++channel) {
> +            memcpy(&data_ptr[sample_size * channel_ordering[channel]], &tmp[sample_size * channel],  sample_size);
> +        }
> +
> +        data_ptr += sample_offset;
> +    }
> +    return 0;
> +}
> +
> static int mxf_read_packet(AVFormatContext *s, AVPacket *pkt)
> {
>     KLVPacket klv;
> @@ -3733,6 +3993,15 @@ static int mxf_read_packet(AVFormatContext *s, AVPacket *pkt)
>                 return ret;
>             }
>
> +            // for audio, process audio remapping if MCA label requires it

Trailing whitespace at end of line.

> +            if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO && track->require_reordering && !mxf->skip_audio_reordering) {

And you can remove the mxf->skip_audio_reordering check form here if it is 
only set if mxf->skip_audio_reordering is not set.

Thanks,
Marton
diff mbox series

Patch

diff --git a/doc/demuxers.texi b/doc/demuxers.texi
index 1c9575b2e8..fecfeadb47 100644
--- a/doc/demuxers.texi
+++ b/doc/demuxers.texi
@@ -770,6 +770,16 @@  MJPEG stream. Turning this option on by setting it to 1 will result in a stricte
 of the boundary value.
 @end table
 
+@section mxf
+
+MXF demuxer.
+
+@table @option
+
+@item -skip_audio_reordering @var{bool}
+This option will disable the audio reordering based on Multi-Channel Audio (MCA) labelling (SMPTE ST-377-4).
+@end table
+
 @section rawvideo
 
 Raw video demuxer.
diff --git a/libavformat/mxf.h b/libavformat/mxf.h
index fe9c52732c..cddbcb13c9 100644
--- a/libavformat/mxf.h
+++ b/libavformat/mxf.h
@@ -50,6 +50,7 @@  enum MXFMetadataSetType {
     TaggedValue,
     TapeDescriptor,
     AVCSubDescriptor,
+    MCASubDescriptor,
 };
 
 enum MXFFrameLayout {
diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c
index af9d33f796..194a887f75 100644
--- a/libavformat/mxfdec.c
+++ b/libavformat/mxfdec.c
@@ -51,11 +51,14 @@ 
 #include "libavutil/mastering_display_metadata.h"
 #include "libavutil/mathematics.h"
 #include "libavcodec/bytestream.h"
+#include "libavcodec/internal.h"
+#include "libavutil/channel_layout.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/timecode.h"
 #include "libavutil/opt.h"
 #include "avformat.h"
+#include "avlanguage.h"
 #include "internal.h"
 #include "mxf.h"
 
@@ -177,6 +180,8 @@  typedef struct {
     int body_sid;
     MXFWrappingScheme wrapping;
     int edit_units_per_packet; /* how many edit units to read at a time (PCM, ClipWrapped) */
+    int require_reordering;
+    int channel_ordering[FF_SANE_NB_CHANNELS];
 } MXFTrack;
 
 typedef struct MXFDescriptor {
@@ -205,6 +210,8 @@  typedef struct MXFDescriptor {
     unsigned int vert_subsampling;
     UID *file_descriptors_refs;
     int file_descriptors_count;
+    UID *sub_descriptors_refs;
+    int sub_descriptors_count;
     int linked_track_id;
     uint8_t *extradata;
     int extradata_size;
@@ -217,6 +224,15 @@  typedef struct MXFDescriptor {
     size_t coll_size;
 } MXFDescriptor;
 
+typedef struct MXFMCASubDescriptor {
+    MXFMetadataSet meta;
+    UID uid;
+    UID mca_link_id;
+    UID mca_group_link_id;
+    UID mca_label_dictionary_id;
+    char *language;
+} MXFMCASubDescriptor;
+
 typedef struct MXFIndexTableSegment {
     MXFMetadataSet meta;
     int edit_unit_byte_count;
@@ -290,6 +306,7 @@  typedef struct MXFContext {
     int nb_index_tables;
     MXFIndexTable *index_tables;
     int eia608_extract;
+    int skip_audio_reordering;
 } MXFContext;
 
 /* NOTE: klv_offset is not set (-1) for local keys */
@@ -311,6 +328,9 @@  static const uint8_t mxf_system_item_key_cp[]              = { 0x06,0x0e,0x2b,0x
 static const uint8_t mxf_system_item_key_gc[]              = { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x03,0x01,0x14 };
 static const uint8_t mxf_klv_key[]                         = { 0x06,0x0e,0x2b,0x34 };
 static const uint8_t mxf_apple_coll_prefix[]               = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x0e,0x0e,0x20,0x04,0x01,0x05,0x03,0x01 };
+static const uint8_t mxf_audio_channel[]                   = { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01 };
+static const uint8_t mxf_soundfield_group[]                = { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02 };
+
 /* complete keys to match */
 static const uint8_t mxf_crypto_source_container_ul[]      = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x09,0x06,0x01,0x01,0x02,0x02,0x00,0x00,0x00 };
 static const uint8_t mxf_encrypted_triplet_key[]           = { 0x06,0x0e,0x2b,0x34,0x02,0x04,0x01,0x07,0x0d,0x01,0x03,0x01,0x02,0x7e,0x01,0x00 };
@@ -323,6 +343,15 @@  static const uint8_t mxf_indirect_value_utf16be[]          = { 0x42,0x01,0x10,0x
 static const uint8_t mxf_apple_coll_max_cll[]              = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x0e,0x0e,0x20,0x04,0x01,0x05,0x03,0x01,0x01 };
 static const uint8_t mxf_apple_coll_max_fall[]             = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x0e,0x0e,0x20,0x04,0x01,0x05,0x03,0x01,0x02 };
 
+static const uint8_t mxf_mca_label_dictionary_id[]         = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x0e,0x01,0x03,0x07,0x01,0x01,0x00,0x00,0x00 };
+static const uint8_t mxf_mca_tag_symbol[]                  = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x0e,0x01,0x03,0x07,0x01,0x02,0x00,0x00,0x00 };
+static const uint8_t mxf_mca_tag_name[]                    = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x0e,0x01,0x03,0x07,0x01,0x03,0x00,0x00,0x00 };
+static const uint8_t mxf_mca_link_id[]                     = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x0e,0x01,0x03,0x07,0x01,0x05,0x00,0x00,0x00 };
+static const uint8_t mxf_soundfield_group_link_id[]        = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x0e,0x01,0x03,0x07,0x01,0x06,0x00,0x00,0x00 };
+static const uint8_t mxf_mca_rfc5646_spoken_language[]     = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x0d,0x03,0x01,0x01,0x02,0x03,0x15,0x00,0x00 };
+
+static const uint8_t mxf_sub_descriptor[]                  = { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0x09,0x06,0x01,0x01,0x04,0x06,0x10,0x00,0x00 };
+
 static const uint8_t mxf_mastering_display_prefix[13]      = { FF_MXF_MasteringDisplay_PREFIX };
 static const uint8_t mxf_mastering_display_uls[4][16] = {
     FF_MXF_MasteringDisplayPrimaries,
@@ -343,6 +372,11 @@  static void mxf_free_metadataset(MXFMetadataSet **ctx, int freectx)
         av_freep(&((MXFDescriptor *)*ctx)->mastering);
         av_freep(&((MXFDescriptor *)*ctx)->coll);
         av_freep(&((MXFDescriptor *)*ctx)->file_descriptors_refs);
+        av_freep(&((MXFDescriptor *)*ctx)->sub_descriptors_refs);
+        break;
+    case MCASubDescriptor:
+        if (((MXFMCASubDescriptor *)*ctx)->language)
+            av_freep(&((MXFMCASubDescriptor *)*ctx)->language);
         break;
     case Sequence:
         av_freep(&((MXFSequence *)*ctx)->structural_components_refs);
@@ -906,6 +940,30 @@  static int mxf_read_strong_ref_array(AVIOContext *pb, UID **refs, int *count)
     return 0;
 }
 
+static inline int mxf_read_us_ascii_string(AVIOContext *pb, int size, char** str)
+{
+    int ret;
+    size_t buf_size;
+
+    if (size < 0 || size > INT_MAX - 1)
+        return AVERROR(EINVAL);
+
+    buf_size = size + 1;
+    av_free(*str);
+    *str = av_malloc(buf_size);
+    if (!*str)
+        return AVERROR(ENOMEM);
+
+    ret = avio_get_str(pb, size, *str, buf_size);
+
+    if (ret < 0) {
+        av_freep(str);
+        return ret;
+    }
+
+    return ret;
+}
+
 static inline int mxf_read_utf16_string(AVIOContext *pb, int size, char** str, int be)
 {
     int ret;
@@ -1360,11 +1418,34 @@  static int mxf_read_generic_descriptor(void *arg, AVIOContext *pb, int tag, int
                 descriptor->coll->MaxFALL = avio_rb16(pb);
             }
         }
+
+        if (IS_KLV_KEY(uid, mxf_sub_descriptor))
+            mxf_read_strong_ref_array(pb, &descriptor->sub_descriptors_refs, &descriptor->sub_descriptors_count);
+
         break;
     }
     return 0;
 }
 
+static int mxf_read_mca_sub_descriptor(void *arg, AVIOContext *pb, int tag, int size, UID uid, int64_t klv_offset)
+{
+    MXFMCASubDescriptor *mca_sub_descriptor = arg;
+
+    if (IS_KLV_KEY(uid, mxf_mca_label_dictionary_id))
+        avio_read(pb, mca_sub_descriptor->mca_label_dictionary_id, 16);
+
+    if (IS_KLV_KEY(uid, mxf_mca_link_id))
+        avio_read(pb, mca_sub_descriptor->mca_link_id, 16);
+
+    if (IS_KLV_KEY(uid, mxf_soundfield_group_link_id))
+        avio_read(pb, mca_sub_descriptor->mca_group_link_id, 16);
+
+    if (IS_KLV_KEY(uid, mxf_mca_rfc5646_spoken_language))
+        return mxf_read_us_ascii_string(pb, size, &mca_sub_descriptor->language);
+
+    return 0;
+}
+
 static int mxf_read_indirect_value(void *arg, AVIOContext *pb, int size)
 {
     MXFTaggedValue *tagged_value = arg;
@@ -1494,6 +1575,56 @@  static const MXFCodecUL mxf_data_essence_container_uls[] = {
     { { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 },  0, AV_CODEC_ID_NONE },
 };
 
+typedef struct MXFSoundfieldGroupUL {
+    UID uid;
+    int64_t id;
+} MXFSoundfieldGroupUL;
+
+static const MXFSoundfieldGroupUL mxf_soundfield_groups[] = {
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x01,0x00,0x00,0x00,0x00 }, AV_CH_LAYOUT_5POINT1 }, // 5.1
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x02,0x00,0x00,0x00,0x00 }, AV_CH_LAYOUT_7POINT1 }, // 7.1 DS
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x03,0x00,0x00,0x00,0x00 }, AV_CH_LAYOUT_7POINT1_WIDE }, // 7.1 SDS
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x04,0x00,0x00,0x00,0x00 }, AV_CH_LAYOUT_6POINT1 }, // 6.1
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x05,0x00,0x00,0x00,0x00 }, AV_CH_LAYOUT_MONO }, // 1.0 Monoral
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x01,0x00,0x00,0x00 }, AV_CH_LAYOUT_STEREO }, // Standard stereo
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x02,0x00,0x00,0x00 }, AV_CH_LAYOUT_STEREO }, // Dual mono
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x04,0x00,0x00,0x00 }, AV_CH_LAYOUT_SURROUND }, // 3.0
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x05,0x00,0x00,0x00 }, AV_CH_LAYOUT_4POINT0 }, // 4.0
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x06,0x00,0x00,0x00 }, AV_CH_LAYOUT_5POINT0 }, // 5.0
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x07,0x00,0x00,0x00 }, AV_CH_LAYOUT_6POINT0 }, // 6.0
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x08,0x00,0x00,0x00 }, AV_CH_LAYOUT_7POINT0 }, // 7.0
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x09,0x00,0x00,0x00 }, AV_CH_LAYOUT_STEREO }, // LrRt
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x0a,0x00,0x00,0x00 }, AV_CH_LAYOUT_5POINT1 }, // 5.1 EX
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x0b,0x00,0x00,0x00 }, AV_CH_LAYOUT_MONO }, // Hearing accessibility
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x02,0x20,0x0c,0x00,0x00,0x00 }, AV_CH_LAYOUT_MONO }, // Visual accessibility
+    { { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 },  0 },
+};
+
+typedef struct MXFChannelOrderingUL {
+    UID uid;
+    int index;
+    enum AVAudioServiceType service_type;
+} MXFChannelOrderingUL;
+
+static const MXFChannelOrderingUL mxf_channel_ordering[] = {
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x01,0x00,0x00,0x00,0x00 }, 0, AV_AUDIO_SERVICE_TYPE_NB }, // Left audio channel
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x02,0x00,0x00,0x00,0x00 }, 1, AV_AUDIO_SERVICE_TYPE_NB }, // Right audio channel
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x03,0x00,0x00,0x00,0x00 }, 2, AV_AUDIO_SERVICE_TYPE_NB }, // Center audio channel
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x04,0x00,0x00,0x00,0x00 }, 5, AV_AUDIO_SERVICE_TYPE_NB }, // LFE audio channel
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x05,0x00,0x00,0x00,0x00 }, 3, AV_AUDIO_SERVICE_TYPE_NB }, // Left surround audio channel
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x06,0x00,0x00,0x00,0x00 }, 4, AV_AUDIO_SERVICE_TYPE_NB }, // Right surround audio channel
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x07,0x00,0x00,0x00,0x00 }, 3, AV_AUDIO_SERVICE_TYPE_NB }, // Left side surround audio channel
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x08,0x00,0x00,0x00,0x00 }, 4, AV_AUDIO_SERVICE_TYPE_NB }, // Right side surround audio channel
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x09,0x00,0x00,0x00,0x00 }, 6, AV_AUDIO_SERVICE_TYPE_NB }, // Left rear surround audio channel
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x0a,0x00,0x00,0x00,0x00 }, 7, AV_AUDIO_SERVICE_TYPE_NB }, // Right rear surround audio channel
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x0b,0x00,0x00,0x00,0x00 }, 6, AV_AUDIO_SERVICE_TYPE_NB }, // Left center audio channel
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x0c,0x00,0x00,0x00,0x00 }, 7, AV_AUDIO_SERVICE_TYPE_NB }, // Right center audio channel
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x0d,0x00,0x00,0x00,0x00 }, 6, AV_AUDIO_SERVICE_TYPE_NB }, // Center surround audio channel
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x0e,0x00,0x00,0x00,0x00 }, 0, AV_AUDIO_SERVICE_TYPE_VISUALLY_IMPAIRED }, // Hearing impaired audio channel
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0d,0x03,0x02,0x01,0x0f,0x00,0x00,0x00,0x00 }, 0, AV_AUDIO_SERVICE_TYPE_HEARING_IMPAIRED }, // Visually impaired narrative audio channel
+    { { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 }, 0, AV_AUDIO_SERVICE_TYPE_NB },
+};
+
 static MXFWrappingScheme mxf_get_wrapping_kind(UID *essence_container_ul)
 {
     int val;
@@ -2292,6 +2423,27 @@  static enum AVColorRange mxf_get_color_range(MXFContext *mxf, MXFDescriptor *des
     return AVCOL_RANGE_UNSPECIFIED;
 }
 
+static int is_pcm(enum AVCodecID codec_id)
+{
+    /* we only care about "normal" PCM codecs until we get samples */
+    return codec_id >= AV_CODEC_ID_PCM_S16LE && codec_id < AV_CODEC_ID_PCM_S24DAUD;
+}
+
+static int set_language(AVFormatContext *s, const char *rfc5646, AVDictionary **met)
+{
+    // language abbr should contain at least 2 chars
+    if (rfc5646 && strlen(rfc5646) > 1) {
+        char primary_tag[4] =
+            {rfc5646[0], rfc5646[1], rfc5646[2] != '-' ? rfc5646[2] : '\0', '\0'};
+
+        const char *iso6392       = ff_convert_lang_to(primary_tag,
+                                                       AV_LANG_ISO639_2_BIBL);
+        if (iso6392)
+            return(av_dict_set(met, "language", iso6392, 0));
+    }
+    return 0;
+}
+
 static int mxf_parse_structural_metadata(MXFContext *mxf)
 {
     MXFPackage *material_package = NULL;
@@ -2329,6 +2481,7 @@  static int mxf_parse_structural_metadata(MXFContext *mxf)
         FFStream *sti;
         AVTimecode tc;
         int flags;
+        int current_channel;
 
         if (!(material_track = mxf_resolve_strong_ref(mxf, &material_package->tracks_refs[i], Track))) {
             av_log(mxf->fc, AV_LOG_ERROR, "could not resolve material track strong ref\n");
@@ -2688,6 +2841,98 @@  static int mxf_parse_structural_metadata(MXFContext *mxf)
                 sti->need_parsing = AVSTREAM_PARSE_FULL;
             }
             st->codecpar->bits_per_coded_sample = av_get_bits_per_sample(st->codecpar->codec_id);
+
+            current_channel = 0;
+
+            if (descriptor->channels >= FF_SANE_NB_CHANNELS) {
+                av_log(mxf->fc, AV_LOG_ERROR, "max number of channels %d reached\n", FF_SANE_NB_CHANNELS);
+                return AVERROR_INVALIDDATA;
+            }
+
+            for (j = 0; j < descriptor->channels; ++j) {
+                source_track->channel_ordering[j] = j;
+            }
+
+            for (j = 0; j < descriptor->sub_descriptors_count; j++) {
+                MXFMCASubDescriptor *mca_sub_descriptor = mxf_resolve_strong_ref(mxf, &descriptor->sub_descriptors_refs[j], MCASubDescriptor);
+                if (mca_sub_descriptor == NULL) {
+                    continue;
+                }
+
+                // Soundfield group
+                if (IS_KLV_KEY(mca_sub_descriptor->mca_label_dictionary_id, mxf_soundfield_group)) {
+                    const MXFSoundfieldGroupUL* group_ptr = mxf_soundfield_groups;
+
+                    while (group_ptr->uid[0]) {
+                        if (IS_KLV_KEY(group_ptr->uid, mca_sub_descriptor->mca_label_dictionary_id)) {
+                            st->codecpar->channel_layout = group_ptr->id;
+                            break;
+                        }
+                        group_ptr++;
+                    }
+                }
+
+                // Audio channel
+                if (IS_KLV_KEY(mca_sub_descriptor->mca_label_dictionary_id, mxf_audio_channel)) {
+                    const MXFChannelOrderingUL* channel_ordering_ptr = mxf_channel_ordering;
+
+                    while (channel_ordering_ptr->uid[0]) {
+                        if (IS_KLV_KEY(channel_ordering_ptr->uid, mca_sub_descriptor->mca_label_dictionary_id)) {
+                            source_track->channel_ordering[current_channel] = channel_ordering_ptr->index;
+
+                            if(channel_ordering_ptr->service_type != AV_AUDIO_SERVICE_TYPE_NB) {
+                                enum AVAudioServiceType *ast;
+                                uint8_t* side_data = av_stream_new_side_data(st, AV_PKT_DATA_AUDIO_SERVICE_TYPE, sizeof(*ast));
+                                if (!side_data)
+                                    goto fail_and_free;
+                                ast = (enum AVAudioServiceType*)side_data;
+                                *ast = channel_ordering_ptr->service_type;
+                            }
+
+                            current_channel += 1;
+                            break;
+                        }
+                        channel_ordering_ptr++;
+                    }
+                }
+
+                // set language from MCA spoken language information
+                if (mca_sub_descriptor->language) {
+                    ret = set_language(mxf->fc, mca_sub_descriptor->language, &st->metadata);
+                    if (ret < 0) {
+                        return ret;
+                    }
+                }
+            }
+
+            // check if the mapping is not required
+            source_track->require_reordering = 0;
+            if (is_pcm(st->codecpar->codec_id)) {
+                for (j = 0; j < descriptor->channels; ++j) {
+                    if (source_track->channel_ordering[j] != j) {
+                        source_track->require_reordering = 1;
+                        break;
+                    }
+                }
+            } else {
+                st->codecpar->channel_layout = 0;
+            }
+
+            if (source_track->require_reordering) {
+                current_channel = 0;
+                av_log(mxf->fc, AV_LOG_DEBUG, "MCA Audio mapping (");
+                for(j = 0; j < descriptor->channels; ++j) {
+                    for(int k = 0; k < descriptor->channels; ++k) {
+                        if(source_track->channel_ordering[k] == current_channel) {
+                            av_log(mxf->fc, AV_LOG_DEBUG, "%d -> %d", source_track->channel_ordering[k], k);
+                            if (current_channel != descriptor->channels - 1)
+                                av_log(mxf->fc, AV_LOG_DEBUG, ", ");
+                            current_channel += 1;
+                        }
+                    }
+                }
+                av_log(mxf->fc, AV_LOG_DEBUG, ")\n");
+            }
         } else if (st->codecpar->codec_type == AVMEDIA_TYPE_DATA) {
             enum AVMediaType type;
             container_ul = mxf_get_codec_ul(mxf_data_essence_container_uls, essence_container_ul);
@@ -2888,6 +3133,8 @@  static const MXFMetadataReadTableEntry mxf_metadata_read_table[] = {
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x5c,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* VANC/VBI - SMPTE 436M */
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x5e,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* MPEG2AudioDescriptor */
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x64,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* DC Timed Text Descriptor */
+    { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x6c,0x00 }, mxf_read_mca_sub_descriptor, sizeof(MXFMCASubDescriptor), MCASubDescriptor }, /* Soundfield Group Label Subdescriptor */
+    { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x6b,0x00 }, mxf_read_mca_sub_descriptor, sizeof(MXFMCASubDescriptor), MCASubDescriptor }, /* Audio Channel Label Subdescriptor */
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x3A,0x00 }, mxf_read_track, sizeof(MXFTrack), Track }, /* Static Track */
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x3B,0x00 }, mxf_read_track, sizeof(MXFTrack), Track }, /* Generic Track */
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x14,0x00 }, mxf_read_timecode_component, sizeof(MXFTimecodeComponent), TimecodeComponent },
@@ -3187,12 +3434,6 @@  static void mxf_compute_essence_containers(AVFormatContext *s)
     }
 }
 
-static int is_pcm(enum AVCodecID codec_id)
-{
-    /* we only care about "normal" PCM codecs until we get samples */
-    return codec_id >= AV_CODEC_ID_PCM_S16LE && codec_id < AV_CODEC_ID_PCM_S24DAUD;
-}
-
 static MXFIndexTable *mxf_find_index_table(MXFContext *mxf, int index_sid)
 {
     int i;
@@ -3619,6 +3860,25 @@  static int mxf_set_pts(MXFContext *mxf, AVStream *st, AVPacket *pkt)
     return 0;
 }
 
+static int mxf_audio_remapping(int* channel_ordering, uint8_t* data, int size, int sample_size, int channels)
+{
+    int sample_offset = channels * sample_size;
+    int number_of_samples = size / sample_offset;
+    uint8_t tmp[FF_SANE_NB_CHANNELS * 4];
+    uint8_t* data_ptr = data;
+
+    for (int sample = 0; sample < number_of_samples; ++sample) {
+        memcpy(tmp, data_ptr, sample_offset);
+
+        for (int channel = 0; channel < channels; ++channel) {
+            memcpy(&data_ptr[sample_size * channel_ordering[channel]], &tmp[sample_size * channel],  sample_size);
+        }
+
+        data_ptr += sample_offset;
+    }
+    return 0;
+}
+
 static int mxf_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
     KLVPacket klv;
@@ -3733,6 +3993,15 @@  static int mxf_read_packet(AVFormatContext *s, AVPacket *pkt)
                 return ret;
             }
 
+            // for audio, process audio remapping if MCA label requires it 
+            if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO && track->require_reordering && !mxf->skip_audio_reordering) {
+                int byte_per_sample = st->codecpar->bits_per_coded_sample / 8;
+                ret = mxf_audio_remapping(track->channel_ordering, pkt->data, pkt->size, byte_per_sample, st->codecpar->channels);
+                if (ret < 0) {
+                    return ret;
+                }
+            }
+
             /* seek for truncated packets */
             avio_seek(s->pb, klv.next_klv, SEEK_SET);
 
@@ -3920,6 +4189,9 @@  static const AVOption options[] = {
     { "eia608_extract", "extract eia 608 captions from s436m track",
       offsetof(MXFContext, eia608_extract), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1,
       AV_OPT_FLAG_DECODING_PARAM },
+    { "skip_audio_reordering", "skip audio reordering based on Multi-Channel Audio labelling",
+      offsetof(MXFContext, skip_audio_reordering), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1,
+      AV_OPT_FLAG_DECODING_PARAM },
     { NULL },
 };
 
diff --git a/libavformat/version.h b/libavformat/version.h
index 81ed517609..0f76f50892 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -33,7 +33,7 @@ 
 // Also please add any ticket numbers that you believe might be affected here
 #define LIBAVFORMAT_VERSION_MAJOR  59
 #define LIBAVFORMAT_VERSION_MINOR   8
-#define LIBAVFORMAT_VERSION_MICRO 100
+#define LIBAVFORMAT_VERSION_MICRO 101
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
                                                LIBAVFORMAT_VERSION_MINOR, \