diff mbox

[FFmpeg-devel,16/21] libavcodec/avcodec, libavformat/movenc: support embedding channel layout to stream side data

Message ID 1471943019-14136-17-git-send-email-erkki.seppala.ext@nokia.com
State Superseded
Headers show

Commit Message

erkki.seppala.ext@nokia.com Aug. 23, 2016, 9:03 a.m. UTC
From: Erkki Seppälä <erkki.seppala.ext@nokia.com>

Added support for passing complex channel layout configuration as side
packet data (AV_PKT_DATA_AUDIO_CHANNEL_LAYOUT,
AV_PKT_DATA_AUDIO_CHANNEL_PREDEFINED_LAYOUT,
AV_PKT_DATA_AUDIO_CHANNEL_LAYOUT_OBJECT_STRUCTURED) to ISO media files
as specified by ISO/IEC 14496-12.

This information isn't integrated into the existing channel layout
system though, which is much more restricted compared to what the
standard permits. However, the side packet data is structured so that
it does not require too much ISO base media file format knowledge in
client code.

This information ends up to an (optional) chnl box in the written file
in an isom track.

Signed-off-by: Erkki Seppälä <erkki.seppala.ext@nokia.com>
Signed-off-by: OZOPlayer <OZOPL@nokia.com>
---
 libavcodec/avcodec.h | 51 ++++++++++++++++++++++++++++++++-
 libavformat/movenc.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 130 insertions(+), 2 deletions(-)

Comments

Nicolas George Aug. 23, 2016, 5:49 p.m. UTC | #1
Le septidi 7 fructidor, an CCXXIV, erkki.seppala.ext@nokia.com a écrit :
> From: Erkki Seppälä <erkki.seppala.ext@nokia.com>
> 
> Added support for passing complex channel layout configuration as side
> packet data (AV_PKT_DATA_AUDIO_CHANNEL_LAYOUT,
> AV_PKT_DATA_AUDIO_CHANNEL_PREDEFINED_LAYOUT,
> AV_PKT_DATA_AUDIO_CHANNEL_LAYOUT_OBJECT_STRUCTURED) to ISO media files
> as specified by ISO/IEC 14496-12.
> 
> This information isn't integrated into the existing channel layout
> system though,

I think it needs to be.

>		 which is much more restricted compared to what the
> standard permits.

Certainly, but the bitfield channel layout system is enough for most cases
and well tested. Any new system must work with it, not around it.

> However, the side packet data is structured so that
> it does not require too much ISO base media file format knowledge in
> client code.
> 
> This information ends up to an (optional) chnl box in the written file
> in an isom track.
> 
> Signed-off-by: Erkki Seppälä <erkki.seppala.ext@nokia.com>
> Signed-off-by: OZOPlayer <OZOPL@nokia.com>
> ---
>  libavcodec/avcodec.h | 51 ++++++++++++++++++++++++++++++++-
>  libavformat/movenc.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 130 insertions(+), 2 deletions(-)
> 
> diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
> index 36c85e9..6c64e6a 100644
> --- a/libavcodec/avcodec.h
> +++ b/libavcodec/avcodec.h
> @@ -1365,6 +1365,35 @@ typedef struct AVTrackReferences {
>      /** followed by: int tracks[nb_tracks]; -- tracks this track refers to */
>  } AVTrackReferences;
>  

> +/** Describes the speaker position of a single audio channel of a single track */

Please follow the surrounding style for Doxygen comments: /** and */ on a
separate line, * at the beginning of each line.

> +typedef struct AVAudioTrackChannelPosition {

> +    int speaker_position; /** an OutputChannelPosition from ISO/IEC 23001-8 */

I think most our API users and even developers have better use for 158 Swiss
Francs than feeding the ISO. Please make the API self-sufficient; I suggest
an enum.

> +
> +    /** The following are used if speaker_position == 126 */
> +    int azimuth;
> +    int elevation;

Please document the units and references.

Also, since it is repeated and packed, using a small type would make sense
here if possible.

> +} AVAudioTrackChannelPosition;
> +
> +/** Describes the channel layout (ie. speaker position) of a single audio track */
> +typedef struct AVAudioTrackChannelLayout {
> +    int nb_positions;

> +    /** followed by: AVAudioTrackChannelPosition positions[nb_positions]; */

AVAudioTrackChannelPosition positions[0];

That way, computing the address is taken care of by the compiler without
tricky pointer arithmetic, including alignment requirements.

(We already use 0-sized final arrays once in vaapi_encode.h; since VA API is
for Linux and BSD, it does not probe the crappy proprietary compilers
support it. In that case, using 1 instead of 0 is fine.)

> +} AVAudioTrackChannelLayout;
> +

> +/** Describes the channel layout based on predefined layout of a single track
> +    by providing the layout and the list of channels are are omitted */

Could you make that clearer?

> +typedef struct AVAudioTrackChannelPredefinedLayout {
> +    int layout;        /** ChannelConfiguration from ISO/IEC 23001-8 */

> +    int nb_omitted_channels;
> +    /** followed by: char omitted_channels[nb_omitted_channels]; - non-zero indicates the channel is omitted */

Is there a reasonable upper bound to nb_omitted_channels?

Also, I think the naming is inconsistent: if nb_omitted_channels = 3 and
omitted_channels[] = { 1, 0, 1 }, there are only two omitted channels, not
three.

> +} AVAudioTrackChannelPredefinedLayout;
> +

> +/** Describes the channel layout to be object-sturctued with given
> +    number of objects */

Please make that clearer.

> +typedef struct AVAudioTrackChannelLayoutObjectStructured {
> +    int object_count;
> +} AVAudioTrackChannelLayoutObjectStructured;
> +
>  enum AVPacketSideDataType {
>      AV_PKT_DATA_PALETTE,
>  
> @@ -1556,7 +1585,27 @@ enum AVPacketSideDataType {
>       * Configured the timed metadata parameters, such as the uri and
>       * meta data configuration. The key is of type AVTimedMetadata.
>       */
> -    AV_PKT_DATA_TIMED_METADATA_INFO
> +    AV_PKT_DATA_TIMED_METADATA_INFO,
> +
> +    /**
> +     * Channel layout, describing the position of spakers for the
> +     * channels of a track, following the structure

> +     * AVAudioTrackChannelLayout.
> +     */
> +    AV_PKT_DATA_AUDIO_CHANNEL_LAYOUT,

I think it would be a good idea if the name of the side data matches the
name of the structure as much as possible: Track -> _TRACK_. Ditto for the
others.

Would it make sense to have a single side data type and an integer field at
the beginning announcing the type of the rest:

typedef struct AVComplexChannelLayout {
    enum AVComplexChannelLayoutType type;
    union {
        uint64_t bitmask;
        AVAudioTrackChannelLayout complete;
        AVAudioTrackChannelPredefinedLayout predefined;
        AVAudioTrackChannelLayoutObjectStructured object;
    } layout;
};

> +
> +    /**
> +     * Predefined channel layout, describing the position of spakers
> +     * for the channels of a track, following the structure
> +     * AVAudioTrackChannelPredefinedLayout.
> +     */
> +    AV_PKT_DATA_AUDIO_CHANNEL_PREDEFINED_LAYOUT,
> +
> +    /**
> +     * The channel layout is object structured with the number of objects in
> +     * AVAudioTrackChannelLayoutObjectStructured
> +     */

> +    AV_PKT_DATA_AUDIO_CHANNEL_LAYOUT_OBJECT_STRUCTURED

Missing final comma (ditto in the patch adding
AV_PKT_DATA_TIMED_METADATA_INFO; blame to Neil for not adding it to
AV_PKT_DATA_MASTERING_DISPLAY_METADATA).

>  };
>  
>  #define AV_PKT_DATA_QUALITY_FACTOR AV_PKT_DATA_QUALITY_STATS //DEPRECATED
> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> index ff4bf85..9606918 100644
> --- a/libavformat/movenc.c
> +++ b/libavformat/movenc.c

I can not comment on that file.

Regards,
erkki.seppala.ext@nokia.com Aug. 24, 2016, 2:39 p.m. UTC | #2
On 08/23/2016 08:49 PM, Nicolas George wrote:
>> +    int speaker_position; /** an OutputChannelPosition from ISO/IEC 23001-8 */
>
> I think most our API users and even developers have better use for 158 Swiss
> Francs than feeding the ISO. Please make the API self-sufficient; I suggest
> an enum.

I'll introduce an enumeration AVSpeakerPosition. However, the spec is 
available publicly at 
http://standards.iso.org/ittf/PubliclyAvailableStandards/c062088_ISO_IEC_23001-8_2013.zip 
. (And the table of output channel positions starts at page 24.)

> Le septidi 7 fructidor, an CCXXIV, erkki.seppala.ext@nokia.com a écrit :
>> From: Erkki Seppälä <erkki.seppala.ext@nokia.com>
>>
>> Added support for passing complex channel layout configuration as side
>> packet data (AV_PKT_DATA_AUDIO_CHANNEL_LAYOUT,
>> AV_PKT_DATA_AUDIO_CHANNEL_PREDEFINED_LAYOUT,
>> AV_PKT_DATA_AUDIO_CHANNEL_LAYOUT_OBJECT_STRUCTURED) to ISO media files
>> as specified by ISO/IEC 14496-12.
>>
>> This information isn't integrated into the existing channel layout
>> system though,
>
> I think it needs to be.

Unfortunately properly integrating this information into FFmpeg sounds 
larger than the scope of my project. I imagine this still would be very 
helpful base work should such information be actually supported by 
various parts of FFmpeg.

A special bit in the AV_CH_LAYOUT_* could be used for indicating that a 
special layout exists, but that information by itself is nowhere enough 
to work with this data with ie. in the functions available in 
channel_layout.c. And if codec or client code is able to deal with the 
advanced channel layout configurations, they also have access to this 
side data.

I also believe is is easier to study more deeper integration once a 
non-intrusive possibility to work with advanced ISOBMFF channel layouts 
exists.

Here is how the mapping on speaker position level might be done:

/** Speaker positions according to ISO/IEC 23001-8 */
enum AVSpeakerPosition {
!    AV_SPEAKER_POSITION_L      = 0,   /// Left front
!   AV_SPEAKER_POSITION_R      = 1,   /// Right front
     AV_SPEAKER_POSITION_C      = 2,   /// Centre front
     AV_SPEAKER_POSITION_LFE    = 3,   /// Low frequency enhancement
     AV_SPEAKER_POSITION_LS     = 4,   /// Left surround
     AV_SPEAKER_POSITION_RS     = 5,   /// Right surround
!   AV_SPEAKER_POSITION_LC     = 6,   /// Left front centre
!   AV_SPEAKER_POSITION_RC     = 7,   /// Right front centre
     AV_SPEAKER_POSITION_LSR    = 8,   /// Rear surround left
     AV_SPEAKER_POSITION_RSR    = 9,   /// Rear surround right
     AV_SPEAKER_POSITION_CS     = 10,  /// Rear centre
     AV_SPEAKER_POSITION_LSD    = 11,  /// Left surround direct
     AV_SPEAKER_POSITION_RSD    = 12,  /// Right surround direct
     AV_SPEAKER_POSITION_LSS    = 13,  /// Left side surround
     AV_SPEAKER_POSITION_RSS    = 14,  /// Right side surround
!    AV_SPEAKER_POSITION_LW     = 15,  /// Left wide front
!    AV_SPEAKER_POSITION_RW     = 16,  /// Right wide front
     AV_SPEAKER_POSITION_LV     = 17,  /// Left front vertical height
     AV_SPEAKER_POSITION_RV     = 18,  /// Right front vertical height
     AV_SPEAKER_POSITION_CV     = 19,  /// Centre front vertical height
     AV_SPEAKER_POSITION_LVR    = 20,  /// Left surround vertical height 
rear
     AV_SPEAKER_POSITION_RVR    = 21,  /// Right surround vertical 
height rear
     AV_SPEAKER_POSITION_CVR    = 22,  /// Centre vertical height rear
     AV_SPEAKER_POSITION_LVSS   = 23,  /// Left vertical height side 
surround
     AV_SPEAKER_POSITION_RVSS   = 24,  /// Right vertical height side 
surround
     AV_SPEAKER_POSITION_TS     = 25,  /// Top centre surround
     AV_SPEAKER_POSITION_LFE2   = 26,  /// E2 Low frequency enhancement 2
     AV_SPEAKER_POSITION_LB     = 27,  /// Left front vertical bottom
     AV_SPEAKER_POSITION_RB     = 28,  /// Right front vertical bottom
     AV_SPEAKER_POSITION_CB     = 29,  /// Centre front vertical bottom
     AV_SPEAKER_POSITION_LVS    = 30,  /// Left vertical height surround
     AV_SPEAKER_POSITION_RVS    = 31,  /// Right vertical height surround
                                       /// 32-45 Reserved
     AV_SPEAKER_POSITION_LFE3   = 36,  /// E3 Low frequency enhancement 3
     AV_SPEAKER_POSITION_LEOS   = 37,  /// Left edge of screen
     AV_SPEAKER_POSITION_REOS   = 38,  /// Right edge of screen
     AV_SPEAKER_POSITION_HWBCAL = 39,  /// half-way between centre of 
screen and left edge of screen
     AV_SPEAKER_POSITION_HWBCAR = 40,  /// half-way between centre of 
screen and right edge of screen
     AV_SPEAKER_POSITION_LBS    = 41,  /// Left back surround
     AV_SPEAKER_POSITION_RBS    = 42,  /// Right back surround
                                       /// 43–125 Reserved
     AV_SPEAKER_POSITION_EXPL   = 126, /// Explicit position (see text)
                                       /// 127 Unknown / undefined
};

I suppose if this is preferred, then one for 23001-8 channel layouts 
would be preferred as well..

It does seem confusing though that similar #defines are available in 
channel_layout.h and I am a bit uncertain of a 1:1 mapping between the 
two. I've marked in above with ! the ones that I found a match from  the 
defines:

AV_CH_FRONT_LEFT             AV_SPEAKER_POSITION_L
AV_CH_FRONT_RIGHT            AV_SPEAKER_POSITION_R
AV_CH_FRONT_CENTER           AV_SPEAKER_POSITION_LC
AV_CH_LOW_FREQUENCY          AV_SPEAKER_POSITION_LFE
AV_CH_BACK_LEFT              AV_SPEAKER_POSITION_LBS
AV_CH_BACK_RIGHT             AV_SPEAKER_POSITION_RBS
AV_CH_FRONT_LEFT_OF_CENTER   AV_SPEAKER_POSITION_LC
AV_CH_FRONT_RIGHT_OF_CENTER  AV_SPEAKER_POSITION_RC
AV_CH_BACK_CENTER
AV_CH_SIDE_LEFT              AV_SPEAKER_POSITION_LSS
AV_CH_SIDE_RIGHT             AV_SPEAKER_POSITION_RSS
AV_CH_TOP_CENTER             AV_SPEAKER_POSITION_TS
AV_CH_TOP_FRONT_LEFT
AV_CH_TOP_FRONT_CENTER
AV_CH_TOP_FRONT_RIGHT
AV_CH_TOP_BACK_LEFT
AV_CH_TOP_BACK_CENTER
AV_CH_TOP_BACK_RIGHT
AV_CH_STEREO_LEFT
AV_CH_STEREO_RIGHT
AV_CH_WIDE_LEFT              AV_SPEAKER_POSITION_LW
AV_CH_WIDE_RIGHT             AV_SPEAKER_POSITION_RW
AV_CH_SURROUND_DIRECT_LEFT   AV_SPEAKER_POSITION_LSD
AV_CH_SURROUND_DIRECT_RIGHT  AV_SPEAKER_POSITION_RSD
AV_CH_LOW_FREQUENCY_2        AV_SPEAKER_POSITION_LFE2

If this approach were taken, then the missing speakers positions 
(channels?) would need to be added, conversion maps for 23001-8 would 
need to be implemented (and probably refusing if there are non-mappable 
positions). Then layout bitmaps would be mapped into 23001-8 ones or if 
the layout cannot be found, a custom one generated.. Personally I don't 
mind how separate&simple this system still is :).

>> 		 which is much more restricted compared to what the
>> standard permits.
>
> Certainly, but the bitfield channel layout system is enough for most cases
> and well tested. Any new system must work with it, not around it.

But as you may agree, such a modification would not be small. Certainly 
not one I would try as my first feature to FFmpeg.

> Please follow the surrounding style for Doxygen comments: /** and */ on a
> separate line, * at the beginning of each line.

Oops. I'll fix those.

>> +    /** The following are used if speaker_position == 126 */
>> +    int azimuth;
>> +    int elevation;
>
> Please document the units and references.

Done.

> Also, since it is repeated and packed, using a small type would make sense
> here if possible.

I'll use the same types as in the headers: int16_t and int8_t.

>> +} AVAudioTrackChannelPosition;
>> +
>> +/** Describes the channel layout (ie. speaker position) of a single audio track */
>> +typedef struct AVAudioTrackChannelLayout {
>> +    int nb_positions;
>
>> +    /** followed by: AVAudioTrackChannelPosition positions[nb_positions]; */
>
> AVAudioTrackChannelPosition positions[0];
>
> That way, computing the address is taken care of by the compiler without
> tricky pointer arithmetic, including alignment requirements.

Actually this is the way I had it in the first place, but compiler 
warnings convinced me not to do it. If I'll use positions[1] the 
allocations will end up slightly too large unless without slightly 
convoluted code.

Is there a preferred way? It doesn't seem like foo[1] is used anywhere.

> (We already use 0-sized final arrays once in vaapi_encode.h; since VA API is
> for Linux and BSD, it does not probe the crappy proprietary compilers
> support it. In that case, using 1 instead of 0 is fine.)

In particular I don't want to do it conditionally :).

>
>> +} AVAudioTrackChannelLayout;
>> +
>
>> +/** Describes the channel layout based on predefined layout of a single track
>> +    by providing the layout and the list of channels are are omitted */
>
> Could you make that clearer?
>
>> +typedef struct AVAudioTrackChannelPredefinedLayout {
>> +    int layout;        /** ChannelConfiguration from ISO/IEC 23001-8 */
>
>> +    int nb_omitted_channels;
>> +    /** followed by: char omitted_channels[nb_omitted_channels]; - non-zero indicates the channel is omitted */
>
> Is there a reasonable upper bound to nb_omitted_channels?

Yes, ISOBMFF supports only 64 channels for this mask. But perhaps even a 
higher number is usable for some special case scenarios, not involving 
ISOBMFF?

> Also, I think the naming is inconsistent: if nb_omitted_channels = 3 and
> omitted_channels[] = { 1, 0, 1 }, there are only two omitted channels, not
> three.

I agree that it seems a bit inconsistent.. Would an integer array 
enumerating the omitted channels be better? Of course this whole problem 
goes away if the structure is fixed-size (ie. uint8_t 
omitted_channels_bitmask[64 / 8] or the uint64_t you suggested).

>>  enum AVPacketSideDataType {
>>      AV_PKT_DATA_PALETTE,
>>
>> @@ -1556,7 +1585,27 @@ enum AVPacketSideDataType {
>>       * Configured the timed metadata parameters, such as the uri and
>>       * meta data configuration. The key is of type AVTimedMetadata.
>>       */
>> -    AV_PKT_DATA_TIMED_METADATA_INFO
>> +    AV_PKT_DATA_TIMED_METADATA_INFO,
>> +
>> +    /**
>> +     * Channel layout, describing the position of spakers for the
>> +     * channels of a track, following the structure
>
>> +     * AVAudioTrackChannelLayout.
>> +     */
>> +    AV_PKT_DATA_AUDIO_CHANNEL_LAYOUT,
>
> I think it would be a good idea if the name of the side data matches the
> name of the structure as much as possible: Track -> _TRACK_. Ditto for the
> others.
>
> Would it make sense to have a single side data type and an integer field at
> the beginning announcing the type of the rest:
>
> typedef struct AVComplexChannelLayout {
>     enum AVComplexChannelLayoutType type;
>     union {
>         uint64_t bitmask;
>         AVAudioTrackChannelLayout complete;
>         AVAudioTrackChannelPredefinedLayout predefined;
>         AVAudioTrackChannelLayoutObjectStructured object;
>     } layout;
> };

It does seem nice. The actual structs/enums might look like this:

typedef struct AVAudioTrackChannelLayout {
     int nb_positions;

     /** Contains nb_positions entries*/
     AVAudioTrackChannelPosition positions[1];
} AVAudioTrackChannelLayout;

typedef struct AVAudioTrackChannelPredefinedLayout {
     int layout;        /** ChannelConfiguration from ISO/IEC 23001-8 */
     uint64_t omitted_channels;
} AVAudioTrackChannelPredefinedLayout;

typedef enum AVComplexAudioTrackChannelLayoutType {
     AV_COMPLEX_CHANNEL_LAYOUT_PREDEFINED,
     AV_COMPLEX_CHANNEL_LAYOUT_CUSTOM
} AVComplexAudioTrackChannelLayoutType;

typedef struct AVComplexAudioChannelLayout {
     enum AVComplexAudioTrackChannelLayoutType type;
     union {
         AVAudioTrackChannelPredefinedLayout predefined;
         AVAudioTrackChannelLayout complete;
     };
} AVComplexAudioChannelLayout;

enum AVPacketSideDataType {
...

     /**
      * Channel layout, describing the position of speakers for the
      * channels of a track, following the structure
      * AVComplexAudioChannelLayout.
      */
     AV_PKT_DATA_COMPLEX_AUDIO_CHANNEL_LAYOUT,

     /**
      * The channel layout is object structured with the number of 
objects in an int (may accompany AV_PKT_DATA_COMPLEX_AUDIO_CHANNEL_LAYOUT)
      */
     AV_PKT_DATA_COMPLEX_AUDIO_TRACK_CHANNEL_LAYOUT_OBJECT_STRUCTURED,
} AVPacketSideDataType;

I'm not super happy about the long names (though tempted to 
s/AVAudio/AVComplexAudio/), but what is the alternative :).

Thanks for reviewing and feedback!
diff mbox

Patch

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 36c85e9..6c64e6a 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -1365,6 +1365,35 @@  typedef struct AVTrackReferences {
     /** followed by: int tracks[nb_tracks]; -- tracks this track refers to */
 } AVTrackReferences;
 
+/** Describes the speaker position of a single audio channel of a single track */
+typedef struct AVAudioTrackChannelPosition {
+    int speaker_position; /** an OutputChannelPosition from ISO/IEC 23001-8 */
+
+    /** The following are used if speaker_position == 126 */
+    int azimuth;
+    int elevation;
+} AVAudioTrackChannelPosition;
+
+/** Describes the channel layout (ie. speaker position) of a single audio track */
+typedef struct AVAudioTrackChannelLayout {
+    int nb_positions;
+    /** followed by: AVAudioTrackChannelPosition positions[nb_positions]; */
+} AVAudioTrackChannelLayout;
+
+/** Describes the channel layout based on predefined layout of a single track
+    by providing the layout and the list of channels are are omitted */
+typedef struct AVAudioTrackChannelPredefinedLayout {
+    int layout;        /** ChannelConfiguration from ISO/IEC 23001-8 */
+    int nb_omitted_channels;
+    /** followed by: char omitted_channels[nb_omitted_channels]; - non-zero indicates the channel is omitted */
+} AVAudioTrackChannelPredefinedLayout;
+
+/** Describes the channel layout to be object-sturctued with given
+    number of objects */
+typedef struct AVAudioTrackChannelLayoutObjectStructured {
+    int object_count;
+} AVAudioTrackChannelLayoutObjectStructured;
+
 enum AVPacketSideDataType {
     AV_PKT_DATA_PALETTE,
 
@@ -1556,7 +1585,27 @@  enum AVPacketSideDataType {
      * Configured the timed metadata parameters, such as the uri and
      * meta data configuration. The key is of type AVTimedMetadata.
      */
-    AV_PKT_DATA_TIMED_METADATA_INFO
+    AV_PKT_DATA_TIMED_METADATA_INFO,
+
+    /**
+     * Channel layout, describing the position of spakers for the
+     * channels of a track, following the structure
+     * AVAudioTrackChannelLayout.
+     */
+    AV_PKT_DATA_AUDIO_CHANNEL_LAYOUT,
+
+    /**
+     * Predefined channel layout, describing the position of spakers
+     * for the channels of a track, following the structure
+     * AVAudioTrackChannelPredefinedLayout.
+     */
+    AV_PKT_DATA_AUDIO_CHANNEL_PREDEFINED_LAYOUT,
+
+    /**
+     * The channel layout is object structured with the number of objects in
+     * AVAudioTrackChannelLayoutObjectStructured
+     */
+    AV_PKT_DATA_AUDIO_CHANNEL_LAYOUT_OBJECT_STRUCTURED
 };
 
 #define AV_PKT_DATA_QUALITY_FACTOR AV_PKT_DATA_QUALITY_STATS //DEPRECATED
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index ff4bf85..9606918 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -557,6 +557,83 @@  static unsigned compute_avg_bitrate(MOVTrack *track)
     return size * 8 * track->timescale / track->track_duration;
 }
 
+static int mov_write_chnl_tag(AVIOContext *pb, MOVTrack *track)
+{
+    AVAudioTrackChannelLayout *layout =
+        (void*) av_stream_get_side_data(track->st, AV_PKT_DATA_AUDIO_CHANNEL_LAYOUT,
+                                        NULL);
+
+    AVAudioTrackChannelPredefinedLayout *predefLayout =
+        (void*) av_stream_get_side_data(track->st, AV_PKT_DATA_AUDIO_CHANNEL_PREDEFINED_LAYOUT,
+                                        NULL);
+
+    AVAudioTrackChannelLayoutObjectStructured *objectStructured =
+        (void*) av_stream_get_side_data(track->st, AV_PKT_DATA_AUDIO_CHANNEL_LAYOUT_OBJECT_STRUCTURED,
+                                        NULL);
+
+    if (!layout && !predefLayout && !objectStructured) {
+        return 0;
+    } else {
+        int64_t pos = avio_tell(pb);
+
+        int channel_structured           = !!layout || !!predefLayout;
+
+        // ChannelConfiguration from ISO/IEC 23001-8
+        int defined_layout               = predefLayout ? predefLayout->layout : 0;
+        int channel_count                = track->par->channels;
+
+        int object_structured            = !!objectStructured;
+        int object_count                 = objectStructured ? objectStructured->object_count : 0;
+
+        int stream_structure             = (channel_structured << 0) | (object_structured << 1);
+
+        avio_wb32(pb, 0); // size
+        ffio_wfourcc(pb, "chnl");
+        avio_wb32(pb, 0); // Version
+
+        avio_w8(pb, stream_structure);
+
+        if (channel_structured) {
+            avio_w8(pb, defined_layout);
+            if (defined_layout == 0) {
+                AVAudioTrackChannelPosition* positions;
+                int i;
+                av_assert0(layout);
+                av_assert0(layout->nb_positions >= channel_count);
+
+                positions = (void*) (layout + 1);
+
+                for (i = 0; i < channel_count; ++i) {
+                    AVAudioTrackChannelPosition *pos = &positions[i];
+                    avio_w8(pb, pos->speaker_position);
+                    if (pos->speaker_position == 126) {
+                        avio_wb16(pb, pos->azimuth);
+                        avio_w8(pb, pos->elevation);
+                    }
+                }
+            } else {
+                int omitted_channels_map[64 / 8] = { 0 };
+                char* omitted_channels;
+                int i;
+                av_assert0(predefLayout);
+
+                omitted_channels = (void*) (predefLayout + 1);
+
+                for (i = 0; i < FFMIN(64, predefLayout->nb_omitted_channels); ++i) {
+                    omitted_channels_map[i / 8] |=
+                        ((!!omitted_channels[i]) << (i % 8));
+                }
+                for (i = 0; i < 64 / 8; ++i)
+                    avio_w8(pb, omitted_channels_map[i]);
+            }
+        }
+        if (object_structured)
+            avio_w8(pb, object_count);
+
+        return update_size(pb, pos);
+    }
+}
+
 static int mov_write_esds_tag(AVIOContext *pb, MOVTrack *track) // Basic
 {
     AVCPBProperties *props;
@@ -996,8 +1073,10 @@  static int mov_write_audio_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
          (mov_pcm_le_gt16(track->par->codec_id) && version==1) ||
          (mov_pcm_be_gt16(track->par->codec_id) && version==1)))
         mov_write_wave_tag(s, pb, track);
-    else if (track->tag == MKTAG('m','p','4','a'))
+    else if (track->tag == MKTAG('m','p','4','a')) {
+        mov_write_chnl_tag(pb, track);
         mov_write_esds_tag(pb, track);
+    }
     else if (track->par->codec_id == AV_CODEC_ID_AMR_NB)
         mov_write_amr_tag(pb, track);
     else if (track->par->codec_id == AV_CODEC_ID_AC3)