diff mbox

[FFmpeg-devel,v7,2/3] lavf/isom: support for demuxing and remuxing of MPEG-H 3D Audio in MP4

Message ID 20191212113840.6174-2-Yuki.Tsuchiya@sony.com
State New
Headers show

Commit Message

Yuki.Tsuchiya Dec. 12, 2019, 11:38 a.m. UTC
Implemented according to the specification at https://www.iso.org/standard/69561.html
The 'mhm1' sample entry is registered with MP4RA, which is defined as MHAS encapsulated single stream MPEG-H 3D Audio.
'MHAS' stands for MPEG-H audio stream, which contains encoded audio data and corresponds metadata for decoding.
This patch enables extracting the MHAS bitstream from MP4 and remuxing into MP4.

Signed-off-by: Yuki Tsuchiya <Yuki.Tsuchiya@sony.com>
---
 libavcodec/avcodec.h    | 1 +
 libavcodec/codec_desc.c | 7 +++++++
 libavcodec/version.h    | 2 +-
 libavformat/isom.c      | 1 +
 libavformat/movenc.c    | 2 ++
 5 files changed, 12 insertions(+), 1 deletion(-)

Comments

James Almer Dec. 12, 2019, 12:43 p.m. UTC | #1
On 12/12/2019 8:38 AM, Yuki Tsuchiya wrote:
> Implemented according to the specification at https://www.iso.org/standard/69561.html
> The 'mhm1' sample entry is registered with MP4RA, which is defined as MHAS encapsulated single stream MPEG-H 3D Audio.
> 'MHAS' stands for MPEG-H audio stream, which contains encoded audio data and corresponds metadata for decoding.
> This patch enables extracting the MHAS bitstream from MP4 and remuxing into MP4.
> 
> Signed-off-by: Yuki Tsuchiya <Yuki.Tsuchiya@sony.com>
> ---
>  libavcodec/avcodec.h    | 1 +
>  libavcodec/codec_desc.c | 7 +++++++
>  libavcodec/version.h    | 2 +-
>  libavformat/isom.c      | 1 +
>  libavformat/movenc.c    | 2 ++
>  5 files changed, 12 insertions(+), 1 deletion(-)
> 
> diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
> index 735a3c2d76..119b32dc1f 100644
> --- a/libavcodec/avcodec.h
> +++ b/libavcodec/avcodec.h
> @@ -656,6 +656,7 @@ enum AVCodecID {
>      AV_CODEC_ID_ATRAC9,
>      AV_CODEC_ID_HCOM,
>      AV_CODEC_ID_ACELP_KELVIN,
> +    AV_CODEC_ID_MPEGH_3D_AUDIO,
>  
>      /* subtitle codecs */
>      AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
> diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
> index 98b6348c59..0cae61bce0 100644
> --- a/libavcodec/codec_desc.c
> +++ b/libavcodec/codec_desc.c
> @@ -3016,6 +3016,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
>          .long_name = NULL_IF_CONFIG_SMALL("Sipro ACELP.KELVIN"),
>          .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
>      },
> +    {
> +        .id        = AV_CODEC_ID_MPEGH_3D_AUDIO,
> +        .type      = AVMEDIA_TYPE_AUDIO,
> +        .name      = "mpegh_3d_audio",
> +        .long_name = NULL_IF_CONFIG_SMALL("MPEG-H 3D Audio"),
> +        .props     = AV_CODEC_PROP_LOSSY,
> +    },
>  
>      /* subtitle codecs */
>      {
> diff --git a/libavcodec/version.h b/libavcodec/version.h
> index 8b9c27378c..4c4027d709 100644
> --- a/libavcodec/version.h
> +++ b/libavcodec/version.h
> @@ -28,7 +28,7 @@
>  #include "libavutil/version.h"
>  
>  #define LIBAVCODEC_VERSION_MAJOR  58
> -#define LIBAVCODEC_VERSION_MINOR  64
> +#define LIBAVCODEC_VERSION_MINOR  65
>  #define LIBAVCODEC_VERSION_MICRO 101
>  
>  #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
> diff --git a/libavformat/isom.c b/libavformat/isom.c
> index edd0d81063..824e811177 100644
> --- a/libavformat/isom.c
> +++ b/libavformat/isom.c
> @@ -371,6 +371,7 @@ const AVCodecTag ff_codec_movaudio_tags[] = {
>      { AV_CODEC_ID_FLAC,            MKTAG('f', 'L', 'a', 'C') }, /* nonstandard */
>      { AV_CODEC_ID_TRUEHD,          MKTAG('m', 'l', 'p', 'a') }, /* mp4ra.org */
>      { AV_CODEC_ID_OPUS,            MKTAG('O', 'p', 'u', 's') }, /* mp4ra.org */
> +    { AV_CODEC_ID_MPEGH_3D_AUDIO,  MKTAG('m', 'h', 'm', '1') }, /* MPEG-H 3D Audio bitstream */
>      { AV_CODEC_ID_NONE, 0 },
>  };
>  
> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> index dd144ae20a..8456bfac6d 100644
> --- a/libavformat/movenc.c
> +++ b/libavformat/movenc.c
> @@ -2493,6 +2493,7 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
>      mov_write_stts_tag(pb, track);
>      if ((track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
>           track->par->codec_id == AV_CODEC_ID_TRUEHD ||
> +         track->par->codec_id == AV_CODEC_ID_MPEGH_3D_AUDIO ||

This still depends on packets being flagged as key. One would hope they
were correctly flagged by a parser or demuxer, but there's always a risk
they were not, so if it's possible to do some basic parsing of the
bitstream to look for a sync code or similar, that would be ideal. See
the custom mov_parse_vc1_frame() and mov_parse_truehd_frame() functions
used for this purpose for an example.

If not possible, then this patch should be ok as is.

>           track->par->codec_tag == MKTAG('r','t','p',' ')) &&
>          track->has_keyframes && track->has_keyframes < track->entry)
>          mov_write_stss_tag(pb, track, MOV_SYNC_SAMPLE);
> @@ -6875,6 +6876,7 @@ const AVCodecTag codec_mp4_tags[] = {
>      { AV_CODEC_ID_DVD_SUBTITLE, MKTAG('m', 'p', '4', 's') },
>      { AV_CODEC_ID_MOV_TEXT    , MKTAG('t', 'x', '3', 'g') },
>      { AV_CODEC_ID_BIN_DATA    , MKTAG('g', 'p', 'm', 'd') },
> +    { AV_CODEC_ID_MPEGH_3D_AUDIO, MKTAG('m', 'h', 'm', '1') },
>      { AV_CODEC_ID_NONE        ,    0 },
>  };
>  
>
Yuki.Tsuchiya Dec. 12, 2019, 12:57 p.m. UTC | #2
Hi James,

On 2019/12/12 21:45, "ffmpeg-devel on behalf of James Almer" <ffmpeg-devel-bounces@ffmpeg.org on behalf of jamrial@gmail.com> wrote:

    >> diff --git a/libavformat/movenc.c b/libavformat/movenc.c

    >> index dd144ae20a..8456bfac6d 100644

    >> --- a/libavformat/movenc.c

    >> +++ b/libavformat/movenc.c

    >> @@ -2493,6 +2493,7 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext

    >>      mov_write_stts_tag(pb, track);

    >>      if ((track->par->codec_type == AVMEDIA_TYPE_VIDEO ||

    >>           track->par->codec_id == AV_CODEC_ID_TRUEHD ||

    >> +         track->par->codec_id == AV_CODEC_ID_MPEGH_3D_AUDIO ||

    >

    > This still depends on packets being flagged as key. One would hope they

    > were correctly flagged by a parser or demuxer, but there's always a risk

    > they were not, so if it's possible to do some basic parsing of the

    > bitstream to look for a sync code or similar, that would be ideal. See

    > the custom mov_parse_vc1_frame() and mov_parse_truehd_frame() functions

    > used for this purpose for an example.

    >

    > If not possible, then this patch should be ok as is.

    
OK. I will contribute the bitstream(mhas) parser like mov_parse_mhas_frame() for adding sync frame information as separate patch.
In this patch, please proceed as is.
Moritz Barsnick Dec. 12, 2019, 1:50 p.m. UTC | #3
On Thu, Dec 12, 2019 at 20:38:39 +0900, Yuki Tsuchiya wrote:
>  #define LIBAVCODEC_VERSION_MAJOR  58
> -#define LIBAVCODEC_VERSION_MINOR  64
> +#define LIBAVCODEC_VERSION_MINOR  65
>  #define LIBAVCODEC_VERSION_MICRO 101

Nit: reset MICRO to 100 when bumping MINOR (or MAJOR).

Moritz
Yuki.Tsuchiya Dec. 12, 2019, 1:57 p.m. UTC | #4
Thank you for pointing it out. I'll fix.

On 2019/12/12 22:51, "ffmpeg-devel on behalf of Moritz Barsnick" <ffmpeg-devel-bounces@ffmpeg.org on behalf of barsnick@gmx.net> wrote:

    On Thu, Dec 12, 2019 at 20:38:39 +0900, Yuki Tsuchiya wrote:
    >  #define LIBAVCODEC_VERSION_MAJOR  58

    > -#define LIBAVCODEC_VERSION_MINOR  64

    > +#define LIBAVCODEC_VERSION_MINOR  65

    >  #define LIBAVCODEC_VERSION_MICRO 101

    
    Nit: reset MICRO to 100 when bumping MINOR (or MAJOR).
James Almer Dec. 12, 2019, 1:58 p.m. UTC | #5
On 12/12/2019 9:57 AM, Tsuchiya, Yuki (SHES) wrote:
> Hi James,
> 
> On 2019/12/12 21:45, "ffmpeg-devel on behalf of James Almer" <ffmpeg-devel-bounces@ffmpeg.org on behalf of jamrial@gmail.com> wrote:
> 
>     >> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
>     >> index dd144ae20a..8456bfac6d 100644
>     >> --- a/libavformat/movenc.c
>     >> +++ b/libavformat/movenc.c
>     >> @@ -2493,6 +2493,7 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
>     >>      mov_write_stts_tag(pb, track);
>     >>      if ((track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
>     >>           track->par->codec_id == AV_CODEC_ID_TRUEHD ||
>     >> +         track->par->codec_id == AV_CODEC_ID_MPEGH_3D_AUDIO ||
>     >
>     > This still depends on packets being flagged as key. One would hope they
>     > were correctly flagged by a parser or demuxer, but there's always a risk
>     > they were not, so if it's possible to do some basic parsing of the
>     > bitstream to look for a sync code or similar, that would be ideal. See
>     > the custom mov_parse_vc1_frame() and mov_parse_truehd_frame() functions
>     > used for this purpose for an example.
>     >
>     > If not possible, then this patch should be ok as is.
>     
> OK. I will contribute the bitstream(mhas) parser like mov_parse_mhas_frame() for adding sync frame information as separate patch.
> In this patch, please proceed as is.

Sounds good. Thanks.
diff mbox

Patch

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 735a3c2d76..119b32dc1f 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -656,6 +656,7 @@  enum AVCodecID {
     AV_CODEC_ID_ATRAC9,
     AV_CODEC_ID_HCOM,
     AV_CODEC_ID_ACELP_KELVIN,
+    AV_CODEC_ID_MPEGH_3D_AUDIO,
 
     /* subtitle codecs */
     AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index 98b6348c59..0cae61bce0 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -3016,6 +3016,13 @@  static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("Sipro ACELP.KELVIN"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
     },
+    {
+        .id        = AV_CODEC_ID_MPEGH_3D_AUDIO,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "mpegh_3d_audio",
+        .long_name = NULL_IF_CONFIG_SMALL("MPEG-H 3D Audio"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
 
     /* subtitle codecs */
     {
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 8b9c27378c..4c4027d709 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -28,7 +28,7 @@ 
 #include "libavutil/version.h"
 
 #define LIBAVCODEC_VERSION_MAJOR  58
-#define LIBAVCODEC_VERSION_MINOR  64
+#define LIBAVCODEC_VERSION_MINOR  65
 #define LIBAVCODEC_VERSION_MICRO 101
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
diff --git a/libavformat/isom.c b/libavformat/isom.c
index edd0d81063..824e811177 100644
--- a/libavformat/isom.c
+++ b/libavformat/isom.c
@@ -371,6 +371,7 @@  const AVCodecTag ff_codec_movaudio_tags[] = {
     { AV_CODEC_ID_FLAC,            MKTAG('f', 'L', 'a', 'C') }, /* nonstandard */
     { AV_CODEC_ID_TRUEHD,          MKTAG('m', 'l', 'p', 'a') }, /* mp4ra.org */
     { AV_CODEC_ID_OPUS,            MKTAG('O', 'p', 'u', 's') }, /* mp4ra.org */
+    { AV_CODEC_ID_MPEGH_3D_AUDIO,  MKTAG('m', 'h', 'm', '1') }, /* MPEG-H 3D Audio bitstream */
     { AV_CODEC_ID_NONE, 0 },
 };
 
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index dd144ae20a..8456bfac6d 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -2493,6 +2493,7 @@  static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
     mov_write_stts_tag(pb, track);
     if ((track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
          track->par->codec_id == AV_CODEC_ID_TRUEHD ||
+         track->par->codec_id == AV_CODEC_ID_MPEGH_3D_AUDIO ||
          track->par->codec_tag == MKTAG('r','t','p',' ')) &&
         track->has_keyframes && track->has_keyframes < track->entry)
         mov_write_stss_tag(pb, track, MOV_SYNC_SAMPLE);
@@ -6875,6 +6876,7 @@  const AVCodecTag codec_mp4_tags[] = {
     { AV_CODEC_ID_DVD_SUBTITLE, MKTAG('m', 'p', '4', 's') },
     { AV_CODEC_ID_MOV_TEXT    , MKTAG('t', 'x', '3', 'g') },
     { AV_CODEC_ID_BIN_DATA    , MKTAG('g', 'p', 'm', 'd') },
+    { AV_CODEC_ID_MPEGH_3D_AUDIO, MKTAG('m', 'h', 'm', '1') },
     { AV_CODEC_ID_NONE        ,    0 },
 };