diff mbox series

[FFmpeg-devel] avformat/avcodec: Add DTS-UHD demuxer and parser, movenc support.

Message ID 61fca341-57cc-abe5-225e-561f5e2a9b4b@funderburk.us
State New
Headers show
Series [FFmpeg-devel] avformat/avcodec: Add DTS-UHD demuxer and parser, movenc support. | expand

Checks

Context Check Description
andriy/configure_x86 warning Failed to apply patch

Commit Message

Roy Funderburk April 14, 2023, 3:39 p.m. UTC
Parsing and demuxing of DTS-UHD input files per ETSI TS 102 114 is added
as demuxer "dtsuhd".  movenc supports DTS-UHD audio track.

Signed-off-by: Roy Funderburk <Roy.Funderburk@xperi.com>
---
  Changelog                  |   1 +
  configure                  |   1 +
  doc/general_contents.texi  |   1 +
  libavcodec/Makefile        |   1 +
  libavcodec/codec_desc.c    |   7 +
  libavcodec/codec_id.h      |   1 +
  libavcodec/dtsuhd_common.c | 991 +++++++++++++++++++++++++++++++++++++
  libavcodec/dtsuhd_common.h |  84 ++++
  libavcodec/dtsuhd_parser.c | 141 ++++++
  libavcodec/parsers.c       |   1 +
  libavformat/Makefile       |   1 +
  libavformat/allformats.c   |   1 +
  libavformat/dtshddec.c     |   2 +-
  libavformat/dtsuhddec.c    | 214 ++++++++
  libavformat/movenc.c       |  32 ++
  libavformat/version.h      |   2 +-
  16 files changed, 1479 insertions(+), 2 deletions(-)
  create mode 100644 libavcodec/dtsuhd_common.c
  create mode 100644 libavcodec/dtsuhd_common.h
  create mode 100644 libavcodec/dtsuhd_parser.c
  create mode 100644 libavformat/dtsuhddec.c

Comments

Hendrik Leppkes April 14, 2023, 4:40 p.m. UTC | #1
On Fri, Apr 14, 2023 at 6:01 PM Roy Funderburk <royffmpeg@funderburk.us> wrote:
>
> Parsing and demuxing of DTS-UHD input files per ETSI TS 102 114 is added
> as demuxer "dtsuhd".  movenc supports DTS-UHD audio track.
>

Can you give a quick summary how this formats relates to DTS HD, and
the DTS:X extension? If at all?
It uses "dtsx" as extension and FourCC in places, so I reckon it does
somehow relate?

- Hendrik
Roy Funderburk April 14, 2023, 4:48 p.m. UTC | #2
On 4/14/23 9:40 AM, Hendrik Leppkes wrote:
 > Can you give a quick summary how this formats relates to DTS HD, and
 > the DTS:X extension? If at all?

The DTS-UHD format is a new format, not compatible with or recognizable by DTS Coherent Acoustics
decoders/parsers.  Both are stored in a DTSHDHDR file, which starts with the 8 byte DTSHDHDR
signature, but when DTS-UHD is stored in the DTSHDHDR file, the extension used is .dtsx instead
of .dtshd.

-Roy
Michael Niedermayer April 15, 2023, 2:56 p.m. UTC | #3
On Fri, Apr 14, 2023 at 08:39:41AM -0700, Roy Funderburk wrote:
> Parsing and demuxing of DTS-UHD input files per ETSI TS 102 114 is added
> as demuxer "dtsuhd".  movenc supports DTS-UHD audio track.
> 
> Signed-off-by: Roy Funderburk <Roy.Funderburk@xperi.com>
> ---
>  Changelog                  |   1 +
>  configure                  |   1 +
>  doc/general_contents.texi  |   1 +
>  libavcodec/Makefile        |   1 +
>  libavcodec/codec_desc.c    |   7 +
>  libavcodec/codec_id.h      |   1 +
>  libavcodec/dtsuhd_common.c | 991 +++++++++++++++++++++++++++++++++++++
>  libavcodec/dtsuhd_common.h |  84 ++++
>  libavcodec/dtsuhd_parser.c | 141 ++++++
>  libavcodec/parsers.c       |   1 +
>  libavformat/Makefile       |   1 +
>  libavformat/allformats.c   |   1 +
>  libavformat/dtshddec.c     |   2 +-
>  libavformat/dtsuhddec.c    | 214 ++++++++
>  libavformat/movenc.c       |  32 ++
>  libavformat/version.h      |   2 +-
>  16 files changed, 1479 insertions(+), 2 deletions(-)
>  create mode 100644 libavcodec/dtsuhd_common.c
>  create mode 100644 libavcodec/dtsuhd_common.h
>  create mode 100644 libavcodec/dtsuhd_parser.c
>  create mode 100644 libavformat/dtsuhddec.c
> 
> diff --git a/Changelog b/Changelog
> index a40f32c23f..f683b49bb2 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -3,6 +3,7 @@ releases are sorted from youngest to oldest.
>   version <next>:
>  - libaribcaption decoder
> +- DTS-UHD demuxer
>   version 6.0:
>  - Radiance HDR image support
> diff --git a/configure b/configure

checking file Changelog
patch: **** malformed patch at line 147: diff --git a/configure b/configure



[...]
Roy Funderburk April 15, 2023, 8:04 p.m. UTC | #4
Parsing and demuxing of DTS-UHD input files per ETSI TS 102 114 is added
as demuxer "dtsuhd".  movenc supports DTS-UHD audio track.

Signed-off-by: Roy Funderburk <Roy.Funderburk@xperi.com>
---
 Changelog                  |   1 +
 configure                  |   1 +
 doc/general_contents.texi  |   1 +
 libavcodec/Makefile        |   1 +
 libavcodec/codec_desc.c    |   7 +
 libavcodec/codec_id.h      |   1 +
 libavcodec/dtsuhd_common.c | 991 +++++++++++++++++++++++++++++++++++++
 libavcodec/dtsuhd_common.h |  84 ++++
 libavcodec/dtsuhd_parser.c | 141 ++++++
 libavcodec/parsers.c       |   1 +
 libavformat/Makefile       |   1 +
 libavformat/allformats.c   |   1 +
 libavformat/dtshddec.c     |   2 +-
 libavformat/dtsuhddec.c    | 214 ++++++++
 libavformat/movenc.c       |  32 ++
 libavformat/version.h      |   2 +-
 16 files changed, 1479 insertions(+), 2 deletions(-)
 create mode 100644 libavcodec/dtsuhd_common.c
 create mode 100644 libavcodec/dtsuhd_common.h
 create mode 100644 libavcodec/dtsuhd_parser.c
 create mode 100644 libavformat/dtsuhddec.c

diff --git a/Changelog b/Changelog
index a40f32c23f..f683b49bb2 100644
--- a/Changelog
+++ b/Changelog
@@ -3,6 +3,7 @@ releases are sorted from youngest to oldest.
 
 version <next>:
 - libaribcaption decoder
+- DTS-UHD demuxer
 
 version 6.0:
 - Radiance HDR image support
diff --git a/configure b/configure
index 033db7442d..557821ceef 100755
--- a/configure
+++ b/configure
@@ -3425,6 +3425,7 @@ dash_demuxer_deps="libxml2"
 dirac_demuxer_select="dirac_parser"
 dts_demuxer_select="dca_parser"
 dtshd_demuxer_select="dca_parser"
+dtsuhd_demuxer_select="dtsuhd_parser"
 dv_demuxer_select="dvprofile"
 dv_muxer_select="dvprofile"
 dxa_demuxer_select="riffdec"
diff --git a/doc/general_contents.texi b/doc/general_contents.texi
index 2eeebd847d..e1ba9c4597 100644
--- a/doc/general_contents.texi
+++ b/doc/general_contents.texi
@@ -597,6 +597,7 @@ library:
 @item raw DNxHD                 @tab X @tab X
 @item raw DTS                   @tab X @tab X
 @item raw DTS-HD                @tab   @tab X
+@item raw DTS-UHD               @tab   @tab
 @item raw E-AC-3                @tab X @tab X
 @item raw FLAC                  @tab X @tab X
 @item raw GSM                   @tab   @tab X
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index aa10fbfcf8..f57564e9eb 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1155,6 +1155,7 @@ OBJS-$(CONFIG_DIRAC_PARSER)            += dirac_parser.o
 OBJS-$(CONFIG_DNXHD_PARSER)            += dnxhd_parser.o dnxhddata.o
 OBJS-$(CONFIG_DOLBY_E_PARSER)          += dolby_e_parser.o dolby_e_parse.o
 OBJS-$(CONFIG_DPX_PARSER)              += dpx_parser.o
+OBJS-$(CONFIG_DTSUHD_PARSER)           += dtsuhd_parser.o dtsuhd_common.o
 OBJS-$(CONFIG_DVAUDIO_PARSER)          += dvaudio_parser.o
 OBJS-$(CONFIG_DVBSUB_PARSER)           += dvbsub_parser.o
 OBJS-$(CONFIG_DVD_NAV_PARSER)          += dvd_nav_parser.o
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index efdcb59bc9..a58315f46b 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -3369,6 +3369,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("RKA (RK Audio)"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
     },
+    {
+        .id        = AV_CODEC_ID_DTSUHD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dtsuhd",
+        .long_name = NULL_IF_CONFIG_SMALL("DTSUHD (DTS-UHD Audio Format)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
 
     /* subtitle codecs */
     {
diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h
index 64df9699f4..6d8b145ee3 100644
--- a/libavcodec/codec_id.h
+++ b/libavcodec/codec_id.h
@@ -538,6 +538,7 @@ enum AVCodecID {
     AV_CODEC_ID_FTR,
     AV_CODEC_ID_WAVARC,
     AV_CODEC_ID_RKA,
+    AV_CODEC_ID_DTSUHD,
 
     /* subtitle codecs */
     AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
diff --git a/libavcodec/dtsuhd_common.c b/libavcodec/dtsuhd_common.c
new file mode 100644
index 0000000000..110cb0c371
--- /dev/null
+++ b/libavcodec/dtsuhd_common.c
@@ -0,0 +1,991 @@
+/*
+ * DTS-UHD common audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Parse DTS-UHD audio frame headers, report frame sizes and configuration.
+ * Specification: ETSI TS 103 491 V1.2.1
+ */
+
+#include <string.h>
+
+#include "dtsuhd_common.h"
+#include "get_bits.h"
+#include "libavutil/channel_layout.h"
+
+#define DTSUHD_ALLOC_INCREMENT 16
+#define DTSUHD_CHUNK_HEADER    16
+
+enum RepType {
+    REP_TYPE_CH_MASK_BASED,
+    REP_TYPE_MTRX2D_CH_MASK_BASED,
+    REP_TYPE_MTRX3D_CH_MASK_BASED,
+    REP_TYPE_BINAURAL,
+    REP_TYPE_AMBISONIC,
+    REP_TYPE_AUDIO_TRACKS,
+    REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF,
+    REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF,
+};
+
+typedef struct MDObject {
+    int started;  /* Object seen since last reset. */
+    int pres_index;
+    int rep_type;
+    int ch_activity_mask;
+} MDObject;
+
+typedef struct MD01 {
+    GetBitContext gb;
+    MDObject object[257]; /* object id max value is 256 */
+    int chunk_id;
+    int object_list[256]; int object_list_count;
+    int packets_acquired;
+    int static_md_extracted;
+    int static_md_packets;
+    int static_md_packet_size;
+    int static_md_update_flag;
+    uint8_t *buf; int buf_bytes; /* temporary buffer to accumulate static data */
+} MD01;
+
+typedef struct NAVI {
+    int bytes;
+    int id;
+    int index;
+    int present;
+} NAVI;
+
+typedef struct UHDAudio {
+    int mask;
+    int selectable;
+} UHDAudio;
+
+typedef struct UHDChunk {
+    int crc_flag;
+    int bytes;
+} UHDChunk;
+
+struct DTSUHD {
+    const uint8_t *data; int data_bytes;  /* Original audio frame buffer. */
+    GetBitContext gb;
+    MD01 *md01; int md01_count;
+    NAVI *navi; int navi_alloc, navi_count;
+    UHDAudio audio[256];
+    UHDChunk *chunk; int chunk_alloc, chunk_count;
+    int chunk_bytes;
+    int clock_rate;
+    int frame_bytes;
+    int frame_duration;
+    int frame_duration_code;
+    int ftoc_bytes;
+    int major_version;
+    int num_audio_pres;
+    int sample_rate;
+    int sample_rate_mod;
+    unsigned full_channel_mix_flag:1;
+    unsigned interactive_obj_limits_present:1;
+    unsigned is_sync_frame:1;
+    unsigned saw_sync:1;
+};
+
+/* Read from the MD01 buffer (if present), falling back to the frame buffer */
+static inline int get_bits_md01(DTSUHD *h, MD01 *md01, int bits)
+{
+    if (md01->buf)
+        return get_bits(&md01->gb, bits);
+    return get_bits(&h->gb, bits);
+}
+
+/* In the specification, the pseudo code defaults the 'add' parameter to true.
+   Table 7-30 shows passing an explicit false, most other calls do not
+   pass the extractAndAdd parameter.
+
+   Function based on code in Table 5-2
+*/
+static int get_bits_var(GetBitContext *gb, const uint8_t table[], int add)
+{
+    static const int bits_used[8] = { 1, 1, 1, 1, 2, 2, 3, 3 };
+    static const int index_table[8] = { 0, 0, 0, 0, 1, 1, 2, 3 };
+    int code = show_bits(gb, 3); /* value range is [0, 7] */
+    int i;
+    int index = index_table[code];
+    int value = 0;
+
+    skip_bits(gb, bits_used[code]);
+    if (table[index] > 0) {
+        if (add) {
+            for (i = 0; i < index; i++)
+                value += 1 << table[i];
+        }
+        value += get_bits_long(gb, table[index]);
+    }
+
+    return value;
+}
+
+/* Implied by Table 6-2, MD01 chunk objects appended in for loop */
+static MD01 *chunk_append_md01(DTSUHD *h, int id)
+{
+    int md01_alloc = h->md01_count + 1;
+    if (av_reallocp_array(&h->md01, md01_alloc, sizeof(*h->md01)))
+        return NULL;
+
+    memset(h->md01 + h->md01_count, 0, sizeof(*h->md01));
+    h->md01[h->md01_count].chunk_id = id;
+    return h->md01 + h->md01_count++;
+}
+
+/* Return existing MD01 chunk based on chunkID */
+static MD01 *chunk_find_md01(DTSUHD *h, int id)
+{
+    int i;
+
+    for (i = 0; i < h->md01_count; i++)
+        if (id == h->md01[i].chunk_id)
+            return h->md01 + i;
+
+    return NULL;
+}
+
+/* Table 6-3 */
+static void chunk_reset(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->md01_count; i++)
+        av_freep(&h->md01[i].buf);
+    av_freep(&h->md01);
+    h->md01_count = 0;
+}
+
+static MDObject *find_default_audio(DTSUHD *h)
+{
+    MDObject *object;
+    int i, j;
+    int obj_index = -1;
+
+    for (i = 0; i < h->md01_count; i++) {
+        for (j = 0; j < 257; j++) {
+            object = h->md01[i].object + j;
+            if (object->started && h->audio[object->pres_index].selectable) {
+                if (obj_index < 0 || (object->pres_index < h->md01[i].object[obj_index].pres_index))
+                    obj_index = j;
+            }
+        }
+        if (obj_index >= 0)
+            return h->md01[i].object + obj_index;
+    }
+
+    return NULL;
+}
+
+/* Save channel mask, count, and rep type to descriptor info.
+   ETSI TS 103 491 Table 7-28 channel activity mask bits
+   mapping and SCTE DVS 243-4 Rev. 0.2 DG X Table 4.  Convert activity mask and
+   representation type to channel mask and channel counts.
+*/
+static void extract_object_info(MDObject *object, DTSUHDDescriptorInfo *info)
+{
+    int i;
+    static const struct {
+        uint32_t activity_mask;
+        uint32_t channel_mask; // Mask as defined by ETSI TS 103 491
+        uint64_t ffmpeg_channel_mask; // Mask as defined in ffmpeg
+    } activity_map[] = {
+        // act mask | chan mask | ffmpeg channel mask
+        { 0x000001, 0x00000001, AV_CH_FRONT_CENTER },
+        { 0x000002, 0x00000006, AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT },
+        { 0x000004, 0x00000018, AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT },
+        { 0x000008, 0x00000020, AV_CH_LOW_FREQUENCY },
+        { 0x000010, 0x00000040, AV_CH_BACK_CENTER },
+        { 0x000020, 0x0000A000, AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT },
+        { 0x000040, 0x00000180, AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT },
+        { 0x000080, 0x00004000, AV_CH_TOP_FRONT_CENTER },
+        { 0x000100, 0x00080000, AV_CH_TOP_CENTER },
+        { 0x000200, 0x00001800, AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_FRONT_RIGHT_OF_CENTER },
+        { 0x000400, 0x00060000, AV_CHAN_WIDE_LEFT | AV_CHAN_WIDE_RIGHT },
+        { 0x000800, 0x00000600, AV_CH_SURROUND_DIRECT_LEFT | AV_CH_SURROUND_DIRECT_RIGHT },
+        { 0x001000, 0x00010000, AV_CH_LOW_FREQUENCY_2 },
+        { 0x002000, 0x00300000, AV_CH_TOP_SIDE_LEFT | AV_CH_TOP_SIDE_RIGHT },
+        { 0x004000, 0x00400000, AV_CH_TOP_BACK_CENTER },
+        { 0x008000, 0x01800000, AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT },
+        { 0x010000, 0x02000000, AV_CH_BOTTOM_FRONT_CENTER },
+        { 0x020000, 0x0C000000, AV_CH_BOTTOM_FRONT_LEFT | AV_CH_BOTTOM_FRONT_RIGHT },
+        { 0x140000, 0x30000000, AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT },
+        { 0x080000, 0xC0000000, AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT },
+        { 0 } // Terminator
+    };
+
+    if (object) {
+        for (i = 0; activity_map[i].activity_mask; i++) {
+            if (activity_map[i].activity_mask & object->ch_activity_mask) {
+                info->channel_mask |= activity_map[i].channel_mask;
+                info->ffmpeg_channel_mask |= activity_map[i].ffmpeg_channel_mask;
+            }
+        }
+        info->channel_count = av_popcount(info->channel_mask);
+        info->rep_type = object->rep_type;
+    }
+}
+
+/* Assemble information for MP4 Sample Entry box.  Sample Size is always
+   16 bits.  The coding name is the name of the SampleEntry sub-box and is
+   'dtsx' unless the version of the bitstream is > 2.
+   If DecoderProfile == 2, then MaxPayloadCode will be zero.
+*/
+static void update_descriptor(DTSUHD *h, DTSUHDDescriptorInfo *info)
+{
+    static const char *coding_name[] = { "dtsx", "dtsy" };
+
+    memset(info, 0, sizeof(*info));
+    memcpy(info->coding_name, coding_name[h->major_version > 2], 5);
+    extract_object_info(find_default_audio(h), info);
+    info->base_sample_freq_code = h->sample_rate == 48000;
+    info->decoder_profile_code = h->major_version - 2;
+    info->frame_duration_code = h->frame_duration_code;
+    info->max_payload_code = 0 + (h->major_version > 2);
+    info->num_pres_code = h->num_audio_pres - 1;
+    info->sample_rate = h->sample_rate;
+    info->sample_rate_mod = h->sample_rate_mod;
+    info->sample_size = 16;
+    info->valid = 1;
+}
+
+/* Table 6-17 p47 */
+static int parse_explicit_object_lists(DTSUHD *h, int mask, int index)
+{
+    GetBitContext *gb = &h->gb;
+    int i;
+    static const uint8_t table[4] = { 4, 8, 16, 32 };
+
+    for (i = 0; i < index; i++) {
+        if ((mask >> i) & 0x01) {
+            if (h->is_sync_frame || get_bits1(gb))
+                get_bits_var(gb, table, 1);
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-15 p44, Table 6-16 p45 */
+static int parse_aud_pres_params(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int audio;
+    int i;
+    int read_mask;
+    static const uint8_t table[4] = { 0, 2, 4, 5 };
+
+    if (h->is_sync_frame) {
+        if (h->full_channel_mix_flag)
+            h->num_audio_pres = 1;
+        else
+            h->num_audio_pres = get_bits_var(gb, table, 1) + 1;
+        memset(h->audio, 0, sizeof(h->audio[0]) * h->num_audio_pres);
+    }
+
+    for (audio = 0; audio < h->num_audio_pres; audio++) {
+        if (h->is_sync_frame) {
+            if (h->full_channel_mix_flag)
+                h->audio[audio].selectable = 1;
+            else
+                h->audio[audio].selectable = get_bits1(gb);
+        }
+
+        if (h->audio[audio].selectable) {
+            if (h->is_sync_frame) {
+                read_mask = (audio > 0) ? get_bits(gb, audio) : 0;
+                h->audio[audio].mask = 0;
+                for (i = 0; read_mask; i++, read_mask >>= 1) {
+                    if (read_mask & 0x01)
+                        h->audio[audio].mask |= get_bits1(gb) << i;
+                }
+            }
+
+            if (parse_explicit_object_lists(h, h->audio[audio].mask, audio))
+                return 1;
+        } else {
+            h->audio[audio].mask = 0;
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-9 p 38 */
+static int check_crc(DTSUHD *h, int bit, int bytes)
+{
+    GetBitContext gb;
+    int i;
+    static const uint16_t lookup[16] = {
+        0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
+        0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF
+    };
+    uint16_t crc = 0xFFFF;
+
+    init_get_bits(&gb, h->data, h->data_bytes * 8);
+    skip_bits(&gb, bit);
+    for (i = -bytes; i < bytes; i++)
+        crc = (crc << 4) ^ lookup[(crc >> 12) ^ get_bits(&gb, 4)];
+
+    return crc != 0;
+}
+
+/* Table 6-12 p 40 */
+static void decode_version(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int bits = get_bits1(gb) ? 3 : 6;
+
+    h->major_version = get_bits(gb, bits) + 2;
+    skip_bits(gb, bits);
+}
+
+/* Table 6-12 p 40 */
+static int parse_stream_params(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int has_ftoc_crc;
+    static const uint32_t table_base_duration[4] = { 512, 480, 384, 0 };
+    static const uint32_t table_clock_rate[4] = { 32000, 44100, 48000, 0 };
+
+    if (h->is_sync_frame)
+        h->full_channel_mix_flag = get_bits1(gb);
+
+    has_ftoc_crc = !h->full_channel_mix_flag || h->is_sync_frame;
+    if (has_ftoc_crc && check_crc(h, 0, h->ftoc_bytes))
+        return 1;
+
+    if (h->is_sync_frame) {
+        if (h->full_channel_mix_flag)
+            h->major_version = 2;
+        else
+            decode_version(h);
+
+        h->frame_duration = table_base_duration[get_bits(gb, 2)];
+        h->frame_duration_code = get_bits(gb, 3);
+        h->frame_duration *= (h->frame_duration_code + 1);
+        h->clock_rate = table_clock_rate[get_bits(gb, 2)];
+        if (h->frame_duration == 0 || h->clock_rate == 0)
+            return 1; /* bitstream error */
+
+        skip_bits(gb, 36 * get_bits1(gb));  /* bTimeStampPresent */
+        h->sample_rate_mod = get_bits(gb, 2);
+        h->sample_rate = h->clock_rate * (1 << h->sample_rate_mod);
+
+        if (h->full_channel_mix_flag) {
+            h->interactive_obj_limits_present = 0;
+        } else {
+            skip_bits1(gb);  /* reserved flag. */
+            h->interactive_obj_limits_present = get_bits1(gb);
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-24 p52 */
+static void navi_purge(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->navi_count; i++)
+        if (!h->navi[i].present)
+            h->navi[i].bytes = 0;
+}
+
+/* Table 6-21 p50 */
+static void navi_clear(DTSUHD *h)
+{
+    if (h->navi)
+        memset(h->navi, 0, sizeof(h->navi[0]) * h->navi_count);
+    h->navi_count = 0;
+}
+
+/* Table 6-22 p51 */
+static void navi_clear_present(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->navi_count; i++)
+        h->navi[i].present = 0;
+}
+
+/* Table 6-23 p51.  Return 0 on success, and the index is returned in
+   the *listIndex parameter.
+*/
+static int navi_find_index(DTSUHD *h, int desired_index, int *list_index)
+{
+    int avail_index = h->navi_count;
+    int i;
+    int navi_alloc;
+
+    for (i = 0; i < h->navi_count; i++) {
+        if (h->navi[i].index == desired_index) {
+            *list_index = i;
+            h->navi[i].present = 1;
+            return 0;
+        }
+
+        if ((h->navi[i].present == 0) && (h->navi[i].bytes == 0) && (avail_index > i))
+            avail_index = i;
+    }
+
+    if (avail_index >= h->navi_count) {
+        if (h->navi_count >= h->navi_alloc) {
+            navi_alloc = h->navi_count + DTSUHD_ALLOC_INCREMENT;
+            if (av_reallocp_array(&h->navi, navi_alloc, sizeof(*h->navi)))
+                return 1;
+            h->navi_alloc = navi_alloc;
+        }
+        h->navi_count++;
+    }
+
+    *list_index = avail_index;
+    h->navi[avail_index].bytes = 0;
+    h->navi[avail_index].present = 1;
+    h->navi[avail_index].id = 256;
+    h->navi[avail_index].index = desired_index;
+
+    return 0;
+}
+
+/* Table 6-20 p48 */
+static int parse_chunk_navi(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int audio_chunks = 1;
+    int bytes;
+    int i;
+    int id;
+    int id_present;
+    int index;
+    int list_index;
+    static const uint8_t table2468[4] = { 2, 4, 6, 8 };
+    static const uint8_t table_audio_chunk_sizes[4] = { 9, 11, 13, 16 };
+    static const uint8_t table_chunk_sizes[4] = { 6, 9, 12, 15 };
+
+    h->chunk_bytes = 0;
+    if (h->full_channel_mix_flag)
+        h->chunk_count = h->is_sync_frame;
+    else
+        h->chunk_count = get_bits_var(gb, table2468, 1);
+
+    if (h->chunk_count >= h->chunk_alloc) {
+        int chunk_alloc = h->chunk_count + DTSUHD_ALLOC_INCREMENT;
+        if (av_reallocp_array(&h->chunk, chunk_alloc, sizeof(*h->chunk)))
+            return 1;
+        h->chunk_alloc = chunk_alloc;
+    }
+
+    for (i = 0; i < h->chunk_count; i++) {
+        h->chunk_bytes += h->chunk[i].bytes = get_bits_var(gb, table_chunk_sizes, 1);
+        if (h->full_channel_mix_flag)
+            h->chunk[i].crc_flag = 0;
+        else
+        h->chunk[i].crc_flag = get_bits1(gb);
+    }
+
+    if (!h->full_channel_mix_flag)
+        audio_chunks = get_bits_var(gb, table2468, 1);
+
+    if (h->is_sync_frame)
+        navi_clear(h);
+    else
+        navi_clear_present(h);
+
+    for (i = 0; i < audio_chunks; i++) {
+        if (h->full_channel_mix_flag)
+            index = 0;
+        else
+            index = get_bits_var(gb, table2468, 1);
+
+        if (navi_find_index(h, index, &list_index))
+            return 1;
+
+        if (h->is_sync_frame)
+            id_present = 1;
+        else if (h->full_channel_mix_flag)
+            id_present = 0;
+        else
+            id_present = get_bits1(gb);
+
+        if (id_present) {
+            id = get_bits_var(gb, table2468, 1);
+            h->navi[list_index].id = id;
+        }
+
+        bytes = get_bits_var(gb, table_audio_chunk_sizes, 1);
+        h->chunk_bytes += bytes;
+        h->navi[list_index].bytes = bytes;
+    }
+
+    navi_purge(h);
+
+    return 0;
+}
+
+
+/* Table 6-6 */
+static int parse_md_chunk_list(DTSUHD *h, MD01 *md01)
+{
+    GetBitContext *gb = &h->gb;
+    const uint8_t table1[4] = { 3, 4, 6, 8 };
+    int i;
+
+    if (h->full_channel_mix_flag) {
+        md01->object_list_count = 1;
+        md01->object_list[0] = 256;
+    } else {
+        md01->object_list_count = get_bits_var(gb, table1, 1);
+        for (i = 0; i < md01->object_list_count; i++)
+            md01->object_list[i] = get_bits(gb, get_bits1(gb) ? 8 : 4);
+    }
+
+    return 0;
+}
+
+/* Table 7-9 */
+static void skip_mp_param_set(DTSUHD *h, MD01 *md01, int nominal_flag)
+{
+    get_bits_md01(h, md01, 6); /* rLoudness */
+    if (nominal_flag == 0)
+        get_bits_md01(h, md01, 5);
+
+    get_bits_md01(h, md01, nominal_flag ? 2 : 4);
+}
+
+/* Table 7-8 */
+static int parse_static_md_params(DTSUHD *h, MD01 *md01, int only_first)
+{
+    int i;
+    int loudness_sets = 1;
+    int nominal_flag = 1;
+
+    if (h->full_channel_mix_flag == 0)
+        nominal_flag = get_bits_md01(h, md01, 1);
+
+    if (nominal_flag) {
+        if (h->full_channel_mix_flag == 0)
+            loudness_sets = get_bits_md01(h, md01, 1) ? 3 : 1;
+    } else {
+        loudness_sets = get_bits_md01(h, md01, 4) + 1;
+    }
+
+    for (i = 0; i < loudness_sets; i++)
+        skip_mp_param_set(h, md01, nominal_flag);
+
+    if (only_first)
+        return 0;
+
+    if (nominal_flag == 0)
+        get_bits_md01(h, md01, 1);
+
+    for (i = 0; i < 3; i++) { /* Table 7-12 suggest 3 types */
+        if (get_bits_md01(h, md01, 1)) {
+            if (get_bits_md01(h, md01, 4) == 15) /* Table 7-14 */
+                get_bits_md01(h, md01, 15);
+        }
+        if (get_bits_md01(h, md01, 1)) /* smooth md present */
+            get_bits_md01(h, md01, 6 * 6);
+    }
+
+    if (h->full_channel_mix_flag == 0) {
+        i = md01->static_md_packets * md01->static_md_packet_size - get_bits_count(&md01->gb);
+        skip_bits(&md01->gb, i);
+    }
+    md01->static_md_extracted = 1;
+
+    return 0;
+}
+
+/* Table 7-7 */
+static int parse_multi_frame_md(DTSUHD *h, MD01 *md01)
+{
+    GetBitContext *gb = &h->gb;
+    int i, n;
+    static const uint8_t table1[4] = { 0, 6, 9, 12 };
+    static const uint8_t table2[4] = { 5, 7, 9, 11 };
+
+    if (h->is_sync_frame) {
+        md01->packets_acquired = 0;
+        if (h->full_channel_mix_flag) {
+            md01->static_md_packets = 1;
+            md01->static_md_packet_size = 0;
+        } else {
+            md01->static_md_packets = get_bits_var(gb, table1, 1) + 1;
+            md01->static_md_packet_size = get_bits_var(gb, table2, 1) + 3;
+        }
+
+        n = md01->static_md_packets * md01->static_md_packet_size;
+        if (n > md01->buf_bytes) {
+            if (av_reallocp(&md01->buf, n))
+                return 1;
+            md01->buf_bytes = n;
+        }
+
+        init_get_bits(&md01->gb, md01->buf, md01->buf_bytes * 8);
+        if (md01->static_md_packets > 1)
+            md01->static_md_update_flag = get_bits1(gb);
+        else
+            md01->static_md_update_flag = 1;
+    }
+
+    if (md01->packets_acquired < md01->static_md_packets) {
+        n = md01->packets_acquired * md01->static_md_packet_size;
+        for (i = 0; i < md01->static_md_packet_size; i++)
+            md01->buf[n + i] = get_bits(gb, 8);
+        md01->packets_acquired++;
+
+        if (md01->packets_acquired == md01->static_md_packets) {
+            if (md01->static_md_update_flag || !md01->static_md_extracted)
+                if (parse_static_md_params(h, md01, 0))
+                    return 1;
+        } else if (md01->packets_acquired == 1) {
+            if (md01->static_md_update_flag || !md01->static_md_extracted)
+                if (parse_static_md_params(h, md01, 1))
+                    return 1;
+        }
+    }
+
+    return 0;
+}
+
+/* Return 1 if suitable, 0 if not.  Table 7-18.  OBJGROUPIDSTART=224 Sec 7.8.7 p75 */
+static int is_suitable_for_render(DTSUHD *h, MD01 *md01, int object_id)
+{
+    GetBitContext *gb = &h->gb;
+    static const uint8_t table[4] = { 8, 10, 12, 14 };
+
+    if (object_id >= 224 || get_bits1(gb))
+        return 1;
+
+    /*  Reject the render and skip the render data. */
+    skip_bits1(gb);
+    skip_bits(gb, get_bits_var(gb, table, 1));
+
+    return 0;
+}
+
+/* Table 7-26 */
+static void parse_ch_mask_params(DTSUHD *h, MD01 *md01, MDObject *object)
+{
+    GetBitContext *gb = &h->gb;
+    const int ch_index = object->rep_type == REP_TYPE_BINAURAL ? 1 : get_bits(gb, 4);
+    static const int mask_table[14] = { /* Table 7-27 */
+        0x000001, 0x000002, 0x000006, 0x00000F, 0x00001F, 0x00084B, 0x00002F,
+        0x00802F, 0x00486B, 0x00886B, 0x03FBFB, 0x000003, 0x000007, 0x000843,
+    };
+
+    if (ch_index == 14)
+        object->ch_activity_mask = get_bits(gb, 16);
+    else if (ch_index == 15)
+        object->ch_activity_mask = get_bits(gb, 32);
+    else
+        object->ch_activity_mask = mask_table[ch_index];
+}
+
+/* Table 7-22 */
+static int parse_object_metadata(DTSUHD *h, MD01 *md01, MDObject *object,
+                                 int start_frame_flag, int object_id)
+{
+    GetBitContext *gb = &h->gb;
+    int ch_mask_object_flag = 0;
+    int object_3d_metadata_flag = 0;
+    static const uint8_t table2[4] = { 1, 4, 4, 8 };
+    static const uint8_t table3[4] = { 3, 3, 4, 8 };
+
+    skip_bits(gb, object_id != 256);
+
+    if (start_frame_flag) {
+        object->rep_type = get_bits(gb, 3);
+        switch (object->rep_type) {
+            case REP_TYPE_BINAURAL:
+            case REP_TYPE_CH_MASK_BASED:
+            case REP_TYPE_MTRX2D_CH_MASK_BASED:
+            case REP_TYPE_MTRX3D_CH_MASK_BASED:
+                ch_mask_object_flag = 1;
+                break;
+
+            case REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF:
+            case REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF:
+                object_3d_metadata_flag = 1;
+                break;
+        }
+
+        if (ch_mask_object_flag) {
+            if (object_id != 256) {
+                skip_bits(gb, 3);  /* Object Importance Level */
+                if (get_bits1(gb))
+                    skip_bits(gb, get_bits1(gb) ? 3 : 5);
+
+                get_bits_var(gb, table2, 1);
+                get_bits_var(gb, table3, 1);
+
+                /* Skip optional Loudness block. */
+                if (get_bits1(gb))
+                    skip_bits(gb, 8);
+
+                /* Skip optional Object Interactive MD (Table 7-25). */
+                if (get_bits1(gb) && h->interactive_obj_limits_present) {
+                    if (get_bits1(gb))
+                        skip_bits(gb, 5 + 6 * object_3d_metadata_flag);
+                }
+            }
+
+            parse_ch_mask_params(h, md01, object);
+        }
+    }
+
+    /* Skip rest of object */
+    return 0;
+}
+
+/* Table 7-4 */
+static int parse_md01(DTSUHD *h, MD01 *md01, int pres_index)
+{
+    GetBitContext *gb = &h->gb;
+    uint32_t i;
+    uint32_t id;
+    uint32_t start_flag;
+
+    if (h->audio[pres_index].selectable) {
+        for (i = 0; i < 4; i++)  /* Table 7-5.  Scaling data. */
+            skip_bits(gb, 5 * get_bits1(gb));
+
+        if (get_bits1(gb) && parse_multi_frame_md(h, md01))
+            return 1;
+    }
+
+    /* Table 7-16: Object metadata. */
+    memset(md01->object, 0, sizeof(md01->object));
+    if (!h->full_channel_mix_flag)
+        skip_bits(gb, 11 * get_bits1(gb));
+
+    for (i = 0; i < md01->object_list_count; i++) {
+        id = md01->object_list[i];
+        if (!is_suitable_for_render(h, md01, id))
+            continue;
+
+        md01->object[id].pres_index = pres_index;
+        start_flag = 0;
+        if (!md01->object[id].started) {
+            skip_bits(gb, id != 256);
+            start_flag = md01->object[id].started = 1;
+        }
+
+        if ((id < 224 || id > 255) &&
+            parse_object_metadata(h, md01, md01->object + id, start_flag, id)) {
+            return 1;
+        }
+
+        break;
+    }
+
+    return 0;
+}
+
+/* Table 6-2 */
+static int parse_chunks(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    MD01 *md01;
+    int bit_next;
+    int i;
+    static const uint8_t table_aud_pres[4] = { 0, 2, 4, 4 };
+    int pres_index;
+    uint32_t id;
+
+    for (i = 0; i < h->chunk_count; i++) {
+        bit_next = get_bits_count(gb) + h->chunk[i].bytes * 8;
+        if (h->chunk[i].crc_flag && check_crc(h, get_bits_count(gb), h->chunk[i].bytes))
+            return 1;
+
+        id = get_bits(gb, 8);
+        if (id == 1) {
+            pres_index = get_bits_var(gb, table_aud_pres, 1);
+        if (pres_index > 255)
+            return 1;
+        md01 = chunk_find_md01(h, id);
+        if (md01 == NULL)
+            md01 = chunk_append_md01(h, id);
+        if (md01 == NULL)
+            return 1;
+        if (parse_md_chunk_list(h, md01))
+            return 1;
+        if (parse_md01(h, md01, pres_index))
+            return 1;
+        }
+
+        skip_bits(gb, bit_next - get_bits_count(gb));
+    }
+
+    return 0;
+}
+
+/** Allocate parsing handle.  The parsing handle should be used to parse
+    one DTS:X Profile 2 Audio stream, then freed by calling DTSUHD_destroy().
+    Do not use the same parsing handle to parse multiple audio streams.
+
+  @return Parsing handle for use with other functions, or NULL on failure.
+*/
+DTSUHD *dtsuhd_create(void)
+{
+    return av_calloc(1, sizeof(DTSUHD));
+}
+
+/** Free all resources used by the parsing handle.
+
+  @param[in] h Handle allocated by dtshd_create
+*/
+void dtsuhd_destroy(DTSUHD *h)
+{
+    if (h) {
+        chunk_reset(h);
+        av_freep(&h->chunk);
+        av_freep(&h->navi);
+        av_freep(&h);
+    }
+}
+
+/** Parse a single DTS:X Profile 2 frame.
+    The frame must start at the first byte of the data buffer, and enough
+    of the frame must be present to decode the majority of the FTOC.
+    From Table 6-11 p40.
+
+    A sync frame must be the first frame provided, before any non-sync frames.
+    Signatures: sync=0x40411BF2, non-sync=0x71C442E8.
+
+  @param[in] h Handle allocated by DTSUHD_create
+  @param[in] First byte of a buffer containing the frame to parse
+  @param[in] nData Number of valid bytes in 'data'
+  @param[out] fi Results of frame parsing, may be NULL
+  @param[out] di Results of descriptor parsing, may be NULL
+  @return 0 on success, DTSUHDStatus enumeration on error
+*/
+int dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
+                 DTSUHDFrameInfo *fi, DTSUHDDescriptorInfo *di)
+{
+    GetBitContext *gb;
+    int fraction = 1;
+    int i;
+    int syncword;
+    static const uint8_t table_payload[4] = { 5, 8, 10, 12 };
+
+    if (!h || !data)
+        return DTSUHD_NULL;
+
+    if (data_bytes < 4)
+        return DTSUHD_INCOMPLETE; /* Data buffer does not contain the signature */
+
+    h->data = data;
+    h->data_bytes = data_bytes;
+    gb = &h->gb;
+    init_get_bits(gb, data, data_bytes * 8);
+
+    syncword = get_bits_long(gb, 32);
+    h->is_sync_frame = syncword == DTSUHD_SYNCWORD;
+    h->saw_sync |= h->is_sync_frame;
+    if (!h->saw_sync || (!h->is_sync_frame && syncword != DTSUHD_NONSYNCWORD))
+        return DTSUHD_NOSYNC;  /* Invalid frame or have not parsed sync frame. */
+
+    h->ftoc_bytes = get_bits_var(gb, table_payload, 1) + 1;
+    if (h->ftoc_bytes < 5 || h->ftoc_bytes >= data_bytes)
+        return DTSUHD_INCOMPLETE;  /* Data buffer does not contain entire FTOC */
+
+    if (parse_stream_params(h))
+        return DTSUHD_INVALID_FRAME;
+
+    if (parse_aud_pres_params(h))
+        return DTSUHD_INVALID_FRAME;
+
+    if (parse_chunk_navi(h))  /* AudioChunkTypes and payload sizes. */
+        return DTSUHD_INVALID_FRAME;
+
+    /* At this point in the parsing, we can calculate the size of the frame. */
+    h->frame_bytes = h->ftoc_bytes + h->chunk_bytes;
+    if (h->frame_bytes > data_bytes)
+        return DTSUHD_INCOMPLETE;
+
+    if (di && h->is_sync_frame) {
+        /* Skip PBRSmoothParams (Table 6-26) and align to the chunks immediately
+           following the FTOC CRC.
+        */
+        skip_bits(gb, h->ftoc_bytes * 8 - get_bits_count(gb));
+        if (parse_chunks(h))
+            return DTSUHD_INVALID_FRAME;
+        update_descriptor(h, di);
+    }
+
+    /* 6.3.6.9: audio frame duration may be a fraction of metadata frame duration. */
+    for (i = 0; i < h->navi_count; i++) {
+        if (h->navi[i].present) {
+            if (h->navi[i].id == 3)
+                fraction = 2;
+            else if (h->navi[i].id == 4)
+                fraction = 4;
+        }
+    }
+
+    if (fi) {
+        fi->sync = h->is_sync_frame;
+        fi->frame_bytes = h->frame_bytes;
+        fi->sample_rate = h->sample_rate;
+        fi->sample_count = (h->frame_duration * fi->sample_rate) / (h->clock_rate * fraction);
+        fi->duration = (double)fi->sample_count / fi->sample_rate;
+    }
+
+    return DTSUHD_OK;
+}
+
+/** Return the offset of the first UHD audio frame.
+    When supplied a buffer containing DTSHDHDR file content, the DTSHD
+    headers are skipped and the offset to the first byte of the STRMDATA
+    chunk is returned, along with the size of that chunk.
+
+  @param[in] dataStart DTS:X Profile 2 file content to parse
+  @param[in] dataSize Number of valid bytes in 'dataStart'
+  @param[out] Number of leading DTS:X Profile 2 audio frames to discard,
+              may be NULL
+  @param[out] Size of STRMDATA payload, may be NULL
+  @return STRMDATA payload offset or 0 if not a valid DTS:X Profile 2 file
+*/
+int dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size, size_t *strmdata_size)
+{
+    const uint8_t *data = data_start;
+    const uint8_t *data_end = data + data_size;
+    uint64_t chunk_size = 0;
+
+    if (data + DTSUHD_CHUNK_HEADER >= data_end || memcmp(data, "DTSHDHDR", 8))
+        return 0;
+
+    for (; data + DTSUHD_CHUNK_HEADER + 4 <= data_end; data += chunk_size + DTSUHD_CHUNK_HEADER) {
+        chunk_size = AV_RB64(data + 8);
+
+        if (!memcmp(data, "STRMDATA", 8)) {
+            if (strmdata_size)
+                *strmdata_size = chunk_size;
+            return (int)(data - data_start) + DTSUHD_CHUNK_HEADER;
+        }
+    }
+
+    return 0;
+}
diff --git a/libavcodec/dtsuhd_common.h b/libavcodec/dtsuhd_common.h
new file mode 100644
index 0000000000..8b4e8ce2aa
--- /dev/null
+++ b/libavcodec/dtsuhd_common.h
@@ -0,0 +1,84 @@
+/*
+ * DTS-UHD common audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DTSUHD_COMMON_H
+#define AVCODEC_DTSUHD_COMMON_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#define DTSUHD_NONSYNCWORD 0x71C442E8
+#define DTSUHD_SYNCWORD    0x40411BF2
+
+#define DTSUHD_MAX_FRAME_SIZE 0x1000
+
+/* Return codes from dtsuhd_frame */
+enum DTSUHDStatus {
+    DTSUHD_OK,
+    DTSUHD_INCOMPLETE,    /* Entire frame not in buffer. */
+    DTSUHD_INVALID_FRAME, /* Error parsing frame. */
+    DTSUHD_NOSYNC,        /* No sync frame prior to non-sync frame. */
+    DTSUHD_NULL,          /* Function parameter may not be NULL. */
+};
+
+/* Return stream information from an audio frame parsed by dtsuhd_frame, */
+typedef struct DTSUHDDescriptorInfo {
+    unsigned valid:1; /* True if descriptor info is valid. */
+    char coding_name[5]; /* Four character, null term SampleEntry box name. */
+    int base_sample_freq_code;
+    int channel_count;
+    int decoder_profile_code;
+    int frame_duration_code;
+    int max_payload_code;
+    int num_pres_code;
+    int rep_type;
+    int sample_rate;
+    int sample_rate_mod;
+    int sample_size;
+    int channel_mask;
+    uint64_t ffmpeg_channel_mask;
+} DTSUHDDescriptorInfo;
+
+/* Return frame information from an audio frame parsed by dtsuhd_frame. */
+typedef struct DTSUHDFrameInfo {
+    double duration;  /* Duration of frame in seconds (seconds per frame). */
+    int frame_bytes;  /* Size of entire frame in bytes. */
+    int sample_count; /* Number of samples in frame (samples per frame). */
+    int sample_rate;  /* Sample rate of frame (samples per second). */
+    unsigned sync:1;  /* True if frame is a sync frame. */
+} DTSUHDFrameInfo;
+
+struct DTSUHD;
+typedef struct DTSUHD DTSUHD;
+
+struct DTSUHD *dtsuhd_create(void);
+void dtsuhd_destroy(DTSUHD*);
+int dtsuhd_frame(DTSUHD*, const uint8_t *data, size_t nData,
+                 DTSUHDFrameInfo*, DTSUHDDescriptorInfo*);
+int dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size,
+                            size_t *strmdata_size);
+
+static inline int dtsuhd_is_syncword(uint32_t syncword)
+{
+    return syncword == DTSUHD_NONSYNCWORD || syncword == DTSUHD_SYNCWORD;
+}
+
+#endif /* AVCODEC_DTSUHD_COMMON_H */
diff --git a/libavcodec/dtsuhd_parser.c b/libavcodec/dtsuhd_parser.c
new file mode 100644
index 0000000000..4c553b8e4f
--- /dev/null
+++ b/libavcodec/dtsuhd_parser.c
@@ -0,0 +1,141 @@
+/*
+ * DTS-UHD audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Parse raw DTS-UHD audio frame input and return individual audio frames.
+ */
+
+#include "dtsuhd_common.h"
+#include "libavutil/intreadwrite.h"
+#include "parser.h"
+
+#define DTSUHD_BUFFER_SIZE (DTSUHD_MAX_FRAME_SIZE * 128)
+
+typedef struct DTSUHDParseContext {
+    DTSUHD *dtsuhd;
+    int buf_offset;
+    int buf_bytes;
+    int frame_bytes;
+    uint8_t *buf;
+} DTSUHDParseContext;
+
+static av_cold int parser_init(AVCodecParserContext *s)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+
+    pc->dtsuhd = dtsuhd_create();
+    pc->buf = av_calloc(DTSUHD_BUFFER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE, 1);
+    if (!pc->dtsuhd || !pc->buf)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void parser_close(AVCodecParserContext *s)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+
+    dtsuhd_destroy(pc->dtsuhd);
+    pc->dtsuhd = NULL;
+    av_freep(&pc->buf);
+    ff_parse_close(s);
+}
+
+// Keep data in contiguous buffer as required by dtsuhd_frame.
+static int append_buffer(DTSUHDParseContext *pc, const uint8_t **buf, int *buf_size, int *input_consumed)
+{
+    int copy_bytes;
+
+    pc->buf_offset += pc->frame_bytes;
+    pc->frame_bytes = 0;
+
+    // Buffer almost full, move partial frame to start of buffer for more space.
+    if (*buf_size > 0 && pc->buf_bytes + *buf_size > DTSUHD_BUFFER_SIZE) {
+        memmove(pc->buf, pc->buf + pc->buf_offset, pc->buf_bytes);
+        pc->buf_bytes -= pc->buf_offset;
+        pc->buf_offset = 0;
+    }
+
+    copy_bytes = FFMAX(0, FFMIN(DTSUHD_BUFFER_SIZE - pc->buf_bytes, *buf_size));
+
+    // Append input buffer to our context.
+    if (copy_bytes) {
+        memcpy(pc->buf + pc->buf_bytes, *buf, copy_bytes);
+        pc->buf_bytes += copy_bytes;
+    }
+
+    // Ensure buffer starts with a syncword
+    while (pc->buf_offset + 4 < pc->buf_bytes && !dtsuhd_is_syncword(AV_RB32(pc->buf + pc->buf_offset)))
+        pc->buf_offset++;
+
+    *input_consumed = copy_bytes;
+    *buf = pc->buf + pc->buf_offset;
+    *buf_size = pc->buf_bytes - pc->buf_offset;
+
+    return copy_bytes && pc->buf_bytes - pc->buf_offset < DTSUHD_MAX_FRAME_SIZE;
+}
+
+static int parser_parse(AVCodecParserContext *s, AVCodecContext *avctx,
+                        const uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+    DTSUHDFrameInfo fi;
+    int input_consumed = 0;
+
+    if (append_buffer(pc, &buf, &buf_size, &input_consumed)) {
+        *poutbuf = NULL;
+        *poutbuf_size = 0;
+        return input_consumed;
+    }
+
+    switch (dtsuhd_frame(pc->dtsuhd, buf, buf_size, &fi, NULL)) {
+    case DTSUHD_OK:
+        if (fi.sample_count)
+            s->duration = fi.sample_count;
+        if (fi.sample_rate)
+            avctx->sample_rate = fi.sample_rate;
+        buf_size = pc->frame_bytes = fi.frame_bytes;
+        break;
+    case DTSUHD_INCOMPLETE:
+        pc->frame_bytes = buf_size;
+        buf = NULL;
+        buf_size = 0;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unable to process DTS-UHD file. File may be invalid.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+
+    return input_consumed;
+}
+
+AVCodecParser ff_dtsuhd_parser = {
+    .codec_ids      = { AV_CODEC_ID_DTSUHD },
+    .priv_data_size = sizeof(DTSUHDParseContext),
+    .parser_init    = parser_init,
+    .parser_parse   = parser_parse,
+    .parser_close   = parser_close,
+};
diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
index d355808018..d724c8b402 100644
--- a/libavcodec/parsers.c
+++ b/libavcodec/parsers.c
@@ -37,6 +37,7 @@ extern const AVCodecParser ff_dirac_parser;
 extern const AVCodecParser ff_dnxhd_parser;
 extern const AVCodecParser ff_dolby_e_parser;
 extern const AVCodecParser ff_dpx_parser;
+extern const AVCodecParser ff_dtsuhd_parser;
 extern const AVCodecParser ff_dvaudio_parser;
 extern const AVCodecParser ff_dvbsub_parser;
 extern const AVCodecParser ff_dvdsub_parser;
diff --git a/libavformat/Makefile b/libavformat/Makefile
index 048649689b..42cf19348f 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -186,6 +186,7 @@ OBJS-$(CONFIG_DSICIN_DEMUXER)            += dsicin.o
 OBJS-$(CONFIG_DSS_DEMUXER)               += dss.o
 OBJS-$(CONFIG_DTSHD_DEMUXER)             += dtshddec.o
 OBJS-$(CONFIG_DTS_DEMUXER)               += dtsdec.o rawdec.o
+OBJS-$(CONFIG_DTSUHD_DEMUXER)            += dtsuhddec.o
 OBJS-$(CONFIG_DTS_MUXER)                 += rawenc.o
 OBJS-$(CONFIG_DV_MUXER)                  += dvenc.o
 OBJS-$(CONFIG_DVBSUB_DEMUXER)            += dvbsub.o rawdec.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index cb5b69e9cd..1b48ce6073 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -144,6 +144,7 @@ extern const AVInputFormat  ff_dss_demuxer;
 extern const AVInputFormat  ff_dts_demuxer;
 extern const FFOutputFormat ff_dts_muxer;
 extern const AVInputFormat  ff_dtshd_demuxer;
+extern const AVInputFormat  ff_dtsuhd_demuxer;
 extern const AVInputFormat  ff_dv_demuxer;
 extern const FFOutputFormat ff_dv_muxer;
 extern const AVInputFormat  ff_dvbsub_demuxer;
diff --git a/libavformat/dtshddec.c b/libavformat/dtshddec.c
index a3dea0668f..6e9e78a335 100644
--- a/libavformat/dtshddec.c
+++ b/libavformat/dtshddec.c
@@ -46,7 +46,7 @@ typedef struct DTSHDDemuxContext {
 static int dtshd_probe(const AVProbeData *p)
 {
     if (AV_RB64(p->buf) == DTSHDHDR)
-        return AVPROBE_SCORE_MAX;
+        return AVPROBE_SCORE_MAX - 4; // DTSUHD (.dtsx) files also have this signature.
     return 0;
 }
 
diff --git a/libavformat/dtsuhddec.c b/libavformat/dtsuhddec.c
new file mode 100644
index 0000000000..e15176382d
--- /dev/null
+++ b/libavformat/dtsuhddec.c
@@ -0,0 +1,214 @@
+/*
+ * DTS-UHD audio demuxer
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Report DTS-UHD audio stream configuration and extract raw packet data.
+ */
+
+#include "internal.h"
+#include "libavcodec/dtsuhd_common.h"
+#include "libavcodec/put_bits.h"
+#include "libavutil/intreadwrite.h"
+
+#define DTSUHD_BUFFER_SIZE (1024 * 1024)
+
+typedef struct DTSUHDDemuxContext {
+    size_t data_end;
+    struct DTSUHD *dtsuhd;
+    uint8_t *buf;
+} DTSUHDDemuxContext;
+
+static int probe(const AVProbeData *p)
+{
+    int offset = dtsuhd_strmdata_payload(p->buf, p->buf_size, NULL);
+    int score = 0;
+    struct DTSUHD *h = dtsuhd_create();
+
+    for (; offset + 4 < p->buf_size; offset++) {
+        if (dtsuhd_is_syncword(AV_RB32(p->buf + offset))) {
+            if (DTSUHD_OK == dtsuhd_frame(h, p->buf + offset, p->buf_size - offset, NULL, NULL)) {
+                score = AVPROBE_SCORE_MAX - 3;
+                break;
+            }
+        }
+    }
+
+    dtsuhd_destroy(h);
+    return score;
+}
+
+static av_cold int read_close(AVFormatContext *s)
+{
+    DTSUHDDemuxContext *dtsxs = s->priv_data;
+
+    av_freep(&dtsxs->buf);
+    dtsuhd_destroy(dtsxs->dtsuhd);
+    dtsxs->dtsuhd = NULL;
+
+    return 0;
+}
+
+static int find_first_syncword(DTSUHDDemuxContext *dtsuhd, int data_start)
+{
+    while (data_start + 4 < DTSUHD_BUFFER_SIZE &&
+        !dtsuhd_is_syncword(AV_RB32(dtsuhd->buf + data_start))) {
+        data_start++;
+    }
+
+    return data_start;
+}
+
+static int write_extradata(AVCodecParameters *par, DTSUHDDescriptorInfo *di)
+{
+    PutBitContext pbc;
+    int ret;
+    int size;
+    uint8_t udts[32];
+
+    init_put_bits(&pbc, udts, sizeof(udts));
+    put_bits32(&pbc, 0); // udts box size
+    put_bits(&pbc, 8, 'u'); // udts box signature
+    put_bits(&pbc, 8, 'd');
+    put_bits(&pbc, 8, 't');
+    put_bits(&pbc, 8, 's');
+    put_bits(&pbc, 6, di->decoder_profile_code);
+    put_bits(&pbc, 2, di->frame_duration_code);
+    put_bits(&pbc, 3, di->max_payload_code);
+    put_bits(&pbc, 5, di->num_pres_code);
+    put_bits32(&pbc,  di->channel_mask);
+    put_bits(&pbc, 1, di->base_sample_freq_code);
+    put_bits(&pbc, 2, di->sample_rate_mod);
+    put_bits(&pbc, 3, di->rep_type);
+    put_bits(&pbc, 3, 0);
+    put_bits(&pbc, 1, 0);
+    put_bits64(&pbc, di->num_pres_code + 1, 0); // ID Tag present for each presentation.
+    flush_put_bits(&pbc); // byte align
+    size = put_bits_count(&pbc) >> 3;
+    AV_WB32(udts, size);
+
+    ret = ff_alloc_extradata(par, size);
+    if (ret < 0)
+        return ret;
+
+    memcpy(par->extradata, udts, size);
+
+    return 0;
+}
+
+static int read_header(AVFormatContext *s)
+{
+    AVIOContext *pb = s->pb;
+    AVStream *st = avformat_new_stream(s, NULL);
+    DTSUHDDemuxContext *dtsuhd = s->priv_data;
+    DTSUHDDescriptorInfo di;
+    DTSUHDFrameInfo fi;
+    int buf_bytes;
+    int ret = DTSUHD_INVALID_FRAME;
+    int data_start;
+
+    if (!(pb->seekable & AVIO_SEEKABLE_NORMAL))
+        return AVERROR(EIO);
+
+    dtsuhd->buf = av_malloc(DTSUHD_BUFFER_SIZE);
+    dtsuhd->dtsuhd = dtsuhd_create();
+    if (!dtsuhd->buf || !dtsuhd->dtsuhd || !st)
+        return AVERROR(ENOMEM);
+
+    buf_bytes = avio_read(pb, dtsuhd->buf, DTSUHD_BUFFER_SIZE);
+    if (buf_bytes < 0)
+        return buf_bytes;
+
+    data_start = dtsuhd_strmdata_payload(dtsuhd->buf, buf_bytes, &dtsuhd->data_end);
+    dtsuhd->data_end += data_start;
+    if (data_start == 0)
+        dtsuhd->data_end = avio_size(pb); // Not a DTSHDHDR chunk file, decode frames to end of file.
+
+    data_start = find_first_syncword(dtsuhd, data_start);
+    if (avio_seek(pb, data_start, SEEK_SET) < 0)
+        return AVERROR(EINVAL);
+
+    ret = dtsuhd_frame(dtsuhd->dtsuhd, dtsuhd->buf + data_start,
+        buf_bytes - data_start, &fi, &di);
+    if (ret != DTSUHD_OK || !di.valid) {
+        av_log(s, AV_LOG_ERROR, "Unable to process DTS-UHD file. File may be invalid.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    ffstream(st)->need_parsing = AVSTREAM_PARSE_FULL_RAW;
+    st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
+    st->codecpar->codec_id = s->iformat->raw_codec_id;
+    st->codecpar->ch_layout.order = AV_CHANNEL_ORDER_NATIVE;
+    st->codecpar->ch_layout.nb_channels = di.channel_count;
+    st->codecpar->ch_layout.u.mask = di.ffmpeg_channel_mask;
+    st->codecpar->codec_tag = AV_RL32(di.coding_name);
+    st->codecpar->frame_size = 512 << di.frame_duration_code;
+    st->codecpar->sample_rate = di.sample_rate;
+
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+    st->codecpar->channels = di.channel_count;
+    st->codecpar->channel_layout = di.ffmpeg_channel_mask;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    ret = write_extradata(st->codecpar, &di);
+    if (ret < 0)
+        return ret;
+
+    if (st->codecpar->sample_rate)
+        avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
+
+    return 0;
+}
+
+static int read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    DTSUHDDemuxContext *dtsuhd = s->priv_data;
+    int64_t size, left;
+    int ret;
+
+    left = dtsuhd->data_end - avio_tell(s->pb);
+    size = FFMIN(left, DTSUHD_MAX_FRAME_SIZE);
+    if (size <= 0)
+        return AVERROR_EOF;
+
+    ret = av_get_packet(s->pb, pkt, size);
+    if (ret < 0)
+        return ret;
+
+    pkt->stream_index = 0;
+
+    return ret;
+}
+
+AVInputFormat ff_dtsuhd_demuxer = {
+    .name           = "dtsuhd",
+    .long_name      = NULL_IF_CONFIG_SMALL("DTS-UHD"),
+    .priv_data_size = sizeof(DTSUHDDemuxContext),
+    .read_probe     = probe,
+    .read_header    = read_header,
+    .read_packet    = read_packet,
+    .read_close     = read_close,
+    .flags          = AVFMT_GENERIC_INDEX,
+    .extensions     = "dtsx",
+    .raw_codec_id   = AV_CODEC_ID_DTSUHD,
+};
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index c370922c7d..e727407694 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -762,6 +762,24 @@ static int mov_write_esds_tag(AVIOContext *pb, MOVTrack *track) // Basic
     return update_size(pb, pos);
 }
 
+static int mov_write_udts_tag(AVIOContext *pb, MOVTrack *track)
+{
+    if (track->vos_len < 12) {
+        av_log(pb, AV_LOG_ERROR,
+               "Cannot write moov atom before DTS-UHD packets."
+               " Set the delay_moov flag to fix this.\n");
+        return AVERROR(EINVAL);
+    }
+
+    /* Write vos_data is udts box. */
+    if (memcmp(track->vos_data + 4, "udts", 4) == 0) {
+        avio_write(pb, track->vos_data, track->vos_len);
+        return track->vos_len;
+    }
+
+    return 0;
+}
+
 static int mov_pcm_le_gt16(enum AVCodecID codec_id)
 {
     return codec_id == AV_CODEC_ID_PCM_S24LE ||
@@ -1367,6 +1385,8 @@ static int mov_write_audio_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
         ret = mov_write_dops_tag(s, pb, track);
     else if (track->par->codec_id == AV_CODEC_ID_TRUEHD)
         ret = mov_write_dmlp_tag(s, pb, track);
+    else if (track->par->codec_id == AV_CODEC_ID_DTSUHD)
+        ret = mov_write_udts_tag(pb, track);
     else if (tag == MOV_MP4_IPCM_TAG || tag == MOV_MP4_FPCM_TAG) {
         if (track->par->ch_layout.nb_channels > 1)
             ret = mov_write_chnl_tag(s, pb, track);
@@ -2781,6 +2801,7 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
     if ((track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
          track->par->codec_id == AV_CODEC_ID_TRUEHD ||
          track->par->codec_id == AV_CODEC_ID_MPEGH_3D_AUDIO ||
+         track->par->codec_id == AV_CODEC_ID_DTSUHD ||
          track->par->codec_tag == MKTAG('r','t','p',' ')) &&
         track->has_keyframes && track->has_keyframes < track->entry)
         mov_write_stss_tag(pb, track, MOV_SYNC_SAMPLE);
@@ -5673,6 +5694,14 @@ static void mov_parse_vc1_frame(AVPacket *pkt, MOVTrack *trk)
     }
 }
 
+static void mov_parse_dtsuhd_frame(AVPacket *pkt, MOVTrack *trk)
+{
+    if (pkt->size > 4 && AV_RB32(pkt->data) == 0x40411BF2) {
+        trk->cluster[trk->entry].flags |= MOV_SYNC_SAMPLE;
+        trk->has_keyframes++;
+    }
+ }
+
 static void mov_parse_truehd_frame(AVPacket *pkt, MOVTrack *trk)
 {
     int length;
@@ -6343,6 +6372,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
         mov_parse_vc1_frame(pkt, trk);
     } else if (par->codec_id == AV_CODEC_ID_TRUEHD) {
         mov_parse_truehd_frame(pkt, trk);
+    } else if (par->codec_id == AV_CODEC_ID_DTSUHD) {
+        mov_parse_dtsuhd_frame(pkt, trk);
     } else if (pkt->flags & AV_PKT_FLAG_KEY) {
         if (mov->mode == MODE_MOV && par->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
             trk->entry > 0) { // force sync sample for the first key frame
@@ -7800,6 +7831,7 @@ static const AVCodecTag codec_mp4_tags[] = {
     { AV_CODEC_ID_AC3,             MKTAG('a', 'c', '-', '3') },
     { AV_CODEC_ID_EAC3,            MKTAG('e', 'c', '-', '3') },
     { AV_CODEC_ID_DTS,             MKTAG('m', 'p', '4', 'a') },
+    { AV_CODEC_ID_DTSUHD,          MKTAG('d', 't', 's', 'x') },
     { AV_CODEC_ID_TRUEHD,          MKTAG('m', 'l', 'p', 'a') },
     { AV_CODEC_ID_FLAC,            MKTAG('f', 'L', 'a', 'C') },
     { AV_CODEC_ID_OPUS,            MKTAG('O', 'p', 'u', 's') },
diff --git a/libavformat/version.h b/libavformat/version.h
index cc56b7cf5c..384cbd49cc 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -31,7 +31,7 @@
 
 #include "version_major.h"
 
-#define LIBAVFORMAT_VERSION_MINOR   4
+#define LIBAVFORMAT_VERSION_MINOR   5
 #define LIBAVFORMAT_VERSION_MICRO 101
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
Roy Funderburk April 15, 2023, 8:20 p.m. UTC | #5
On 4/15/23 7:56 AM, Michael Niedermayer wrote:
> checking file Changelog
> patch: **** malformed patch at line 147: diff --git a/configure b/configure

Apologies for that, I sorted out the issues and replied with a new patch.

Regards,
-Roy Funderburk
Michael Niedermayer April 16, 2023, 7:55 p.m. UTC | #6
Hi

On Sat, Apr 15, 2023 at 01:04:42PM -0700, Roy Funderburk wrote:
> 
> Parsing and demuxing of DTS-UHD input files per ETSI TS 102 114 is added
> as demuxer "dtsuhd".  movenc supports DTS-UHD audio track.
> 
> Signed-off-by: Roy Funderburk <Roy.Funderburk@xperi.com>
> ---
>  Changelog                  |   1 +
>  configure                  |   1 +
>  doc/general_contents.texi  |   1 +
>  libavcodec/Makefile        |   1 +
>  libavcodec/codec_desc.c    |   7 +
>  libavcodec/codec_id.h      |   1 +
>  libavcodec/dtsuhd_common.c | 991 +++++++++++++++++++++++++++++++++++++
>  libavcodec/dtsuhd_common.h |  84 ++++
>  libavcodec/dtsuhd_parser.c | 141 ++++++
>  libavcodec/parsers.c       |   1 +
>  libavformat/Makefile       |   1 +
>  libavformat/allformats.c   |   1 +
>  libavformat/dtshddec.c     |   2 +-
>  libavformat/dtsuhddec.c    | 214 ++++++++
>  libavformat/movenc.c       |  32 ++
>  libavformat/version.h      |   2 +-
>  16 files changed, 1479 insertions(+), 2 deletions(-)
>  create mode 100644 libavcodec/dtsuhd_common.c
>  create mode 100644 libavcodec/dtsuhd_common.h
>  create mode 100644 libavcodec/dtsuhd_parser.c
>  create mode 100644 libavformat/dtsuhddec.c
> 
[...]

> +/* In the specification, the pseudo code defaults the 'add' parameter to true.
> +   Table 7-30 shows passing an explicit false, most other calls do not
> +   pass the extractAndAdd parameter.
> +
> +   Function based on code in Table 5-2
> +*/
> +static int get_bits_var(GetBitContext *gb, const uint8_t table[], int add)
> +{
> +    static const int bits_used[8] = { 1, 1, 1, 1, 2, 2, 3, 3 };
> +    static const int index_table[8] = { 0, 0, 0, 0, 1, 1, 2, 3 };
> +    int code = show_bits(gb, 3); /* value range is [0, 7] */
> +    int i;
> +    int index = index_table[code];
> +    int value = 0;
> +
> +    skip_bits(gb, bits_used[code]);
> +    if (table[index] > 0) {
> +        if (add) {
> +            for (i = 0; i < index; i++)
> +                value += 1 << table[i];
> +        }
> +        value += get_bits_long(gb, table[index]);
> +    }

If the speed of this matters,
you could remove the indirection by index_table and remove teh add code, that
would add 12 entries to some of these tables

something like:

int code = show_bits(gb, 3);
skip_bits(gb, bits_used[code]);
if (table[code][0] == 0)
    return 0;
return get_bits_long(gb, table[code][0]) + table[code][1];
    
OTOH if speed doesnt matter then this can probably be left as is


[...]
> +
> +/* Table 6-9 p 38 */
> +static int check_crc(DTSUHD *h, int bit, int bytes)
> +{
> +    GetBitContext gb;
> +    int i;
> +    static const uint16_t lookup[16] = {
> +        0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
> +        0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF
> +    };
> +    uint16_t crc = 0xFFFF;
> +
> +    init_get_bits(&gb, h->data, h->data_bytes * 8);
> +    skip_bits(&gb, bit);
> +    for (i = -bytes; i < bytes; i++)
> +        crc = (crc << 4) ^ lookup[(crc >> 12) ^ get_bits(&gb, 4)];
> +
> +    return crc != 0;
> +}

likely should use libavutil/crc.h


[...]

> +/* Table 7-26 */
> +static void parse_ch_mask_params(DTSUHD *h, MD01 *md01, MDObject *object)
> +{
> +    GetBitContext *gb = &h->gb;
> +    const int ch_index = object->rep_type == REP_TYPE_BINAURAL ? 1 : get_bits(gb, 4);
> +    static const int mask_table[14] = { /* Table 7-27 */
> +        0x000001, 0x000002, 0x000006, 0x00000F, 0x00001F, 0x00084B, 0x00002F,
> +        0x00802F, 0x00486B, 0x00886B, 0x03FBFB, 0x000003, 0x000007, 0x000843,
> +    };
> +
> +    if (ch_index == 14)
> +        object->ch_activity_mask = get_bits(gb, 16);
> +    else if (ch_index == 15)

> +        object->ch_activity_mask = get_bits(gb, 32);

get_bits_long()

[...]

> +/** Allocate parsing handle.  The parsing handle should be used to parse
> +    one DTS:X Profile 2 Audio stream, then freed by calling DTSUHD_destroy().
> +    Do not use the same parsing handle to parse multiple audio streams.
> +
> +  @return Parsing handle for use with other functions, or NULL on failure.
> +*/
> +DTSUHD *dtsuhd_create(void)

stuff needs av / avpriv prefixes when shared between libraries other symbols arent
exported and will break build depending on build options

also minor libavcodec version needs to be +1 when adding av* symbols
and libavcodec and libavformat changes should be in 2 seperate patches



[...]
> +    if (fi) {
> +        fi->sync = h->is_sync_frame;
> +        fi->frame_bytes = h->frame_bytes;
> +        fi->sample_rate = h->sample_rate;
> +        fi->sample_count = (h->frame_duration * fi->sample_rate) / (h->clock_rate * fraction);


> +        fi->duration = (double)fi->sample_count / fi->sample_rate;

it feels as if double is not needed here
Either AVRational or a simple integer type int / int64_t in samples instead of seconds
seem better as it would be exact and no odd platform rounding difference
could happen


[...]
> +
> +/** Return the offset of the first UHD audio frame.
> +    When supplied a buffer containing DTSHDHDR file content, the DTSHD
> +    headers are skipped and the offset to the first byte of the STRMDATA
> +    chunk is returned, along with the size of that chunk.
> +
> +  @param[in] dataStart DTS:X Profile 2 file content to parse
> +  @param[in] dataSize Number of valid bytes in 'dataStart'
> +  @param[out] Number of leading DTS:X Profile 2 audio frames to discard,
> +              may be NULL
> +  @param[out] Size of STRMDATA payload, may be NULL
> +  @return STRMDATA payload offset or 0 if not a valid DTS:X Profile 2 file
> +*/
> +int dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size, size_t *strmdata_size)
> +{
> +    const uint8_t *data = data_start;
> +    const uint8_t *data_end = data + data_size;
> +    uint64_t chunk_size = 0;
> +
> +    if (data + DTSUHD_CHUNK_HEADER >= data_end || memcmp(data, "DTSHDHDR", 8))
> +        return 0;
> +

> +    for (; data + DTSUHD_CHUNK_HEADER + 4 <= data_end; data += chunk_size + DTSUHD_CHUNK_HEADER) {
> +        chunk_size = AV_RB64(data + 8);
> +
> +        if (!memcmp(data, "STRMDATA", 8)) {
> +            if (strmdata_size)
> +                *strmdata_size = chunk_size;
> +            return (int)(data - data_start) + DTSUHD_CHUNK_HEADER;
> +        }
> +    }

this can infinite loop
undefined behavior for teh out of array pointers that can happen with the
"right" chunk_size
also data can decrease if one ignores that this is already undefined before


[...]
> +
> +    ffstream(st)->need_parsing = AVSTREAM_PARSE_FULL_RAW;
> +    st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
> +    st->codecpar->codec_id = s->iformat->raw_codec_id;
> +    st->codecpar->ch_layout.order = AV_CHANNEL_ORDER_NATIVE;
> +    st->codecpar->ch_layout.nb_channels = di.channel_count;
> +    st->codecpar->ch_layout.u.mask = di.ffmpeg_channel_mask;
> +    st->codecpar->codec_tag = AV_RL32(di.coding_name);
> +    st->codecpar->frame_size = 512 << di.frame_duration_code;
> +    st->codecpar->sample_rate = di.sample_rate;

you could align all the "=" below each other, that would make this look
more pretty

thx

[...]
Roy Funderburk April 16, 2023, 9:52 p.m. UTC | #7
On 4/16/23 12:55 PM, Michael Niedermayer wrote:
> also minor libavcodec version needs to be +1 when adding av* symbols
> and libavcodec and libavformat changes should be in 2 seperate patches

 
Would it be best to submit the separate patches as PATCH 1/2 and PATCH 2/2 in this email thread or is starting a new thread preferred?

Thanks for reviewing,
-Roy
Roy Funderburk April 17, 2023, 4:12 a.m. UTC | #8
Parsing of DTS-UHD input files per ETSI TS 102 114 is added
as parser for codec id AV_CODEC_ID_DTSUHD.

Signed-off-by: Roy Funderburk <Roy.Funderburk@xperi.com>
---
 libavcodec/Makefile        |    1 +
 libavcodec/codec_desc.c    |    7 +
 libavcodec/codec_id.h      |    1 +
 libavcodec/dtsuhd_common.c | 1010 ++++++++++++++++++++++++++++++++++++
 libavcodec/dtsuhd_common.h |   83 +++
 libavcodec/dtsuhd_parser.c |  141 +++++
 libavcodec/parsers.c       |    1 +
 libavcodec/version.h       |    2 +-
 8 files changed, 1245 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/dtsuhd_common.c
 create mode 100644 libavcodec/dtsuhd_common.h
 create mode 100644 libavcodec/dtsuhd_parser.c

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index aa10fbfcf8..f57564e9eb 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1155,6 +1155,7 @@ OBJS-$(CONFIG_DIRAC_PARSER)            += dirac_parser.o
 OBJS-$(CONFIG_DNXHD_PARSER)            += dnxhd_parser.o dnxhddata.o
 OBJS-$(CONFIG_DOLBY_E_PARSER)          += dolby_e_parser.o dolby_e_parse.o
 OBJS-$(CONFIG_DPX_PARSER)              += dpx_parser.o
+OBJS-$(CONFIG_DTSUHD_PARSER)           += dtsuhd_parser.o dtsuhd_common.o
 OBJS-$(CONFIG_DVAUDIO_PARSER)          += dvaudio_parser.o
 OBJS-$(CONFIG_DVBSUB_PARSER)           += dvbsub_parser.o
 OBJS-$(CONFIG_DVD_NAV_PARSER)          += dvd_nav_parser.o
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index efdcb59bc9..a58315f46b 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -3369,6 +3369,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("RKA (RK Audio)"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
     },
+    {
+        .id        = AV_CODEC_ID_DTSUHD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dtsuhd",
+        .long_name = NULL_IF_CONFIG_SMALL("DTSUHD (DTS-UHD Audio Format)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
 
     /* subtitle codecs */
     {
diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h
index 64df9699f4..6d8b145ee3 100644
--- a/libavcodec/codec_id.h
+++ b/libavcodec/codec_id.h
@@ -538,6 +538,7 @@ enum AVCodecID {
     AV_CODEC_ID_FTR,
     AV_CODEC_ID_WAVARC,
     AV_CODEC_ID_RKA,
+    AV_CODEC_ID_DTSUHD,
 
     /* subtitle codecs */
     AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
diff --git a/libavcodec/dtsuhd_common.c b/libavcodec/dtsuhd_common.c
new file mode 100644
index 0000000000..e9937bc105
--- /dev/null
+++ b/libavcodec/dtsuhd_common.c
@@ -0,0 +1,1010 @@
+/*
+ * DTS-UHD common audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Parse DTS-UHD audio frame headers, report frame sizes and configuration.
+ * Specification: ETSI TS 103 491 V1.2.1
+ */
+
+#include <string.h>
+
+#include "dtsuhd_common.h"
+#include "get_bits.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/crc.h"
+
+#define DTSUHD_ALLOC_INCREMENT 16
+#define DTSUHD_CHUNK_HEADER    16
+#define DTSUHD_CRC_SEED 0xFFFF
+
+enum RepType {
+    REP_TYPE_CH_MASK_BASED,
+    REP_TYPE_MTRX2D_CH_MASK_BASED,
+    REP_TYPE_MTRX3D_CH_MASK_BASED,
+    REP_TYPE_BINAURAL,
+    REP_TYPE_AMBISONIC,
+    REP_TYPE_AUDIO_TRACKS,
+    REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF,
+    REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF,
+};
+
+typedef struct MDObject {
+    int started;  /* Object seen since last reset. */
+    int pres_index;
+    int rep_type;
+    int ch_activity_mask;
+} MDObject;
+
+typedef struct MD01 {
+    GetBitContext gb;
+    MDObject object[257]; /* object id max value is 256 */
+    int chunk_id;
+    int object_list[256]; int object_list_count;
+    int packets_acquired;
+    int static_md_extracted;
+    int static_md_packets;
+    int static_md_packet_size;
+    int static_md_update_flag;
+    uint8_t *buf; int buf_bytes; /* temporary buffer to accumulate static data */
+} MD01;
+
+typedef struct NAVI {
+    int bytes;
+    int id;
+    int index;
+    int present;
+} NAVI;
+
+typedef struct UHDAudio {
+    int mask;
+    int selectable;
+} UHDAudio;
+
+typedef struct UHDChunk {
+    int crc_flag;
+    int bytes;
+} UHDChunk;
+
+struct DTSUHD {
+    const uint8_t *data; int data_bytes;  /* Original audio frame buffer. */
+    const AVCRC *crc;
+    GetBitContext gb;
+    MD01 *md01; int md01_count;
+    NAVI *navi; int navi_alloc, navi_count;
+    UHDAudio audio[256];
+    UHDChunk *chunk; int chunk_alloc, chunk_count;
+    int chunk_bytes;
+    int clock_rate;
+    int frame_bytes;
+    int frame_duration;
+    int frame_duration_code;
+    int ftoc_bytes;
+    int major_version;
+    int num_audio_pres;
+    int sample_rate;
+    int sample_rate_mod;
+    unsigned full_channel_mix_flag:1;
+    unsigned interactive_obj_limits_present:1;
+    unsigned is_sync_frame:1;
+    unsigned saw_sync:1;
+};
+
+/* Read from the MD01 buffer (if present), falling back to the frame buffer */
+static inline int get_bits_md01(DTSUHD *h, MD01 *md01, int bits)
+{
+    if (md01->buf)
+        return get_bits(&md01->gb, bits);
+    return get_bits(&h->gb, bits);
+}
+
+#define TABLE_BITS 0
+#define TABLE_ADD  1
+/* In the specification Table 5-2, the pseudo code defaults the 'add'
+   parameter to true. Table 7-30 shows passing an explicit false, most
+   other calls do not pass the extractAndAdd parameter.  In the function
+   below, the indirection index and add parameter are pre-applied to the
+   input table itself.  The original table from the specification is
+   converted to the table used by get_bits_var as follows:
+
+    int original_table = { 1, 2, 3, 4 }; // From specification pseudo code
+    int index_table[8] = { 0, 0, 0, 0, 1, 1, 2, 3 }; // Indirection index
+    for (i = 0; i < 8; i++) {
+        table[TABLE_BITS][i] = original_table[index_table[i]]);
+        table[TABLE_ADD][i] = 0;
+        for (index = 0; index < index_table[i]; index++)
+            table[1][index] += original_table[index];
+    }
+*/
+
+static int get_bits_var(GetBitContext *gb, const int table[2][8])
+{
+    static const int bits_used[8] = { 1, 1, 1, 1, 2, 2, 3, 3 };
+    int code = show_bits(gb, 3); /* value range is [0, 7] */
+
+    skip_bits(gb, bits_used[code]);
+    if (table[TABLE_BITS][code] == 0)
+        return 0;
+    return get_bits_long(gb, table[TABLE_BITS][code]) + table[TABLE_ADD][code];
+}
+
+/* Implied by Table 6-2, MD01 chunk objects appended in for loop */
+static MD01 *chunk_append_md01(DTSUHD *h, int id)
+{
+    int md01_alloc = h->md01_count + 1;
+    if (av_reallocp_array(&h->md01, md01_alloc, sizeof(*h->md01)))
+        return NULL;
+
+    memset(h->md01 + h->md01_count, 0, sizeof(*h->md01));
+    h->md01[h->md01_count].chunk_id = id;
+    return h->md01 + h->md01_count++;
+}
+
+/* Return existing MD01 chunk based on chunkID */
+static MD01 *chunk_find_md01(DTSUHD *h, int id)
+{
+    int i;
+
+    for (i = 0; i < h->md01_count; i++)
+        if (id == h->md01[i].chunk_id)
+            return h->md01 + i;
+
+    return NULL;
+}
+
+/* Table 6-3 */
+static void chunk_reset(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->md01_count; i++)
+        av_freep(&h->md01[i].buf);
+    av_freep(&h->md01);
+    h->md01_count = 0;
+}
+
+static MDObject *find_default_audio(DTSUHD *h)
+{
+    MDObject *object;
+    int i, j;
+    int obj_index = -1;
+
+    for (i = 0; i < h->md01_count; i++) {
+        for (j = 0; j < 257; j++) {
+            object = h->md01[i].object + j;
+            if (object->started && h->audio[object->pres_index].selectable) {
+                if (obj_index < 0 || (object->pres_index < h->md01[i].object[obj_index].pres_index))
+                    obj_index = j;
+            }
+        }
+        if (obj_index >= 0)
+            return h->md01[i].object + obj_index;
+    }
+
+    return NULL;
+}
+
+/* Save channel mask, count, and rep type to descriptor info.
+   ETSI TS 103 491 Table 7-28 channel activity mask bits
+   mapping and SCTE DVS 243-4 Rev. 0.2 DG X Table 4.  Convert activity mask and
+   representation type to channel mask and channel counts.
+*/
+static void extract_object_info(MDObject *object, DTSUHDDescriptorInfo *info)
+{
+    int i;
+    static const struct {
+        uint32_t activity_mask;
+        uint32_t channel_mask; // Mask as defined by ETSI TS 103 491
+        uint64_t ffmpeg_channel_mask; // Mask as defined in ffmpeg
+    } activity_map[] = {
+        // act mask | chan mask | ffmpeg channel mask
+        { 0x000001, 0x00000001, AV_CH_FRONT_CENTER },
+        { 0x000002, 0x00000006, AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT },
+        { 0x000004, 0x00000018, AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT },
+        { 0x000008, 0x00000020, AV_CH_LOW_FREQUENCY },
+        { 0x000010, 0x00000040, AV_CH_BACK_CENTER },
+        { 0x000020, 0x0000A000, AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT },
+        { 0x000040, 0x00000180, AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT },
+        { 0x000080, 0x00004000, AV_CH_TOP_FRONT_CENTER },
+        { 0x000100, 0x00080000, AV_CH_TOP_CENTER },
+        { 0x000200, 0x00001800, AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_FRONT_RIGHT_OF_CENTER },
+        { 0x000400, 0x00060000, AV_CHAN_WIDE_LEFT | AV_CHAN_WIDE_RIGHT },
+        { 0x000800, 0x00000600, AV_CH_SURROUND_DIRECT_LEFT | AV_CH_SURROUND_DIRECT_RIGHT },
+        { 0x001000, 0x00010000, AV_CH_LOW_FREQUENCY_2 },
+        { 0x002000, 0x00300000, AV_CH_TOP_SIDE_LEFT | AV_CH_TOP_SIDE_RIGHT },
+        { 0x004000, 0x00400000, AV_CH_TOP_BACK_CENTER },
+        { 0x008000, 0x01800000, AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT },
+        { 0x010000, 0x02000000, AV_CH_BOTTOM_FRONT_CENTER },
+        { 0x020000, 0x0C000000, AV_CH_BOTTOM_FRONT_LEFT | AV_CH_BOTTOM_FRONT_RIGHT },
+        { 0x140000, 0x30000000, AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT },
+        { 0x080000, 0xC0000000, AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT },
+        { 0 } // Terminator
+    };
+
+    if (object) {
+        for (i = 0; activity_map[i].activity_mask; i++) {
+            if (activity_map[i].activity_mask & object->ch_activity_mask) {
+                info->channel_mask |= activity_map[i].channel_mask;
+                info->ffmpeg_channel_mask |= activity_map[i].ffmpeg_channel_mask;
+            }
+        }
+        info->channel_count = av_popcount(info->channel_mask);
+        info->rep_type = object->rep_type;
+    }
+}
+
+/* Assemble information for MP4 Sample Entry box.  Sample Size is always
+   16 bits.  The coding name is the name of the SampleEntry sub-box and is
+   'dtsx' unless the version of the bitstream is > 2.
+   If DecoderProfile == 2, then MaxPayloadCode will be zero.
+*/
+static void update_descriptor(DTSUHD *h, DTSUHDDescriptorInfo *info)
+{
+    static const char *coding_name[] = { "dtsx", "dtsy" };
+
+    memset(info, 0, sizeof(*info));
+    memcpy(info->coding_name, coding_name[h->major_version > 2], 5);
+    extract_object_info(find_default_audio(h), info);
+    info->base_sample_freq_code = h->sample_rate == 48000;
+    info->decoder_profile_code = h->major_version - 2;
+    info->frame_duration_code = h->frame_duration_code;
+    info->max_payload_code = 0 + (h->major_version > 2);
+    info->num_pres_code = h->num_audio_pres - 1;
+    info->sample_rate = h->sample_rate;
+    info->sample_rate_mod = h->sample_rate_mod;
+    info->sample_size = 16;
+    info->valid = 1;
+}
+
+/* Table 6-17 p47 */
+static int parse_explicit_object_lists(DTSUHD *h, int mask, int index)
+{
+    GetBitContext *gb = &h->gb;
+    int i;
+    static const int table[2][8] = {
+        { 4, 4, 4, 8, 8, 16, 32 }, { 0, 0, 0, 0, 16, 16, 272, 65808 }
+    };
+
+    for (i = 0; i < index; i++) {
+        if ((mask >> i) & 0x01) {
+            if (h->is_sync_frame || get_bits1(gb))
+                get_bits_var(gb, table);
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-15 p44, Table 6-16 p45 */
+static int parse_aud_pres_params(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int audio;
+    int i;
+    int read_mask;
+    static const int table[2][8] = {
+        { 0, 0, 0, 0, 2, 2, 4, 5 }, { 0, 0, 0, 0, 1, 1, 5, 21 }
+    };
+
+    if (h->is_sync_frame) {
+        if (h->full_channel_mix_flag)
+            h->num_audio_pres = 1;
+        else
+            h->num_audio_pres = get_bits_var(gb, table) + 1;
+        memset(h->audio, 0, sizeof(h->audio[0]) * h->num_audio_pres);
+    }
+
+    for (audio = 0; audio < h->num_audio_pres; audio++) {
+        if (h->is_sync_frame) {
+            if (h->full_channel_mix_flag)
+                h->audio[audio].selectable = 1;
+            else
+                h->audio[audio].selectable = get_bits1(gb);
+        }
+
+        if (h->audio[audio].selectable) {
+            if (h->is_sync_frame) {
+                read_mask = (audio > 0) ? get_bits(gb, audio) : 0;
+                h->audio[audio].mask = 0;
+                for (i = 0; read_mask; i++, read_mask >>= 1) {
+                    if (read_mask & 0x01)
+                        h->audio[audio].mask |= get_bits1(gb) << i;
+                }
+            }
+
+            if (parse_explicit_object_lists(h, h->audio[audio].mask, audio))
+                return 1;
+        } else {
+            h->audio[audio].mask = 0;
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-12 p 40 */
+static void decode_version(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int bits = get_bits1(gb) ? 3 : 6;
+
+    h->major_version = get_bits(gb, bits) + 2;
+    skip_bits(gb, bits);
+}
+
+/* Table 6-12 p 40 */
+static int parse_stream_params(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int has_ftoc_crc;
+    static const uint32_t table_base_duration[4] = { 512, 480, 384, 0 };
+    static const uint32_t table_clock_rate[4] = { 32000, 44100, 48000, 0 };
+
+    if (h->is_sync_frame)
+        h->full_channel_mix_flag = get_bits1(gb);
+
+    has_ftoc_crc = !h->full_channel_mix_flag || h->is_sync_frame;
+    if (has_ftoc_crc && av_crc(h->crc, DTSUHD_CRC_SEED, h->data, h->ftoc_bytes))
+        return 1;
+
+    if (h->is_sync_frame) {
+        if (h->full_channel_mix_flag)
+            h->major_version = 2;
+        else
+            decode_version(h);
+
+        h->frame_duration = table_base_duration[get_bits(gb, 2)];
+        h->frame_duration_code = get_bits(gb, 3);
+        h->frame_duration *= (h->frame_duration_code + 1);
+        h->clock_rate = table_clock_rate[get_bits(gb, 2)];
+        if (h->frame_duration == 0 || h->clock_rate == 0)
+            return 1; /* bitstream error */
+
+        skip_bits(gb, 36 * get_bits1(gb));  /* bTimeStampPresent */
+        h->sample_rate_mod = get_bits(gb, 2);
+        h->sample_rate = h->clock_rate * (1 << h->sample_rate_mod);
+
+        if (h->full_channel_mix_flag) {
+            h->interactive_obj_limits_present = 0;
+        } else {
+            skip_bits1(gb);  /* reserved flag. */
+            h->interactive_obj_limits_present = get_bits1(gb);
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-24 p52 */
+static void navi_purge(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->navi_count; i++)
+        if (!h->navi[i].present)
+            h->navi[i].bytes = 0;
+}
+
+/* Table 6-21 p50 */
+static void navi_clear(DTSUHD *h)
+{
+    if (h->navi)
+        memset(h->navi, 0, sizeof(h->navi[0]) * h->navi_count);
+    h->navi_count = 0;
+}
+
+/* Table 6-22 p51 */
+static void navi_clear_present(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->navi_count; i++)
+        h->navi[i].present = 0;
+}
+
+/* Table 6-23 p51.  Return 0 on success, and the index is returned in
+   the *listIndex parameter.
+*/
+static int navi_find_index(DTSUHD *h, int desired_index, int *list_index)
+{
+    int avail_index = h->navi_count;
+    int i;
+    int navi_alloc;
+
+    for (i = 0; i < h->navi_count; i++) {
+        if (h->navi[i].index == desired_index) {
+            *list_index = i;
+            h->navi[i].present = 1;
+            return 0;
+        }
+
+        if ((h->navi[i].present == 0) && (h->navi[i].bytes == 0) && (avail_index > i))
+            avail_index = i;
+    }
+
+    if (avail_index >= h->navi_count) {
+        if (h->navi_count >= h->navi_alloc) {
+            navi_alloc = h->navi_count + DTSUHD_ALLOC_INCREMENT;
+            if (av_reallocp_array(&h->navi, navi_alloc, sizeof(*h->navi)))
+                return 1;
+            h->navi_alloc = navi_alloc;
+        }
+        h->navi_count++;
+    }
+
+    *list_index = avail_index;
+    h->navi[avail_index].bytes = 0;
+    h->navi[avail_index].present = 1;
+    h->navi[avail_index].id = 256;
+    h->navi[avail_index].index = desired_index;
+
+    return 0;
+}
+
+/* Table 6-20 p48 */
+static int parse_chunk_navi(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int audio_chunks = 1;
+    int bytes;
+    int i;
+    int id;
+    int id_present;
+    int index;
+    int list_index;
+    static const int table2468[2][8] = {
+        { 2, 2, 2, 2, 4, 4, 6, 8 }, { 0, 0, 0, 0, 4, 4, 20, 84 }
+    };
+    static const int table_audio_chunk_sizes[2][8] = {
+        { 9, 9, 9, 9, 11, 11, 13, 16 }, { 0, 0, 0, 0, 512, 512, 2560, 10752 }
+    };
+    static const int table_chunk_sizes[2][8] = {
+        { 6, 6, 6, 6, 9, 9, 12, 15 }, { 0, 0, 0, 0, 64, 64, 576, 4672 }
+    };
+
+    h->chunk_bytes = 0;
+    if (h->full_channel_mix_flag)
+        h->chunk_count = h->is_sync_frame;
+    else
+        h->chunk_count = get_bits_var(gb, table2468);
+
+    if (h->chunk_count >= h->chunk_alloc) {
+        int chunk_alloc = h->chunk_count + DTSUHD_ALLOC_INCREMENT;
+        if (av_reallocp_array(&h->chunk, chunk_alloc, sizeof(*h->chunk)))
+            return 1;
+        h->chunk_alloc = chunk_alloc;
+    }
+
+    for (i = 0; i < h->chunk_count; i++) {
+        h->chunk_bytes += h->chunk[i].bytes = get_bits_var(gb, table_chunk_sizes);
+        if (h->full_channel_mix_flag)
+            h->chunk[i].crc_flag = 0;
+        else
+        h->chunk[i].crc_flag = get_bits1(gb);
+    }
+
+    if (!h->full_channel_mix_flag)
+        audio_chunks = get_bits_var(gb, table2468);
+
+    if (h->is_sync_frame)
+        navi_clear(h);
+    else
+        navi_clear_present(h);
+
+    for (i = 0; i < audio_chunks; i++) {
+        if (h->full_channel_mix_flag)
+            index = 0;
+        else
+            index = get_bits_var(gb, table2468);
+
+        if (navi_find_index(h, index, &list_index))
+            return 1;
+
+        if (h->is_sync_frame)
+            id_present = 1;
+        else if (h->full_channel_mix_flag)
+            id_present = 0;
+        else
+            id_present = get_bits1(gb);
+
+        if (id_present) {
+            id = get_bits_var(gb, table2468);
+            h->navi[list_index].id = id;
+        }
+
+        bytes = get_bits_var(gb, table_audio_chunk_sizes);
+        h->chunk_bytes += bytes;
+        h->navi[list_index].bytes = bytes;
+    }
+
+    navi_purge(h);
+
+    return 0;
+}
+
+
+/* Table 6-6 */
+static int parse_md_chunk_list(DTSUHD *h, MD01 *md01)
+{
+    GetBitContext *gb = &h->gb;
+    static const int table1[2][8] = {
+        { 3, 3, 3, 3, 4, 4, 6, 8 }, { 0, 0, 0, 0, 8, 8, 24, 88 }
+    };
+    int i;
+
+    if (h->full_channel_mix_flag) {
+        md01->object_list_count = 1;
+        md01->object_list[0] = 256;
+    } else {
+        md01->object_list_count = get_bits_var(gb, table1);
+        for (i = 0; i < md01->object_list_count; i++)
+            md01->object_list[i] = get_bits(gb, get_bits1(gb) ? 8 : 4);
+    }
+
+    return 0;
+}
+
+/* Table 7-9 */
+static void skip_mp_param_set(DTSUHD *h, MD01 *md01, int nominal_flag)
+{
+    get_bits_md01(h, md01, 6); /* rLoudness */
+    if (nominal_flag == 0)
+        get_bits_md01(h, md01, 5);
+
+    get_bits_md01(h, md01, nominal_flag ? 2 : 4);
+}
+
+/* Table 7-8 */
+static int parse_static_md_params(DTSUHD *h, MD01 *md01, int only_first)
+{
+    int i;
+    int loudness_sets = 1;
+    int nominal_flag = 1;
+
+    if (h->full_channel_mix_flag == 0)
+        nominal_flag = get_bits_md01(h, md01, 1);
+
+    if (nominal_flag) {
+        if (h->full_channel_mix_flag == 0)
+            loudness_sets = get_bits_md01(h, md01, 1) ? 3 : 1;
+    } else {
+        loudness_sets = get_bits_md01(h, md01, 4) + 1;
+    }
+
+    for (i = 0; i < loudness_sets; i++)
+        skip_mp_param_set(h, md01, nominal_flag);
+
+    if (only_first)
+        return 0;
+
+    if (nominal_flag == 0)
+        get_bits_md01(h, md01, 1);
+
+    for (i = 0; i < 3; i++) { /* Table 7-12 suggest 3 types */
+        if (get_bits_md01(h, md01, 1)) {
+            if (get_bits_md01(h, md01, 4) == 15) /* Table 7-14 */
+                get_bits_md01(h, md01, 15);
+        }
+        if (get_bits_md01(h, md01, 1)) /* smooth md present */
+            get_bits_md01(h, md01, 6 * 6);
+    }
+
+    if (h->full_channel_mix_flag == 0) {
+        i = md01->static_md_packets * md01->static_md_packet_size - get_bits_count(&md01->gb);
+        skip_bits(&md01->gb, i);
+    }
+    md01->static_md_extracted = 1;
+
+    return 0;
+}
+
+/* Table 7-7 */
+static int parse_multi_frame_md(DTSUHD *h, MD01 *md01)
+{
+    GetBitContext *gb = &h->gb;
+    int i, n;
+    static const int table1[2][8] = {
+        { 0, 0, 0, 0, 6, 6, 9, 12 }, { 0, 0, 0, 0, 1, 1, 65, 577 }
+    };
+    static const int table2[2][8] = {
+        { 5, 5, 5, 5, 7, 7, 9, 11 }, { 0, 0, 0, 0, 32, 32, 160, 672 }
+    };
+
+    if (h->is_sync_frame) {
+        md01->packets_acquired = 0;
+        if (h->full_channel_mix_flag) {
+            md01->static_md_packets = 1;
+            md01->static_md_packet_size = 0;
+        } else {
+            md01->static_md_packets = get_bits_var(gb, table1) + 1;
+            md01->static_md_packet_size = get_bits_var(gb, table2) + 3;
+        }
+
+        n = md01->static_md_packets * md01->static_md_packet_size;
+        if (n > md01->buf_bytes) {
+            if (av_reallocp(&md01->buf, n))
+                return 1;
+            md01->buf_bytes = n;
+        }
+
+        init_get_bits(&md01->gb, md01->buf, md01->buf_bytes * 8);
+        if (md01->static_md_packets > 1)
+            md01->static_md_update_flag = get_bits1(gb);
+        else
+            md01->static_md_update_flag = 1;
+    }
+
+    if (md01->packets_acquired < md01->static_md_packets) {
+        n = md01->packets_acquired * md01->static_md_packet_size;
+        for (i = 0; i < md01->static_md_packet_size; i++)
+            md01->buf[n + i] = get_bits(gb, 8);
+        md01->packets_acquired++;
+
+        if (md01->packets_acquired == md01->static_md_packets) {
+            if (md01->static_md_update_flag || !md01->static_md_extracted)
+                if (parse_static_md_params(h, md01, 0))
+                    return 1;
+        } else if (md01->packets_acquired == 1) {
+            if (md01->static_md_update_flag || !md01->static_md_extracted)
+                if (parse_static_md_params(h, md01, 1))
+                    return 1;
+        }
+    }
+
+    return 0;
+}
+
+/* Return 1 if suitable, 0 if not.  Table 7-18.  OBJGROUPIDSTART=224 Sec 7.8.7 p75 */
+static int is_suitable_for_render(DTSUHD *h, MD01 *md01, int object_id)
+{
+    GetBitContext *gb = &h->gb;
+    static const int table[2][8] = {
+        { 8, 8, 8, 8, 10, 10, 12, 14 }, { 0, 0, 0, 0, 256, 256, 1280, 5376 }
+    };
+
+    if (object_id >= 224 || get_bits1(gb))
+        return 1;
+
+    /*  Reject the render and skip the render data. */
+    skip_bits1(gb);
+    skip_bits(gb, get_bits_var(gb, table));
+
+    return 0;
+}
+
+/* Table 7-26 */
+static void parse_ch_mask_params(DTSUHD *h, MD01 *md01, MDObject *object)
+{
+    GetBitContext *gb = &h->gb;
+    const int ch_index = object->rep_type == REP_TYPE_BINAURAL ? 1 : get_bits(gb, 4);
+    static const int mask_table[14] = { /* Table 7-27 */
+        0x000001, 0x000002, 0x000006, 0x00000F, 0x00001F, 0x00084B, 0x00002F,
+        0x00802F, 0x00486B, 0x00886B, 0x03FBFB, 0x000003, 0x000007, 0x000843,
+    };
+
+    if (ch_index == 14)
+        object->ch_activity_mask = get_bits(gb, 16);
+    else if (ch_index == 15)
+        object->ch_activity_mask = get_bits_long(gb, 32);
+    else
+        object->ch_activity_mask = mask_table[ch_index];
+}
+
+/* Table 7-22 */
+static int parse_object_metadata(DTSUHD *h, MD01 *md01, MDObject *object,
+                                 int start_frame_flag, int object_id)
+{
+    GetBitContext *gb = &h->gb;
+    int ch_mask_object_flag = 0;
+    int object_3d_metadata_flag = 0;
+    static const int table2[2][8] = {
+        { 1, 1, 1, 1, 4, 4, 4, 8 }, { 0, 0, 0, 0, 2, 2, 18, 34 }
+    };
+    static const int table3[2][8] = {
+        { 3, 3, 3, 3, 3, 3, 4, 8 }, { 0, 0, 0, 0, 8, 8, 16, 32 }
+    };
+
+    skip_bits(gb, object_id != 256);
+
+    if (start_frame_flag) {
+        object->rep_type = get_bits(gb, 3);
+        switch (object->rep_type) {
+            case REP_TYPE_BINAURAL:
+            case REP_TYPE_CH_MASK_BASED:
+            case REP_TYPE_MTRX2D_CH_MASK_BASED:
+            case REP_TYPE_MTRX3D_CH_MASK_BASED:
+                ch_mask_object_flag = 1;
+                break;
+
+            case REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF:
+            case REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF:
+                object_3d_metadata_flag = 1;
+                break;
+        }
+
+        if (ch_mask_object_flag) {
+            if (object_id != 256) {
+                skip_bits(gb, 3);  /* Object Importance Level */
+                if (get_bits1(gb))
+                    skip_bits(gb, get_bits1(gb) ? 3 : 5);
+
+                get_bits_var(gb, table2);
+                get_bits_var(gb, table3);
+
+                /* Skip optional Loudness block. */
+                if (get_bits1(gb))
+                    skip_bits(gb, 8);
+
+                /* Skip optional Object Interactive MD (Table 7-25). */
+                if (get_bits1(gb) && h->interactive_obj_limits_present) {
+                    if (get_bits1(gb))
+                        skip_bits(gb, 5 + 6 * object_3d_metadata_flag);
+                }
+            }
+
+            parse_ch_mask_params(h, md01, object);
+        }
+    }
+
+    /* Skip rest of object */
+    return 0;
+}
+
+/* Table 7-4 */
+static int parse_md01(DTSUHD *h, MD01 *md01, int pres_index)
+{
+    GetBitContext *gb = &h->gb;
+    uint32_t i;
+    uint32_t id;
+    uint32_t start_flag;
+
+    if (h->audio[pres_index].selectable) {
+        for (i = 0; i < 4; i++)  /* Table 7-5.  Scaling data. */
+            skip_bits(gb, 5 * get_bits1(gb));
+
+        if (get_bits1(gb) && parse_multi_frame_md(h, md01))
+            return 1;
+    }
+
+    /* Table 7-16: Object metadata. */
+    memset(md01->object, 0, sizeof(md01->object));
+    if (!h->full_channel_mix_flag)
+        skip_bits(gb, 11 * get_bits1(gb));
+
+    for (i = 0; i < md01->object_list_count; i++) {
+        id = md01->object_list[i];
+        if (!is_suitable_for_render(h, md01, id))
+            continue;
+
+        md01->object[id].pres_index = pres_index;
+        start_flag = 0;
+        if (!md01->object[id].started) {
+            skip_bits(gb, id != 256);
+            start_flag = md01->object[id].started = 1;
+        }
+
+        if ((id < 224 || id > 255) &&
+            parse_object_metadata(h, md01, md01->object + id, start_flag, id)) {
+            return 1;
+        }
+
+        break;
+    }
+
+    return 0;
+}
+
+/* Table 6-2 */
+static int parse_chunks(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    MD01 *md01;
+    const uint8_t *byte_start;
+    int bit_next;
+    int i;
+    static const int table_aud_pres[2][8] = {
+        { 0, 0, 0, 0, 2, 2, 4, 4 }, { 0, 0, 0, 0, 1, 1, 5, 21 }
+    };
+    int pres_index;
+    uint32_t id;
+
+    for (i = 0; i < h->chunk_count; i++) {
+        bit_next = get_bits_count(gb) + h->chunk[i].bytes * 8;
+        byte_start = h->data + get_bits_count(gb) / 8;
+        if (h->chunk[i].crc_flag && av_crc(h->crc, DTSUHD_CRC_SEED, byte_start, h->chunk[i].bytes))
+            return 1;
+
+        id = get_bits(gb, 8);
+        if (id == 1) {
+            pres_index = get_bits_var(gb, table_aud_pres);
+        if (pres_index > 255)
+            return 1;
+        md01 = chunk_find_md01(h, id);
+        if (md01 == NULL)
+            md01 = chunk_append_md01(h, id);
+        if (md01 == NULL)
+            return 1;
+        if (parse_md_chunk_list(h, md01))
+            return 1;
+        if (parse_md01(h, md01, pres_index))
+            return 1;
+        }
+
+        skip_bits(gb, bit_next - get_bits_count(gb));
+    }
+
+    return 0;
+}
+
+/** Allocate parsing handle.  The parsing handle should be used to parse
+    one DTS:X Profile 2 Audio stream, then freed by calling DTSUHD_destroy().
+    Do not use the same parsing handle to parse multiple audio streams.
+
+  @return Parsing handle for use with other functions, or NULL on failure.
+*/
+DTSUHD *av_dtsuhd_create(void)
+{
+    DTSUHD *h = av_calloc(1, sizeof(DTSUHD));
+    if (h)
+        h->crc = av_crc_get_table(AV_CRC_16_CCITT);
+    return h;
+}
+
+/** Free all resources used by the parsing handle.
+
+  @param[in] h Handle allocated by dtshd_create
+*/
+void av_dtsuhd_destroy(DTSUHD *h)
+{
+    if (h) {
+        chunk_reset(h);
+        av_freep(&h->chunk);
+        av_freep(&h->navi);
+        av_freep(&h);
+    }
+}
+
+/** Parse a single DTS:X Profile 2 frame.
+    The frame must start at the first byte of the data buffer, and enough
+    of the frame must be present to decode the majority of the FTOC.
+    From Table 6-11 p40.
+
+    A sync frame must be the first frame provided, before any non-sync frames.
+    Signatures: sync=0x40411BF2, non-sync=0x71C442E8.
+
+  @param[in] h Handle allocated by DTSUHD_create
+  @param[in] First byte of a buffer containing the frame to parse
+  @param[in] nData Number of valid bytes in 'data'
+  @param[out] fi Results of frame parsing, may be NULL
+  @param[out] di Results of descriptor parsing, may be NULL
+  @return 0 on success, DTSUHDStatus enumeration on error
+*/
+int av_dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
+                    DTSUHDFrameInfo *fi, DTSUHDDescriptorInfo *di)
+{
+    GetBitContext *gb;
+    int fraction = 1;
+    int i;
+    int syncword;
+    static const int table_payload[2][8] = {
+        { 5, 5, 5, 5, 8, 8, 10, 12 }, { 0, 0, 0, 0, 32, 32, 288, 1312 }
+    };
+
+    if (!h || !data)
+        return DTSUHD_NULL;
+
+    if (data_bytes < 4)
+        return DTSUHD_INCOMPLETE; /* Data buffer does not contain the signature */
+
+    h->data = data;
+    h->data_bytes = data_bytes;
+    gb = &h->gb;
+    init_get_bits(gb, data, data_bytes * 8);
+
+    syncword = get_bits_long(gb, 32);
+    h->is_sync_frame = syncword == DTSUHD_SYNCWORD;
+    h->saw_sync |= h->is_sync_frame;
+    if (!h->saw_sync || (!h->is_sync_frame && syncword != DTSUHD_NONSYNCWORD))
+        return DTSUHD_NOSYNC;  /* Invalid frame or have not parsed sync frame. */
+
+    h->ftoc_bytes = get_bits_var(gb, table_payload) + 1;
+    if (h->ftoc_bytes < 5 || h->ftoc_bytes >= data_bytes)
+        return DTSUHD_INCOMPLETE;  /* Data buffer does not contain entire FTOC */
+
+    if (parse_stream_params(h))
+        return DTSUHD_INVALID_FRAME;
+
+    if (parse_aud_pres_params(h))
+        return DTSUHD_INVALID_FRAME;
+
+    if (parse_chunk_navi(h))  /* AudioChunkTypes and payload sizes. */
+        return DTSUHD_INVALID_FRAME;
+
+    /* At this point in the parsing, we can calculate the size of the frame. */
+    h->frame_bytes = h->ftoc_bytes + h->chunk_bytes;
+    if (h->frame_bytes > data_bytes)
+        return DTSUHD_INCOMPLETE;
+
+    if (di && h->is_sync_frame) {
+        /* Skip PBRSmoothParams (Table 6-26) and align to the chunks immediately
+           following the FTOC CRC.
+        */
+        skip_bits(gb, h->ftoc_bytes * 8 - get_bits_count(gb));
+        if (parse_chunks(h))
+            return DTSUHD_INVALID_FRAME;
+        update_descriptor(h, di);
+    }
+
+    /* 6.3.6.9: audio frame duration may be a fraction of metadata frame duration. */
+    for (i = 0; i < h->navi_count; i++) {
+        if (h->navi[i].present) {
+            if (h->navi[i].id == 3)
+                fraction = 2;
+            else if (h->navi[i].id == 4)
+                fraction = 4;
+        }
+    }
+
+    if (fi) {
+        fi->sync = h->is_sync_frame;
+        fi->frame_bytes = h->frame_bytes;
+        fi->sample_rate = h->sample_rate;
+        fi->sample_count = (h->frame_duration * fi->sample_rate) / (h->clock_rate * fraction);
+    }
+
+    return DTSUHD_OK;
+}
+
+/** Return the offset of the first UHD audio frame.
+    When supplied a buffer containing DTSHDHDR file content, the DTSHD
+    headers are skipped and the offset to the first byte of the STRMDATA
+    chunk is returned, along with the size of that chunk.
+
+  @param[in] dataStart DTS:X Profile 2 file content to parse
+  @param[in] dataSize Number of valid bytes in 'dataStart'
+  @param[out] Number of leading DTS:X Profile 2 audio frames to discard,
+              may be NULL
+  @param[out] Size of STRMDATA payload, may be NULL
+  @return STRMDATA payload offset or 0 if not a valid DTS:X Profile 2 file
+*/
+int av_dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size, size_t *strmdata_size)
+{
+    const uint8_t *data = data_start;
+    const uint8_t *data_end = data + data_size;
+    uint64_t chunk_size = 0;
+
+    if (data + DTSUHD_CHUNK_HEADER >= data_end || memcmp(data, "DTSHDHDR", 8))
+        return 0;
+
+    for (; data + DTSUHD_CHUNK_HEADER <= data_end; data += chunk_size + DTSUHD_CHUNK_HEADER) {
+        chunk_size = AV_RB64(data + 8);
+        if (chunk_size < 4 || chunk_size > ((uint64_t)1 << 61))
+            return AVERROR_INVALIDDATA;
+
+        if (!memcmp(data, "STRMDATA", 8)) {
+            if (strmdata_size)
+                *strmdata_size = chunk_size;
+            return (int)(data - data_start) + DTSUHD_CHUNK_HEADER;
+        }
+    }
+
+    return 0;
+}
diff --git a/libavcodec/dtsuhd_common.h b/libavcodec/dtsuhd_common.h
new file mode 100644
index 0000000000..10280cd203
--- /dev/null
+++ b/libavcodec/dtsuhd_common.h
@@ -0,0 +1,83 @@
+/*
+ * DTS-UHD common audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DTSUHD_COMMON_H
+#define AVCODEC_DTSUHD_COMMON_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#define DTSUHD_NONSYNCWORD 0x71C442E8
+#define DTSUHD_SYNCWORD    0x40411BF2
+
+#define DTSUHD_MAX_FRAME_SIZE 0x1000
+
+/* Return codes from dtsuhd_frame */
+enum DTSUHDStatus {
+    DTSUHD_OK,
+    DTSUHD_INCOMPLETE,    /* Entire frame not in buffer. */
+    DTSUHD_INVALID_FRAME, /* Error parsing frame. */
+    DTSUHD_NOSYNC,        /* No sync frame prior to non-sync frame. */
+    DTSUHD_NULL,          /* Function parameter may not be NULL. */
+};
+
+/* Return stream information from an audio frame parsed by dtsuhd_frame, */
+typedef struct DTSUHDDescriptorInfo {
+    unsigned valid:1; /* True if descriptor info is valid. */
+    char coding_name[5]; /* Four character, null term SampleEntry box name. */
+    int base_sample_freq_code;
+    int channel_count;
+    int decoder_profile_code;
+    int frame_duration_code;
+    int max_payload_code;
+    int num_pres_code;
+    int rep_type;
+    int sample_rate;
+    int sample_rate_mod;
+    int sample_size;
+    int channel_mask;
+    uint64_t ffmpeg_channel_mask;
+} DTSUHDDescriptorInfo;
+
+/* Return frame information from an audio frame parsed by dtsuhd_frame. */
+typedef struct DTSUHDFrameInfo {
+    int frame_bytes;  /* Size of entire frame in bytes. */
+    int sample_count; /* Number of samples in frame (samples per frame). */
+    int sample_rate;  /* Sample rate of frame (samples per second). */
+    unsigned sync:1;  /* True if frame is a sync frame. */
+} DTSUHDFrameInfo;
+
+struct DTSUHD;
+typedef struct DTSUHD DTSUHD;
+
+struct DTSUHD *av_dtsuhd_create(void);
+void av_dtsuhd_destroy(DTSUHD*);
+int av_dtsuhd_frame(DTSUHD*, const uint8_t *data, size_t nData,
+                    DTSUHDFrameInfo*, DTSUHDDescriptorInfo*);
+int av_dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size,
+                               size_t *strmdata_size);
+
+static inline int dtsuhd_is_syncword(uint32_t syncword)
+{
+    return syncword == DTSUHD_NONSYNCWORD || syncword == DTSUHD_SYNCWORD;
+}
+
+#endif /* AVCODEC_DTSUHD_COMMON_H */
diff --git a/libavcodec/dtsuhd_parser.c b/libavcodec/dtsuhd_parser.c
new file mode 100644
index 0000000000..e8058e1701
--- /dev/null
+++ b/libavcodec/dtsuhd_parser.c
@@ -0,0 +1,141 @@
+/*
+ * DTS-UHD audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Parse raw DTS-UHD audio frame input and return individual audio frames.
+ */
+
+#include "dtsuhd_common.h"
+#include "libavutil/intreadwrite.h"
+#include "parser.h"
+
+#define DTSUHD_BUFFER_SIZE (DTSUHD_MAX_FRAME_SIZE * 128)
+
+typedef struct DTSUHDParseContext {
+    DTSUHD *dtsuhd;
+    int buf_offset;
+    int buf_bytes;
+    int frame_bytes;
+    uint8_t *buf;
+} DTSUHDParseContext;
+
+static av_cold int parser_init(AVCodecParserContext *s)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+
+    pc->dtsuhd = av_dtsuhd_create();
+    pc->buf = av_calloc(DTSUHD_BUFFER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE, 1);
+    if (!pc->dtsuhd || !pc->buf)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void parser_close(AVCodecParserContext *s)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+
+    av_dtsuhd_destroy(pc->dtsuhd);
+    pc->dtsuhd = NULL;
+    av_freep(&pc->buf);
+    ff_parse_close(s);
+}
+
+// Keep data in contiguous buffer as required by dtsuhd_frame.
+static int append_buffer(DTSUHDParseContext *pc, const uint8_t **buf, int *buf_size, int *input_consumed)
+{
+    int copy_bytes;
+
+    pc->buf_offset += pc->frame_bytes;
+    pc->frame_bytes = 0;
+
+    // Buffer almost full, move partial frame to start of buffer for more space.
+    if (*buf_size > 0 && pc->buf_bytes + *buf_size > DTSUHD_BUFFER_SIZE) {
+        memmove(pc->buf, pc->buf + pc->buf_offset, pc->buf_bytes);
+        pc->buf_bytes -= pc->buf_offset;
+        pc->buf_offset = 0;
+    }
+
+    copy_bytes = FFMAX(0, FFMIN(DTSUHD_BUFFER_SIZE - pc->buf_bytes, *buf_size));
+
+    // Append input buffer to our context.
+    if (copy_bytes) {
+        memcpy(pc->buf + pc->buf_bytes, *buf, copy_bytes);
+        pc->buf_bytes += copy_bytes;
+    }
+
+    // Ensure buffer starts with a syncword
+    while (pc->buf_offset + 4 < pc->buf_bytes && !dtsuhd_is_syncword(AV_RB32(pc->buf + pc->buf_offset)))
+        pc->buf_offset++;
+
+    *input_consumed = copy_bytes;
+    *buf = pc->buf + pc->buf_offset;
+    *buf_size = pc->buf_bytes - pc->buf_offset;
+
+    return copy_bytes && pc->buf_bytes - pc->buf_offset < DTSUHD_MAX_FRAME_SIZE;
+}
+
+static int parser_parse(AVCodecParserContext *s, AVCodecContext *avctx,
+                        const uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+    DTSUHDFrameInfo fi;
+    int input_consumed = 0;
+
+    if (append_buffer(pc, &buf, &buf_size, &input_consumed)) {
+        *poutbuf = NULL;
+        *poutbuf_size = 0;
+        return input_consumed;
+    }
+
+    switch (av_dtsuhd_frame(pc->dtsuhd, buf, buf_size, &fi, NULL)) {
+    case DTSUHD_OK:
+        if (fi.sample_count)
+            s->duration = fi.sample_count;
+        if (fi.sample_rate)
+            avctx->sample_rate = fi.sample_rate;
+        buf_size = pc->frame_bytes = fi.frame_bytes;
+        break;
+    case DTSUHD_INCOMPLETE:
+        pc->frame_bytes = buf_size;
+        buf = NULL;
+        buf_size = 0;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unable to process DTS-UHD file. File may be invalid.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+
+    return input_consumed;
+}
+
+AVCodecParser ff_dtsuhd_parser = {
+    .codec_ids      = { AV_CODEC_ID_DTSUHD },
+    .priv_data_size = sizeof(DTSUHDParseContext),
+    .parser_init    = parser_init,
+    .parser_parse   = parser_parse,
+    .parser_close   = parser_close,
+};
diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
index d355808018..d724c8b402 100644
--- a/libavcodec/parsers.c
+++ b/libavcodec/parsers.c
@@ -37,6 +37,7 @@ extern const AVCodecParser ff_dirac_parser;
 extern const AVCodecParser ff_dnxhd_parser;
 extern const AVCodecParser ff_dolby_e_parser;
 extern const AVCodecParser ff_dpx_parser;
+extern const AVCodecParser ff_dtsuhd_parser;
 extern const AVCodecParser ff_dvaudio_parser;
 extern const AVCodecParser ff_dvbsub_parser;
 extern const AVCodecParser ff_dvdsub_parser;
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 230d5fa13e..80e2ae630d 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -29,7 +29,7 @@
 
 #include "version_major.h"
 
-#define LIBAVCODEC_VERSION_MINOR   9
+#define LIBAVCODEC_VERSION_MINOR  10
 #define LIBAVCODEC_VERSION_MICRO 100
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
Roy Funderburk April 17, 2023, 4:13 a.m. UTC | #9
Demuxing of DTS-UHD input files per ETSI TS 102 114 is added as
demuxer "dtsuhd".  movenc supports DTS-UHD audio track.

Signed-off-by: Roy Funderburk <Roy.Funderburk@xperi.com>
---
 Changelog                 |   1 +
 configure                 |   1 +
 doc/general_contents.texi |   1 +
 libavformat/Makefile      |   1 +
 libavformat/allformats.c  |   1 +
 libavformat/dtshddec.c    |   2 +-
 libavformat/dtsuhddec.c   | 216 ++++++++++++++++++++++++++++++++++++++
 libavformat/movenc.c      |  32 ++++++
 libavformat/version.h     |   2 +-
 9 files changed, 255 insertions(+), 2 deletions(-)
 create mode 100644 libavformat/dtsuhddec.c

diff --git a/Changelog b/Changelog
index a40f32c23f..f683b49bb2 100644
--- a/Changelog
+++ b/Changelog
@@ -3,6 +3,7 @@ releases are sorted from youngest to oldest.
 
 version <next>:
 - libaribcaption decoder
+- DTS-UHD demuxer
 
 version 6.0:
 - Radiance HDR image support
diff --git a/configure b/configure
index 033db7442d..557821ceef 100755
--- a/configure
+++ b/configure
@@ -3425,6 +3425,7 @@ dash_demuxer_deps="libxml2"
 dirac_demuxer_select="dirac_parser"
 dts_demuxer_select="dca_parser"
 dtshd_demuxer_select="dca_parser"
+dtsuhd_demuxer_select="dtsuhd_parser"
 dv_demuxer_select="dvprofile"
 dv_muxer_select="dvprofile"
 dxa_demuxer_select="riffdec"
diff --git a/doc/general_contents.texi b/doc/general_contents.texi
index 2eeebd847d..e1ba9c4597 100644
--- a/doc/general_contents.texi
+++ b/doc/general_contents.texi
@@ -597,6 +597,7 @@ library:
 @item raw DNxHD                 @tab X @tab X
 @item raw DTS                   @tab X @tab X
 @item raw DTS-HD                @tab   @tab X
+@item raw DTS-UHD               @tab   @tab
 @item raw E-AC-3                @tab X @tab X
 @item raw FLAC                  @tab X @tab X
 @item raw GSM                   @tab   @tab X
diff --git a/libavformat/Makefile b/libavformat/Makefile
index 048649689b..42cf19348f 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -186,6 +186,7 @@ OBJS-$(CONFIG_DSICIN_DEMUXER)            += dsicin.o
 OBJS-$(CONFIG_DSS_DEMUXER)               += dss.o
 OBJS-$(CONFIG_DTSHD_DEMUXER)             += dtshddec.o
 OBJS-$(CONFIG_DTS_DEMUXER)               += dtsdec.o rawdec.o
+OBJS-$(CONFIG_DTSUHD_DEMUXER)            += dtsuhddec.o
 OBJS-$(CONFIG_DTS_MUXER)                 += rawenc.o
 OBJS-$(CONFIG_DV_MUXER)                  += dvenc.o
 OBJS-$(CONFIG_DVBSUB_DEMUXER)            += dvbsub.o rawdec.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index cb5b69e9cd..1b48ce6073 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -144,6 +144,7 @@ extern const AVInputFormat  ff_dss_demuxer;
 extern const AVInputFormat  ff_dts_demuxer;
 extern const FFOutputFormat ff_dts_muxer;
 extern const AVInputFormat  ff_dtshd_demuxer;
+extern const AVInputFormat  ff_dtsuhd_demuxer;
 extern const AVInputFormat  ff_dv_demuxer;
 extern const FFOutputFormat ff_dv_muxer;
 extern const AVInputFormat  ff_dvbsub_demuxer;
diff --git a/libavformat/dtshddec.c b/libavformat/dtshddec.c
index a3dea0668f..6e9e78a335 100644
--- a/libavformat/dtshddec.c
+++ b/libavformat/dtshddec.c
@@ -46,7 +46,7 @@ typedef struct DTSHDDemuxContext {
 static int dtshd_probe(const AVProbeData *p)
 {
     if (AV_RB64(p->buf) == DTSHDHDR)
-        return AVPROBE_SCORE_MAX;
+        return AVPROBE_SCORE_MAX - 4; // DTSUHD (.dtsx) files also have this signature.
     return 0;
 }
 
diff --git a/libavformat/dtsuhddec.c b/libavformat/dtsuhddec.c
new file mode 100644
index 0000000000..d840c0a033
--- /dev/null
+++ b/libavformat/dtsuhddec.c
@@ -0,0 +1,216 @@
+/*
+ * DTS-UHD audio demuxer
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Report DTS-UHD audio stream configuration and extract raw packet data.
+ */
+
+#include "internal.h"
+#include "libavcodec/dtsuhd_common.h"
+#include "libavcodec/put_bits.h"
+#include "libavutil/intreadwrite.h"
+
+#define DTSUHD_BUFFER_SIZE (1024 * 1024)
+
+typedef struct DTSUHDDemuxContext {
+    size_t data_end;
+    struct DTSUHD *dtsuhd;
+    uint8_t *buf;
+} DTSUHDDemuxContext;
+
+static int probe(const AVProbeData *p)
+{
+    int offset = av_dtsuhd_strmdata_payload(p->buf, p->buf_size, NULL);
+    int score = 0;
+    struct DTSUHD *h = av_dtsuhd_create();
+
+    if (h && offset >= 0) {
+        for (; offset + 4 < p->buf_size; offset++) {
+            if (dtsuhd_is_syncword(AV_RB32(p->buf + offset))) {
+                if (DTSUHD_OK == av_dtsuhd_frame(h, p->buf + offset, p->buf_size - offset, NULL, NULL)) {
+                    score = AVPROBE_SCORE_MAX - 3;
+                    break;
+                }
+            }
+        }
+    }
+
+    av_dtsuhd_destroy(h);
+    return score;
+}
+
+static av_cold int read_close(AVFormatContext *s)
+{
+    DTSUHDDemuxContext *dtsxs = s->priv_data;
+
+    av_freep(&dtsxs->buf);
+    av_dtsuhd_destroy(dtsxs->dtsuhd);
+    dtsxs->dtsuhd = NULL;
+
+    return 0;
+}
+
+static int find_first_syncword(DTSUHDDemuxContext *dtsuhd, int data_start)
+{
+    while (data_start + 4 < DTSUHD_BUFFER_SIZE &&
+        !dtsuhd_is_syncword(AV_RB32(dtsuhd->buf + data_start))) {
+        data_start++;
+    }
+
+    return data_start;
+}
+
+static int write_extradata(AVCodecParameters *par, DTSUHDDescriptorInfo *di)
+{
+    PutBitContext pbc;
+    int ret;
+    int size;
+    uint8_t udts[32];
+
+    init_put_bits(&pbc, udts, sizeof(udts));
+    put_bits32(&pbc, 0); // udts box size
+    put_bits32(&pbc, AV_RB32("udts")); // udts box signature
+    put_bits(&pbc, 6, di->decoder_profile_code);
+    put_bits(&pbc, 2, di->frame_duration_code);
+    put_bits(&pbc, 3, di->max_payload_code);
+    put_bits(&pbc, 5, di->num_pres_code);
+    put_bits32(&pbc,  di->channel_mask);
+    put_bits(&pbc, 1, di->base_sample_freq_code);
+    put_bits(&pbc, 2, di->sample_rate_mod);
+    put_bits(&pbc, 3, di->rep_type);
+    put_bits(&pbc, 3, 0);
+    put_bits(&pbc, 1, 0);
+    put_bits64(&pbc, di->num_pres_code + 1, 0); // ID Tag present for each presentation.
+    flush_put_bits(&pbc); // byte align
+    size = put_bytes_output(&pbc);
+    AV_WB32(udts, size);
+
+    ret = ff_alloc_extradata(par, size);
+    if (ret < 0)
+        return ret;
+
+    memcpy(par->extradata, udts, size);
+
+    return 0;
+}
+
+static int read_header(AVFormatContext *s)
+{
+    AVIOContext *pb = s->pb;
+    AVStream *st = avformat_new_stream(s, NULL);
+    DTSUHDDemuxContext *dtsuhd = s->priv_data;
+    DTSUHDDescriptorInfo di;
+    DTSUHDFrameInfo fi;
+    int buf_bytes;
+    int ret = DTSUHD_INVALID_FRAME;
+    int data_start;
+
+    if (!(pb->seekable & AVIO_SEEKABLE_NORMAL))
+        return AVERROR(EIO);
+
+    dtsuhd->buf = av_malloc(DTSUHD_BUFFER_SIZE);
+    dtsuhd->dtsuhd = av_dtsuhd_create();
+    if (!dtsuhd->buf || !dtsuhd->dtsuhd || !st)
+        return AVERROR(ENOMEM);
+
+    buf_bytes = avio_read(pb, dtsuhd->buf, DTSUHD_BUFFER_SIZE);
+    if (buf_bytes < 0)
+        return buf_bytes;
+
+    data_start = av_dtsuhd_strmdata_payload(dtsuhd->buf, buf_bytes, &dtsuhd->data_end);
+    if (data_start < 0)
+        return data_start;
+
+    dtsuhd->data_end += data_start;
+    if (data_start == 0)
+        dtsuhd->data_end = avio_size(pb); // Not a DTSHDHDR chunk file, decode frames to end of file.
+
+    data_start = find_first_syncword(dtsuhd, data_start);
+    if (avio_seek(pb, data_start, SEEK_SET) < 0)
+        return AVERROR(EINVAL);
+
+    ret = av_dtsuhd_frame(dtsuhd->dtsuhd, dtsuhd->buf + data_start,
+        buf_bytes - data_start, &fi, &di);
+    if (ret != DTSUHD_OK || !di.valid) {
+        av_log(s, AV_LOG_ERROR, "Unable to process DTS-UHD file. File may be invalid.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    ffstream(st)->need_parsing          = AVSTREAM_PARSE_FULL_RAW;
+    st->codecpar->codec_type            = AVMEDIA_TYPE_AUDIO;
+    st->codecpar->codec_id              = s->iformat->raw_codec_id;
+    st->codecpar->ch_layout.order       = AV_CHANNEL_ORDER_NATIVE;
+    st->codecpar->ch_layout.nb_channels = di.channel_count;
+    st->codecpar->ch_layout.u.mask      = di.ffmpeg_channel_mask;
+    st->codecpar->codec_tag             = AV_RL32(di.coding_name);
+    st->codecpar->frame_size            = 512 << di.frame_duration_code;
+    st->codecpar->sample_rate           = di.sample_rate;
+
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+    st->codecpar->channels       = di.channel_count;
+    st->codecpar->channel_layout = di.ffmpeg_channel_mask;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    ret = write_extradata(st->codecpar, &di);
+    if (ret < 0)
+        return ret;
+
+    if (st->codecpar->sample_rate)
+        avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
+
+    return 0;
+}
+
+static int read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    DTSUHDDemuxContext *dtsuhd = s->priv_data;
+    int64_t size, left;
+    int ret;
+
+    left = dtsuhd->data_end - avio_tell(s->pb);
+    size = FFMIN(left, DTSUHD_MAX_FRAME_SIZE);
+    if (size <= 0)
+        return AVERROR_EOF;
+
+    ret = av_get_packet(s->pb, pkt, size);
+    if (ret < 0)
+        return ret;
+
+    pkt->stream_index = 0;
+
+    return ret;
+}
+
+AVInputFormat ff_dtsuhd_demuxer = {
+    .name           = "dtsuhd",
+    .long_name      = NULL_IF_CONFIG_SMALL("DTS-UHD"),
+    .priv_data_size = sizeof(DTSUHDDemuxContext),
+    .read_probe     = probe,
+    .read_header    = read_header,
+    .read_packet    = read_packet,
+    .read_close     = read_close,
+    .flags          = AVFMT_GENERIC_INDEX,
+    .extensions     = "dtsx",
+    .raw_codec_id   = AV_CODEC_ID_DTSUHD,
+};
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index c370922c7d..e727407694 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -762,6 +762,24 @@ static int mov_write_esds_tag(AVIOContext *pb, MOVTrack *track) // Basic
     return update_size(pb, pos);
 }
 
+static int mov_write_udts_tag(AVIOContext *pb, MOVTrack *track)
+{
+    if (track->vos_len < 12) {
+        av_log(pb, AV_LOG_ERROR,
+               "Cannot write moov atom before DTS-UHD packets."
+               " Set the delay_moov flag to fix this.\n");
+        return AVERROR(EINVAL);
+    }
+
+    /* Write vos_data is udts box. */
+    if (memcmp(track->vos_data + 4, "udts", 4) == 0) {
+        avio_write(pb, track->vos_data, track->vos_len);
+        return track->vos_len;
+    }
+
+    return 0;
+}
+
 static int mov_pcm_le_gt16(enum AVCodecID codec_id)
 {
     return codec_id == AV_CODEC_ID_PCM_S24LE ||
@@ -1367,6 +1385,8 @@ static int mov_write_audio_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
         ret = mov_write_dops_tag(s, pb, track);
     else if (track->par->codec_id == AV_CODEC_ID_TRUEHD)
         ret = mov_write_dmlp_tag(s, pb, track);
+    else if (track->par->codec_id == AV_CODEC_ID_DTSUHD)
+        ret = mov_write_udts_tag(pb, track);
     else if (tag == MOV_MP4_IPCM_TAG || tag == MOV_MP4_FPCM_TAG) {
         if (track->par->ch_layout.nb_channels > 1)
             ret = mov_write_chnl_tag(s, pb, track);
@@ -2781,6 +2801,7 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
     if ((track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
          track->par->codec_id == AV_CODEC_ID_TRUEHD ||
          track->par->codec_id == AV_CODEC_ID_MPEGH_3D_AUDIO ||
+         track->par->codec_id == AV_CODEC_ID_DTSUHD ||
          track->par->codec_tag == MKTAG('r','t','p',' ')) &&
         track->has_keyframes && track->has_keyframes < track->entry)
         mov_write_stss_tag(pb, track, MOV_SYNC_SAMPLE);
@@ -5673,6 +5694,14 @@ static void mov_parse_vc1_frame(AVPacket *pkt, MOVTrack *trk)
     }
 }
 
+static void mov_parse_dtsuhd_frame(AVPacket *pkt, MOVTrack *trk)
+{
+    if (pkt->size > 4 && AV_RB32(pkt->data) == 0x40411BF2) {
+        trk->cluster[trk->entry].flags |= MOV_SYNC_SAMPLE;
+        trk->has_keyframes++;
+    }
+ }
+
 static void mov_parse_truehd_frame(AVPacket *pkt, MOVTrack *trk)
 {
     int length;
@@ -6343,6 +6372,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
         mov_parse_vc1_frame(pkt, trk);
     } else if (par->codec_id == AV_CODEC_ID_TRUEHD) {
         mov_parse_truehd_frame(pkt, trk);
+    } else if (par->codec_id == AV_CODEC_ID_DTSUHD) {
+        mov_parse_dtsuhd_frame(pkt, trk);
     } else if (pkt->flags & AV_PKT_FLAG_KEY) {
         if (mov->mode == MODE_MOV && par->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
             trk->entry > 0) { // force sync sample for the first key frame
@@ -7800,6 +7831,7 @@ static const AVCodecTag codec_mp4_tags[] = {
     { AV_CODEC_ID_AC3,             MKTAG('a', 'c', '-', '3') },
     { AV_CODEC_ID_EAC3,            MKTAG('e', 'c', '-', '3') },
     { AV_CODEC_ID_DTS,             MKTAG('m', 'p', '4', 'a') },
+    { AV_CODEC_ID_DTSUHD,          MKTAG('d', 't', 's', 'x') },
     { AV_CODEC_ID_TRUEHD,          MKTAG('m', 'l', 'p', 'a') },
     { AV_CODEC_ID_FLAC,            MKTAG('f', 'L', 'a', 'C') },
     { AV_CODEC_ID_OPUS,            MKTAG('O', 'p', 'u', 's') },
diff --git a/libavformat/version.h b/libavformat/version.h
index cc56b7cf5c..384cbd49cc 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -31,7 +31,7 @@
 
 #include "version_major.h"
 
-#define LIBAVFORMAT_VERSION_MINOR   4
+#define LIBAVFORMAT_VERSION_MINOR   5
 #define LIBAVFORMAT_VERSION_MICRO 101
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
Roy Funderburk May 8, 2023, 5:49 p.m. UTC | #10
Hi,

If there is anything I could do to improve this submission, please tell me.

Thanks,
-Roy

On 4/16/23 9:12 PM, Roy Funderburk wrote:
> Parsing of DTS-UHD input files per ETSI TS 102 114 is added
> as parser for codec id AV_CODEC_ID_DTSUHD.
> 
> Signed-off-by: Roy Funderburk <Roy.Funderburk@xperi.com>
Paul B Mahol May 13, 2023, 7:28 a.m. UTC | #11
On Mon, May 8, 2023 at 7:49 PM Roy Funderburk <royffmpeg@funderburk.us>
wrote:

> Hi,
>
> If there is anything I could do to improve this submission, please tell me.
>
>
How to test this additions, any samples available?


> Thanks,
> -Roy
>
> On 4/16/23 9:12 PM, Roy Funderburk wrote:
> > Parsing of DTS-UHD input files per ETSI TS 102 114 is added
> > as parser for codec id AV_CODEC_ID_DTSUHD.
> >
> > Signed-off-by: Roy Funderburk <Roy.Funderburk@xperi.com>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Roy Funderburk May 15, 2023, 2:50 p.m. UTC | #12
On 5/13/23 12:28 AM, Paul B Mahol wrote:
> How to test this additions, any samples available?


Hi,

I have uploaded a DTS:X audio sample to google drive:

drive. google. com /drive/folders/ 1jbfsZvnrBDmgQS0VLKBAnsZaxgf28vYh?usp=sharing

Thanks for looking this over,
-Roy
Michael Niedermayer May 15, 2023, 8:35 p.m. UTC | #13
On Mon, May 15, 2023 at 07:50:27AM -0700, Roy Funderburk wrote:
> On 5/13/23 12:28 AM, Paul B Mahol wrote:
> > How to test this additions, any samples available?
> 
> 
> Hi,
> 
> I have uploaded a DTS:X audio sample to google drive:
> 
> drive. google. com /drive/folders/ 1jbfsZvnrBDmgQS0VLKBAnsZaxgf28vYh?usp=sharing

Seems iam unable to download this. google complains about some 3rd party cookies
being disabled when i try to download. This seems new
Which is kind of odd and lame from google 
I think getting googles surveliance machienery working in my browser is too
much work, so ill hope this file will find its way to samples or something

thx

[...]
Roy Funderburk May 15, 2023, 9:14 p.m. UTC | #14
On 5/15/23 1:35 PM, Michael Niedermayer wrote:
> Seems iam unable to download this. google complains about some 3rd party cookies
> being disabled when i try to download. This seems new


Hi,

Since google drive is not working out, I am uploading a small 78k (under the 100k limit) audio file here.  The following ffmpeg command will invoke the code in the patches.

ffmpeg -i short.dtsx -c copy -y out.mp4

Regards,
-Roy
Paul B Mahol June 13, 2023, 2:26 p.m. UTC | #15
On Mon, May 15, 2023 at 11:15 PM Roy Funderburk <royffmpeg@funderburk.us>
wrote:

> On 5/15/23 1:35 PM, Michael Niedermayer wrote:
> > Seems iam unable to download this. google complains about some 3rd party
> cookies
> > being disabled when i try to download. This seems new
>
>
> Hi,
>
> Since google drive is not working out, I am uploading a small 78k (under
> the 100k limit) audio file here.  The following ffmpeg command will invoke
> the code in the patches.
>
> ffmpeg -i short.dtsx -c copy -y out.mp4
>
>
Why we need new av_* calls, can you elaborate logic behind such approach to
implement parser?


> Regards,
> -Roy_______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Roy Funderburk June 13, 2023, 5:43 p.m. UTC | #16
On 6/13/23 7:26 AM, Paul B Mahol wrote:
> Why we need new av_* calls, can you elaborate logic behind such approach to
> implement parser?


There is common code for dtsuhd audio frame parsing (dtsuhd_common.c) used by the libavcodec and libavformat DTS-UHD modules.  It is complex enough that we do not want to duplicate it.

If you refer to the naming of av_*, would changing the names to ff_dtsuhd_* as in libavcodec/aac_ac3_parser.c be more appropriate?

Thanks,
-Roy
Paul B Mahol June 13, 2023, 6:09 p.m. UTC | #17
On Tue, Jun 13, 2023 at 7:43 PM Roy Funderburk <royffmpeg@funderburk.us>
wrote:

>
>
> On 6/13/23 7:26 AM, Paul B Mahol wrote:
> > Why we need new av_* calls, can you elaborate logic behind such approach
> to
> > implement parser?
>
>
> There is common code for dtsuhd audio frame parsing (dtsuhd_common.c) used
> by the libavcodec and libavformat DTS-UHD modules.  It is complex enough
> that we do not want to duplicate it.
>

Parser just splits bitstream into packets and packets are then passed to
decoders. Demuxer in such case pass fixed packet sizes to parser minus
optional header/trailer bytes.
There should be no reason for such complexity in parser and/or demuxer if
there are in bitstream valid markers for start/end of packet that is given
to decoder.

Unless this format uses packets that may be not byte aligned than and/or
markers are useless and/or there are no size info of each packet feed to
decoder in such case and only in such case current complexity is valid.


>
> If you refer to the naming of av_*, would changing the names to
> ff_dtsuhd_* as in libavcodec/aac_ac3_parser.c be more appropriate?
>

That is purely for visuals, and not relevant in discussing issues.


>
> Thanks,
> -Roy
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Roy Funderburk June 13, 2023, 6:20 p.m. UTC | #18
On 6/13/23 11:09 AM, Paul B Mahol wrote:
> Parser just splits bitstream into packets and packets are then passed to
> decoders. Demuxer in such case pass fixed packet sizes to parser minus
> optional header/trailer bytes.
> There should be no reason for such complexity in parser and/or demuxer if
> there are in bitstream valid markers for start/end of packet that is given
> to decoder.
> 
> Unless this format uses packets that may be not byte aligned than and/or
> markers are useless and/or there are no size info of each packet feed to
> decoder in such case and only in such case current complexity is valid.


The dtsuhd audio frames are variable length.  The length of the audio frames cannot be determined by reading a field in the frame header, instead the frame must be parsed and only then can the frame length be determined by adding the sizes of the elements parsed.  So we do have the complexity you refer to.  While the frame signature (first four bytes) is unlikely to occur within a frame, such a false positive is possible, so we cannot rely on the signature alone.
Paul B Mahol June 13, 2023, 6:32 p.m. UTC | #19
On Mon, Apr 17, 2023 at 6:14 AM Roy Funderburk <royffmpeg@funderburk.us>
wrote:

> Demuxing of DTS-UHD input files per ETSI TS 102 114 is added as
> demuxer "dtsuhd".  movenc supports DTS-UHD audio track.
>
> Signed-off-by: Roy Funderburk <Roy.Funderburk@xperi.com>
> ---
>  Changelog                 |   1 +
>  configure                 |   1 +
>  doc/general_contents.texi |   1 +
>  libavformat/Makefile      |   1 +
>  libavformat/allformats.c  |   1 +
>  libavformat/dtshddec.c    |   2 +-
>  libavformat/dtsuhddec.c   | 216 ++++++++++++++++++++++++++++++++++++++
>  libavformat/movenc.c      |  32 ++++++
>  libavformat/version.h     |   2 +-
>  9 files changed, 255 insertions(+), 2 deletions(-)
>  create mode 100644 libavformat/dtsuhddec.c
>
> diff --git a/Changelog b/Changelog
> index a40f32c23f..f683b49bb2 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -3,6 +3,7 @@ releases are sorted from youngest to oldest.
>
>  version <next>:
>  - libaribcaption decoder
> +- DTS-UHD demuxer
>
>  version 6.0:
>  - Radiance HDR image support
> diff --git a/configure b/configure
> index 033db7442d..557821ceef 100755
> --- a/configure
> +++ b/configure
> @@ -3425,6 +3425,7 @@ dash_demuxer_deps="libxml2"
>  dirac_demuxer_select="dirac_parser"
>  dts_demuxer_select="dca_parser"
>  dtshd_demuxer_select="dca_parser"
> +dtsuhd_demuxer_select="dtsuhd_parser"
>  dv_demuxer_select="dvprofile"
>  dv_muxer_select="dvprofile"
>  dxa_demuxer_select="riffdec"
> diff --git a/doc/general_contents.texi b/doc/general_contents.texi
> index 2eeebd847d..e1ba9c4597 100644
> --- a/doc/general_contents.texi
> +++ b/doc/general_contents.texi
> @@ -597,6 +597,7 @@ library:
>  @item raw DNxHD                 @tab X @tab X
>  @item raw DTS                   @tab X @tab X
>  @item raw DTS-HD                @tab   @tab X
> +@item raw DTS-UHD               @tab   @tab
>  @item raw E-AC-3                @tab X @tab X
>  @item raw FLAC                  @tab X @tab X
>  @item raw GSM                   @tab   @tab X
> diff --git a/libavformat/Makefile b/libavformat/Makefile
> index 048649689b..42cf19348f 100644
> --- a/libavformat/Makefile
> +++ b/libavformat/Makefile
> @@ -186,6 +186,7 @@ OBJS-$(CONFIG_DSICIN_DEMUXER)            += dsicin.o
>  OBJS-$(CONFIG_DSS_DEMUXER)               += dss.o
>  OBJS-$(CONFIG_DTSHD_DEMUXER)             += dtshddec.o
>  OBJS-$(CONFIG_DTS_DEMUXER)               += dtsdec.o rawdec.o
> +OBJS-$(CONFIG_DTSUHD_DEMUXER)            += dtsuhddec.o
>  OBJS-$(CONFIG_DTS_MUXER)                 += rawenc.o
>  OBJS-$(CONFIG_DV_MUXER)                  += dvenc.o
>  OBJS-$(CONFIG_DVBSUB_DEMUXER)            += dvbsub.o rawdec.o
> diff --git a/libavformat/allformats.c b/libavformat/allformats.c
> index cb5b69e9cd..1b48ce6073 100644
> --- a/libavformat/allformats.c
> +++ b/libavformat/allformats.c
> @@ -144,6 +144,7 @@ extern const AVInputFormat  ff_dss_demuxer;
>  extern const AVInputFormat  ff_dts_demuxer;
>  extern const FFOutputFormat ff_dts_muxer;
>  extern const AVInputFormat  ff_dtshd_demuxer;
> +extern const AVInputFormat  ff_dtsuhd_demuxer;
>  extern const AVInputFormat  ff_dv_demuxer;
>  extern const FFOutputFormat ff_dv_muxer;
>  extern const AVInputFormat  ff_dvbsub_demuxer;
> diff --git a/libavformat/dtshddec.c b/libavformat/dtshddec.c
> index a3dea0668f..6e9e78a335 100644
> --- a/libavformat/dtshddec.c
> +++ b/libavformat/dtshddec.c
> @@ -46,7 +46,7 @@ typedef struct DTSHDDemuxContext {
>  static int dtshd_probe(const AVProbeData *p)
>  {
>      if (AV_RB64(p->buf) == DTSHDHDR)
> -        return AVPROBE_SCORE_MAX;
> +        return AVPROBE_SCORE_MAX - 4; // DTSUHD (.dtsx) files also have
> this signature.
>      return 0;
>  }
>
> diff --git a/libavformat/dtsuhddec.c b/libavformat/dtsuhddec.c
> new file mode 100644
> index 0000000000..d840c0a033
> --- /dev/null
> +++ b/libavformat/dtsuhddec.c
> @@ -0,0 +1,216 @@
> +/*
> + * DTS-UHD audio demuxer
> + * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * Report DTS-UHD audio stream configuration and extract raw packet data.
> + */
> +
> +#include "internal.h"
> +#include "libavcodec/dtsuhd_common.h"
> +#include "libavcodec/put_bits.h"
> +#include "libavutil/intreadwrite.h"
> +
> +#define DTSUHD_BUFFER_SIZE (1024 * 1024)
> +
> +typedef struct DTSUHDDemuxContext {
> +    size_t data_end;
> +    struct DTSUHD *dtsuhd;
> +    uint8_t *buf;
> +} DTSUHDDemuxContext;
> +
> +static int probe(const AVProbeData *p)
> +{
> +    int offset = av_dtsuhd_strmdata_payload(p->buf, p->buf_size, NULL);
> +    int score = 0;
> +    struct DTSUHD *h = av_dtsuhd_create();
> +
> +    if (h && offset >= 0) {
> +        for (; offset + 4 < p->buf_size; offset++) {
> +            if (dtsuhd_is_syncword(AV_RB32(p->buf + offset))) {
> +                if (DTSUHD_OK == av_dtsuhd_frame(h, p->buf + offset,
> p->buf_size - offset, NULL, NULL)) {
> +                    score = AVPROBE_SCORE_MAX - 3;
> +                    break;
> +                }
> +            }
> +        }
> +    }
> +
> +    av_dtsuhd_destroy(h);
>

Doing allocation in probe?
How much this is costly?
Probing should be very fast.


> +    return score;
> +}
> +
> +static av_cold int read_close(AVFormatContext *s)
> +{
> +    DTSUHDDemuxContext *dtsxs = s->priv_data;
> +
> +    av_freep(&dtsxs->buf);
> +    av_dtsuhd_destroy(dtsxs->dtsuhd);
> +    dtsxs->dtsuhd = NULL;
> +
> +    return 0;
> +}
> +
> +static int find_first_syncword(DTSUHDDemuxContext *dtsuhd, int data_start)
> +{
> +    while (data_start + 4 < DTSUHD_BUFFER_SIZE &&
> +        !dtsuhd_is_syncword(AV_RB32(dtsuhd->buf + data_start))) {
> +        data_start++;
> +    }
> +
> +    return data_start;
> +}
> +
> +static int write_extradata(AVCodecParameters *par, DTSUHDDescriptorInfo
> *di)
> +{
> +    PutBitContext pbc;
> +    int ret;
> +    int size;
> +    uint8_t udts[32];
> +
> +    init_put_bits(&pbc, udts, sizeof(udts));
> +    put_bits32(&pbc, 0); // udts box size
> +    put_bits32(&pbc, AV_RB32("udts")); // udts box signature
> +    put_bits(&pbc, 6, di->decoder_profile_code);
> +    put_bits(&pbc, 2, di->frame_duration_code);
> +    put_bits(&pbc, 3, di->max_payload_code);
> +    put_bits(&pbc, 5, di->num_pres_code);
> +    put_bits32(&pbc,  di->channel_mask);
> +    put_bits(&pbc, 1, di->base_sample_freq_code);
> +    put_bits(&pbc, 2, di->sample_rate_mod);
> +    put_bits(&pbc, 3, di->rep_type);
> +    put_bits(&pbc, 3, 0);
> +    put_bits(&pbc, 1, 0);
> +    put_bits64(&pbc, di->num_pres_code + 1, 0); // ID Tag present for
> each presentation.
> +    flush_put_bits(&pbc); // byte align
> +    size = put_bytes_output(&pbc);
> +    AV_WB32(udts, size);
> +
> +    ret = ff_alloc_extradata(par, size);
> +    if (ret < 0)
> +        return ret;
> +
> +    memcpy(par->extradata, udts, size);
> +
> +    return 0;
> +}
> +
> +static int read_header(AVFormatContext *s)
> +{
> +    AVIOContext *pb = s->pb;
> +    AVStream *st = avformat_new_stream(s, NULL);
> +    DTSUHDDemuxContext *dtsuhd = s->priv_data;
> +    DTSUHDDescriptorInfo di;
> +    DTSUHDFrameInfo fi;
> +    int buf_bytes;
> +    int ret = DTSUHD_INVALID_FRAME;
> +    int data_start;
> +
> +    if (!(pb->seekable & AVIO_SEEKABLE_NORMAL))
> +        return AVERROR(EIO);
> +
> +    dtsuhd->buf = av_malloc(DTSUHD_BUFFER_SIZE);
> +    dtsuhd->dtsuhd = av_dtsuhd_create();
> +    if (!dtsuhd->buf || !dtsuhd->dtsuhd || !st)
> +        return AVERROR(ENOMEM);
> +
> +    buf_bytes = avio_read(pb, dtsuhd->buf, DTSUHD_BUFFER_SIZE);
> +    if (buf_bytes < 0)
> +        return buf_bytes;
> +
> +    data_start = av_dtsuhd_strmdata_payload(dtsuhd->buf, buf_bytes,
> &dtsuhd->data_end);
> +    if (data_start < 0)
> +        return data_start;
> +
> +    dtsuhd->data_end += data_start;
> +    if (data_start == 0)
> +        dtsuhd->data_end = avio_size(pb); // Not a DTSHDHDR chunk file,
> decode frames to end of file.
> +
> +    data_start = find_first_syncword(dtsuhd, data_start);
> +    if (avio_seek(pb, data_start, SEEK_SET) < 0)
> +        return AVERROR(EINVAL);
> +
> +    ret = av_dtsuhd_frame(dtsuhd->dtsuhd, dtsuhd->buf + data_start,
> +        buf_bytes - data_start, &fi, &di);
> +    if (ret != DTSUHD_OK || !di.valid) {
> +        av_log(s, AV_LOG_ERROR, "Unable to process DTS-UHD file. File may
> be invalid.\n");
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    ffstream(st)->need_parsing          = AVSTREAM_PARSE_FULL_RAW;
> +    st->codecpar->codec_type            = AVMEDIA_TYPE_AUDIO;
> +    st->codecpar->codec_id              = s->iformat->raw_codec_id;
> +    st->codecpar->ch_layout.order       = AV_CHANNEL_ORDER_NATIVE;
> +    st->codecpar->ch_layout.nb_channels = di.channel_count;
> +    st->codecpar->ch_layout.u.mask      = di.ffmpeg_channel_mask;
> +    st->codecpar->codec_tag             = AV_RL32(di.coding_name);
> +    st->codecpar->frame_size            = 512 << di.frame_duration_code;
> +    st->codecpar->sample_rate           = di.sample_rate;
> +
> +#if FF_API_OLD_CHANNEL_LAYOUT
> +FF_DISABLE_DEPRECATION_WARNINGS
> +    st->codecpar->channels       = di.channel_count;
> +    st->codecpar->channel_layout = di.ffmpeg_channel_mask;
> +FF_ENABLE_DEPRECATION_WARNINGS
> +#endif
> +
> +    ret = write_extradata(st->codecpar, &di);
> +    if (ret < 0)
> +        return ret;
> +
> +    if (st->codecpar->sample_rate)
> +        avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
> +
> +    return 0;
> +}
> +
> +static int read_packet(AVFormatContext *s, AVPacket *pkt)
> +{
> +    DTSUHDDemuxContext *dtsuhd = s->priv_data;
> +    int64_t size, left;
> +    int ret;
> +
> +    left = dtsuhd->data_end - avio_tell(s->pb);
> +    size = FFMIN(left, DTSUHD_MAX_FRAME_SIZE);
> +    if (size <= 0)
> +        return AVERROR_EOF;
> +
> +    ret = av_get_packet(s->pb, pkt, size);
> +    if (ret < 0)
> +        return ret;
> +
> +    pkt->stream_index = 0;
> +
> +    return ret;
> +}
> +
> +AVInputFormat ff_dtsuhd_demuxer = {
> +    .name           = "dtsuhd",
> +    .long_name      = NULL_IF_CONFIG_SMALL("DTS-UHD"),
> +    .priv_data_size = sizeof(DTSUHDDemuxContext),
> +    .read_probe     = probe,
> +    .read_header    = read_header,
> +    .read_packet    = read_packet,
> +    .read_close     = read_close,
> +    .flags          = AVFMT_GENERIC_INDEX,
> +    .extensions     = "dtsx",
> +    .raw_codec_id   = AV_CODEC_ID_DTSUHD,
> +};
> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> index c370922c7d..e727407694 100644
> --- a/libavformat/movenc.c
> +++ b/libavformat/movenc.c
> @@ -762,6 +762,24 @@ static int mov_write_esds_tag(AVIOContext *pb,
> MOVTrack *track) // Basic
>      return update_size(pb, pos);
>  }
>
> +static int mov_write_udts_tag(AVIOContext *pb, MOVTrack *track)
> +{
> +    if (track->vos_len < 12) {
> +        av_log(pb, AV_LOG_ERROR,
> +               "Cannot write moov atom before DTS-UHD packets."
> +               " Set the delay_moov flag to fix this.\n");
> +        return AVERROR(EINVAL);
> +    }
> +
> +    /* Write vos_data is udts box. */
> +    if (memcmp(track->vos_data + 4, "udts", 4) == 0) {
> +        avio_write(pb, track->vos_data, track->vos_len);
> +        return track->vos_len;
> +    }
> +
> +    return 0;
> +}
> +
>  static int mov_pcm_le_gt16(enum AVCodecID codec_id)
>  {
>      return codec_id == AV_CODEC_ID_PCM_S24LE ||
> @@ -1367,6 +1385,8 @@ static int mov_write_audio_tag(AVFormatContext *s,
> AVIOContext *pb, MOVMuxContex
>          ret = mov_write_dops_tag(s, pb, track);
>      else if (track->par->codec_id == AV_CODEC_ID_TRUEHD)
>          ret = mov_write_dmlp_tag(s, pb, track);
> +    else if (track->par->codec_id == AV_CODEC_ID_DTSUHD)
> +        ret = mov_write_udts_tag(pb, track);
>      else if (tag == MOV_MP4_IPCM_TAG || tag == MOV_MP4_FPCM_TAG) {
>          if (track->par->ch_layout.nb_channels > 1)
>              ret = mov_write_chnl_tag(s, pb, track);
> @@ -2781,6 +2801,7 @@ static int mov_write_stbl_tag(AVFormatContext *s,
> AVIOContext *pb, MOVMuxContext
>      if ((track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
>           track->par->codec_id == AV_CODEC_ID_TRUEHD ||
>           track->par->codec_id == AV_CODEC_ID_MPEGH_3D_AUDIO ||
> +         track->par->codec_id == AV_CODEC_ID_DTSUHD ||
>           track->par->codec_tag == MKTAG('r','t','p',' ')) &&
>          track->has_keyframes && track->has_keyframes < track->entry)
>          mov_write_stss_tag(pb, track, MOV_SYNC_SAMPLE);
> @@ -5673,6 +5694,14 @@ static void mov_parse_vc1_frame(AVPacket *pkt,
> MOVTrack *trk)
>      }
>  }
>
> +static void mov_parse_dtsuhd_frame(AVPacket *pkt, MOVTrack *trk)
> +{
> +    if (pkt->size > 4 && AV_RB32(pkt->data) == 0x40411BF2) {
> +        trk->cluster[trk->entry].flags |= MOV_SYNC_SAMPLE;
> +        trk->has_keyframes++;
> +    }
> + }
> +
>  static void mov_parse_truehd_frame(AVPacket *pkt, MOVTrack *trk)
>  {
>      int length;
> @@ -6343,6 +6372,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket
> *pkt)
>          mov_parse_vc1_frame(pkt, trk);
>      } else if (par->codec_id == AV_CODEC_ID_TRUEHD) {
>          mov_parse_truehd_frame(pkt, trk);
> +    } else if (par->codec_id == AV_CODEC_ID_DTSUHD) {
> +        mov_parse_dtsuhd_frame(pkt, trk);
>      } else if (pkt->flags & AV_PKT_FLAG_KEY) {
>          if (mov->mode == MODE_MOV && par->codec_id ==
> AV_CODEC_ID_MPEG2VIDEO &&
>              trk->entry > 0) { // force sync sample for the first key frame
> @@ -7800,6 +7831,7 @@ static const AVCodecTag codec_mp4_tags[] = {
>      { AV_CODEC_ID_AC3,             MKTAG('a', 'c', '-', '3') },
>      { AV_CODEC_ID_EAC3,            MKTAG('e', 'c', '-', '3') },
>      { AV_CODEC_ID_DTS,             MKTAG('m', 'p', '4', 'a') },
> +    { AV_CODEC_ID_DTSUHD,          MKTAG('d', 't', 's', 'x') },
>      { AV_CODEC_ID_TRUEHD,          MKTAG('m', 'l', 'p', 'a') },
>      { AV_CODEC_ID_FLAC,            MKTAG('f', 'L', 'a', 'C') },
>      { AV_CODEC_ID_OPUS,            MKTAG('O', 'p', 'u', 's') },
> diff --git a/libavformat/version.h b/libavformat/version.h
> index cc56b7cf5c..384cbd49cc 100644
> --- a/libavformat/version.h
> +++ b/libavformat/version.h
> @@ -31,7 +31,7 @@
>
>  #include "version_major.h"
>
> -#define LIBAVFORMAT_VERSION_MINOR   4
> +#define LIBAVFORMAT_VERSION_MINOR   5
>  #define LIBAVFORMAT_VERSION_MICRO 101
>
>  #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR,
> \
> --
> 2.17.1
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Paul B Mahol June 13, 2023, 6:35 p.m. UTC | #20
On Fri, Apr 14, 2023 at 6:01 PM Roy Funderburk <royffmpeg@funderburk.us>
wrote:

> Parsing and demuxing of DTS-UHD input files per ETSI TS 102 114 is added
> as demuxer "dtsuhd".  movenc supports DTS-UHD audio track.
>
> Signed-off-by: Roy Funderburk <Roy.Funderburk@xperi.com>
> ---
>   Changelog                  |   1 +
>   configure                  |   1 +
>   doc/general_contents.texi  |   1 +
>   libavcodec/Makefile        |   1 +
>   libavcodec/codec_desc.c    |   7 +
>   libavcodec/codec_id.h      |   1 +
>   libavcodec/dtsuhd_common.c | 991 +++++++++++++++++++++++++++++++++++++
>   libavcodec/dtsuhd_common.h |  84 ++++
>   libavcodec/dtsuhd_parser.c | 141 ++++++
>   libavcodec/parsers.c       |   1 +
>   libavformat/Makefile       |   1 +
>   libavformat/allformats.c   |   1 +
>   libavformat/dtshddec.c     |   2 +-
>   libavformat/dtsuhddec.c    | 214 ++++++++
>   libavformat/movenc.c       |  32 ++
>   libavformat/version.h      |   2 +-
>   16 files changed, 1479 insertions(+), 2 deletions(-)
>   create mode 100644 libavcodec/dtsuhd_common.c
>   create mode 100644 libavcodec/dtsuhd_common.h
>   create mode 100644 libavcodec/dtsuhd_parser.c
>   create mode 100644 libavformat/dtsuhddec.c
>
> diff --git a/Changelog b/Changelog
> index a40f32c23f..f683b49bb2 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -3,6 +3,7 @@ releases are sorted from youngest to oldest.
>    version <next>:
>   - libaribcaption decoder
> +- DTS-UHD demuxer
>    version 6.0:
>   - Radiance HDR image support
> diff --git a/configure b/configure
> index 033db7442d..557821ceef 100755
> --- a/configure
> +++ b/configure
> @@ -3425,6 +3425,7 @@ dash_demuxer_deps="libxml2"
>   dirac_demuxer_select="dirac_parser"
>   dts_demuxer_select="dca_parser"
>   dtshd_demuxer_select="dca_parser"
> +dtsuhd_demuxer_select="dtsuhd_parser"
>   dv_demuxer_select="dvprofile"
>   dv_muxer_select="dvprofile"
>   dxa_demuxer_select="riffdec"
> diff --git a/doc/general_contents.texi b/doc/general_contents.texi
> index 2eeebd847d..e1ba9c4597 100644
> --- a/doc/general_contents.texi
> +++ b/doc/general_contents.texi
> @@ -597,6 +597,7 @@ library:
>   @item raw DNxHD                 @tab X @tab X
>   @item raw DTS                   @tab X @tab X
>   @item raw DTS-HD                @tab   @tab X
> +@item raw DTS-UHD               @tab   @tab
>   @item raw E-AC-3                @tab X @tab X
>   @item raw FLAC                  @tab X @tab X
>   @item raw GSM                   @tab   @tab X
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index aa10fbfcf8..f57564e9eb 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -1155,6 +1155,7 @@ OBJS-$(CONFIG_DIRAC_PARSER)            +=
> dirac_parser.o
>   OBJS-$(CONFIG_DNXHD_PARSER)            += dnxhd_parser.o dnxhddata.o
>   OBJS-$(CONFIG_DOLBY_E_PARSER)          += dolby_e_parser.o
> dolby_e_parse.o
>   OBJS-$(CONFIG_DPX_PARSER)              += dpx_parser.o
> +OBJS-$(CONFIG_DTSUHD_PARSER)           += dtsuhd_parser.o dtsuhd_common.o
>   OBJS-$(CONFIG_DVAUDIO_PARSER)          += dvaudio_parser.o
>   OBJS-$(CONFIG_DVBSUB_PARSER)           += dvbsub_parser.o
>   OBJS-$(CONFIG_DVD_NAV_PARSER)          += dvd_nav_parser.o
> diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
> index efdcb59bc9..a58315f46b 100644
> --- a/libavcodec/codec_desc.c
> +++ b/libavcodec/codec_desc.c
> @@ -3369,6 +3369,13 @@ static const AVCodecDescriptor codec_descriptors[]
> = {
>           .long_name = NULL_IF_CONFIG_SMALL("RKA (RK Audio)"),
>           .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
> AV_CODEC_PROP_LOSSLESS,
>       },
> +    {
> +        .id        = AV_CODEC_ID_DTSUHD,
> +        .type      = AVMEDIA_TYPE_AUDIO,
> +        .name      = "dtsuhd",
> +        .long_name = NULL_IF_CONFIG_SMALL("DTSUHD (DTS-UHD Audio
> Format)"),
> +        .props     = AV_CODEC_PROP_LOSSY,
> +    },
>        /* subtitle codecs */
>       {
> diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h
> index 64df9699f4..6d8b145ee3 100644
> --- a/libavcodec/codec_id.h
> +++ b/libavcodec/codec_id.h
> @@ -538,6 +538,7 @@ enum AVCodecID {
>       AV_CODEC_ID_FTR,
>       AV_CODEC_ID_WAVARC,
>       AV_CODEC_ID_RKA,
> +    AV_CODEC_ID_DTSUHD,
>        /* subtitle codecs */
>       AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID
> pointing at the start of subtitle codecs.
> diff --git a/libavcodec/dtsuhd_common.c b/libavcodec/dtsuhd_common.c
> new file mode 100644
> index 0000000000..110cb0c371
> --- /dev/null
> +++ b/libavcodec/dtsuhd_common.c
> @@ -0,0 +1,991 @@
> +/*
> + * DTS-UHD common audio frame parsing code
> + * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * Parse DTS-UHD audio frame headers, report frame sizes and
> configuration.
> + * Specification: ETSI TS 103 491 V1.2.1
> + */
> +
> +#include <string.h>
> +
> +#include "dtsuhd_common.h"
> +#include "get_bits.h"
> +#include "libavutil/channel_layout.h"
> +
> +#define DTSUHD_ALLOC_INCREMENT 16
> +#define DTSUHD_CHUNK_HEADER    16
> +
> +enum RepType {
> +    REP_TYPE_CH_MASK_BASED,
> +    REP_TYPE_MTRX2D_CH_MASK_BASED,
> +    REP_TYPE_MTRX3D_CH_MASK_BASED,
> +    REP_TYPE_BINAURAL,
> +    REP_TYPE_AMBISONIC,
> +    REP_TYPE_AUDIO_TRACKS,
> +    REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF,
> +    REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF,
> +};
> +
> +typedef struct MDObject {
> +    int started;  /* Object seen since last reset. */
> +    int pres_index;
> +    int rep_type;
> +    int ch_activity_mask;
> +} MDObject;
> +
> +typedef struct MD01 {
> +    GetBitContext gb;
> +    MDObject object[257]; /* object id max value is 256 */
> +    int chunk_id;
> +    int object_list[256]; int object_list_count;
> +    int packets_acquired;
> +    int static_md_extracted;
> +    int static_md_packets;
> +    int static_md_packet_size;
> +    int static_md_update_flag;
> +    uint8_t *buf; int buf_bytes; /* temporary buffer to accumulate static
> data */
> +} MD01;
> +
> +typedef struct NAVI {
> +    int bytes;
> +    int id;
> +    int index;
> +    int present;
> +} NAVI;
> +
> +typedef struct UHDAudio {
> +    int mask;
> +    int selectable;
> +} UHDAudio;
> +
> +typedef struct UHDChunk {
> +    int crc_flag;
> +    int bytes;
> +} UHDChunk;
> +
> +struct DTSUHD {
> +    const uint8_t *data; int data_bytes;  /* Original audio frame buffer.
> */
> +    GetBitContext gb;
> +    MD01 *md01; int md01_count;
> +    NAVI *navi; int navi_alloc, navi_count;
> +    UHDAudio audio[256];
> +    UHDChunk *chunk; int chunk_alloc, chunk_count;
> +    int chunk_bytes;
> +    int clock_rate;
> +    int frame_bytes;
> +    int frame_duration;
> +    int frame_duration_code;
> +    int ftoc_bytes;
> +    int major_version;
> +    int num_audio_pres;
> +    int sample_rate;
> +    int sample_rate_mod;
> +    unsigned full_channel_mix_flag:1;
> +    unsigned interactive_obj_limits_present:1;
> +    unsigned is_sync_frame:1;
> +    unsigned saw_sync:1;
> +};
> +
> +/* Read from the MD01 buffer (if present), falling back to the frame
> buffer */
> +static inline int get_bits_md01(DTSUHD *h, MD01 *md01, int bits)
> +{
> +    if (md01->buf)
> +        return get_bits(&md01->gb, bits);
> +    return get_bits(&h->gb, bits);
> +}
> +
> +/* In the specification, the pseudo code defaults the 'add' parameter to
> true.
> +   Table 7-30 shows passing an explicit false, most other calls do not
> +   pass the extractAndAdd parameter.
> +
> +   Function based on code in Table 5-2
> +*/
> +static int get_bits_var(GetBitContext *gb, const uint8_t table[], int add)
> +{
> +    static const int bits_used[8] = { 1, 1, 1, 1, 2, 2, 3, 3 };
> +    static const int index_table[8] = { 0, 0, 0, 0, 1, 1, 2, 3 };
> +    int code = show_bits(gb, 3); /* value range is [0, 7] */
> +    int i;
> +    int index = index_table[code];
> +    int value = 0;
> +
> +    skip_bits(gb, bits_used[code]);
> +    if (table[index] > 0) {
> +        if (add) {
> +            for (i = 0; i < index; i++)
> +                value += 1 << table[i];
> +        }
> +        value += get_bits_long(gb, table[index]);
> +    }
> +
> +    return value;
> +}
> +
> +/* Implied by Table 6-2, MD01 chunk objects appended in for loop */
> +static MD01 *chunk_append_md01(DTSUHD *h, int id)
> +{
> +    int md01_alloc = h->md01_count + 1;
> +    if (av_reallocp_array(&h->md01, md01_alloc, sizeof(*h->md01)))
> +        return NULL;
> +
> +    memset(h->md01 + h->md01_count, 0, sizeof(*h->md01));
> +    h->md01[h->md01_count].chunk_id = id;
> +    return h->md01 + h->md01_count++;
> +}
> +
> +/* Return existing MD01 chunk based on chunkID */
> +static MD01 *chunk_find_md01(DTSUHD *h, int id)
> +{
> +    int i;
> +
> +    for (i = 0; i < h->md01_count; i++)
> +        if (id == h->md01[i].chunk_id)
> +            return h->md01 + i;
> +
> +    return NULL;
> +}
> +
> +/* Table 6-3 */
> +static void chunk_reset(DTSUHD *h)
> +{
> +    int i;
> +
> +    for (i = 0; i < h->md01_count; i++)
> +        av_freep(&h->md01[i].buf);
> +    av_freep(&h->md01);
> +    h->md01_count = 0;
> +}
> +
> +static MDObject *find_default_audio(DTSUHD *h)
> +{
> +    MDObject *object;
> +    int i, j;
> +    int obj_index = -1;
> +
> +    for (i = 0; i < h->md01_count; i++) {
> +        for (j = 0; j < 257; j++) {
> +            object = h->md01[i].object + j;
> +            if (object->started &&
> h->audio[object->pres_index].selectable) {
> +                if (obj_index < 0 || (object->pres_index <
> h->md01[i].object[obj_index].pres_index))
> +                    obj_index = j;
> +            }
> +        }
> +        if (obj_index >= 0)
> +            return h->md01[i].object + obj_index;
> +    }
> +
> +    return NULL;
> +}
> +
> +/* Save channel mask, count, and rep type to descriptor info.
> +   ETSI TS 103 491 Table 7-28 channel activity mask bits
> +   mapping and SCTE DVS 243-4 Rev. 0.2 DG X Table 4.  Convert activity
> mask and
> +   representation type to channel mask and channel counts.
> +*/
> +static void extract_object_info(MDObject *object, DTSUHDDescriptorInfo
> *info)
> +{
> +    int i;
> +    static const struct {
> +        uint32_t activity_mask;
> +        uint32_t channel_mask; // Mask as defined by ETSI TS 103 491
> +        uint64_t ffmpeg_channel_mask; // Mask as defined in ffmpeg
> +    } activity_map[] = {
> +        // act mask | chan mask | ffmpeg channel mask
> +        { 0x000001, 0x00000001, AV_CH_FRONT_CENTER },
> +        { 0x000002, 0x00000006, AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT },
> +        { 0x000004, 0x00000018, AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT },
> +        { 0x000008, 0x00000020, AV_CH_LOW_FREQUENCY },
> +        { 0x000010, 0x00000040, AV_CH_BACK_CENTER },
> +        { 0x000020, 0x0000A000, AV_CH_TOP_FRONT_LEFT |
> AV_CH_TOP_FRONT_RIGHT },
> +        { 0x000040, 0x00000180, AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT },
> +        { 0x000080, 0x00004000, AV_CH_TOP_FRONT_CENTER },
> +        { 0x000100, 0x00080000, AV_CH_TOP_CENTER },
> +        { 0x000200, 0x00001800, AV_CH_FRONT_LEFT_OF_CENTER |
> AV_CH_FRONT_RIGHT_OF_CENTER },
> +        { 0x000400, 0x00060000, AV_CHAN_WIDE_LEFT | AV_CHAN_WIDE_RIGHT },
> +        { 0x000800, 0x00000600, AV_CH_SURROUND_DIRECT_LEFT |
> AV_CH_SURROUND_DIRECT_RIGHT },
> +        { 0x001000, 0x00010000, AV_CH_LOW_FREQUENCY_2 },
> +        { 0x002000, 0x00300000, AV_CH_TOP_SIDE_LEFT |
> AV_CH_TOP_SIDE_RIGHT },
> +        { 0x004000, 0x00400000, AV_CH_TOP_BACK_CENTER },
> +        { 0x008000, 0x01800000, AV_CH_TOP_BACK_LEFT |
> AV_CH_TOP_BACK_RIGHT },
> +        { 0x010000, 0x02000000, AV_CH_BOTTOM_FRONT_CENTER },
> +        { 0x020000, 0x0C000000, AV_CH_BOTTOM_FRONT_LEFT |
> AV_CH_BOTTOM_FRONT_RIGHT },
> +        { 0x140000, 0x30000000, AV_CH_TOP_FRONT_LEFT |
> AV_CH_TOP_FRONT_RIGHT },
> +        { 0x080000, 0xC0000000, AV_CH_TOP_BACK_LEFT |
> AV_CH_TOP_BACK_RIGHT },
> +        { 0 } // Terminator
> +    };
> +
> +    if (object) {
> +        for (i = 0; activity_map[i].activity_mask; i++) {
> +            if (activity_map[i].activity_mask & object->ch_activity_mask)
> {
> +                info->channel_mask |= activity_map[i].channel_mask;
> +                info->ffmpeg_channel_mask |=
> activity_map[i].ffmpeg_channel_mask;
> +            }
> +        }
> +        info->channel_count = av_popcount(info->channel_mask);
> +        info->rep_type = object->rep_type;
> +    }
> +}
> +
> +/* Assemble information for MP4 Sample Entry box.  Sample Size is always
> +   16 bits.  The coding name is the name of the SampleEntry sub-box and is
> +   'dtsx' unless the version of the bitstream is > 2.
> +   If DecoderProfile == 2, then MaxPayloadCode will be zero.
> +*/
> +static void update_descriptor(DTSUHD *h, DTSUHDDescriptorInfo *info)
> +{
> +    static const char *coding_name[] = { "dtsx", "dtsy" };
> +
> +    memset(info, 0, sizeof(*info));
> +    memcpy(info->coding_name, coding_name[h->major_version > 2], 5);
> +    extract_object_info(find_default_audio(h), info);
> +    info->base_sample_freq_code = h->sample_rate == 48000;
> +    info->decoder_profile_code = h->major_version - 2;
> +    info->frame_duration_code = h->frame_duration_code;
> +    info->max_payload_code = 0 + (h->major_version > 2);
> +    info->num_pres_code = h->num_audio_pres - 1;
> +    info->sample_rate = h->sample_rate;
> +    info->sample_rate_mod = h->sample_rate_mod;
> +    info->sample_size = 16;
> +    info->valid = 1;
> +}
> +
> +/* Table 6-17 p47 */
> +static int parse_explicit_object_lists(DTSUHD *h, int mask, int index)
> +{
> +    GetBitContext *gb = &h->gb;
> +    int i;
> +    static const uint8_t table[4] = { 4, 8, 16, 32 };
> +
> +    for (i = 0; i < index; i++) {
> +        if ((mask >> i) & 0x01) {
> +            if (h->is_sync_frame || get_bits1(gb))
> +                get_bits_var(gb, table, 1);
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +/* Table 6-15 p44, Table 6-16 p45 */
> +static int parse_aud_pres_params(DTSUHD *h)
> +{
> +    GetBitContext *gb = &h->gb;
> +    int audio;
> +    int i;
> +    int read_mask;
> +    static const uint8_t table[4] = { 0, 2, 4, 5 };
> +
> +    if (h->is_sync_frame) {
> +        if (h->full_channel_mix_flag)
> +            h->num_audio_pres = 1;
> +        else
> +            h->num_audio_pres = get_bits_var(gb, table, 1) + 1;
> +        memset(h->audio, 0, sizeof(h->audio[0]) * h->num_audio_pres);
> +    }
> +
> +    for (audio = 0; audio < h->num_audio_pres; audio++) {
> +        if (h->is_sync_frame) {
> +            if (h->full_channel_mix_flag)
> +                h->audio[audio].selectable = 1;
> +            else
> +                h->audio[audio].selectable = get_bits1(gb);
> +        }
> +
> +        if (h->audio[audio].selectable) {
> +            if (h->is_sync_frame) {
> +                read_mask = (audio > 0) ? get_bits(gb, audio) : 0;
> +                h->audio[audio].mask = 0;
> +                for (i = 0; read_mask; i++, read_mask >>= 1) {
> +                    if (read_mask & 0x01)
> +                        h->audio[audio].mask |= get_bits1(gb) << i;
> +                }
> +            }
> +
> +            if (parse_explicit_object_lists(h, h->audio[audio].mask,
> audio))
> +                return 1;
> +        } else {
> +            h->audio[audio].mask = 0;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +/* Table 6-9 p 38 */
> +static int check_crc(DTSUHD *h, int bit, int bytes)
> +{
> +    GetBitContext gb;
> +    int i;
> +    static const uint16_t lookup[16] = {
> +        0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
> +        0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF
> +    };
> +    uint16_t crc = 0xFFFF;
> +
> +    init_get_bits(&gb, h->data, h->data_bytes * 8);
> +    skip_bits(&gb, bit);
> +    for (i = -bytes; i < bytes; i++)
> +        crc = (crc << 4) ^ lookup[(crc >> 12) ^ get_bits(&gb, 4)];
> +
> +    return crc != 0;
> +}
> +
> +/* Table 6-12 p 40 */
> +static void decode_version(DTSUHD *h)
> +{
> +    GetBitContext *gb = &h->gb;
> +    int bits = get_bits1(gb) ? 3 : 6;
> +
> +    h->major_version = get_bits(gb, bits) + 2;
> +    skip_bits(gb, bits);
> +}
> +
> +/* Table 6-12 p 40 */
> +static int parse_stream_params(DTSUHD *h)
> +{
> +    GetBitContext *gb = &h->gb;
> +    int has_ftoc_crc;
> +    static const uint32_t table_base_duration[4] = { 512, 480, 384, 0 };
> +    static const uint32_t table_clock_rate[4] = { 32000, 44100, 48000, 0
> };
> +
> +    if (h->is_sync_frame)
> +        h->full_channel_mix_flag = get_bits1(gb);
> +
> +    has_ftoc_crc = !h->full_channel_mix_flag || h->is_sync_frame;
> +    if (has_ftoc_crc && check_crc(h, 0, h->ftoc_bytes))
> +        return 1;
> +
> +    if (h->is_sync_frame) {
> +        if (h->full_channel_mix_flag)
> +            h->major_version = 2;
> +        else
> +            decode_version(h);
> +
> +        h->frame_duration = table_base_duration[get_bits(gb, 2)];
> +        h->frame_duration_code = get_bits(gb, 3);
> +        h->frame_duration *= (h->frame_duration_code + 1);
> +        h->clock_rate = table_clock_rate[get_bits(gb, 2)];
> +        if (h->frame_duration == 0 || h->clock_rate == 0)
> +            return 1; /* bitstream error */
> +
> +        skip_bits(gb, 36 * get_bits1(gb));  /* bTimeStampPresent */
> +        h->sample_rate_mod = get_bits(gb, 2);
> +        h->sample_rate = h->clock_rate * (1 << h->sample_rate_mod);
> +
> +        if (h->full_channel_mix_flag) {
> +            h->interactive_obj_limits_present = 0;
> +        } else {
> +            skip_bits1(gb);  /* reserved flag. */
> +            h->interactive_obj_limits_present = get_bits1(gb);
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +/* Table 6-24 p52 */
> +static void navi_purge(DTSUHD *h)
> +{
> +    int i;
> +
> +    for (i = 0; i < h->navi_count; i++)
> +        if (!h->navi[i].present)
> +            h->navi[i].bytes = 0;
> +}
> +
> +/* Table 6-21 p50 */
> +static void navi_clear(DTSUHD *h)
> +{
> +    if (h->navi)
> +        memset(h->navi, 0, sizeof(h->navi[0]) * h->navi_count);
> +    h->navi_count = 0;
> +}
> +
> +/* Table 6-22 p51 */
> +static void navi_clear_present(DTSUHD *h)
> +{
> +    int i;
> +
> +    for (i = 0; i < h->navi_count; i++)
> +        h->navi[i].present = 0;
> +}
> +
> +/* Table 6-23 p51.  Return 0 on success, and the index is returned in
> +   the *listIndex parameter.
> +*/
> +static int navi_find_index(DTSUHD *h, int desired_index, int *list_index)
> +{
> +    int avail_index = h->navi_count;
> +    int i;
> +    int navi_alloc;
> +
> +    for (i = 0; i < h->navi_count; i++) {
> +        if (h->navi[i].index == desired_index) {
> +            *list_index = i;
> +            h->navi[i].present = 1;
> +            return 0;
> +        }
> +
> +        if ((h->navi[i].present == 0) && (h->navi[i].bytes == 0) &&
> (avail_index > i))
> +            avail_index = i;
> +    }
> +
> +    if (avail_index >= h->navi_count) {
> +        if (h->navi_count >= h->navi_alloc) {
> +            navi_alloc = h->navi_count + DTSUHD_ALLOC_INCREMENT;
> +            if (av_reallocp_array(&h->navi, navi_alloc, sizeof(*h->navi)))
> +                return 1;
> +            h->navi_alloc = navi_alloc;
> +        }
> +        h->navi_count++;
> +    }
> +
> +    *list_index = avail_index;
> +    h->navi[avail_index].bytes = 0;
> +    h->navi[avail_index].present = 1;
> +    h->navi[avail_index].id = 256;
> +    h->navi[avail_index].index = desired_index;
> +
> +    return 0;
> +}
> +
> +/* Table 6-20 p48 */
> +static int parse_chunk_navi(DTSUHD *h)
> +{
> +    GetBitContext *gb = &h->gb;
> +    int audio_chunks = 1;
> +    int bytes;
> +    int i;
> +    int id;
> +    int id_present;
> +    int index;
> +    int list_index;
> +    static const uint8_t table2468[4] = { 2, 4, 6, 8 };
> +    static const uint8_t table_audio_chunk_sizes[4] = { 9, 11, 13, 16 };
> +    static const uint8_t table_chunk_sizes[4] = { 6, 9, 12, 15 };
> +
> +    h->chunk_bytes = 0;
> +    if (h->full_channel_mix_flag)
> +        h->chunk_count = h->is_sync_frame;
> +    else
> +        h->chunk_count = get_bits_var(gb, table2468, 1);
> +
> +    if (h->chunk_count >= h->chunk_alloc) {
> +        int chunk_alloc = h->chunk_count + DTSUHD_ALLOC_INCREMENT;
> +        if (av_reallocp_array(&h->chunk, chunk_alloc, sizeof(*h->chunk)))
> +            return 1;
> +        h->chunk_alloc = chunk_alloc;
> +    }
> +
> +    for (i = 0; i < h->chunk_count; i++) {
> +        h->chunk_bytes += h->chunk[i].bytes = get_bits_var(gb,
> table_chunk_sizes, 1);
> +        if (h->full_channel_mix_flag)
> +            h->chunk[i].crc_flag = 0;
> +        else
> +        h->chunk[i].crc_flag = get_bits1(gb);
> +    }
> +
> +    if (!h->full_channel_mix_flag)
> +        audio_chunks = get_bits_var(gb, table2468, 1);
> +
> +    if (h->is_sync_frame)
> +        navi_clear(h);
> +    else
> +        navi_clear_present(h);
> +
> +    for (i = 0; i < audio_chunks; i++) {
> +        if (h->full_channel_mix_flag)
> +            index = 0;
> +        else
> +            index = get_bits_var(gb, table2468, 1);
> +
> +        if (navi_find_index(h, index, &list_index))
> +            return 1;
> +
> +        if (h->is_sync_frame)
> +            id_present = 1;
> +        else if (h->full_channel_mix_flag)
> +            id_present = 0;
> +        else
> +            id_present = get_bits1(gb);
> +
> +        if (id_present) {
> +            id = get_bits_var(gb, table2468, 1);
> +            h->navi[list_index].id = id;
> +        }
> +
> +        bytes = get_bits_var(gb, table_audio_chunk_sizes, 1);
> +        h->chunk_bytes += bytes;
> +        h->navi[list_index].bytes = bytes;
> +    }
> +
> +    navi_purge(h);
> +
> +    return 0;
> +}
> +
> +
> +/* Table 6-6 */
> +static int parse_md_chunk_list(DTSUHD *h, MD01 *md01)
> +{
> +    GetBitContext *gb = &h->gb;
> +    const uint8_t table1[4] = { 3, 4, 6, 8 };
> +    int i;
> +
> +    if (h->full_channel_mix_flag) {
> +        md01->object_list_count = 1;
> +        md01->object_list[0] = 256;
> +    } else {
> +        md01->object_list_count = get_bits_var(gb, table1, 1);
> +        for (i = 0; i < md01->object_list_count; i++)
> +            md01->object_list[i] = get_bits(gb, get_bits1(gb) ? 8 : 4);
> +    }
> +
> +    return 0;
> +}
> +
> +/* Table 7-9 */
> +static void skip_mp_param_set(DTSUHD *h, MD01 *md01, int nominal_flag)
> +{
> +    get_bits_md01(h, md01, 6); /* rLoudness */
> +    if (nominal_flag == 0)
> +        get_bits_md01(h, md01, 5);
> +
> +    get_bits_md01(h, md01, nominal_flag ? 2 : 4);
> +}
> +
> +/* Table 7-8 */
> +static int parse_static_md_params(DTSUHD *h, MD01 *md01, int only_first)
> +{
> +    int i;
> +    int loudness_sets = 1;
> +    int nominal_flag = 1;
> +
> +    if (h->full_channel_mix_flag == 0)
> +        nominal_flag = get_bits_md01(h, md01, 1);
> +
> +    if (nominal_flag) {
> +        if (h->full_channel_mix_flag == 0)
> +            loudness_sets = get_bits_md01(h, md01, 1) ? 3 : 1;
> +    } else {
> +        loudness_sets = get_bits_md01(h, md01, 4) + 1;
> +    }
> +
> +    for (i = 0; i < loudness_sets; i++)
> +        skip_mp_param_set(h, md01, nominal_flag);
> +
> +    if (only_first)
> +        return 0;
> +
> +    if (nominal_flag == 0)
> +        get_bits_md01(h, md01, 1);
> +
> +    for (i = 0; i < 3; i++) { /* Table 7-12 suggest 3 types */
> +        if (get_bits_md01(h, md01, 1)) {
> +            if (get_bits_md01(h, md01, 4) == 15) /* Table 7-14 */
> +                get_bits_md01(h, md01, 15);
> +        }
> +        if (get_bits_md01(h, md01, 1)) /* smooth md present */
> +            get_bits_md01(h, md01, 6 * 6);
> +    }
> +
> +    if (h->full_channel_mix_flag == 0) {
> +        i = md01->static_md_packets * md01->static_md_packet_size -
> get_bits_count(&md01->gb);
> +        skip_bits(&md01->gb, i);
> +    }
> +    md01->static_md_extracted = 1;
> +
> +    return 0;
> +}
> +
> +/* Table 7-7 */
> +static int parse_multi_frame_md(DTSUHD *h, MD01 *md01)
> +{
> +    GetBitContext *gb = &h->gb;
> +    int i, n;
> +    static const uint8_t table1[4] = { 0, 6, 9, 12 };
> +    static const uint8_t table2[4] = { 5, 7, 9, 11 };
> +
> +    if (h->is_sync_frame) {
> +        md01->packets_acquired = 0;
> +        if (h->full_channel_mix_flag) {
> +            md01->static_md_packets = 1;
> +            md01->static_md_packet_size = 0;
> +        } else {
> +            md01->static_md_packets = get_bits_var(gb, table1, 1) + 1;
> +            md01->static_md_packet_size = get_bits_var(gb, table2, 1) + 3;
> +        }
> +
> +        n = md01->static_md_packets * md01->static_md_packet_size;
> +        if (n > md01->buf_bytes) {
> +            if (av_reallocp(&md01->buf, n))
> +                return 1;
> +            md01->buf_bytes = n;
> +        }
> +
> +        init_get_bits(&md01->gb, md01->buf, md01->buf_bytes * 8);
> +        if (md01->static_md_packets > 1)
> +            md01->static_md_update_flag = get_bits1(gb);
> +        else
> +            md01->static_md_update_flag = 1;
> +    }
> +
> +    if (md01->packets_acquired < md01->static_md_packets) {
> +        n = md01->packets_acquired * md01->static_md_packet_size;
> +        for (i = 0; i < md01->static_md_packet_size; i++)
> +            md01->buf[n + i] = get_bits(gb, 8);
> +        md01->packets_acquired++;
> +
> +        if (md01->packets_acquired == md01->static_md_packets) {
> +            if (md01->static_md_update_flag || !md01->static_md_extracted)
> +                if (parse_static_md_params(h, md01, 0))
> +                    return 1;
> +        } else if (md01->packets_acquired == 1) {
> +            if (md01->static_md_update_flag || !md01->static_md_extracted)
> +                if (parse_static_md_params(h, md01, 1))
> +                    return 1;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +/* Return 1 if suitable, 0 if not.  Table 7-18.  OBJGROUPIDSTART=224 Sec
> 7.8.7 p75 */
> +static int is_suitable_for_render(DTSUHD *h, MD01 *md01, int object_id)
> +{
> +    GetBitContext *gb = &h->gb;
> +    static const uint8_t table[4] = { 8, 10, 12, 14 };
> +
> +    if (object_id >= 224 || get_bits1(gb))
> +        return 1;
> +
> +    /*  Reject the render and skip the render data. */
> +    skip_bits1(gb);
> +    skip_bits(gb, get_bits_var(gb, table, 1));
> +
> +    return 0;
> +}
> +
> +/* Table 7-26 */
> +static void parse_ch_mask_params(DTSUHD *h, MD01 *md01, MDObject *object)
> +{
> +    GetBitContext *gb = &h->gb;
> +    const int ch_index = object->rep_type == REP_TYPE_BINAURAL ? 1 :
> get_bits(gb, 4);
> +    static const int mask_table[14] = { /* Table 7-27 */
> +        0x000001, 0x000002, 0x000006, 0x00000F, 0x00001F, 0x00084B,
> 0x00002F,
> +        0x00802F, 0x00486B, 0x00886B, 0x03FBFB, 0x000003, 0x000007,
> 0x000843,
> +    };
> +
> +    if (ch_index == 14)
> +        object->ch_activity_mask = get_bits(gb, 16);
> +    else if (ch_index == 15)
> +        object->ch_activity_mask = get_bits(gb, 32);
> +    else
> +        object->ch_activity_mask = mask_table[ch_index];
> +}
> +
> +/* Table 7-22 */
> +static int parse_object_metadata(DTSUHD *h, MD01 *md01, MDObject *object,
> +                                 int start_frame_flag, int object_id)
> +{
> +    GetBitContext *gb = &h->gb;
> +    int ch_mask_object_flag = 0;
> +    int object_3d_metadata_flag = 0;
> +    static const uint8_t table2[4] = { 1, 4, 4, 8 };
> +    static const uint8_t table3[4] = { 3, 3, 4, 8 };
> +
> +    skip_bits(gb, object_id != 256);
> +
> +    if (start_frame_flag) {
> +        object->rep_type = get_bits(gb, 3);
> +        switch (object->rep_type) {
> +            case REP_TYPE_BINAURAL:
> +            case REP_TYPE_CH_MASK_BASED:
> +            case REP_TYPE_MTRX2D_CH_MASK_BASED:
> +            case REP_TYPE_MTRX3D_CH_MASK_BASED:
> +                ch_mask_object_flag = 1;
> +                break;
> +
> +            case REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF:
> +            case REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF:
> +                object_3d_metadata_flag = 1;
> +                break;
> +        }
> +
> +        if (ch_mask_object_flag) {
> +            if (object_id != 256) {
> +                skip_bits(gb, 3);  /* Object Importance Level */
> +                if (get_bits1(gb))
> +                    skip_bits(gb, get_bits1(gb) ? 3 : 5);
> +
> +                get_bits_var(gb, table2, 1);
> +                get_bits_var(gb, table3, 1);
> +
> +                /* Skip optional Loudness block. */
> +                if (get_bits1(gb))
> +                    skip_bits(gb, 8);
> +
> +                /* Skip optional Object Interactive MD (Table 7-25). */
> +                if (get_bits1(gb) && h->interactive_obj_limits_present) {
> +                    if (get_bits1(gb))
> +                        skip_bits(gb, 5 + 6 * object_3d_metadata_flag);
> +                }
> +            }
> +
> +            parse_ch_mask_params(h, md01, object);
> +        }
> +    }
> +
> +    /* Skip rest of object */
> +    return 0;
> +}
> +
> +/* Table 7-4 */
> +static int parse_md01(DTSUHD *h, MD01 *md01, int pres_index)
> +{
> +    GetBitContext *gb = &h->gb;
> +    uint32_t i;
> +    uint32_t id;
> +    uint32_t start_flag;
> +
> +    if (h->audio[pres_index].selectable) {
> +        for (i = 0; i < 4; i++)  /* Table 7-5.  Scaling data. */
> +            skip_bits(gb, 5 * get_bits1(gb));
> +
> +        if (get_bits1(gb) && parse_multi_frame_md(h, md01))
> +            return 1;
> +    }
> +
> +    /* Table 7-16: Object metadata. */
> +    memset(md01->object, 0, sizeof(md01->object));
> +    if (!h->full_channel_mix_flag)
> +        skip_bits(gb, 11 * get_bits1(gb));
> +
> +    for (i = 0; i < md01->object_list_count; i++) {
> +        id = md01->object_list[i];
> +        if (!is_suitable_for_render(h, md01, id))
> +            continue;
> +
> +        md01->object[id].pres_index = pres_index;
> +        start_flag = 0;
> +        if (!md01->object[id].started) {
> +            skip_bits(gb, id != 256);
> +            start_flag = md01->object[id].started = 1;
> +        }
> +
> +        if ((id < 224 || id > 255) &&
> +            parse_object_metadata(h, md01, md01->object + id, start_flag,
> id)) {
> +            return 1;
> +        }
> +
> +        break;
> +    }
> +
> +    return 0;
> +}
> +
> +/* Table 6-2 */
> +static int parse_chunks(DTSUHD *h)
> +{
> +    GetBitContext *gb = &h->gb;
> +    MD01 *md01;
> +    int bit_next;
> +    int i;
> +    static const uint8_t table_aud_pres[4] = { 0, 2, 4, 4 };
> +    int pres_index;
> +    uint32_t id;
> +
> +    for (i = 0; i < h->chunk_count; i++) {
> +        bit_next = get_bits_count(gb) + h->chunk[i].bytes * 8;
> +        if (h->chunk[i].crc_flag && check_crc(h, get_bits_count(gb),
> h->chunk[i].bytes))
> +            return 1;
> +
> +        id = get_bits(gb, 8);
> +        if (id == 1) {
> +            pres_index = get_bits_var(gb, table_aud_pres, 1);
> +        if (pres_index > 255)
> +            return 1;
> +        md01 = chunk_find_md01(h, id);
> +        if (md01 == NULL)
> +            md01 = chunk_append_md01(h, id);
> +        if (md01 == NULL)
> +            return 1;
> +        if (parse_md_chunk_list(h, md01))
> +            return 1;
> +        if (parse_md01(h, md01, pres_index))
> +            return 1;
> +        }
> +
> +        skip_bits(gb, bit_next - get_bits_count(gb));
> +    }
> +
> +    return 0;
> +}
> +
> +/** Allocate parsing handle.  The parsing handle should be used to parse
> +    one DTS:X Profile 2 Audio stream, then freed by calling
> DTSUHD_destroy().
> +    Do not use the same parsing handle to parse multiple audio streams.
> +
> +  @return Parsing handle for use with other functions, or NULL on failure.
> +*/
> +DTSUHD *dtsuhd_create(void)
> +{
> +    return av_calloc(1, sizeof(DTSUHD));
> +}
> +
> +/** Free all resources used by the parsing handle.
> +
> +  @param[in] h Handle allocated by dtshd_create
> +*/
> +void dtsuhd_destroy(DTSUHD *h)
> +{
> +    if (h) {
> +        chunk_reset(h);
> +        av_freep(&h->chunk);
> +        av_freep(&h->navi);
> +        av_freep(&h);
> +    }
> +}
> +
> +/** Parse a single DTS:X Profile 2 frame.
> +    The frame must start at the first byte of the data buffer, and enough
> +    of the frame must be present to decode the majority of the FTOC.
> +    From Table 6-11 p40.
> +
> +    A sync frame must be the first frame provided, before any non-sync
> frames.
> +    Signatures: sync=0x40411BF2, non-sync=0x71C442E8.
> +
> +  @param[in] h Handle allocated by DTSUHD_create
> +  @param[in] First byte of a buffer containing the frame to parse
> +  @param[in] nData Number of valid bytes in 'data'
> +  @param[out] fi Results of frame parsing, may be NULL
> +  @param[out] di Results of descriptor parsing, may be NULL
> +  @return 0 on success, DTSUHDStatus enumeration on error
> +*/
> +int dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
> +                 DTSUHDFrameInfo *fi, DTSUHDDescriptorInfo *di)
> +{
> +    GetBitContext *gb;
> +    int fraction = 1;
> +    int i;
> +    int syncword;
> +    static const uint8_t table_payload[4] = { 5, 8, 10, 12 };
> +
> +    if (!h || !data)
> +        return DTSUHD_NULL;
> +
> +    if (data_bytes < 4)
> +        return DTSUHD_INCOMPLETE; /* Data buffer does not contain the
> signature */
> +
> +    h->data = data;
> +    h->data_bytes = data_bytes;
> +    gb = &h->gb;
> +    init_get_bits(gb, data, data_bytes * 8);
>


init_get_bits8, and check return code.


> +
> +    syncword = get_bits_long(gb, 32);
> +    h->is_sync_frame = syncword == DTSUHD_SYNCWORD;
> +    h->saw_sync |= h->is_sync_frame;
> +    if (!h->saw_sync || (!h->is_sync_frame && syncword !=
> DTSUHD_NONSYNCWORD))
> +        return DTSUHD_NOSYNC;  /* Invalid frame or have not parsed sync
> frame. */
> +
> +    h->ftoc_bytes = get_bits_var(gb, table_payload, 1) + 1;
> +    if (h->ftoc_bytes < 5 || h->ftoc_bytes >= data_bytes)
> +        return DTSUHD_INCOMPLETE;  /* Data buffer does not contain entire
> FTOC */
> +
> +    if (parse_stream_params(h))
> +        return DTSUHD_INVALID_FRAME;
> +
> +    if (parse_aud_pres_params(h))
> +        return DTSUHD_INVALID_FRAME;
> +
> +    if (parse_chunk_navi(h))  /* AudioChunkTypes and payload sizes. */
> +        return DTSUHD_INVALID_FRAME;
> +
> +    /* At this point in the parsing, we can calculate the size of the
> frame. */
> +    h->frame_bytes = h->ftoc_bytes + h->chunk_bytes;
> +    if (h->frame_bytes > data_bytes)
> +        return DTSUHD_INCOMPLETE;
> +
> +    if (di && h->is_sync_frame) {
> +        /* Skip PBRSmoothParams (Table 6-26) and align to the chunks
> immediately
> +           following the FTOC CRC.
> +        */
> +        skip_bits(gb, h->ftoc_bytes * 8 - get_bits_count(gb));
> +        if (parse_chunks(h))
> +            return DTSUHD_INVALID_FRAME;
> +        update_descriptor(h, di);
> +    }
> +
> +    /* 6.3.6.9: audio frame duration may be a fraction of metadata frame
> duration. */
> +    for (i = 0; i < h->navi_count; i++) {
> +        if (h->navi[i].present) {
> +            if (h->navi[i].id == 3)
> +                fraction = 2;
> +            else if (h->navi[i].id == 4)
> +                fraction = 4;
> +        }
> +    }
> +
> +    if (fi) {
> +        fi->sync = h->is_sync_frame;
> +        fi->frame_bytes = h->frame_bytes;
> +        fi->sample_rate = h->sample_rate;
> +        fi->sample_count = (h->frame_duration * fi->sample_rate) /
> (h->clock_rate * fraction);
> +        fi->duration = (double)fi->sample_count / fi->sample_rate;
>

Please  no double  type.
Also make use of av_rescale.


> +    }
> +
> +    return DTSUHD_OK;
> +}
> +
> +/** Return the offset of the first UHD audio frame.
> +    When supplied a buffer containing DTSHDHDR file content, the DTSHD
> +    headers are skipped and the offset to the first byte of the STRMDATA
> +    chunk is returned, along with the size of that chunk.
> +
> +  @param[in] dataStart DTS:X Profile 2 file content to parse
> +  @param[in] dataSize Number of valid bytes in 'dataStart'
> +  @param[out] Number of leading DTS:X Profile 2 audio frames to discard,
> +              may be NULL
> +  @param[out] Size of STRMDATA payload, may be NULL
> +  @return STRMDATA payload offset or 0 if not a valid DTS:X Profile 2 file
> +*/
> +int dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size,
> size_t *strmdata_size)
> +{
> +    const uint8_t *data = data_start;
> +    const uint8_t *data_end = data + data_size;
> +    uint64_t chunk_size = 0;
> +
> +    if (data + DTSUHD_CHUNK_HEADER >= data_end || memcmp(data,
> "DTSHDHDR", 8))
> +        return 0;
> +
> +    for (; data + DTSUHD_CHUNK_HEADER + 4 <= data_end; data += chunk_size
> + DTSUHD_CHUNK_HEADER) {
> +        chunk_size = AV_RB64(data + 8);
> +
> +        if (!memcmp(data, "STRMDATA", 8)) {
> +            if (strmdata_size)
> +                *strmdata_size = chunk_size;
> +            return (int)(data - data_start) + DTSUHD_CHUNK_HEADER;
> +        }
> +    }
> +
> +    return 0;
> +}
> diff --git a/libavcodec/dtsuhd_common.h b/libavcodec/dtsuhd_common.h
> new file mode 100644
> index 0000000000..8b4e8ce2aa
> --- /dev/null
> +++ b/libavcodec/dtsuhd_common.h
> @@ -0,0 +1,84 @@
> +/*
> + * DTS-UHD common audio frame parsing code
> + * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +#ifndef AVCODEC_DTSUHD_COMMON_H
> +#define AVCODEC_DTSUHD_COMMON_H
> +
> +#include <stdint.h>
> +#include <stdlib.h>
> +
> +#define DTSUHD_NONSYNCWORD 0x71C442E8
> +#define DTSUHD_SYNCWORD    0x40411BF2
> +
> +#define DTSUHD_MAX_FRAME_SIZE 0x1000
> +
> +/* Return codes from dtsuhd_frame */
> +enum DTSUHDStatus {
> +    DTSUHD_OK,
> +    DTSUHD_INCOMPLETE,    /* Entire frame not in buffer. */
> +    DTSUHD_INVALID_FRAME, /* Error parsing frame. */
> +    DTSUHD_NOSYNC,        /* No sync frame prior to non-sync frame. */
> +    DTSUHD_NULL,          /* Function parameter may not be NULL. */
> +};
> +
> +/* Return stream information from an audio frame parsed by dtsuhd_frame,
> */
> +typedef struct DTSUHDDescriptorInfo {
> +    unsigned valid:1; /* True if descriptor info is valid. */
> +    char coding_name[5]; /* Four character, null term SampleEntry box
> name. */
> +    int base_sample_freq_code;
> +    int channel_count;
> +    int decoder_profile_code;
> +    int frame_duration_code;
> +    int max_payload_code;
> +    int num_pres_code;
> +    int rep_type;
> +    int sample_rate;
> +    int sample_rate_mod;
> +    int sample_size;
> +    int channel_mask;
> +    uint64_t ffmpeg_channel_mask;
> +} DTSUHDDescriptorInfo;
> +
> +/* Return frame information from an audio frame parsed by dtsuhd_frame. */
> +typedef struct DTSUHDFrameInfo {
> +    double duration;  /* Duration of frame in seconds (seconds per
> frame). */
> +    int frame_bytes;  /* Size of entire frame in bytes. */
> +    int sample_count; /* Number of samples in frame (samples per frame).
> */
> +    int sample_rate;  /* Sample rate of frame (samples per second). */
> +    unsigned sync:1;  /* True if frame is a sync frame. */
> +} DTSUHDFrameInfo;
> +
> +struct DTSUHD;
> +typedef struct DTSUHD DTSUHD;
> +
> +struct DTSUHD *dtsuhd_create(void);
> +void dtsuhd_destroy(DTSUHD*);
> +int dtsuhd_frame(DTSUHD*, const uint8_t *data, size_t nData,
> +                 DTSUHDFrameInfo*, DTSUHDDescriptorInfo*);
> +int dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size,
> +                            size_t *strmdata_size);
> +
> +static inline int dtsuhd_is_syncword(uint32_t syncword)
> +{
> +    return syncword == DTSUHD_NONSYNCWORD || syncword == DTSUHD_SYNCWORD;
> +}
> +
> +#endif /* AVCODEC_DTSUHD_COMMON_H */
> diff --git a/libavcodec/dtsuhd_parser.c b/libavcodec/dtsuhd_parser.c
> new file mode 100644
> index 0000000000..4c553b8e4f
> --- /dev/null
> +++ b/libavcodec/dtsuhd_parser.c
> @@ -0,0 +1,141 @@
> +/*
> + * DTS-UHD audio frame parsing code
> + * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * Parse raw DTS-UHD audio frame input and return individual audio frames.
> + */
> +
> +#include "dtsuhd_common.h"
> +#include "libavutil/intreadwrite.h"
> +#include "parser.h"
> +
> +#define DTSUHD_BUFFER_SIZE (DTSUHD_MAX_FRAME_SIZE * 128)
> +
> +typedef struct DTSUHDParseContext {
> +    DTSUHD *dtsuhd;
> +    int buf_offset;
> +    int buf_bytes;
> +    int frame_bytes;
> +    uint8_t *buf;
> +} DTSUHDParseContext;
> +
> +static av_cold int parser_init(AVCodecParserContext *s)
> +{
> +    DTSUHDParseContext *pc = s->priv_data;
> +
> +    pc->dtsuhd = dtsuhd_create();
> +    pc->buf = av_calloc(DTSUHD_BUFFER_SIZE +
> AV_INPUT_BUFFER_PADDING_SIZE, 1);
> +    if (!pc->dtsuhd || !pc->buf)
> +        return AVERROR(ENOMEM);
> +
> +    return 0;
> +}
> +
> +static void parser_close(AVCodecParserContext *s)
> +{
> +    DTSUHDParseContext *pc = s->priv_data;
> +
> +    dtsuhd_destroy(pc->dtsuhd);
> +    pc->dtsuhd = NULL;
> +    av_freep(&pc->buf);
> +    ff_parse_close(s);
> +}
> +
> +// Keep data in contiguous buffer as required by dtsuhd_frame.
> +static int append_buffer(DTSUHDParseContext *pc, const uint8_t **buf, int
> *buf_size, int *input_consumed)
> +{
> +    int copy_bytes;
> +
> +    pc->buf_offset += pc->frame_bytes;
> +    pc->frame_bytes = 0;
> +
> +    // Buffer almost full, move partial frame to start of buffer for more
> space.
> +    if (*buf_size > 0 && pc->buf_bytes + *buf_size > DTSUHD_BUFFER_SIZE) {
> +        memmove(pc->buf, pc->buf + pc->buf_offset, pc->buf_bytes);
> +        pc->buf_bytes -= pc->buf_offset;
> +        pc->buf_offset = 0;
> +    }
> +
> +    copy_bytes = FFMAX(0, FFMIN(DTSUHD_BUFFER_SIZE - pc->buf_bytes,
> *buf_size));
> +
> +    // Append input buffer to our context.
> +    if (copy_bytes) {
> +        memcpy(pc->buf + pc->buf_bytes, *buf, copy_bytes);
> +        pc->buf_bytes += copy_bytes;
> +    }
> +
> +    // Ensure buffer starts with a syncword
> +    while (pc->buf_offset + 4 < pc->buf_bytes &&
> !dtsuhd_is_syncword(AV_RB32(pc->buf + pc->buf_offset)))
> +        pc->buf_offset++;
> +
> +    *input_consumed = copy_bytes;
> +    *buf = pc->buf + pc->buf_offset;
> +    *buf_size = pc->buf_bytes - pc->buf_offset;
> +
> +    return copy_bytes && pc->buf_bytes - pc->buf_offset <
> DTSUHD_MAX_FRAME_SIZE;
> +}
> +
> +static int parser_parse(AVCodecParserContext *s, AVCodecContext *avctx,
> +                        const uint8_t **poutbuf, int *poutbuf_size,
> +                        const uint8_t *buf, int buf_size)
> +{
> +    DTSUHDParseContext *pc = s->priv_data;
> +    DTSUHDFrameInfo fi;
> +    int input_consumed = 0;
> +
> +    if (append_buffer(pc, &buf, &buf_size, &input_consumed)) {
> +        *poutbuf = NULL;
> +        *poutbuf_size = 0;
> +        return input_consumed;
> +    }
> +
> +    switch (dtsuhd_frame(pc->dtsuhd, buf, buf_size, &fi, NULL)) {
> +    case DTSUHD_OK:
> +        if (fi.sample_count)
> +            s->duration = fi.sample_count;
> +        if (fi.sample_rate)
> +            avctx->sample_rate = fi.sample_rate;
> +        buf_size = pc->frame_bytes = fi.frame_bytes;
> +        break;
> +    case DTSUHD_INCOMPLETE:
> +        pc->frame_bytes = buf_size;
> +        buf = NULL;
> +        buf_size = 0;
> +        break;
> +    default:
> +        av_log(avctx, AV_LOG_ERROR, "Unable to process DTS-UHD file. File
> may be invalid.\n");
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    *poutbuf      = buf;
> +    *poutbuf_size = buf_size;
> +
> +    return input_consumed;
> +}
> +
> +AVCodecParser ff_dtsuhd_parser = {
> +    .codec_ids      = { AV_CODEC_ID_DTSUHD },
> +    .priv_data_size = sizeof(DTSUHDParseContext),
> +    .parser_init    = parser_init,
> +    .parser_parse   = parser_parse,
> +    .parser_close   = parser_close,
> +};
> diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
> index d355808018..d724c8b402 100644
> --- a/libavcodec/parsers.c
> +++ b/libavcodec/parsers.c
> @@ -37,6 +37,7 @@ extern const AVCodecParser ff_dirac_parser;
>   extern const AVCodecParser ff_dnxhd_parser;
>   extern const AVCodecParser ff_dolby_e_parser;
>   extern const AVCodecParser ff_dpx_parser;
> +extern const AVCodecParser ff_dtsuhd_parser;
>   extern const AVCodecParser ff_dvaudio_parser;
>   extern const AVCodecParser ff_dvbsub_parser;
>   extern const AVCodecParser ff_dvdsub_parser;
> diff --git a/libavformat/Makefile b/libavformat/Makefile
> index 048649689b..42cf19348f 100644
> --- a/libavformat/Makefile
> +++ b/libavformat/Makefile
> @@ -186,6 +186,7 @@ OBJS-$(CONFIG_DSICIN_DEMUXER)            += dsicin.o
>   OBJS-$(CONFIG_DSS_DEMUXER)               += dss.o
>   OBJS-$(CONFIG_DTSHD_DEMUXER)             += dtshddec.o
>   OBJS-$(CONFIG_DTS_DEMUXER)               += dtsdec.o rawdec.o
> +OBJS-$(CONFIG_DTSUHD_DEMUXER)            += dtsuhddec.o
>   OBJS-$(CONFIG_DTS_MUXER)                 += rawenc.o
>   OBJS-$(CONFIG_DV_MUXER)                  += dvenc.o
>   OBJS-$(CONFIG_DVBSUB_DEMUXER)            += dvbsub.o rawdec.o
> diff --git a/libavformat/allformats.c b/libavformat/allformats.c
> index cb5b69e9cd..1b48ce6073 100644
> --- a/libavformat/allformats.c
> +++ b/libavformat/allformats.c
> @@ -144,6 +144,7 @@ extern const AVInputFormat  ff_dss_demuxer;
>   extern const AVInputFormat  ff_dts_demuxer;
>   extern const FFOutputFormat ff_dts_muxer;
>   extern const AVInputFormat  ff_dtshd_demuxer;
> +extern const AVInputFormat  ff_dtsuhd_demuxer;
>   extern const AVInputFormat  ff_dv_demuxer;
>   extern const FFOutputFormat ff_dv_muxer;
>   extern const AVInputFormat  ff_dvbsub_demuxer;
> diff --git a/libavformat/dtshddec.c b/libavformat/dtshddec.c
> index a3dea0668f..6e9e78a335 100644
> --- a/libavformat/dtshddec.c
> +++ b/libavformat/dtshddec.c
> @@ -46,7 +46,7 @@ typedef struct DTSHDDemuxContext {
>   static int dtshd_probe(const AVProbeData *p)
>   {
>       if (AV_RB64(p->buf) == DTSHDHDR)
> -        return AVPROBE_SCORE_MAX;
> +        return AVPROBE_SCORE_MAX - 4; // DTSUHD (.dtsx) files also have
> this signature.
>       return 0;
>   }
>   diff --git a/libavformat/dtsuhddec.c b/libavformat/dtsuhddec.c
> new file mode 100644
> index 0000000000..e15176382d
> --- /dev/null
> +++ b/libavformat/dtsuhddec.c
> @@ -0,0 +1,214 @@
> +/*
> + * DTS-UHD audio demuxer
> + * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * Report DTS-UHD audio stream configuration and extract raw packet data.
> + */
> +
> +#include "internal.h"
> +#include "libavcodec/dtsuhd_common.h"
> +#include "libavcodec/put_bits.h"
> +#include "libavutil/intreadwrite.h"
> +
> +#define DTSUHD_BUFFER_SIZE (1024 * 1024)
> +
> +typedef struct DTSUHDDemuxContext {
> +    size_t data_end;
> +    struct DTSUHD *dtsuhd;
> +    uint8_t *buf;
> +} DTSUHDDemuxContext;
> +
> +static int probe(const AVProbeData *p)
> +{
> +    int offset = dtsuhd_strmdata_payload(p->buf, p->buf_size, NULL);
> +    int score = 0;
> +    struct DTSUHD *h = dtsuhd_create();
> +
> +    for (; offset + 4 < p->buf_size; offset++) {
> +        if (dtsuhd_is_syncword(AV_RB32(p->buf + offset))) {
> +            if (DTSUHD_OK == dtsuhd_frame(h, p->buf + offset, p->buf_size
> - offset, NULL, NULL)) {
> +                score = AVPROBE_SCORE_MAX - 3;
> +                break;
> +            }
> +        }
> +    }
> +
> +    dtsuhd_destroy(h);
> +    return score;
> +}
> +
> +static av_cold int read_close(AVFormatContext *s)
> +{
> +    DTSUHDDemuxContext *dtsxs = s->priv_data;
> +
> +    av_freep(&dtsxs->buf);
> +    dtsuhd_destroy(dtsxs->dtsuhd);
> +    dtsxs->dtsuhd = NULL;
> +
> +    return 0;
> +}
> +
> +static int find_first_syncword(DTSUHDDemuxContext *dtsuhd, int data_start)
> +{
> +    while (data_start + 4 < DTSUHD_BUFFER_SIZE &&
> +        !dtsuhd_is_syncword(AV_RB32(dtsuhd->buf + data_start))) {
> +        data_start++;
> +    }
> +
> +    return data_start;
> +}
> +
> +static int write_extradata(AVCodecParameters *par, DTSUHDDescriptorInfo
> *di)
> +{
> +    PutBitContext pbc;
> +    int ret;
> +    int size;
> +    uint8_t udts[32];
> +
> +    init_put_bits(&pbc, udts, sizeof(udts));
> +    put_bits32(&pbc, 0); // udts box size
> +    put_bits(&pbc, 8, 'u'); // udts box signature
> +    put_bits(&pbc, 8, 'd');
> +    put_bits(&pbc, 8, 't');
> +    put_bits(&pbc, 8, 's');
> +    put_bits(&pbc, 6, di->decoder_profile_code);
> +    put_bits(&pbc, 2, di->frame_duration_code);
> +    put_bits(&pbc, 3, di->max_payload_code);
> +    put_bits(&pbc, 5, di->num_pres_code);
> +    put_bits32(&pbc,  di->channel_mask);
> +    put_bits(&pbc, 1, di->base_sample_freq_code);
> +    put_bits(&pbc, 2, di->sample_rate_mod);
> +    put_bits(&pbc, 3, di->rep_type);
> +    put_bits(&pbc, 3, 0);
> +    put_bits(&pbc, 1, 0);
> +    put_bits64(&pbc, di->num_pres_code + 1, 0); // ID Tag present for
> each presentation.
> +    flush_put_bits(&pbc); // byte align
> +    size = put_bits_count(&pbc) >> 3;
> +    AV_WB32(udts, size);
> +
> +    ret = ff_alloc_extradata(par, size);
> +    if (ret < 0)
> +        return ret;
> +
> +    memcpy(par->extradata, udts, size);
> +
> +    return 0;
> +}
> +
> +static int read_header(AVFormatContext *s)
> +{
> +    AVIOContext *pb = s->pb;
> +    AVStream *st = avformat_new_stream(s, NULL);
> +    DTSUHDDemuxContext *dtsuhd = s->priv_data;
> +    DTSUHDDescriptorInfo di;
> +    DTSUHDFrameInfo fi;
> +    int buf_bytes;
> +    int ret = DTSUHD_INVALID_FRAME;
> +    int data_start;
> +
> +    if (!(pb->seekable & AVIO_SEEKABLE_NORMAL))
> +        return AVERROR(EIO);
> +
> +    dtsuhd->buf = av_malloc(DTSUHD_BUFFER_SIZE);
> +    dtsuhd->dtsuhd = dtsuhd_create();
> +    if (!dtsuhd->buf || !dtsuhd->dtsuhd || !st)
> +        return AVERROR(ENOMEM);
> +
> +    buf_bytes = avio_read(pb, dtsuhd->buf, DTSUHD_BUFFER_SIZE);
> +    if (buf_bytes < 0)
> +        return buf_bytes;
> +
> +    data_start = dtsuhd_strmdata_payload(dtsuhd->buf, buf_bytes,
> &dtsuhd->data_end);
> +    dtsuhd->data_end += data_start;
> +    if (data_start == 0)
> +        dtsuhd->data_end = avio_size(pb); // Not a DTSHDHDR chunk file,
> decode frames to end of file.
> +
> +    data_start = find_first_syncword(dtsuhd, data_start);
> +    if (avio_seek(pb, data_start, SEEK_SET) < 0)
> +        return AVERROR(EINVAL);
> +
> +    ret = dtsuhd_frame(dtsuhd->dtsuhd, dtsuhd->buf + data_start,
> +        buf_bytes - data_start, &fi, &di);
> +    if (ret != DTSUHD_OK || !di.valid) {
> +        av_log(s, AV_LOG_ERROR, "Unable to process DTS-UHD file. File may
> be invalid.\n");
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    ffstream(st)->need_parsing = AVSTREAM_PARSE_FULL_RAW;
> +    st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
> +    st->codecpar->codec_id = s->iformat->raw_codec_id;
> +    st->codecpar->ch_layout.order = AV_CHANNEL_ORDER_NATIVE;
> +    st->codecpar->ch_layout.nb_channels = di.channel_count;
> +    st->codecpar->ch_layout.u.mask = di.ffmpeg_channel_mask;
> +    st->codecpar->codec_tag = AV_RL32(di.coding_name);
> +    st->codecpar->frame_size = 512 << di.frame_duration_code;
> +    st->codecpar->sample_rate = di.sample_rate;
> +
> +#if FF_API_OLD_CHANNEL_LAYOUT
> +FF_DISABLE_DEPRECATION_WARNINGS
> +    st->codecpar->channels = di.channel_count;
> +    st->codecpar->channel_layout = di.ffmpeg_channel_mask;
> +FF_ENABLE_DEPRECATION_WARNINGS
> +#endif
> +
> +    ret = write_extradata(st->codecpar, &di);
> +    if (ret < 0)
> +        return ret;
> +
> +    if (st->codecpar->sample_rate)
> +        avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
> +
> +    return 0;
> +}
> +
> +static int read_packet(AVFormatContext *s, AVPacket *pkt)
> +{
> +    DTSUHDDemuxContext *dtsuhd = s->priv_data;
> +    int64_t size, left;
> +    int ret;
> +
> +    left = dtsuhd->data_end - avio_tell(s->pb);
> +    size = FFMIN(left, DTSUHD_MAX_FRAME_SIZE);
> +    if (size <= 0)
> +        return AVERROR_EOF;
> +
> +    ret = av_get_packet(s->pb, pkt, size);
> +    if (ret < 0)
> +        return ret;
> +
> +    pkt->stream_index = 0;
> +
> +    return ret;
> +}
> +
> +AVInputFormat ff_dtsuhd_demuxer = {
> +    .name           = "dtsuhd",
> +    .long_name      = NULL_IF_CONFIG_SMALL("DTS-UHD"),
> +    .priv_data_size = sizeof(DTSUHDDemuxContext),
> +    .read_probe     = probe,
> +    .read_header    = read_header,
> +    .read_packet    = read_packet,
> +    .read_close     = read_close,
> +    .flags          = AVFMT_GENERIC_INDEX,
> +    .extensions     = "dtsx",
> +    .raw_codec_id   = AV_CODEC_ID_DTSUHD,
> +};
> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> index c370922c7d..e727407694 100644
> --- a/libavformat/movenc.c
> +++ b/libavformat/movenc.c
> @@ -762,6 +762,24 @@ static int mov_write_esds_tag(AVIOContext *pb,
> MOVTrack *track) // Basic
>       return update_size(pb, pos);
>   }
>   +static int mov_write_udts_tag(AVIOContext *pb, MOVTrack *track)
> +{
> +    if (track->vos_len < 12) {
> +        av_log(pb, AV_LOG_ERROR,
> +               "Cannot write moov atom before DTS-UHD packets."
> +               " Set the delay_moov flag to fix this.\n");
> +        return AVERROR(EINVAL);
> +    }
> +
> +    /* Write vos_data is udts box. */
> +    if (memcmp(track->vos_data + 4, "udts", 4) == 0) {
> +        avio_write(pb, track->vos_data, track->vos_len);
> +        return track->vos_len;
> +    }
> +
> +    return 0;
> +}
> +
>   static int mov_pcm_le_gt16(enum AVCodecID codec_id)
>   {
>       return codec_id == AV_CODEC_ID_PCM_S24LE ||
> @@ -1367,6 +1385,8 @@ static int mov_write_audio_tag(AVFormatContext *s,
> AVIOContext *pb, MOVMuxContex
>           ret = mov_write_dops_tag(s, pb, track);
>       else if (track->par->codec_id == AV_CODEC_ID_TRUEHD)
>           ret = mov_write_dmlp_tag(s, pb, track);
> +    else if (track->par->codec_id == AV_CODEC_ID_DTSUHD)
> +        ret = mov_write_udts_tag(pb, track);
>       else if (tag == MOV_MP4_IPCM_TAG || tag == MOV_MP4_FPCM_TAG) {
>           if (track->par->ch_layout.nb_channels > 1)
>               ret = mov_write_chnl_tag(s, pb, track);
> @@ -2781,6 +2801,7 @@ static int mov_write_stbl_tag(AVFormatContext *s,
> AVIOContext *pb, MOVMuxContext
>       if ((track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
>            track->par->codec_id == AV_CODEC_ID_TRUEHD ||
>            track->par->codec_id == AV_CODEC_ID_MPEGH_3D_AUDIO ||
> +         track->par->codec_id == AV_CODEC_ID_DTSUHD ||
>            track->par->codec_tag == MKTAG('r','t','p',' ')) &&
>           track->has_keyframes && track->has_keyframes < track->entry)
>           mov_write_stss_tag(pb, track, MOV_SYNC_SAMPLE);
> @@ -5673,6 +5694,14 @@ static void mov_parse_vc1_frame(AVPacket *pkt,
> MOVTrack *trk)
>       }
>   }
>   +static void mov_parse_dtsuhd_frame(AVPacket *pkt, MOVTrack *trk)
> +{
> +    if (pkt->size > 4 && AV_RB32(pkt->data) == 0x40411BF2) {
> +        trk->cluster[trk->entry].flags |= MOV_SYNC_SAMPLE;
> +        trk->has_keyframes++;
> +    }
> + }
> +
>   static void mov_parse_truehd_frame(AVPacket *pkt, MOVTrack *trk)
>   {
>       int length;
> @@ -6343,6 +6372,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket
> *pkt)
>           mov_parse_vc1_frame(pkt, trk);
>       } else if (par->codec_id == AV_CODEC_ID_TRUEHD) {
>           mov_parse_truehd_frame(pkt, trk);
> +    } else if (par->codec_id == AV_CODEC_ID_DTSUHD) {
> +        mov_parse_dtsuhd_frame(pkt, trk);
>       } else if (pkt->flags & AV_PKT_FLAG_KEY) {
>           if (mov->mode == MODE_MOV && par->codec_id ==
> AV_CODEC_ID_MPEG2VIDEO &&
>               trk->entry > 0) { // force sync sample for the first key
> frame
> @@ -7800,6 +7831,7 @@ static const AVCodecTag codec_mp4_tags[] = {
>       { AV_CODEC_ID_AC3,             MKTAG('a', 'c', '-', '3') },
>       { AV_CODEC_ID_EAC3,            MKTAG('e', 'c', '-', '3') },
>       { AV_CODEC_ID_DTS,             MKTAG('m', 'p', '4', 'a') },
> +    { AV_CODEC_ID_DTSUHD,          MKTAG('d', 't', 's', 'x') },
>       { AV_CODEC_ID_TRUEHD,          MKTAG('m', 'l', 'p', 'a') },
>       { AV_CODEC_ID_FLAC,            MKTAG('f', 'L', 'a', 'C') },
>       { AV_CODEC_ID_OPUS,            MKTAG('O', 'p', 'u', 's') },
> diff --git a/libavformat/version.h b/libavformat/version.h
> index cc56b7cf5c..384cbd49cc 100644
> --- a/libavformat/version.h
> +++ b/libavformat/version.h
> @@ -31,7 +31,7 @@
>    #include "version_major.h"
>   -#define LIBAVFORMAT_VERSION_MINOR   4
> +#define LIBAVFORMAT_VERSION_MINOR   5
>   #define LIBAVFORMAT_VERSION_MICRO 101
>    #define LIBAVFORMAT_VERSION_INT
> AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
> --
> 2.17.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Anton Khirnov June 13, 2023, 7:04 p.m. UTC | #21
Quoting Roy Funderburk (2023-06-13 19:43:25)
> 
> 
> On 6/13/23 7:26 AM, Paul B Mahol wrote:
> > Why we need new av_* calls, can you elaborate logic behind such approach to
> > implement parser?
> 
> 
> There is common code for dtsuhd audio frame parsing (dtsuhd_common.c) used by the libavcodec and libavformat DTS-UHD modules.  It is complex enough that we do not want to duplicate it.
> 
> If you refer to the naming of av_*, would changing the names to ff_dtsuhd_* as in libavcodec/aac_ac3_parser.c be more appropriate?

The difference between av* and ff* is not merely cosmetic - av-prefixed
symbols are exported from shared objects [1].

Also do note that sharing structs across libraries opens you to various
compatibility questions [2]. It might be easier to sidestep them by
having a function in libavcodec that accepts AVCodecParameters and fills
them according to the data, rather than pass codec-specific structs
between libavformat and libavcodec.

[1] http://ffmpeg.org/developer.html#Naming-conventions-1
[2] http://ffmpeg.org/developer.html#Library-public-interfaces
Roy Funderburk June 14, 2023, midnight UTC | #22
On 6/13/23 11:35 AM, Paul B Mahol wrote:
> Doing allocation in probe?
> Probing should be very fast.

In line 143 of the avformat patch, memory allocation is removed from the probe

>>+int dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
>>+                DTSUHDFrameInfo *fi, DTSUHDDescriptorInfo *di)
>>+{
>>+    gb = &h->gb;
>>+    init_get_bits(gb, data, data_bytes * 8);
> init_get_bits8, and check return code.

In line 986 of the avcodec patch, changed to using init_get_bits8 and added return code check.

>>+        fi->sync = h->is_sync_frame;
>>+        fi->frame_bytes = h->frame_bytes;
>>+        fi->sample_rate = h->sample_rate;
>>+        fi->sample_count = (h->frame_duration * fi->sample_rate) /(h->clock_rate * fraction);
>>+        fi->duration = (double)fi->sample_count / fi->sample_rate;
>Please  no double  type.
>Also make use of av_rescale.

Around line 1108 of the avcodec patch, I found the duration was not needed and removed it.


On 6/13/23 12:04 PM, Anton Khirnov wrote:
> Also do note that sharing structs across libraries opens you to various
> compatibility questions [2]. It might be easier to sidestep them by
> having a function in libavcodec that accepts AVCodecParameters and fills
> them according to the data, rather than pass codec-specific structs
> between libavformat and libavcodec.

In line 1061 of the avcodec patch, changed the function to: 
  int av_dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
	AVCodecParameters *codecpar, uint8_t **udts, int *udts_size)



Thank you for reviewing this,
-Roy
Parsing of DTS-UHD input files per ETSI TS 102 114 is added
as parser for codec id AV_CODEC_ID_DTSUHD.

Signed-off-by: Roy Funderburk <Roy.Funderburk@xperi.com>
---
 libavcodec/Makefile        |    1 +
 libavcodec/codec_desc.c    |    7 +
 libavcodec/codec_id.h      |    1 +
 libavcodec/dtsuhd_common.c | 1075 ++++++++++++++++++++++++++++++++++++
 libavcodec/dtsuhd_common.h |   87 +++
 libavcodec/dtsuhd_parser.c |  141 +++++
 libavcodec/parsers.c       |    1 +
 libavcodec/version.h       |    2 +-
 8 files changed, 1314 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/dtsuhd_common.c
 create mode 100644 libavcodec/dtsuhd_common.h
 create mode 100644 libavcodec/dtsuhd_parser.c

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 2efab60d7d..0b49984902 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1164,6 +1164,7 @@ OBJS-$(CONFIG_DIRAC_PARSER)            += dirac_parser.o
 OBJS-$(CONFIG_DNXHD_PARSER)            += dnxhd_parser.o dnxhddata.o
 OBJS-$(CONFIG_DOLBY_E_PARSER)          += dolby_e_parser.o dolby_e_parse.o
 OBJS-$(CONFIG_DPX_PARSER)              += dpx_parser.o
+OBJS-$(CONFIG_DTSUHD_PARSER)           += dtsuhd_parser.o dtsuhd_common.o
 OBJS-$(CONFIG_DVAUDIO_PARSER)          += dvaudio_parser.o
 OBJS-$(CONFIG_DVBSUB_PARSER)           += dvbsub_parser.o
 OBJS-$(CONFIG_DVD_NAV_PARSER)          += dvd_nav_parser.o
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index 3e31a1eed6..63dc939905 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -3406,6 +3406,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("RKA (RK Audio)"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
     },
+    {
+        .id        = AV_CODEC_ID_DTSUHD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dtsuhd",
+        .long_name = NULL_IF_CONFIG_SMALL("DTSUHD (DTS-UHD Audio Format)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
 
     /* subtitle codecs */
     {
diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h
index d23549d7e0..a5d580169b 100644
--- a/libavcodec/codec_id.h
+++ b/libavcodec/codec_id.h
@@ -542,6 +542,7 @@ enum AVCodecID {
     AV_CODEC_ID_FTR,
     AV_CODEC_ID_WAVARC,
     AV_CODEC_ID_RKA,
+    AV_CODEC_ID_DTSUHD,
 
     /* subtitle codecs */
     AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
diff --git a/libavcodec/dtsuhd_common.c b/libavcodec/dtsuhd_common.c
new file mode 100644
index 0000000000..0199219e87
--- /dev/null
+++ b/libavcodec/dtsuhd_common.c
@@ -0,0 +1,1075 @@
+/*
+ * DTS-UHD common audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Parse DTS-UHD audio frame headers, report frame sizes and configuration.
+ * Specification: ETSI TS 103 491 V1.2.1
+ */
+
+#include <string.h>
+
+#include "dtsuhd_common.h"
+#include "get_bits.h"
+#include "put_bits.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/crc.h"
+
+#define DTSUHD_ALLOC_INCREMENT 16
+#define DTSUHD_CHUNK_HEADER    16
+#define DTSUHD_CRC_SEED        0xFFFF
+#define DTSUHD_UDTS_BUFFER     32 // work buffer to construct 'udts' box
+
+enum RepType {
+    REP_TYPE_CH_MASK_BASED,
+    REP_TYPE_MTRX2D_CH_MASK_BASED,
+    REP_TYPE_MTRX3D_CH_MASK_BASED,
+    REP_TYPE_BINAURAL,
+    REP_TYPE_AMBISONIC,
+    REP_TYPE_AUDIO_TRACKS,
+    REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF,
+    REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF,
+};
+
+typedef struct MDObject {
+    int started;  /* Object seen since last reset. */
+    int pres_index;
+    int rep_type;
+    int ch_activity_mask;
+} MDObject;
+
+typedef struct MD01 {
+    GetBitContext gb;
+    MDObject object[257]; /* object id max value is 256 */
+    int chunk_id;
+    int object_list[256]; int object_list_count;
+    int packets_acquired;
+    int static_md_extracted;
+    int static_md_packets;
+    int static_md_packet_size;
+    int static_md_update_flag;
+    uint8_t *buf; int buf_bytes; /* temporary buffer to accumulate static data */
+} MD01;
+
+typedef struct NAVI {
+    int bytes;
+    int id;
+    int index;
+    int present;
+} NAVI;
+
+typedef struct UHDAudio {
+    int mask;
+    int selectable;
+} UHDAudio;
+
+typedef struct UHDChunk {
+    int crc_flag;
+    int bytes;
+} UHDChunk;
+
+struct DTSUHD {
+    const uint8_t *data; int data_bytes;  /* Original audio frame buffer. */
+    const AVCRC *crc;
+    GetBitContext gb;
+    MD01 *md01; int md01_count;
+    NAVI *navi; int navi_alloc, navi_count;
+    UHDAudio audio[256];
+    UHDChunk *chunk; int chunk_alloc, chunk_count;
+    int chunk_bytes;
+    int clock_rate;
+    int frame_bytes;
+    int frame_duration;
+    int frame_duration_code;
+    int ftoc_bytes;
+    int major_version;
+    int num_audio_pres;
+    int sample_rate;
+    int sample_rate_mod;
+    unsigned full_channel_mix_flag:1;
+    unsigned interactive_obj_limits_present:1;
+    unsigned is_sync_frame:1;
+    unsigned saw_sync:1;
+};
+
+/* Read from the MD01 buffer (if present), falling back to the frame buffer */
+static inline int get_bits_md01(DTSUHD *h, MD01 *md01, int bits)
+{
+    if (md01->buf)
+        return get_bits(&md01->gb, bits);
+    return get_bits(&h->gb, bits);
+}
+
+/* Skip bits in the MD01 buffer (if present), falling back to the frame buffer */
+static inline void skip_bits_md01(DTSUHD *h, MD01 *md01, int bits)
+{
+    if (md01->buf)
+        return skip_bits(&md01->gb, bits);
+    return skip_bits(&h->gb, bits);
+}
+
+#define TABLE_BITS 0
+#define TABLE_ADD  1
+/* In the specification Table 5-2, the pseudo code defaults the 'add'
+   parameter to true. Table 7-30 shows passing an explicit false, most
+   other calls do not pass the extractAndAdd parameter.  In the function
+   below, the indirection index and add parameter are pre-applied to the
+   input table itself.  The original table from the specification is
+   converted to the table used by get_bits_var as follows:
+
+    int original_table = { 1, 2, 3, 4 }; // From specification pseudo code
+    int index_table[8] = { 0, 0, 0, 0, 1, 1, 2, 3 }; // Indirection index
+    for (i = 0; i < 8; i++) {
+        table[TABLE_BITS][i] = original_table[index_table[i]]);
+        table[TABLE_ADD][i] = 0;
+        for (index = 0; index < index_table[i]; index++)
+            table[1][index] += original_table[index];
+    }
+*/
+static int get_bits_var(GetBitContext *gb, const int table[2][8])
+{
+    static const int bits_used[8] = { 1, 1, 1, 1, 2, 2, 3, 3 };
+    int code = show_bits(gb, 3); /* value range is [0, 7] */
+
+    skip_bits(gb, bits_used[code]);
+    if (table[TABLE_BITS][code] == 0)
+        return 0;
+    return get_bits_long(gb, table[TABLE_BITS][code]) + table[TABLE_ADD][code];
+}
+
+/* Implied by Table 6-2, MD01 chunk objects appended in for loop */
+static MD01 *chunk_append_md01(DTSUHD *h, int id)
+{
+    int md01_alloc = h->md01_count + 1;
+    if (av_reallocp_array(&h->md01, md01_alloc, sizeof(*h->md01)))
+        return NULL;
+
+    memset(h->md01 + h->md01_count, 0, sizeof(*h->md01));
+    h->md01[h->md01_count].chunk_id = id;
+    return h->md01 + h->md01_count++;
+}
+
+/* Return existing MD01 chunk based on chunkID */
+static MD01 *chunk_find_md01(DTSUHD *h, int id)
+{
+    int i;
+
+    for (i = 0; i < h->md01_count; i++)
+        if (id == h->md01[i].chunk_id)
+            return h->md01 + i;
+
+    return NULL;
+}
+
+/* Table 6-3 */
+static void chunk_reset(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->md01_count; i++)
+        av_freep(&h->md01[i].buf);
+    av_freep(&h->md01);
+    h->md01_count = 0;
+}
+
+static MDObject *find_default_audio(DTSUHD *h)
+{
+    MDObject *object;
+    int i, j;
+    int obj_index = -1;
+
+    for (i = 0; i < h->md01_count; i++) {
+        for (j = 0; j < 257; j++) {
+            object = h->md01[i].object + j;
+            if (object->started && h->audio[object->pres_index].selectable) {
+                if (obj_index < 0 || (object->pres_index < h->md01[i].object[obj_index].pres_index))
+                    obj_index = j;
+            }
+        }
+        if (obj_index >= 0)
+            return h->md01[i].object + obj_index;
+    }
+
+    return NULL;
+}
+
+/* Save channel mask, count, and rep type to descriptor info.
+   ETSI TS 103 491 Table 7-28 channel activity mask bits
+   mapping and SCTE DVS 243-4 Rev. 0.2 DG X Table 4.  Convert activity mask and
+   representation type to channel mask and channel counts.
+*/
+static void extract_object_info(MDObject *object, int *rep_type,
+                                int *channel_mask, uint64_t *ffmpeg_channel_mask)
+{
+    int i;
+    static const struct {
+        uint32_t activity_mask;
+        uint32_t channel_mask; // Mask as defined by ETSI TS 103 491
+        uint64_t ffmpeg_channel_mask; // Mask as defined in ffmpeg
+    } activity_map[] = {
+        // act mask | chan mask | ffmpeg channel mask
+        { 0x000001, 0x00000001, AV_CH_FRONT_CENTER },
+        { 0x000002, 0x00000006, AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT },
+        { 0x000004, 0x00000018, AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT },
+        { 0x000008, 0x00000020, AV_CH_LOW_FREQUENCY },
+        { 0x000010, 0x00000040, AV_CH_BACK_CENTER },
+        { 0x000020, 0x0000A000, AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT },
+        { 0x000040, 0x00000180, AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT },
+        { 0x000080, 0x00004000, AV_CH_TOP_FRONT_CENTER },
+        { 0x000100, 0x00080000, AV_CH_TOP_CENTER },
+        { 0x000200, 0x00001800, AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_FRONT_RIGHT_OF_CENTER },
+        { 0x000400, 0x00060000, AV_CHAN_WIDE_LEFT | AV_CHAN_WIDE_RIGHT },
+        { 0x000800, 0x00000600, AV_CH_SURROUND_DIRECT_LEFT | AV_CH_SURROUND_DIRECT_RIGHT },
+        { 0x001000, 0x00010000, AV_CH_LOW_FREQUENCY_2 },
+        { 0x002000, 0x00300000, AV_CH_TOP_SIDE_LEFT | AV_CH_TOP_SIDE_RIGHT },
+        { 0x004000, 0x00400000, AV_CH_TOP_BACK_CENTER },
+        { 0x008000, 0x01800000, AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT },
+        { 0x010000, 0x02000000, AV_CH_BOTTOM_FRONT_CENTER },
+        { 0x020000, 0x0C000000, AV_CH_BOTTOM_FRONT_LEFT | AV_CH_BOTTOM_FRONT_RIGHT },
+        { 0x140000, 0x30000000, AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT },
+        { 0x080000, 0xC0000000, AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT },
+        { 0 } // Terminator
+    };
+
+    if (object) {
+        for (i = 0; activity_map[i].activity_mask; i++) {
+            if (activity_map[i].activity_mask & object->ch_activity_mask) {
+                *channel_mask |= activity_map[i].channel_mask;
+                *ffmpeg_channel_mask |= activity_map[i].ffmpeg_channel_mask;
+            }
+        }
+        *rep_type = object->rep_type;
+    }
+}
+
+/* Assemble information for MP4 Sample Entry box.  Sample Size is always
+   16 bits.  The coding name is the name of the SampleEntry sub-box and is
+   'dtsx' unless the version of the bitstream is > 2.
+   If DecoderProfile == 2, then MaxPayloadCode will be zero.
+*/
+static void update_descriptor(DTSUHD *h, AVCodecParameters *codecpar,
+                              uint8_t **udts, int *udts_size)
+{
+    PutBitContext pbc;
+    int channel_mask = 0, rep_type = 0;
+    static const char *coding_name[] = { "dtsx", "dtsy" };
+    uint64_t ffmpeg_channel_mask = 0;
+
+    extract_object_info(find_default_audio(h), &rep_type, &channel_mask, &ffmpeg_channel_mask);
+    codecpar->codec_type            = AVMEDIA_TYPE_AUDIO;
+    codecpar->ch_layout.order       = AV_CHANNEL_ORDER_NATIVE;
+    codecpar->ch_layout.nb_channels = av_popcount(channel_mask);
+    codecpar->ch_layout.u.mask      = ffmpeg_channel_mask;
+    codecpar->codec_tag             = AV_RL32(coding_name[h->major_version > 2]);
+    codecpar->frame_size            = 512 << h->frame_duration_code;
+    codecpar->sample_rate           = h->sample_rate;
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+    codecpar->channels              = codecpar->ch_layout.nb_channels;
+    codecpar->channel_layout        = ffmpeg_channel_mask;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    if (udts && udts_size) {
+        *udts = av_calloc(1, DTSUHD_UDTS_BUFFER);
+        if (*udts) {
+            init_put_bits(&pbc, *udts, DTSUHD_UDTS_BUFFER);
+            put_bits32(&pbc, 0); // udts box size
+            put_bits32(&pbc, AV_RB32("udts")); // udts box signature
+            put_bits(&pbc, 6, h->major_version - 2);
+            put_bits(&pbc, 2, h->frame_duration_code);
+            put_bits(&pbc, 3, h->major_version > 2);
+            put_bits(&pbc, 5, h->num_audio_pres - 1);
+            put_bits32(&pbc,  channel_mask);
+            put_bits(&pbc, 1, h->sample_rate == 48000);
+            put_bits(&pbc, 2, h->sample_rate_mod);
+            put_bits(&pbc, 3, rep_type);
+            put_bits(&pbc, 3, 0);
+            put_bits(&pbc, 1, 0);
+            put_bits64(&pbc, h->num_audio_pres, 0); // ID Tag present for each presentation.
+            flush_put_bits(&pbc); // byte align
+            *udts_size = put_bytes_output(&pbc);
+            AV_WB32(*udts, *udts_size);
+        }
+    }
+}
+
+/* Table 6-17 p47 */
+static int parse_explicit_object_lists(DTSUHD *h, int mask, int index)
+{
+    GetBitContext *gb = &h->gb;
+    int i;
+    static const int table[2][8] = {
+        { 4, 4, 4, 4, 8, 8, 16, 32 }, { 0, 0, 0, 0, 16, 16, 272, 65808 }
+    };
+
+    for (i = 0; i < index; i++) {
+        if ((mask >> i) & 0x01) {
+            if (h->is_sync_frame || get_bits1(gb))
+                get_bits_var(gb, table);
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-15 p44, Table 6-16 p45 */
+static int parse_aud_pres_params(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int audio;
+    int i;
+    int read_mask;
+    static const int table[2][8] = {
+        { 0, 0, 0, 0, 2, 2, 4, 5 }, { 0, 0, 0, 0, 1, 1, 5, 21 }
+    };
+
+    if (h->is_sync_frame) {
+        if (h->full_channel_mix_flag)
+            h->num_audio_pres = 1;
+        else
+            h->num_audio_pres = get_bits_var(gb, table) + 1;
+        memset(h->audio, 0, sizeof(h->audio[0]) * h->num_audio_pres);
+    }
+
+    for (audio = 0; audio < h->num_audio_pres; audio++) {
+        if (h->is_sync_frame) {
+            if (h->full_channel_mix_flag)
+                h->audio[audio].selectable = 1;
+            else
+                h->audio[audio].selectable = get_bits1(gb);
+        }
+
+        if (h->audio[audio].selectable) {
+            if (h->is_sync_frame) {
+                read_mask = (audio > 0) ? get_bits(gb, audio) : 0;
+                h->audio[audio].mask = 0;
+                for (i = 0; read_mask; i++, read_mask >>= 1) {
+                    if (read_mask & 0x01)
+                        h->audio[audio].mask |= get_bits1(gb) << i;
+                }
+            }
+
+            if (parse_explicit_object_lists(h, h->audio[audio].mask, audio))
+                return 1;
+        } else {
+            h->audio[audio].mask = 0;
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-12 p 40 */
+static void decode_version(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int bits = get_bits1(gb) ? 3 : 6;
+
+    h->major_version = get_bits(gb, bits) + 2;
+    skip_bits(gb, bits);
+}
+
+/* Table 6-12 p 40 */
+static int parse_stream_params(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int has_ftoc_crc;
+    static const uint32_t table_base_duration[4] = { 512, 480, 384, 0 };
+    static const uint32_t table_clock_rate[4] = { 32000, 44100, 48000, 0 };
+
+    if (h->is_sync_frame)
+        h->full_channel_mix_flag = get_bits1(gb);
+
+    has_ftoc_crc = !h->full_channel_mix_flag || h->is_sync_frame;
+    if (has_ftoc_crc && av_crc(h->crc, DTSUHD_CRC_SEED, h->data, h->ftoc_bytes))
+        return 1;
+
+    if (h->is_sync_frame) {
+        if (h->full_channel_mix_flag)
+            h->major_version = 2;
+        else
+            decode_version(h);
+
+        h->frame_duration = table_base_duration[get_bits(gb, 2)];
+        h->frame_duration_code = get_bits(gb, 3);
+        h->frame_duration *= (h->frame_duration_code + 1);
+        h->clock_rate = table_clock_rate[get_bits(gb, 2)];
+        if (h->frame_duration == 0 || h->clock_rate == 0)
+            return 1; /* bitstream error */
+
+        skip_bits(gb, 36 * get_bits1(gb));  /* bTimeStampPresent */
+        h->sample_rate_mod = get_bits(gb, 2);
+        h->sample_rate = h->clock_rate * (1 << h->sample_rate_mod);
+
+        if (h->full_channel_mix_flag) {
+            h->interactive_obj_limits_present = 0;
+        } else {
+            skip_bits1(gb);  /* reserved flag. */
+            h->interactive_obj_limits_present = get_bits1(gb);
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-24 p52 */
+static void navi_purge(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->navi_count; i++)
+        if (!h->navi[i].present)
+            h->navi[i].bytes = 0;
+}
+
+/* Table 6-21 p50 */
+static void navi_clear(DTSUHD *h)
+{
+    if (h->navi)
+        memset(h->navi, 0, sizeof(h->navi[0]) * h->navi_count);
+    h->navi_count = 0;
+}
+
+/* Table 6-22 p51 */
+static void navi_clear_present(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->navi_count; i++)
+        h->navi[i].present = 0;
+}
+
+/* Table 6-23 p51.  Return 0 on success, and the index is returned in
+   the *listIndex parameter.
+*/
+static int navi_find_index(DTSUHD *h, int desired_index, int *list_index)
+{
+    int avail_index = h->navi_count;
+    int i;
+    int navi_alloc;
+
+    for (i = 0; i < h->navi_count; i++) {
+        if (h->navi[i].index == desired_index) {
+            *list_index = i;
+            h->navi[i].present = 1;
+            return 0;
+        }
+
+        if ((h->navi[i].present == 0) && (h->navi[i].bytes == 0) && (avail_index > i))
+            avail_index = i;
+    }
+
+    if (avail_index >= h->navi_count) {
+        if (h->navi_count >= h->navi_alloc) {
+            navi_alloc = h->navi_count + DTSUHD_ALLOC_INCREMENT;
+            if (av_reallocp_array(&h->navi, navi_alloc, sizeof(*h->navi)))
+                return 1;
+            h->navi_alloc = navi_alloc;
+        }
+        h->navi_count++;
+    }
+
+    *list_index = avail_index;
+    h->navi[avail_index].bytes = 0;
+    h->navi[avail_index].present = 1;
+    h->navi[avail_index].id = 256;
+    h->navi[avail_index].index = desired_index;
+
+    return 0;
+}
+
+/* Table 6-20 p48 */
+static int parse_chunk_navi(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int audio_chunks = 1;
+    int bytes;
+    int i;
+    int id;
+    int id_present;
+    int index;
+    int list_index;
+    static const int table2468[2][8] = {
+        { 2, 2, 2, 2, 4, 4, 6, 8 }, { 0, 0, 0, 0, 4, 4, 20, 84 }
+    };
+    static const int table_audio_chunk_sizes[2][8] = {
+        { 9, 9, 9, 9, 11, 11, 13, 16 }, { 0, 0, 0, 0, 512, 512, 2560, 10752 }
+    };
+    static const int table_chunk_sizes[2][8] = {
+        { 6, 6, 6, 6, 9, 9, 12, 15 }, { 0, 0, 0, 0, 64, 64, 576, 4672 }
+    };
+
+    h->chunk_bytes = 0;
+    if (h->full_channel_mix_flag)
+        h->chunk_count = h->is_sync_frame;
+    else
+        h->chunk_count = get_bits_var(gb, table2468);
+
+    if (h->chunk_count >= h->chunk_alloc) {
+        int chunk_alloc = h->chunk_count + DTSUHD_ALLOC_INCREMENT;
+        if (av_reallocp_array(&h->chunk, chunk_alloc, sizeof(*h->chunk)))
+            return 1;
+        h->chunk_alloc = chunk_alloc;
+    }
+
+    for (i = 0; i < h->chunk_count; i++) {
+        h->chunk_bytes += h->chunk[i].bytes = get_bits_var(gb, table_chunk_sizes);
+        if (h->full_channel_mix_flag)
+            h->chunk[i].crc_flag = 0;
+        else
+        h->chunk[i].crc_flag = get_bits1(gb);
+    }
+
+    if (!h->full_channel_mix_flag)
+        audio_chunks = get_bits_var(gb, table2468);
+
+    if (h->is_sync_frame)
+        navi_clear(h);
+    else
+        navi_clear_present(h);
+
+    for (i = 0; i < audio_chunks; i++) {
+        if (h->full_channel_mix_flag)
+            index = 0;
+        else
+            index = get_bits_var(gb, table2468);
+
+        if (navi_find_index(h, index, &list_index))
+            return 1;
+
+        if (h->is_sync_frame)
+            id_present = 1;
+        else if (h->full_channel_mix_flag)
+            id_present = 0;
+        else
+            id_present = get_bits1(gb);
+
+        if (id_present) {
+            id = get_bits_var(gb, table2468);
+            h->navi[list_index].id = id;
+        }
+
+        bytes = get_bits_var(gb, table_audio_chunk_sizes);
+        h->chunk_bytes += bytes;
+        h->navi[list_index].bytes = bytes;
+    }
+
+    navi_purge(h);
+
+    return 0;
+}
+
+
+/* Table 6-6 */
+static int parse_md_chunk_list(DTSUHD *h, MD01 *md01)
+{
+    GetBitContext *gb = &h->gb;
+    static const int table1[2][8] = {
+        { 3, 3, 3, 3, 4, 4, 6, 8 }, { 0, 0, 0, 0, 8, 8, 24, 88 }
+    };
+    int i;
+
+    if (h->full_channel_mix_flag) {
+        md01->object_list_count = 1;
+        md01->object_list[0] = 256;
+    } else {
+        md01->object_list_count = get_bits_var(gb, table1);
+        for (i = 0; i < md01->object_list_count; i++)
+            md01->object_list[i] = get_bits(gb, get_bits1(gb) ? 8 : 4);
+    }
+
+    return 0;
+}
+
+/* Table 7-9 */
+static void skip_mp_param_set(DTSUHD *h, MD01 *md01, int nominal_flag)
+{
+    skip_bits_md01(h, md01, 6); /* rLoudness */
+    if (nominal_flag == 0)
+        skip_bits_md01(h, md01, 5);
+
+    skip_bits_md01(h, md01, nominal_flag ? 2 : 4);
+}
+
+/* Table 7-8 */
+static int parse_static_md_params(DTSUHD *h, MD01 *md01, int only_first)
+{
+    int i;
+    int loudness_sets = 1;
+    int nominal_flag = 1;
+
+    if (h->full_channel_mix_flag == 0)
+        nominal_flag = get_bits_md01(h, md01, 1);
+
+    if (nominal_flag) {
+        if (h->full_channel_mix_flag == 0)
+            loudness_sets = get_bits_md01(h, md01, 1) ? 3 : 1;
+    } else {
+        loudness_sets = get_bits_md01(h, md01, 4) + 1;
+    }
+
+    for (i = 0; i < loudness_sets; i++)
+        skip_mp_param_set(h, md01, nominal_flag);
+
+    if (only_first)
+        return 0;
+
+    if (nominal_flag == 0)
+        skip_bits_md01(h, md01, 1);
+
+    for (i = 0; i < 3; i++) { /* Table 7-12 suggest 3 types */
+        if (get_bits_md01(h, md01, 1)) {
+            if (get_bits_md01(h, md01, 4) == 15) /* Table 7-14 */
+                skip_bits_md01(h, md01, 15);
+        }
+        if (get_bits_md01(h, md01, 1)) /* smooth md present */
+            skip_bits_md01(h, md01, 6 * 6);
+    }
+
+    if (h->full_channel_mix_flag == 0) {
+        i = md01->static_md_packets * md01->static_md_packet_size - get_bits_count(&md01->gb);
+        skip_bits(&md01->gb, i);
+    }
+    md01->static_md_extracted = 1;
+
+    return 0;
+}
+
+/* Table 7-7 */
+static int parse_multi_frame_md(DTSUHD *h, MD01 *md01)
+{
+    GetBitContext *gb = &h->gb;
+    int i, n;
+    static const int table1[2][8] = {
+        { 0, 0, 0, 0, 6, 6, 9, 12 }, { 0, 0, 0, 0, 1, 1, 65, 577 }
+    };
+    static const int table2[2][8] = {
+        { 5, 5, 5, 5, 7, 7, 9, 11 }, { 0, 0, 0, 0, 32, 32, 160, 672 }
+    };
+
+    if (h->is_sync_frame) {
+        md01->packets_acquired = 0;
+        if (h->full_channel_mix_flag) {
+            md01->static_md_packets = 1;
+            md01->static_md_packet_size = 0;
+        } else {
+            md01->static_md_packets = get_bits_var(gb, table1) + 1;
+            md01->static_md_packet_size = get_bits_var(gb, table2) + 3;
+        }
+
+        n = md01->static_md_packets * md01->static_md_packet_size;
+        if (n > md01->buf_bytes) {
+            if (av_reallocp(&md01->buf, n))
+                return 1;
+            md01->buf_bytes = n;
+        }
+
+        init_get_bits(&md01->gb, md01->buf, md01->buf_bytes * 8);
+        if (md01->static_md_packets > 1)
+            md01->static_md_update_flag = get_bits1(gb);
+        else
+            md01->static_md_update_flag = 1;
+    }
+
+    if (md01->packets_acquired < md01->static_md_packets) {
+        n = md01->packets_acquired * md01->static_md_packet_size;
+        for (i = 0; i < md01->static_md_packet_size; i++)
+            md01->buf[n + i] = get_bits(gb, 8);
+        md01->packets_acquired++;
+
+        if (md01->packets_acquired == md01->static_md_packets) {
+            if (md01->static_md_update_flag || !md01->static_md_extracted)
+                if (parse_static_md_params(h, md01, 0))
+                    return 1;
+        } else if (md01->packets_acquired == 1) {
+            if (md01->static_md_update_flag || !md01->static_md_extracted)
+                if (parse_static_md_params(h, md01, 1))
+                    return 1;
+        }
+    }
+
+    return 0;
+}
+
+/* Return 1 if suitable, 0 if not.  Table 7-18.  OBJGROUPIDSTART=224 Sec 7.8.7 p75 */
+static int is_suitable_for_render(DTSUHD *h, MD01 *md01, int object_id)
+{
+    GetBitContext *gb = &h->gb;
+    static const int table[2][8] = {
+        { 8, 8, 8, 8, 10, 10, 12, 14 }, { 0, 0, 0, 0, 256, 256, 1280, 5376 }
+    };
+
+    if (object_id >= 224 || get_bits1(gb))
+        return 1;
+
+    /*  Reject the render and skip the render data. */
+    skip_bits1(gb);
+    skip_bits(gb, get_bits_var(gb, table));
+
+    return 0;
+}
+
+/* Table 7-26 */
+static void parse_ch_mask_params(DTSUHD *h, MD01 *md01, MDObject *object)
+{
+    GetBitContext *gb = &h->gb;
+    const int ch_index = object->rep_type == REP_TYPE_BINAURAL ? 1 : get_bits(gb, 4);
+    static const int mask_table[14] = { /* Table 7-27 */
+        0x000001, 0x000002, 0x000006, 0x00000F, 0x00001F, 0x00084B, 0x00002F,
+        0x00802F, 0x00486B, 0x00886B, 0x03FBFB, 0x000003, 0x000007, 0x000843,
+    };
+
+    if (ch_index == 14)
+        object->ch_activity_mask = get_bits(gb, 16);
+    else if (ch_index == 15)
+        object->ch_activity_mask = get_bits_long(gb, 32);
+    else
+        object->ch_activity_mask = mask_table[ch_index];
+}
+
+/* Table 7-22 */
+static int parse_object_metadata(DTSUHD *h, MD01 *md01, MDObject *object,
+                                 int start_frame_flag, int object_id)
+{
+    GetBitContext *gb = &h->gb;
+    int ch_mask_object_flag = 0;
+    int object_3d_metadata_flag = 0;
+    static const int table2[2][8] = {
+        { 1, 1, 1, 1, 4, 4, 4, 8 }, { 0, 0, 0, 0, 2, 2, 18, 34 }
+    };
+    static const int table3[2][8] = {
+        { 3, 3, 3, 3, 3, 3, 4, 8 }, { 0, 0, 0, 0, 8, 8, 16, 32 }
+    };
+
+    skip_bits(gb, object_id != 256);
+
+    if (start_frame_flag) {
+        object->rep_type = get_bits(gb, 3);
+        switch (object->rep_type) {
+            case REP_TYPE_BINAURAL:
+            case REP_TYPE_CH_MASK_BASED:
+            case REP_TYPE_MTRX2D_CH_MASK_BASED:
+            case REP_TYPE_MTRX3D_CH_MASK_BASED:
+                ch_mask_object_flag = 1;
+                break;
+
+            case REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF:
+            case REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF:
+                object_3d_metadata_flag = 1;
+                break;
+        }
+
+        if (ch_mask_object_flag) {
+            if (object_id != 256) {
+                skip_bits(gb, 3);  /* Object Importance Level */
+                if (get_bits1(gb))
+                    skip_bits(gb, get_bits1(gb) ? 3 : 5);
+
+                get_bits_var(gb, table2);
+                get_bits_var(gb, table3);
+
+                /* Skip optional Loudness block. */
+                if (get_bits1(gb))
+                    skip_bits(gb, 8);
+
+                /* Skip optional Object Interactive MD (Table 7-25). */
+                if (get_bits1(gb) && h->interactive_obj_limits_present) {
+                    if (get_bits1(gb))
+                        skip_bits(gb, 5 + 6 * object_3d_metadata_flag);
+                }
+            }
+
+            parse_ch_mask_params(h, md01, object);
+        }
+    }
+
+    /* Skip rest of object */
+    return 0;
+}
+
+/* Table 7-4 */
+static int parse_md01(DTSUHD *h, MD01 *md01, int pres_index)
+{
+    GetBitContext *gb = &h->gb;
+    uint32_t i;
+    uint32_t id;
+    uint32_t start_flag;
+
+    if (h->audio[pres_index].selectable) {
+        for (i = 0; i < 4; i++)  /* Table 7-5.  Scaling data. */
+            skip_bits(gb, 5 * get_bits1(gb));
+
+        if (get_bits1(gb) && parse_multi_frame_md(h, md01))
+            return 1;
+    }
+
+    /* Table 7-16: Object metadata. */
+    memset(md01->object, 0, sizeof(md01->object));
+    if (!h->full_channel_mix_flag)
+        skip_bits(gb, 11 * get_bits1(gb));
+
+    for (i = 0; i < md01->object_list_count; i++) {
+        id = md01->object_list[i];
+        if (!is_suitable_for_render(h, md01, id))
+            continue;
+
+        md01->object[id].pres_index = pres_index;
+        start_flag = 0;
+        if (!md01->object[id].started) {
+            skip_bits(gb, id != 256);
+            start_flag = md01->object[id].started = 1;
+        }
+
+        if ((id < 224 || id > 255) &&
+            parse_object_metadata(h, md01, md01->object + id, start_flag, id)) {
+            return 1;
+        }
+
+        break;
+    }
+
+    return 0;
+}
+
+/* Table 6-2 */
+static int parse_chunks(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    MD01 *md01;
+    const uint8_t *byte_start;
+    int bit_next;
+    int i;
+    static const int table_aud_pres[2][8] = {
+        { 0, 0, 0, 0, 2, 2, 4, 4 }, { 0, 0, 0, 0, 1, 1, 5, 21 }
+    };
+    int pres_index;
+    uint32_t id;
+
+    for (i = 0; i < h->chunk_count; i++) {
+        bit_next = get_bits_count(gb) + h->chunk[i].bytes * 8;
+        byte_start = h->data + get_bits_count(gb) / 8;
+        if (h->chunk[i].crc_flag && av_crc(h->crc, DTSUHD_CRC_SEED, byte_start, h->chunk[i].bytes))
+            return 1;
+
+        id = get_bits(gb, 8);
+        if (id == 1) {
+            pres_index = get_bits_var(gb, table_aud_pres);
+        if (pres_index > 255)
+            return 1;
+        md01 = chunk_find_md01(h, id);
+        if (md01 == NULL)
+            md01 = chunk_append_md01(h, id);
+        if (md01 == NULL)
+            return 1;
+        if (parse_md_chunk_list(h, md01))
+            return 1;
+        if (parse_md01(h, md01, pres_index))
+            return 1;
+        }
+
+        skip_bits(gb, bit_next - get_bits_count(gb));
+    }
+
+    return 0;
+}
+
+/* Helper function for av_dtsuhd_frame and ff_dtsuhd_parse_frame */
+static int parse_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes)
+{
+    GetBitContext *gb;
+    int syncword;
+    static const int table_payload[2][8] = {
+        { 5, 5, 5, 5, 8, 8, 10, 12 }, { 0, 0, 0, 0, 32, 32, 288, 1312 }
+    };
+
+    if (!h || !data)
+        return DTSUHD_NULL;
+
+    if (data_bytes < 4)
+        return DTSUHD_INCOMPLETE; /* Data buffer does not contain the signature */
+
+    h->data = data;
+    h->data_bytes = data_bytes;
+    gb = &h->gb;
+    if (init_get_bits8(gb, data, data_bytes) < 0)
+        return DTSUHD_INVALID_FRAME;
+
+    syncword = get_bits_long(gb, 32);
+    h->is_sync_frame = syncword == DTSUHD_SYNCWORD;
+    h->saw_sync |= h->is_sync_frame;
+    if (!h->saw_sync || (!h->is_sync_frame && syncword != DTSUHD_NONSYNCWORD))
+        return DTSUHD_NOSYNC;  /* Invalid frame or have not parsed sync frame. */
+
+    h->ftoc_bytes = get_bits_var(gb, table_payload) + 1;
+    if (h->ftoc_bytes < 5 || h->ftoc_bytes >= data_bytes)
+        return DTSUHD_INCOMPLETE;  /* Data buffer does not contain entire FTOC */
+
+    if (parse_stream_params(h))
+        return DTSUHD_INVALID_FRAME;
+
+    if (parse_aud_pres_params(h))
+        return DTSUHD_INVALID_FRAME;
+
+    if (parse_chunk_navi(h))  /* AudioChunkTypes and payload sizes. */
+        return DTSUHD_INVALID_FRAME;
+
+    /* At this point in the parsing, we can calculate the size of the frame. */
+    h->frame_bytes = h->ftoc_bytes + h->chunk_bytes;
+    if (h->frame_bytes > data_bytes)
+        return DTSUHD_INCOMPLETE;
+
+    return DTSUHD_OK;
+}
+
+/** Allocate parsing handle.  The parsing handle should be used to parse
+    one DTS:X Profile 2 Audio stream, then freed by calling DTSUHD_destroy().
+    Do not use the same parsing handle to parse multiple audio streams.
+
+  @return Parsing handle for use with other functions, or NULL on failure.
+*/
+DTSUHD *av_dtsuhd_create(void)
+{
+    DTSUHD *h = av_calloc(1, sizeof(DTSUHD));
+    if (h)
+        h->crc = av_crc_get_table(AV_CRC_16_CCITT);
+    return h;
+}
+
+/** Free all resources used by the parsing handle.
+
+  @param[in] h Handle allocated by dtshd_create
+*/
+void av_dtsuhd_destroy(DTSUHD *h)
+{
+    if (h) {
+        chunk_reset(h);
+        av_freep(&h->chunk);
+        av_freep(&h->navi);
+        av_freep(&h);
+    }
+}
+
+/** Parse a single DTS:X Profile 2 frame.
+    The frame must start at the first byte of the data buffer, and enough
+    of the frame must be present to decode the majority of the FTOC.
+    From Table 6-11 p40.
+
+    A sync frame must be the first frame provided, before any non-sync frames.
+    Signatures: sync=0x40411BF2, non-sync=0x71C442E8.
+
+  @param[in] h Handle allocated by DTSUHD_create
+  @param[in] First byte of a buffer containing the frame to parse
+  @param[in] nData Number of valid bytes in 'data'
+  @param[out] codecpar Filled out codec paramters with results of descriptor
+              parsing, may be NULL
+  @param[out] utds Filled MP4 udts sample entry descriptor.
+  @param[out] utds_size Size of valid 'udts' data.
+  @return 0 on success, DTSUHDStatus enumeration on error
+*/
+int av_dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
+                    AVCodecParameters *codecpar, uint8_t **udts, int *udts_size)
+{
+    int ret = parse_frame(h, data, data_bytes);
+
+    if (ret == DTSUHD_OK && codecpar && h->is_sync_frame) {
+        /* Skip PBRSmoothParams (Table 6-26) and align to the chunks immediately
+           following the FTOC CRC.
+        */
+        skip_bits(&h->gb, h->ftoc_bytes * 8 - get_bits_count(&h->gb));
+        if (parse_chunks(h))
+            return DTSUHD_INVALID_FRAME;
+        update_descriptor(h, codecpar, udts, udts_size);
+    }
+
+    return ret;
+}
+
+/** Similar to av_dtsuhd_frame, but for use only within libavcodec.
+
+  @param[in] h Handle allocated by DTSUHD_create
+  @param[in] First byte of a buffer containing the frame to parse
+  @param[in] nData Number of valid bytes in 'data'
+  @param[out] fi Results of frame parsing, may be NULL
+  @return 0 on success, DTSUHDStatus enumeration on error
+*/
+int ff_dtsuhd_parse_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
+                          DTSUHDFrameInfo *fi)
+{
+    int fraction = 1;
+    int i;
+    int ret = parse_frame(h, data, data_bytes);
+
+    if (ret == DTSUHD_OK && fi) {
+        /* 6.3.6.9: audio frame duration may be a fraction of metadata frame duration. */
+        for (i = 0; i < h->navi_count; i++) {
+            if (h->navi[i].present) {
+                if (h->navi[i].id == 3)
+                    fraction = 2;
+                else if (h->navi[i].id == 4)
+                    fraction = 4;
+            }
+        }
+
+        fi->sync = h->is_sync_frame;
+        fi->frame_bytes = h->frame_bytes;
+        fi->sample_rate = h->sample_rate;
+        fi->sample_count = (h->frame_duration * fi->sample_rate) / (h->clock_rate * fraction);
+    }
+
+    return ret;
+}
+
+/** Return the offset of the first UHD audio frame.
+    When supplied a buffer containing DTSHDHDR file content, the DTSHD
+    headers are skipped and the offset to the first byte of the STRMDATA
+    chunk is returned, along with the size of that chunk.
+
+  @param[in] dataStart DTS:X Profile 2 file content to parse
+  @param[in] dataSize Number of valid bytes in 'dataStart'
+  @param[out] Number of leading DTS:X Profile 2 audio frames to discard,
+              may be NULL
+  @param[out] Size of STRMDATA payload, may be NULL
+  @return STRMDATA payload offset or 0 if not a valid DTS:X Profile 2 file
+*/
+int av_dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size, size_t *strmdata_size)
+{
+    const uint8_t *data = data_start;
+    const uint8_t *data_end = data + data_size;
+    uint64_t chunk_size = 0;
+
+    if (data + DTSUHD_CHUNK_HEADER >= data_end || memcmp(data, "DTSHDHDR", 8))
+        return 0;
+
+    for (; data + DTSUHD_CHUNK_HEADER <= data_end; data += chunk_size + DTSUHD_CHUNK_HEADER) {
+        chunk_size = AV_RB64(data + 8);
+        if (chunk_size < 4 || chunk_size > ((uint64_t)1 << 61))
+            return AVERROR_INVALIDDATA;
+
+        if (!memcmp(data, "STRMDATA", 8)) {
+            if (strmdata_size)
+                *strmdata_size = chunk_size;
+            return (int)(data - data_start) + DTSUHD_CHUNK_HEADER;
+        }
+    }
+
+    return 0;
+}
diff --git a/libavcodec/dtsuhd_common.h b/libavcodec/dtsuhd_common.h
new file mode 100644
index 0000000000..b92ed9808b
--- /dev/null
+++ b/libavcodec/dtsuhd_common.h
@@ -0,0 +1,87 @@
+/*
+ * DTS-UHD common audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DTSUHD_COMMON_H
+#define AVCODEC_DTSUHD_COMMON_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "codec_par.h"
+
+#define DTSUHD_NONSYNCWORD 0x71C442E8
+#define DTSUHD_SYNCWORD    0x40411BF2
+
+#define DTSUHD_MAX_FRAME_SIZE 0x1000
+
+/* Return codes from dtsuhd_frame */
+enum DTSUHDStatus {
+    DTSUHD_OK,
+    DTSUHD_INCOMPLETE,    /* Entire frame not in buffer. */
+    DTSUHD_INVALID_FRAME, /* Error parsing frame. */
+    DTSUHD_NOSYNC,        /* No sync frame prior to non-sync frame. */
+    DTSUHD_NULL,          /* Function parameter may not be NULL. */
+};
+
+/* Return stream information from an audio frame parsed by dtsuhd_frame, */
+typedef struct DTSUHDDescriptorInfo {
+    unsigned valid:1; /* True if descriptor info is valid. */
+    char coding_name[5]; /* Four character, null term SampleEntry box name. */
+    int base_sample_freq_code;
+    int channel_count;
+    int decoder_profile_code;
+    int frame_duration_code;
+    int max_payload_code;
+    int num_pres_code;
+    int rep_type;
+    int sample_rate;
+    int sample_rate_mod;
+    int sample_size;
+    int channel_mask;
+    uint64_t ffmpeg_channel_mask;
+} DTSUHDDescriptorInfo;
+
+/* Return frame information from an audio frame parsed by dtsuhd_frame. */
+typedef struct DTSUHDFrameInfo {
+    int frame_bytes;  /* Size of entire frame in bytes. */
+    int sample_count; /* Number of samples in frame (samples per frame). */
+    int sample_rate;  /* Sample rate of frame (samples per second). */
+    unsigned sync:1;  /* True if frame is a sync frame. */
+} DTSUHDFrameInfo;
+
+struct DTSUHD;
+typedef struct DTSUHD DTSUHD;
+
+struct DTSUHD *av_dtsuhd_create(void);
+void av_dtsuhd_destroy(DTSUHD*);
+int av_dtsuhd_frame(DTSUHD*, const uint8_t *data, size_t nData,
+                    AVCodecParameters*, uint8_t**, int*);
+int av_dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size,
+                               size_t *strmdata_size);
+int ff_dtsuhd_parse_frame(DTSUHD*, const uint8_t *data, size_t nData,
+                          DTSUHDFrameInfo*);
+
+static inline int dtsuhd_is_syncword(uint32_t syncword)
+{
+    return syncword == DTSUHD_NONSYNCWORD || syncword == DTSUHD_SYNCWORD;
+}
+
+#endif /* AVCODEC_DTSUHD_COMMON_H */
diff --git a/libavcodec/dtsuhd_parser.c b/libavcodec/dtsuhd_parser.c
new file mode 100644
index 0000000000..2860c1eee5
--- /dev/null
+++ b/libavcodec/dtsuhd_parser.c
@@ -0,0 +1,141 @@
+/*
+ * DTS-UHD audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Parse raw DTS-UHD audio frame input and return individual audio frames.
+ */
+
+#include "dtsuhd_common.h"
+#include "libavutil/intreadwrite.h"
+#include "parser.h"
+
+#define DTSUHD_BUFFER_SIZE (DTSUHD_MAX_FRAME_SIZE * 128)
+
+typedef struct DTSUHDParseContext {
+    DTSUHD *dtsuhd;
+    int buf_offset;
+    int buf_bytes;
+    int frame_bytes;
+    uint8_t *buf;
+} DTSUHDParseContext;
+
+static av_cold int parser_init(AVCodecParserContext *s)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+
+    pc->dtsuhd = av_dtsuhd_create();
+    pc->buf = av_calloc(DTSUHD_BUFFER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE, 1);
+    if (!pc->dtsuhd || !pc->buf)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void parser_close(AVCodecParserContext *s)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+
+    av_dtsuhd_destroy(pc->dtsuhd);
+    pc->dtsuhd = NULL;
+    av_freep(&pc->buf);
+    ff_parse_close(s);
+}
+
+// Keep data in contiguous buffer as required by dtsuhd_frame.
+static int append_buffer(DTSUHDParseContext *pc, const uint8_t **buf, int *buf_size, int *input_consumed)
+{
+    int copy_bytes;
+
+    pc->buf_offset += pc->frame_bytes;
+    pc->frame_bytes = 0;
+
+    // Buffer almost full, move partial frame to start of buffer for more space.
+    if (*buf_size > 0 && pc->buf_bytes + *buf_size > DTSUHD_BUFFER_SIZE) {
+        memmove(pc->buf, pc->buf + pc->buf_offset, pc->buf_bytes);
+        pc->buf_bytes -= pc->buf_offset;
+        pc->buf_offset = 0;
+    }
+
+    copy_bytes = FFMAX(0, FFMIN(DTSUHD_BUFFER_SIZE - pc->buf_bytes, *buf_size));
+
+    // Append input buffer to our context.
+    if (copy_bytes) {
+        memcpy(pc->buf + pc->buf_bytes, *buf, copy_bytes);
+        pc->buf_bytes += copy_bytes;
+    }
+
+    // Ensure buffer starts with a syncword
+    while (pc->buf_offset + 4 < pc->buf_bytes && !dtsuhd_is_syncword(AV_RB32(pc->buf + pc->buf_offset)))
+        pc->buf_offset++;
+
+    *input_consumed = copy_bytes;
+    *buf = pc->buf + pc->buf_offset;
+    *buf_size = pc->buf_bytes - pc->buf_offset;
+
+    return copy_bytes && pc->buf_bytes - pc->buf_offset < DTSUHD_MAX_FRAME_SIZE;
+}
+
+static int parser_parse(AVCodecParserContext *s, AVCodecContext *avctx,
+                        const uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+    DTSUHDFrameInfo fi;
+    int input_consumed = 0;
+
+    if (append_buffer(pc, &buf, &buf_size, &input_consumed)) {
+        *poutbuf = NULL;
+        *poutbuf_size = 0;
+        return input_consumed;
+    }
+
+    switch (ff_dtsuhd_parse_frame(pc->dtsuhd, buf, buf_size, &fi)) {
+    case DTSUHD_OK:
+        if (fi.sample_count)
+            s->duration = fi.sample_count;
+        if (fi.sample_rate)
+            avctx->sample_rate = fi.sample_rate;
+        buf_size = pc->frame_bytes = fi.frame_bytes;
+        break;
+    case DTSUHD_INCOMPLETE:
+        pc->frame_bytes = buf_size;
+        buf = NULL;
+        buf_size = 0;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unable to process DTS-UHD file. File may be invalid.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+
+    return input_consumed;
+}
+
+AVCodecParser ff_dtsuhd_parser = {
+    .codec_ids      = { AV_CODEC_ID_DTSUHD },
+    .priv_data_size = sizeof(DTSUHDParseContext),
+    .parser_init    = parser_init,
+    .parser_parse   = parser_parse,
+    .parser_close   = parser_close,
+};
diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
index d355808018..d724c8b402 100644
--- a/libavcodec/parsers.c
+++ b/libavcodec/parsers.c
@@ -37,6 +37,7 @@ extern const AVCodecParser ff_dirac_parser;
 extern const AVCodecParser ff_dnxhd_parser;
 extern const AVCodecParser ff_dolby_e_parser;
 extern const AVCodecParser ff_dpx_parser;
+extern const AVCodecParser ff_dtsuhd_parser;
 extern const AVCodecParser ff_dvaudio_parser;
 extern const AVCodecParser ff_dvbsub_parser;
 extern const AVCodecParser ff_dvdsub_parser;
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 65bc52fb24..4b7ec515fe 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -29,7 +29,7 @@
 
 #include "version_major.h"
 
-#define LIBAVCODEC_VERSION_MINOR  18
+#define LIBAVCODEC_VERSION_MINOR  19
 #define LIBAVCODEC_VERSION_MICRO 100
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
Paul B Mahol June 14, 2023, 5:37 a.m. UTC | #23
On Wed, Jun 14, 2023 at 2:00 AM Roy Funderburk <royffmpeg@funderburk.us>
wrote:

>
> On 6/13/23 11:35 AM, Paul B Mahol wrote:
> > Doing allocation in probe?
> > Probing should be very fast.
>
> In line 143 of the avformat patch, memory allocation is removed from the
> probe
>
> >>+int dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
> >>+                DTSUHDFrameInfo *fi, DTSUHDDescriptorInfo *di)
> >>+{
> >>+    gb = &h->gb;
> >>+    init_get_bits(gb, data, data_bytes * 8);
> > init_get_bits8, and check return code.
>
> In line 986 of the avcodec patch, changed to using init_get_bits8 and
> added return code check.
>
> >>+        fi->sync = h->is_sync_frame;
> >>+        fi->frame_bytes = h->frame_bytes;
> >>+        fi->sample_rate = h->sample_rate;
> >>+        fi->sample_count = (h->frame_duration * fi->sample_rate)
> /(h->clock_rate * fraction);
> >>+        fi->duration = (double)fi->sample_count / fi->sample_rate;
> >Please  no double  type.
> >Also make use of av_rescale.
>
> Around line 1108 of the avcodec patch, I found the duration was not needed
> and removed it.
>
>
> On 6/13/23 12:04 PM, Anton Khirnov wrote:
> > Also do note that sharing structs across libraries opens you to various
> > compatibility questions [2]. It might be easier to sidestep them by
> > having a function in libavcodec that accepts AVCodecParameters and fills
> > them according to the data, rather than pass codec-specific structs
> > between libavformat and libavcodec.
>
> In line 1061 of the avcodec patch, changed the function to:
>   int av_dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
>         AVCodecParameters *codecpar, uint8_t **udts, int *udts_size)
>
>
>
Usually libavcodec code does use get_vlc2() for variable length codes,
instead of
usage of show_bits/skip_bits/get_bits.




>
> Thank you for reviewing this,
> -Roy_______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Paul B Mahol June 14, 2023, 6:01 a.m. UTC | #24
On Wed, Jun 14, 2023 at 7:37 AM Paul B Mahol <onemda@gmail.com> wrote:

>
>
> On Wed, Jun 14, 2023 at 2:00 AM Roy Funderburk <royffmpeg@funderburk.us>
> wrote:
>
>>
>> On 6/13/23 11:35 AM, Paul B Mahol wrote:
>> > Doing allocation in probe?
>> > Probing should be very fast.
>>
>> In line 143 of the avformat patch, memory allocation is removed from the
>> probe
>>
>> >>+int dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
>> >>+                DTSUHDFrameInfo *fi, DTSUHDDescriptorInfo *di)
>> >>+{
>> >>+    gb = &h->gb;
>> >>+    init_get_bits(gb, data, data_bytes * 8);
>> > init_get_bits8, and check return code.
>>
>> In line 986 of the avcodec patch, changed to using init_get_bits8 and
>> added return code check.
>>
>> >>+        fi->sync = h->is_sync_frame;
>> >>+        fi->frame_bytes = h->frame_bytes;
>> >>+        fi->sample_rate = h->sample_rate;
>> >>+        fi->sample_count = (h->frame_duration * fi->sample_rate)
>> /(h->clock_rate * fraction);
>> >>+        fi->duration = (double)fi->sample_count / fi->sample_rate;
>> >Please  no double  type.
>> >Also make use of av_rescale.
>>
>> Around line 1108 of the avcodec patch, I found the duration was not
>> needed and removed it.
>>
>>
>> On 6/13/23 12:04 PM, Anton Khirnov wrote:
>> > Also do note that sharing structs across libraries opens you to various
>> > compatibility questions [2]. It might be easier to sidestep them by
>> > having a function in libavcodec that accepts AVCodecParameters and fills
>> > them according to the data, rather than pass codec-specific structs
>> > between libavformat and libavcodec.
>>
>> In line 1061 of the avcodec patch, changed the function to:
>>   int av_dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
>>         AVCodecParameters *codecpar, uint8_t **udts, int *udts_size)
>>
>>
>>
> Usually libavcodec code does use get_vlc2() for variable length codes,
> instead of
> usage of show_bits/skip_bits/get_bits.
>
>
Also there is no reason to use int for elements in tables when max value
can be lower.
Current table reading/handling code should be completely rewritten to use
get_vlc2().
And tables split so length of codes use uint8_t type.


>
>
>
>>
>> Thank you for reviewing this,
>> -Roy_______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>
>
Paul B Mahol June 14, 2023, 6:06 a.m. UTC | #25
On Wed, Jun 14, 2023 at 8:01 AM Paul B Mahol <onemda@gmail.com> wrote:

>
>
> On Wed, Jun 14, 2023 at 7:37 AM Paul B Mahol <onemda@gmail.com> wrote:
>
>>
>>
>> On Wed, Jun 14, 2023 at 2:00 AM Roy Funderburk <royffmpeg@funderburk.us>
>> wrote:
>>
>>>
>>> On 6/13/23 11:35 AM, Paul B Mahol wrote:
>>> > Doing allocation in probe?
>>> > Probing should be very fast.
>>>
>>> In line 143 of the avformat patch, memory allocation is removed from the
>>> probe
>>>
>>> >>+int dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
>>> >>+                DTSUHDFrameInfo *fi, DTSUHDDescriptorInfo *di)
>>> >>+{
>>> >>+    gb = &h->gb;
>>> >>+    init_get_bits(gb, data, data_bytes * 8);
>>> > init_get_bits8, and check return code.
>>>
>>> In line 986 of the avcodec patch, changed to using init_get_bits8 and
>>> added return code check.
>>>
>>> >>+        fi->sync = h->is_sync_frame;
>>> >>+        fi->frame_bytes = h->frame_bytes;
>>> >>+        fi->sample_rate = h->sample_rate;
>>> >>+        fi->sample_count = (h->frame_duration * fi->sample_rate)
>>> /(h->clock_rate * fraction);
>>> >>+        fi->duration = (double)fi->sample_count / fi->sample_rate;
>>> >Please  no double  type.
>>> >Also make use of av_rescale.
>>>
>>> Around line 1108 of the avcodec patch, I found the duration was not
>>> needed and removed it.
>>>
>>>
>>> On 6/13/23 12:04 PM, Anton Khirnov wrote:
>>> > Also do note that sharing structs across libraries opens you to various
>>> > compatibility questions [2]. It might be easier to sidestep them by
>>> > having a function in libavcodec that accepts AVCodecParameters and
>>> fills
>>> > them according to the data, rather than pass codec-specific structs
>>> > between libavformat and libavcodec.
>>>
>>> In line 1061 of the avcodec patch, changed the function to:
>>>   int av_dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
>>>         AVCodecParameters *codecpar, uint8_t **udts, int *udts_size)
>>>
>>>
>>>
>> Usually libavcodec code does use get_vlc2() for variable length codes,
>> instead of
>> usage of show_bits/skip_bits/get_bits.
>>
>>
> Also there is no reason to use int for elements in tables when max value
> can be lower.
> Current table reading/handling code should be completely rewritten to use
> get_vlc2().
> And tables split so length of codes use uint8_t type.
>

In parse_stream_params() function, there is skip_bits(gb, 36*get_bits1(gb))

but skip_bits can read usually max 25 bits.
Use skip_bits_long().



>
>>
>>
>>
>>>
>>> Thank you for reviewing this,
>>> -Roy_______________________________________________
>>> ffmpeg-devel mailing list
>>> ffmpeg-devel@ffmpeg.org
>>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>>
>>> To unsubscribe, visit link above, or email
>>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>>
>>
Paul B Mahol June 14, 2023, 6:11 a.m. UTC | #26
On Wed, Jun 14, 2023 at 8:06 AM Paul B Mahol <onemda@gmail.com> wrote:

>
>
> On Wed, Jun 14, 2023 at 8:01 AM Paul B Mahol <onemda@gmail.com> wrote:
>
>>
>>
>> On Wed, Jun 14, 2023 at 7:37 AM Paul B Mahol <onemda@gmail.com> wrote:
>>
>>>
>>>
>>> On Wed, Jun 14, 2023 at 2:00 AM Roy Funderburk <royffmpeg@funderburk.us>
>>> wrote:
>>>
>>>>
>>>> On 6/13/23 11:35 AM, Paul B Mahol wrote:
>>>> > Doing allocation in probe?
>>>> > Probing should be very fast.
>>>>
>>>> In line 143 of the avformat patch, memory allocation is removed from
>>>> the probe
>>>>
>>>> >>+int dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
>>>> >>+                DTSUHDFrameInfo *fi, DTSUHDDescriptorInfo *di)
>>>> >>+{
>>>> >>+    gb = &h->gb;
>>>> >>+    init_get_bits(gb, data, data_bytes * 8);
>>>> > init_get_bits8, and check return code.
>>>>
>>>> In line 986 of the avcodec patch, changed to using init_get_bits8 and
>>>> added return code check.
>>>>
>>>> >>+        fi->sync = h->is_sync_frame;
>>>> >>+        fi->frame_bytes = h->frame_bytes;
>>>> >>+        fi->sample_rate = h->sample_rate;
>>>> >>+        fi->sample_count = (h->frame_duration * fi->sample_rate)
>>>> /(h->clock_rate * fraction);
>>>> >>+        fi->duration = (double)fi->sample_count / fi->sample_rate;
>>>> >Please  no double  type.
>>>> >Also make use of av_rescale.
>>>>
>>>> Around line 1108 of the avcodec patch, I found the duration was not
>>>> needed and removed it.
>>>>
>>>>
>>>> On 6/13/23 12:04 PM, Anton Khirnov wrote:
>>>> > Also do note that sharing structs across libraries opens you to
>>>> various
>>>> > compatibility questions [2]. It might be easier to sidestep them by
>>>> > having a function in libavcodec that accepts AVCodecParameters and
>>>> fills
>>>> > them according to the data, rather than pass codec-specific structs
>>>> > between libavformat and libavcodec.
>>>>
>>>> In line 1061 of the avcodec patch, changed the function to:
>>>>   int av_dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
>>>>         AVCodecParameters *codecpar, uint8_t **udts, int *udts_size)
>>>>
>>>>
>>>>
>>> Usually libavcodec code does use get_vlc2() for variable length codes,
>>> instead of
>>> usage of show_bits/skip_bits/get_bits.
>>>
>>>
>> Also there is no reason to use int for elements in tables when max value
>> can be lower.
>> Current table reading/handling code should be completely rewritten to use
>> get_vlc2().
>> And tables split so length of codes use uint8_t type.
>>
>
> In parse_stream_params() function, there is skip_bits(gb,
> 36*get_bits1(gb))
>
> but skip_bits can read usually max 25 bits.
> Use skip_bits_long().
>

Also please remove from structs ':1' suffix and similar if that is not
really required to have exactly single bit max usage signaled per item in
struct.


>
>
>
>>
>>>
>>>
>>>
>>>>
>>>> Thank you for reviewing this,
>>>> -Roy_______________________________________________
>>>> ffmpeg-devel mailing list
>>>> ffmpeg-devel@ffmpeg.org
>>>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>>>
>>>> To unsubscribe, visit link above, or email
>>>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>>>
>>>
Roy Funderburk June 14, 2023, 6:24 p.m. UTC | #27
On 6/13/23 11:01 PM, Paul B Mahol wrote:
> On Wed, Jun 14, 2023 at 7:37 AM Paul B Mahol <onemda@gmail.com> wrote:
> Also there is no reason to use int for elements in tables when max value
> can be lower.
> Current table reading/handling code should be completely rewritten to use
> get_vlc2().
> And tables split so length of codes use uint8_t type.


I will split the tables into two arrays and make the first array uint8_t.

I was looking over the get_vlc2() function, and I don't believe it will be
able to replace the current dtsuhd_common.c's get_bits_var.

The current dtsuhd_common function and a sample table parameter:

    static const int uhd_table[2][8] = {
            // TABLE_BITS                     TABLE_ADD
        { 4, 4, 4, 4, 8, 8, 16, 32 }, { 0, 0, 0, 0, 16, 16, 272, 65808 } };
    static int get_bits_var(GetBitContext *gb, const int uhd_table[2][8])
    {
        static const int bits_used[8] = { 1, 1, 1, 1, 2, 2, 3, 3 };
        int code = show_bits(gb, 3); /* value range is [0, 7] */

        skip_bits(gb, bits_used[code]);
        if (uhd_table[TABLE_BITS][code] == 0)
            return 0;
        return get_bits_long(gb, uhd_table[TABLE_BITS][code]) + uhd_table[TABLE_ADD][code];
    }


It peeks ahead 3 bits, and uses them to read the three 8 element tables: bits_used,
uhd_table[0], uhd_table[1].  Skips "bits_used" bits, reads "uhd_table[0]" bits and
adds to that value the value from "uhd_table[1]"

get_vlc2 uses the GET_VLC macro which goes like this:

    #define GET_VLC(code, name, gb, table, bits, max_depth)         \
        do {                                                        \
            int n, nb_bits;                                         \
            unsigned int index;                                     \
                                                                    \
            index = SHOW_UBITS(name, gb, bits);                     \
            code  = table[index].sym;                               \
            n     = table[index].len;                               \
                                                                    \
            if (max_depth > 1 && n < 0) {                           \
                ...
                nb_bits = -n;                                       \
                                                                    \
                index = SHOW_UBITS(name, gb, nb_bits) + code;       \
                code  = table[index].sym;                           \
                n     = table[index].len;                           \
            ...
            }                                                       \
            SKIP_BITS(name, gb, n);                                 \


The end result from this function is "code".  It needs to be the result of
reading "uhd_table[0][x]" bits added to the value from "uhd_table[1][x]".
But, that only way to get to that value would be in the last read of
    "index = SHOW_UBITS(name, gb, nb_bits) + code;"
And "index" is not returned from that macro.
Further, index has a maximum size of 32 bits, which is a problem for the next
line "code  = table[index].sym;".  That table would be 4Gigs, and we don't
even want the value from that table.

So that means we can't use the "if" block at all.

This means the best get_vlc2 could do is replace the "show_bits" and "skip_bits"
of the original dtsuhd get_bits_var function.  The last get_bits_long would
still be needed.

In that case, "table[index].len" would have to be from the table "bits_used"
and "table[index].sym would have to be from that table "uhd_table[0]".
get_vlc2() would return the bits to read for the call to get_bits_long.

But, when building the table using ff_init_vlc_from_lengths, there is this line
from that function in vlc.c:
    code += 1U << (32 - len);
    if (code > UINT32_MAX + 1ULL) {
        av_log(logctx, AV_LOG_ERROR, "Overdetermined VLC tree\n");

"len" comes from the an input table to that function "bits_used[8]" and that
table has duplicate entries.  Specifically several values of "1".  And that
quickly causes the "overdetermined" error.  Looking over the other functions
in vlc.c, those do not look as if they would work well with the dtsuhd_common
tables either.
Roy Funderburk June 14, 2023, 8:01 p.m. UTC | #28
Hi,

I updated the libavcodec patch per Paul Mahol's reviews:

dtsuhd_common.c:496 get_bits_long instead of get_bits used for reading 36 bits

dtsuhd_common.c:224 get_bits_var changed to accept arrays in VarBits structure, allowing arrays with all values less than 256 to use uint8_t arrays.

Also removed bitfields from structures.

Regards,
-Roy
Parsing of DTS-UHD input files per ETSI TS 102 114 is added
as parser for codec id AV_CODEC_ID_DTSUHD.

Signed-off-by: Roy Funderburk <Roy.Funderburk@xperi.com>
---
 libavcodec/Makefile        |    1 +
 libavcodec/codec_desc.c    |    7 +
 libavcodec/codec_id.h      |    1 +
 libavcodec/dtsuhd_common.c | 1079 ++++++++++++++++++++++++++++++++++++
 libavcodec/dtsuhd_common.h |   87 +++
 libavcodec/dtsuhd_parser.c |  141 +++++
 libavcodec/parsers.c       |    1 +
 libavcodec/version.h       |    2 +-
 8 files changed, 1318 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/dtsuhd_common.c
 create mode 100644 libavcodec/dtsuhd_common.h
 create mode 100644 libavcodec/dtsuhd_parser.c

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 2efab60d7d..0b49984902 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1164,6 +1164,7 @@ OBJS-$(CONFIG_DIRAC_PARSER)            += dirac_parser.o
 OBJS-$(CONFIG_DNXHD_PARSER)            += dnxhd_parser.o dnxhddata.o
 OBJS-$(CONFIG_DOLBY_E_PARSER)          += dolby_e_parser.o dolby_e_parse.o
 OBJS-$(CONFIG_DPX_PARSER)              += dpx_parser.o
+OBJS-$(CONFIG_DTSUHD_PARSER)           += dtsuhd_parser.o dtsuhd_common.o
 OBJS-$(CONFIG_DVAUDIO_PARSER)          += dvaudio_parser.o
 OBJS-$(CONFIG_DVBSUB_PARSER)           += dvbsub_parser.o
 OBJS-$(CONFIG_DVD_NAV_PARSER)          += dvd_nav_parser.o
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index 3e31a1eed6..63dc939905 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -3406,6 +3406,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("RKA (RK Audio)"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
     },
+    {
+        .id        = AV_CODEC_ID_DTSUHD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dtsuhd",
+        .long_name = NULL_IF_CONFIG_SMALL("DTSUHD (DTS-UHD Audio Format)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
 
     /* subtitle codecs */
     {
diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h
index d23549d7e0..a5d580169b 100644
--- a/libavcodec/codec_id.h
+++ b/libavcodec/codec_id.h
@@ -542,6 +542,7 @@ enum AVCodecID {
     AV_CODEC_ID_FTR,
     AV_CODEC_ID_WAVARC,
     AV_CODEC_ID_RKA,
+    AV_CODEC_ID_DTSUHD,
 
     /* subtitle codecs */
     AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
diff --git a/libavcodec/dtsuhd_common.c b/libavcodec/dtsuhd_common.c
new file mode 100644
index 0000000000..246dfacfac
--- /dev/null
+++ b/libavcodec/dtsuhd_common.c
@@ -0,0 +1,1079 @@
+/*
+ * DTS-UHD common audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Parse DTS-UHD audio frame headers, report frame sizes and configuration.
+ * Specification: ETSI TS 103 491 V1.2.1
+ */
+
+#include <string.h>
+
+#include "dtsuhd_common.h"
+#include "get_bits.h"
+#include "put_bits.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/crc.h"
+
+#define DTSUHD_ALLOC_INCREMENT 16
+#define DTSUHD_CHUNK_HEADER    16
+#define DTSUHD_CRC_SEED        0xFFFF
+#define DTSUHD_UDTS_BUFFER     32 // work buffer to construct 'udts' box
+
+enum RepType {
+    REP_TYPE_CH_MASK_BASED,
+    REP_TYPE_MTRX2D_CH_MASK_BASED,
+    REP_TYPE_MTRX3D_CH_MASK_BASED,
+    REP_TYPE_BINAURAL,
+    REP_TYPE_AMBISONIC,
+    REP_TYPE_AUDIO_TRACKS,
+    REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF,
+    REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF,
+};
+
+typedef struct MDObject {
+    int started;  /* Object seen since last reset. */
+    int pres_index;
+    int rep_type;
+    int ch_activity_mask;
+} MDObject;
+
+typedef struct MD01 {
+    GetBitContext gb;
+    MDObject object[257]; /* object id max value is 256 */
+    int chunk_id;
+    int object_list[256]; int object_list_count;
+    int packets_acquired;
+    int static_md_extracted;
+    int static_md_packets;
+    int static_md_packet_size;
+    int static_md_update_flag;
+    uint8_t *buf; int buf_bytes; /* temporary buffer to accumulate static data */
+} MD01;
+
+typedef struct NAVI {
+    int bytes;
+    int id;
+    int index;
+    int present;
+} NAVI;
+
+typedef struct UHDAudio {
+    int mask;
+    int selectable;
+} UHDAudio;
+
+typedef struct UHDChunk {
+    int crc_flag;
+    int bytes;
+} UHDChunk;
+
+typedef struct VarBits {
+    uint8_t bits[8];
+    int add[8];
+} VarBits;
+
+struct DTSUHD {
+    const uint8_t *data; int data_bytes;  /* Original audio frame buffer. */
+    const AVCRC *crc;
+    GetBitContext gb;
+    MD01 *md01; int md01_count;
+    NAVI *navi; int navi_alloc, navi_count;
+    UHDAudio audio[256];
+    UHDChunk *chunk; int chunk_alloc, chunk_count;
+    int chunk_bytes;
+    int clock_rate;
+    int frame_bytes;
+    int frame_duration;
+    int frame_duration_code;
+    int ftoc_bytes;
+    int major_version;
+    int num_audio_pres;
+    int sample_rate;
+    int sample_rate_mod;
+    unsigned full_channel_mix_flag;
+    unsigned interactive_obj_limits_present;
+    unsigned is_sync_frame;
+    unsigned saw_sync;
+};
+
+/* Read from the MD01 buffer (if present), falling back to the frame buffer */
+static inline int get_bits_md01(DTSUHD *h, MD01 *md01, int bits)
+{
+    if (md01->buf)
+        return get_bits(&md01->gb, bits);
+    return get_bits(&h->gb, bits);
+}
+
+/* Skip bits in the MD01 buffer (if present), falling back to the frame buffer */
+static inline void skip_bits_md01(DTSUHD *h, MD01 *md01, int bits)
+{
+    if (md01->buf)
+        return skip_bits(&md01->gb, bits);
+    return skip_bits(&h->gb, bits);
+}
+
+/* In the specification Table 5-2, the pseudo code defaults the 'add'
+   parameter to true. Table 7-30 shows passing an explicit false, most
+   other calls do not pass the extractAndAdd parameter.  In the function
+   below, the indirection index and add parameter are pre-applied to the
+   input table itself.  The original table from the specification is
+   converted to the tables used by get_bits_var as follows:
+
+    VarBits table;
+    int original_table = { 1, 2, 3, 4 }; // From specification pseudo code
+    int index_table[8] = { 0, 0, 0, 0, 1, 1, 2, 3 }; // Indirection index
+    for (i = 0; i < 8; i++) {
+        table.bits[i] = original_table[index_table[i]]);
+        table.add[i] = 0;
+        for (index = 0; index < index_table[i]; index++)
+            table.add[index] += original_table[index];
+    }
+*/
+static int get_bits_var(GetBitContext *gb, const VarBits *var_bits)
+{
+    static const int bits_used[8] = { 1, 1, 1, 1, 2, 2, 3, 3 };
+    int code = show_bits(gb, 3); /* value range is [0, 7] */
+
+    skip_bits(gb, bits_used[code]);
+    if (var_bits->bits[code] == 0)
+        return 0;
+    return get_bits_long(gb, var_bits->bits[code]) + var_bits->add[code];
+}
+
+/* Implied by Table 6-2, MD01 chunk objects appended in for loop */
+static MD01 *chunk_append_md01(DTSUHD *h, int id)
+{
+    int md01_alloc = h->md01_count + 1;
+    if (av_reallocp_array(&h->md01, md01_alloc, sizeof(*h->md01)))
+        return NULL;
+
+    memset(h->md01 + h->md01_count, 0, sizeof(*h->md01));
+    h->md01[h->md01_count].chunk_id = id;
+    return h->md01 + h->md01_count++;
+}
+
+/* Return existing MD01 chunk based on chunkID */
+static MD01 *chunk_find_md01(DTSUHD *h, int id)
+{
+    int i;
+
+    for (i = 0; i < h->md01_count; i++)
+        if (id == h->md01[i].chunk_id)
+            return h->md01 + i;
+
+    return NULL;
+}
+
+/* Table 6-3 */
+static void chunk_reset(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->md01_count; i++)
+        av_freep(&h->md01[i].buf);
+    av_freep(&h->md01);
+    h->md01_count = 0;
+}
+
+static MDObject *find_default_audio(DTSUHD *h)
+{
+    MDObject *object;
+    int i, j;
+    int obj_index = -1;
+
+    for (i = 0; i < h->md01_count; i++) {
+        for (j = 0; j < 257; j++) {
+            object = h->md01[i].object + j;
+            if (object->started && h->audio[object->pres_index].selectable) {
+                if (obj_index < 0 || (object->pres_index < h->md01[i].object[obj_index].pres_index))
+                    obj_index = j;
+            }
+        }
+        if (obj_index >= 0)
+            return h->md01[i].object + obj_index;
+    }
+
+    return NULL;
+}
+
+/* Save channel mask, count, and rep type to descriptor info.
+   ETSI TS 103 491 Table 7-28 channel activity mask bits
+   mapping and SCTE DVS 243-4 Rev. 0.2 DG X Table 4.  Convert activity mask and
+   representation type to channel mask and channel counts.
+*/
+static void extract_object_info(MDObject *object, int *rep_type,
+                                int *channel_mask, uint64_t *ffmpeg_channel_mask)
+{
+    int i;
+    static const struct {
+        uint32_t activity_mask;
+        uint32_t channel_mask; // Mask as defined by ETSI TS 103 491
+        uint64_t ffmpeg_channel_mask; // Mask as defined in ffmpeg
+    } activity_map[] = {
+        // act mask | chan mask | ffmpeg channel mask
+        { 0x000001, 0x00000001, AV_CH_FRONT_CENTER },
+        { 0x000002, 0x00000006, AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT },
+        { 0x000004, 0x00000018, AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT },
+        { 0x000008, 0x00000020, AV_CH_LOW_FREQUENCY },
+        { 0x000010, 0x00000040, AV_CH_BACK_CENTER },
+        { 0x000020, 0x0000A000, AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT },
+        { 0x000040, 0x00000180, AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT },
+        { 0x000080, 0x00004000, AV_CH_TOP_FRONT_CENTER },
+        { 0x000100, 0x00080000, AV_CH_TOP_CENTER },
+        { 0x000200, 0x00001800, AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_FRONT_RIGHT_OF_CENTER },
+        { 0x000400, 0x00060000, AV_CHAN_WIDE_LEFT | AV_CHAN_WIDE_RIGHT },
+        { 0x000800, 0x00000600, AV_CH_SURROUND_DIRECT_LEFT | AV_CH_SURROUND_DIRECT_RIGHT },
+        { 0x001000, 0x00010000, AV_CH_LOW_FREQUENCY_2 },
+        { 0x002000, 0x00300000, AV_CH_TOP_SIDE_LEFT | AV_CH_TOP_SIDE_RIGHT },
+        { 0x004000, 0x00400000, AV_CH_TOP_BACK_CENTER },
+        { 0x008000, 0x01800000, AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT },
+        { 0x010000, 0x02000000, AV_CH_BOTTOM_FRONT_CENTER },
+        { 0x020000, 0x0C000000, AV_CH_BOTTOM_FRONT_LEFT | AV_CH_BOTTOM_FRONT_RIGHT },
+        { 0x140000, 0x30000000, AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT },
+        { 0x080000, 0xC0000000, AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT },
+        { 0 } // Terminator
+    };
+
+    if (object) {
+        for (i = 0; activity_map[i].activity_mask; i++) {
+            if (activity_map[i].activity_mask & object->ch_activity_mask) {
+                *channel_mask |= activity_map[i].channel_mask;
+                *ffmpeg_channel_mask |= activity_map[i].ffmpeg_channel_mask;
+            }
+        }
+        *rep_type = object->rep_type;
+    }
+}
+
+/* Assemble information for MP4 Sample Entry box.  Sample Size is always
+   16 bits.  The coding name is the name of the SampleEntry sub-box and is
+   'dtsx' unless the version of the bitstream is > 2.
+   If DecoderProfile == 2, then MaxPayloadCode will be zero.
+*/
+static void update_descriptor(DTSUHD *h, AVCodecParameters *codecpar,
+                              uint8_t **udts, int *udts_size)
+{
+    PutBitContext pbc;
+    int channel_mask = 0, rep_type = 0;
+    static const char *coding_name[] = { "dtsx", "dtsy" };
+    uint64_t ffmpeg_channel_mask = 0;
+
+    extract_object_info(find_default_audio(h), &rep_type, &channel_mask, &ffmpeg_channel_mask);
+    codecpar->codec_type            = AVMEDIA_TYPE_AUDIO;
+    codecpar->ch_layout.order       = AV_CHANNEL_ORDER_NATIVE;
+    codecpar->ch_layout.nb_channels = av_popcount(channel_mask);
+    codecpar->ch_layout.u.mask      = ffmpeg_channel_mask;
+    codecpar->codec_tag             = AV_RL32(coding_name[h->major_version > 2]);
+    codecpar->frame_size            = 512 << h->frame_duration_code;
+    codecpar->sample_rate           = h->sample_rate;
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+    codecpar->channels              = codecpar->ch_layout.nb_channels;
+    codecpar->channel_layout        = ffmpeg_channel_mask;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    if (udts && udts_size) {
+        *udts = av_calloc(1, DTSUHD_UDTS_BUFFER);
+        if (*udts) {
+            init_put_bits(&pbc, *udts, DTSUHD_UDTS_BUFFER);
+            put_bits32(&pbc, 0); // udts box size
+            put_bits32(&pbc, AV_RB32("udts")); // udts box signature
+            put_bits(&pbc, 6, h->major_version - 2);
+            put_bits(&pbc, 2, h->frame_duration_code);
+            put_bits(&pbc, 3, h->major_version > 2);
+            put_bits(&pbc, 5, h->num_audio_pres - 1);
+            put_bits32(&pbc,  channel_mask);
+            put_bits(&pbc, 1, h->sample_rate == 48000);
+            put_bits(&pbc, 2, h->sample_rate_mod);
+            put_bits(&pbc, 3, rep_type);
+            put_bits(&pbc, 3, 0);
+            put_bits(&pbc, 1, 0);
+            put_bits64(&pbc, h->num_audio_pres, 0); // ID Tag present for each presentation.
+            flush_put_bits(&pbc); // byte align
+            *udts_size = put_bytes_output(&pbc);
+            AV_WB32(*udts, *udts_size);
+        }
+    }
+}
+
+/* Table 6-17 p47 */
+static int parse_explicit_object_lists(DTSUHD *h, int mask, int index)
+{
+    GetBitContext *gb = &h->gb;
+    int i;
+    static const VarBits table = {
+        { 4, 4, 4, 4, 8, 8, 16, 32 }, { 0, 0, 0, 0, 16, 16, 272, 65808 }
+    };
+
+    for (i = 0; i < index; i++) {
+        if ((mask >> i) & 0x01) {
+            if (h->is_sync_frame || get_bits1(gb))
+                get_bits_var(gb, &table);
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-15 p44, Table 6-16 p45 */
+static int parse_aud_pres_params(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int audio;
+    int i;
+    int read_mask;
+    static const VarBits table = {
+        { 0, 0, 0, 0, 2, 2, 4, 5 }, { 0, 0, 0, 0, 1, 1, 5, 21 }
+    };
+
+    if (h->is_sync_frame) {
+        if (h->full_channel_mix_flag)
+            h->num_audio_pres = 1;
+        else
+            h->num_audio_pres = get_bits_var(gb, &table) + 1;
+        memset(h->audio, 0, sizeof(h->audio[0]) * h->num_audio_pres);
+    }
+
+    for (audio = 0; audio < h->num_audio_pres; audio++) {
+        if (h->is_sync_frame) {
+            if (h->full_channel_mix_flag)
+                h->audio[audio].selectable = 1;
+            else
+                h->audio[audio].selectable = get_bits1(gb);
+        }
+
+        if (h->audio[audio].selectable) {
+            if (h->is_sync_frame) {
+                read_mask = (audio > 0) ? get_bits(gb, audio) : 0;
+                h->audio[audio].mask = 0;
+                for (i = 0; read_mask; i++, read_mask >>= 1) {
+                    if (read_mask & 0x01)
+                        h->audio[audio].mask |= get_bits1(gb) << i;
+                }
+            }
+
+            if (parse_explicit_object_lists(h, h->audio[audio].mask, audio))
+                return 1;
+        } else {
+            h->audio[audio].mask = 0;
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-12 p 40 */
+static void decode_version(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int bits = get_bits1(gb) ? 3 : 6;
+
+    h->major_version = get_bits(gb, bits) + 2;
+    skip_bits(gb, bits);
+}
+
+/* Table 6-12 p 40 */
+static int parse_stream_params(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int has_ftoc_crc;
+    static const uint32_t table_base_duration[4] = { 512, 480, 384, 0 };
+    static const uint32_t table_clock_rate[4] = { 32000, 44100, 48000, 0 };
+
+    if (h->is_sync_frame)
+        h->full_channel_mix_flag = get_bits1(gb);
+
+    has_ftoc_crc = !h->full_channel_mix_flag || h->is_sync_frame;
+    if (has_ftoc_crc && av_crc(h->crc, DTSUHD_CRC_SEED, h->data, h->ftoc_bytes))
+        return 1;
+
+    if (h->is_sync_frame) {
+        if (h->full_channel_mix_flag)
+            h->major_version = 2;
+        else
+            decode_version(h);
+
+        h->frame_duration = table_base_duration[get_bits(gb, 2)];
+        h->frame_duration_code = get_bits(gb, 3);
+        h->frame_duration *= (h->frame_duration_code + 1);
+        h->clock_rate = table_clock_rate[get_bits(gb, 2)];
+        if (h->frame_duration == 0 || h->clock_rate == 0)
+            return 1; /* bitstream error */
+
+        skip_bits_long(gb, 36 * get_bits1(gb));  /* bTimeStampPresent */
+        h->sample_rate_mod = get_bits(gb, 2);
+        h->sample_rate = h->clock_rate * (1 << h->sample_rate_mod);
+
+        if (h->full_channel_mix_flag) {
+            h->interactive_obj_limits_present = 0;
+        } else {
+            skip_bits1(gb);  /* reserved flag. */
+            h->interactive_obj_limits_present = get_bits1(gb);
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-24 p52 */
+static void navi_purge(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->navi_count; i++)
+        if (!h->navi[i].present)
+            h->navi[i].bytes = 0;
+}
+
+/* Table 6-21 p50 */
+static void navi_clear(DTSUHD *h)
+{
+    if (h->navi)
+        memset(h->navi, 0, sizeof(h->navi[0]) * h->navi_count);
+    h->navi_count = 0;
+}
+
+/* Table 6-22 p51 */
+static void navi_clear_present(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->navi_count; i++)
+        h->navi[i].present = 0;
+}
+
+/* Table 6-23 p51.  Return 0 on success, and the index is returned in
+   the *listIndex parameter.
+*/
+static int navi_find_index(DTSUHD *h, int desired_index, int *list_index)
+{
+    int avail_index = h->navi_count;
+    int i;
+    int navi_alloc;
+
+    for (i = 0; i < h->navi_count; i++) {
+        if (h->navi[i].index == desired_index) {
+            *list_index = i;
+            h->navi[i].present = 1;
+            return 0;
+        }
+
+        if ((h->navi[i].present == 0) && (h->navi[i].bytes == 0) && (avail_index > i))
+            avail_index = i;
+    }
+
+    if (avail_index >= h->navi_count) {
+        if (h->navi_count >= h->navi_alloc) {
+            navi_alloc = h->navi_count + DTSUHD_ALLOC_INCREMENT;
+            if (av_reallocp_array(&h->navi, navi_alloc, sizeof(*h->navi)))
+                return 1;
+            h->navi_alloc = navi_alloc;
+        }
+        h->navi_count++;
+    }
+
+    *list_index = avail_index;
+    h->navi[avail_index].bytes = 0;
+    h->navi[avail_index].present = 1;
+    h->navi[avail_index].id = 256;
+    h->navi[avail_index].index = desired_index;
+
+    return 0;
+}
+
+/* Table 6-20 p48 */
+static int parse_chunk_navi(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int audio_chunks = 1;
+    int bytes;
+    int i;
+    int id;
+    int id_present;
+    int index;
+    int list_index;
+    static const VarBits table2468 = {
+        { 2, 2, 2, 2, 4, 4, 6, 8 }, { 0, 0, 0, 0, 4, 4, 20, 84 }
+    };
+    static const VarBits table_audio_chunk_sizes = {
+        { 9, 9, 9, 9, 11, 11, 13, 16 }, { 0, 0, 0, 0, 512, 512, 2560, 10752 }
+    };
+    static const VarBits table_chunk_sizes = {
+        { 6, 6, 6, 6, 9, 9, 12, 15 }, { 0, 0, 0, 0, 64, 64, 576, 4672 }
+    };
+
+    h->chunk_bytes = 0;
+    if (h->full_channel_mix_flag)
+        h->chunk_count = h->is_sync_frame;
+    else
+        h->chunk_count = get_bits_var(gb, &table2468);
+
+    if (h->chunk_count >= h->chunk_alloc) {
+        int chunk_alloc = h->chunk_count + DTSUHD_ALLOC_INCREMENT;
+        if (av_reallocp_array(&h->chunk, chunk_alloc, sizeof(*h->chunk)))
+            return 1;
+        h->chunk_alloc = chunk_alloc;
+    }
+
+    for (i = 0; i < h->chunk_count; i++) {
+        h->chunk_bytes += h->chunk[i].bytes = get_bits_var(gb, &table_chunk_sizes);
+        if (h->full_channel_mix_flag)
+            h->chunk[i].crc_flag = 0;
+        else
+        h->chunk[i].crc_flag = get_bits1(gb);
+    }
+
+    if (!h->full_channel_mix_flag)
+        audio_chunks = get_bits_var(gb, &table2468);
+
+    if (h->is_sync_frame)
+        navi_clear(h);
+    else
+        navi_clear_present(h);
+
+    for (i = 0; i < audio_chunks; i++) {
+        if (h->full_channel_mix_flag)
+            index = 0;
+        else
+            index = get_bits_var(gb, &table2468);
+
+        if (navi_find_index(h, index, &list_index))
+            return 1;
+
+        if (h->is_sync_frame)
+            id_present = 1;
+        else if (h->full_channel_mix_flag)
+            id_present = 0;
+        else
+            id_present = get_bits1(gb);
+
+        if (id_present) {
+            id = get_bits_var(gb, &table2468);
+            h->navi[list_index].id = id;
+        }
+
+        bytes = get_bits_var(gb, &table_audio_chunk_sizes);
+        h->chunk_bytes += bytes;
+        h->navi[list_index].bytes = bytes;
+    }
+
+    navi_purge(h);
+
+    return 0;
+}
+
+
+/* Table 6-6 */
+static int parse_md_chunk_list(DTSUHD *h, MD01 *md01)
+{
+    GetBitContext *gb = &h->gb;
+    static const VarBits table = {
+        { 3, 3, 3, 3, 4, 4, 6, 8 }, { 0, 0, 0, 0, 8, 8, 24, 88 }
+    };
+    int i;
+
+    if (h->full_channel_mix_flag) {
+        md01->object_list_count = 1;
+        md01->object_list[0] = 256;
+    } else {
+        md01->object_list_count = get_bits_var(gb, &table);
+        for (i = 0; i < md01->object_list_count; i++)
+            md01->object_list[i] = get_bits(gb, get_bits1(gb) ? 8 : 4);
+    }
+
+    return 0;
+}
+
+/* Table 7-9 */
+static void skip_mp_param_set(DTSUHD *h, MD01 *md01, int nominal_flag)
+{
+    skip_bits_md01(h, md01, 6); /* rLoudness */
+    if (nominal_flag == 0)
+        skip_bits_md01(h, md01, 5);
+
+    skip_bits_md01(h, md01, nominal_flag ? 2 : 4);
+}
+
+/* Table 7-8 */
+static int parse_static_md_params(DTSUHD *h, MD01 *md01, int only_first)
+{
+    int i;
+    int loudness_sets = 1;
+    int nominal_flag = 1;
+
+    if (h->full_channel_mix_flag == 0)
+        nominal_flag = get_bits_md01(h, md01, 1);
+
+    if (nominal_flag) {
+        if (h->full_channel_mix_flag == 0)
+            loudness_sets = get_bits_md01(h, md01, 1) ? 3 : 1;
+    } else {
+        loudness_sets = get_bits_md01(h, md01, 4) + 1;
+    }
+
+    for (i = 0; i < loudness_sets; i++)
+        skip_mp_param_set(h, md01, nominal_flag);
+
+    if (only_first)
+        return 0;
+
+    if (nominal_flag == 0)
+        skip_bits_md01(h, md01, 1);
+
+    for (i = 0; i < 3; i++) { /* Table 7-12 suggest 3 types */
+        if (get_bits_md01(h, md01, 1)) {
+            if (get_bits_md01(h, md01, 4) == 15) /* Table 7-14 */
+                skip_bits_md01(h, md01, 15);
+        }
+        if (get_bits_md01(h, md01, 1)) /* smooth md present */
+            skip_bits_md01(h, md01, 6 * 6);
+    }
+
+    if (h->full_channel_mix_flag == 0) {
+        i = md01->static_md_packets * md01->static_md_packet_size - get_bits_count(&md01->gb);
+        skip_bits(&md01->gb, i);
+    }
+    md01->static_md_extracted = 1;
+
+    return 0;
+}
+
+/* Table 7-7 */
+static int parse_multi_frame_md(DTSUHD *h, MD01 *md01)
+{
+    GetBitContext *gb = &h->gb;
+    int i, n;
+    static const VarBits table1 = {
+        { 0, 0, 0, 0, 6, 6, 9, 12 }, { 0, 0, 0, 0, 1, 1, 65, 577 }
+    };
+    static const VarBits table2 = {
+        { 5, 5, 5, 5, 7, 7, 9, 11 }, { 0, 0, 0, 0, 32, 32, 160, 672 }
+    };
+
+    if (h->is_sync_frame) {
+        md01->packets_acquired = 0;
+        if (h->full_channel_mix_flag) {
+            md01->static_md_packets = 1;
+            md01->static_md_packet_size = 0;
+        } else {
+            md01->static_md_packets = get_bits_var(gb, &table1) + 1;
+            md01->static_md_packet_size = get_bits_var(gb, &table2) + 3;
+        }
+
+        n = md01->static_md_packets * md01->static_md_packet_size;
+        if (n > md01->buf_bytes) {
+            if (av_reallocp(&md01->buf, n))
+                return 1;
+            md01->buf_bytes = n;
+        }
+
+        init_get_bits(&md01->gb, md01->buf, md01->buf_bytes * 8);
+        if (md01->static_md_packets > 1)
+            md01->static_md_update_flag = get_bits1(gb);
+        else
+            md01->static_md_update_flag = 1;
+    }
+
+    if (md01->packets_acquired < md01->static_md_packets) {
+        n = md01->packets_acquired * md01->static_md_packet_size;
+        for (i = 0; i < md01->static_md_packet_size; i++)
+            md01->buf[n + i] = get_bits(gb, 8);
+        md01->packets_acquired++;
+
+        if (md01->packets_acquired == md01->static_md_packets) {
+            if (md01->static_md_update_flag || !md01->static_md_extracted)
+                if (parse_static_md_params(h, md01, 0))
+                    return 1;
+        } else if (md01->packets_acquired == 1) {
+            if (md01->static_md_update_flag || !md01->static_md_extracted)
+                if (parse_static_md_params(h, md01, 1))
+                    return 1;
+        }
+    }
+
+    return 0;
+}
+
+/* Return 1 if suitable, 0 if not.  Table 7-18.  OBJGROUPIDSTART=224 Sec 7.8.7 p75 */
+static int is_suitable_for_render(DTSUHD *h, MD01 *md01, int object_id)
+{
+    GetBitContext *gb = &h->gb;
+    static const VarBits table = {
+        { 8, 8, 8, 8, 10, 10, 12, 14 }, { 0, 0, 0, 0, 256, 256, 1280, 5376 }
+    };
+
+    if (object_id >= 224 || get_bits1(gb))
+        return 1;
+
+    /*  Reject the render and skip the render data. */
+    skip_bits1(gb);
+    skip_bits(gb, get_bits_var(gb, &table));
+
+    return 0;
+}
+
+/* Table 7-26 */
+static void parse_ch_mask_params(DTSUHD *h, MD01 *md01, MDObject *object)
+{
+    GetBitContext *gb = &h->gb;
+    const int ch_index = object->rep_type == REP_TYPE_BINAURAL ? 1 : get_bits(gb, 4);
+    static const int mask_table[14] = { /* Table 7-27 */
+        0x000001, 0x000002, 0x000006, 0x00000F, 0x00001F, 0x00084B, 0x00002F,
+        0x00802F, 0x00486B, 0x00886B, 0x03FBFB, 0x000003, 0x000007, 0x000843,
+    };
+
+    if (ch_index == 14)
+        object->ch_activity_mask = get_bits(gb, 16);
+    else if (ch_index == 15)
+        object->ch_activity_mask = get_bits_long(gb, 32);
+    else
+        object->ch_activity_mask = mask_table[ch_index];
+}
+
+/* Table 7-22 */
+static int parse_object_metadata(DTSUHD *h, MD01 *md01, MDObject *object,
+                                 int start_frame_flag, int object_id)
+{
+    GetBitContext *gb = &h->gb;
+    int ch_mask_object_flag = 0;
+    int object_3d_metadata_flag = 0;
+    static const VarBits table1 = {
+        { 1, 1, 1, 1, 4, 4, 4, 8 }, { 0, 0, 0, 0, 2, 2, 18, 34 }
+    };
+    static const VarBits table2 = {
+        { 3, 3, 3, 3, 3, 3, 4, 8 }, { 0, 0, 0, 0, 8, 8, 16, 32 }
+    };
+
+    skip_bits(gb, object_id != 256);
+
+    if (start_frame_flag) {
+        object->rep_type = get_bits(gb, 3);
+        switch (object->rep_type) {
+            case REP_TYPE_BINAURAL:
+            case REP_TYPE_CH_MASK_BASED:
+            case REP_TYPE_MTRX2D_CH_MASK_BASED:
+            case REP_TYPE_MTRX3D_CH_MASK_BASED:
+                ch_mask_object_flag = 1;
+                break;
+
+            case REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF:
+            case REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF:
+                object_3d_metadata_flag = 1;
+                break;
+        }
+
+        if (ch_mask_object_flag) {
+            if (object_id != 256) {
+                skip_bits(gb, 3);  /* Object Importance Level */
+                if (get_bits1(gb))
+                    skip_bits(gb, get_bits1(gb) ? 3 : 5);
+
+                get_bits_var(gb, &table1);
+                get_bits_var(gb, &table2);
+
+                /* Skip optional Loudness block. */
+                if (get_bits1(gb))
+                    skip_bits(gb, 8);
+
+                /* Skip optional Object Interactive MD (Table 7-25). */
+                if (get_bits1(gb) && h->interactive_obj_limits_present) {
+                    if (get_bits1(gb))
+                        skip_bits(gb, 5 + 6 * object_3d_metadata_flag);
+                }
+            }
+
+            parse_ch_mask_params(h, md01, object);
+        }
+    }
+
+    /* Skip rest of object */
+    return 0;
+}
+
+/* Table 7-4 */
+static int parse_md01(DTSUHD *h, MD01 *md01, int pres_index)
+{
+    GetBitContext *gb = &h->gb;
+    uint32_t i;
+    uint32_t id;
+    uint32_t start_flag;
+
+    if (h->audio[pres_index].selectable) {
+        for (i = 0; i < 4; i++)  /* Table 7-5.  Scaling data. */
+            skip_bits(gb, 5 * get_bits1(gb));
+
+        if (get_bits1(gb) && parse_multi_frame_md(h, md01))
+            return 1;
+    }
+
+    /* Table 7-16: Object metadata. */
+    memset(md01->object, 0, sizeof(md01->object));
+    if (!h->full_channel_mix_flag)
+        skip_bits(gb, 11 * get_bits1(gb));
+
+    for (i = 0; i < md01->object_list_count; i++) {
+        id = md01->object_list[i];
+        if (!is_suitable_for_render(h, md01, id))
+            continue;
+
+        md01->object[id].pres_index = pres_index;
+        start_flag = 0;
+        if (!md01->object[id].started) {
+            skip_bits(gb, id != 256);
+            start_flag = md01->object[id].started = 1;
+        }
+
+        if ((id < 224 || id > 255) &&
+            parse_object_metadata(h, md01, md01->object + id, start_flag, id)) {
+            return 1;
+        }
+
+        break;
+    }
+
+    return 0;
+}
+
+/* Table 6-2 */
+static int parse_chunks(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    MD01 *md01;
+    const uint8_t *byte_start;
+    int bit_next;
+    int i;
+    static const VarBits table_aud_pres = {
+        { 0, 0, 0, 0, 2, 2, 4, 4 }, { 0, 0, 0, 0, 1, 1, 5, 21 }
+    };
+    int pres_index;
+    uint32_t id;
+
+    for (i = 0; i < h->chunk_count; i++) {
+        bit_next = get_bits_count(gb) + h->chunk[i].bytes * 8;
+        byte_start = h->data + get_bits_count(gb) / 8;
+        if (h->chunk[i].crc_flag && av_crc(h->crc, DTSUHD_CRC_SEED, byte_start, h->chunk[i].bytes))
+            return 1;
+
+        id = get_bits(gb, 8);
+        if (id == 1) {
+            pres_index = get_bits_var(gb, &table_aud_pres);
+        if (pres_index > 255)
+            return 1;
+        md01 = chunk_find_md01(h, id);
+        if (md01 == NULL)
+            md01 = chunk_append_md01(h, id);
+        if (md01 == NULL)
+            return 1;
+        if (parse_md_chunk_list(h, md01))
+            return 1;
+        if (parse_md01(h, md01, pres_index))
+            return 1;
+        }
+
+        skip_bits(gb, bit_next - get_bits_count(gb));
+    }
+
+    return 0;
+}
+
+/* Helper function for av_dtsuhd_frame and ff_dtsuhd_parse_frame */
+static int parse_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes)
+{
+    GetBitContext *gb;
+    int syncword;
+    static const VarBits table_payload = {
+        { 5, 5, 5, 5, 8, 8, 10, 12 }, { 0, 0, 0, 0, 32, 32, 288, 1312 }
+    };
+
+    if (!h || !data)
+        return DTSUHD_NULL;
+
+    if (data_bytes < 4)
+        return DTSUHD_INCOMPLETE; /* Data buffer does not contain the signature */
+
+    h->data = data;
+    h->data_bytes = data_bytes;
+    gb = &h->gb;
+    if (init_get_bits8(gb, data, data_bytes) < 0)
+        return DTSUHD_INVALID_FRAME;
+
+    syncword = get_bits_long(gb, 32);
+    h->is_sync_frame = syncword == DTSUHD_SYNCWORD;
+    h->saw_sync |= h->is_sync_frame;
+    if (!h->saw_sync || (!h->is_sync_frame && syncword != DTSUHD_NONSYNCWORD))
+        return DTSUHD_NOSYNC;  /* Invalid frame or have not parsed sync frame. */
+
+    h->ftoc_bytes = get_bits_var(gb, &table_payload) + 1;
+    if (h->ftoc_bytes < 5 || h->ftoc_bytes >= data_bytes)
+        return DTSUHD_INCOMPLETE;  /* Data buffer does not contain entire FTOC */
+
+    if (parse_stream_params(h))
+        return DTSUHD_INVALID_FRAME;
+
+    if (parse_aud_pres_params(h))
+        return DTSUHD_INVALID_FRAME;
+
+    if (parse_chunk_navi(h))  /* AudioChunkTypes and payload sizes. */
+        return DTSUHD_INVALID_FRAME;
+
+    /* At this point in the parsing, we can calculate the size of the frame. */
+    h->frame_bytes = h->ftoc_bytes + h->chunk_bytes;
+    if (h->frame_bytes > data_bytes)
+        return DTSUHD_INCOMPLETE;
+
+    return DTSUHD_OK;
+}
+
+/** Allocate parsing handle.  The parsing handle should be used to parse
+    one DTS:X Profile 2 Audio stream, then freed by calling DTSUHD_destroy().
+    Do not use the same parsing handle to parse multiple audio streams.
+
+  @return Parsing handle for use with other functions, or NULL on failure.
+*/
+DTSUHD *av_dtsuhd_create(void)
+{
+    DTSUHD *h = av_calloc(1, sizeof(DTSUHD));
+    if (h)
+        h->crc = av_crc_get_table(AV_CRC_16_CCITT);
+    return h;
+}
+
+/** Free all resources used by the parsing handle.
+
+  @param[in] h Handle allocated by dtshd_create
+*/
+void av_dtsuhd_destroy(DTSUHD *h)
+{
+    if (h) {
+        chunk_reset(h);
+        av_freep(&h->chunk);
+        av_freep(&h->navi);
+        av_freep(&h);
+    }
+}
+
+/** Parse a single DTS:X Profile 2 frame.
+    The frame must start at the first byte of the data buffer, and enough
+    of the frame must be present to decode the majority of the FTOC.
+    From Table 6-11 p40.
+
+    A sync frame must be the first frame provided, before any non-sync frames.
+    Signatures: sync=0x40411BF2, non-sync=0x71C442E8.
+
+  @param[in] h Handle allocated by DTSUHD_create
+  @param[in] First byte of a buffer containing the frame to parse
+  @param[in] nData Number of valid bytes in 'data'
+  @param[out] codecpar Filled out codec paramters with results of descriptor
+              parsing, may be NULL
+  @param[out] utds Filled MP4 udts sample entry descriptor.
+  @param[out] utds_size Size of valid 'udts' data.
+  @return 0 on success, DTSUHDStatus enumeration on error
+*/
+int av_dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
+                    AVCodecParameters *codecpar, uint8_t **udts, int *udts_size)
+{
+    int ret = parse_frame(h, data, data_bytes);
+
+    if (ret == DTSUHD_OK && codecpar && h->is_sync_frame) {
+        /* Skip PBRSmoothParams (Table 6-26) and align to the chunks immediately
+           following the FTOC CRC.
+        */
+        skip_bits(&h->gb, h->ftoc_bytes * 8 - get_bits_count(&h->gb));
+        if (parse_chunks(h))
+            return DTSUHD_INVALID_FRAME;
+        update_descriptor(h, codecpar, udts, udts_size);
+    }
+
+    return ret;
+}
+
+/** Similar to av_dtsuhd_frame, but for use only within libavcodec.
+
+  @param[in] h Handle allocated by DTSUHD_create
+  @param[in] First byte of a buffer containing the frame to parse
+  @param[in] nData Number of valid bytes in 'data'
+  @param[out] fi Results of frame parsing, may be NULL
+  @return 0 on success, DTSUHDStatus enumeration on error
+*/
+int ff_dtsuhd_parse_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
+                          DTSUHDFrameInfo *fi)
+{
+    int fraction = 1;
+    int i;
+    int ret = parse_frame(h, data, data_bytes);
+
+    if (ret == DTSUHD_OK && fi) {
+        /* 6.3.6.9: audio frame duration may be a fraction of metadata frame duration. */
+        for (i = 0; i < h->navi_count; i++) {
+            if (h->navi[i].present) {
+                if (h->navi[i].id == 3)
+                    fraction = 2;
+                else if (h->navi[i].id == 4)
+                    fraction = 4;
+            }
+        }
+
+        fi->sync = h->is_sync_frame;
+        fi->frame_bytes = h->frame_bytes;
+        fi->sample_rate = h->sample_rate;
+        fi->sample_count = (h->frame_duration * fi->sample_rate) / (h->clock_rate * fraction);
+    }
+
+    return ret;
+}
+
+/** Return the offset of the first UHD audio frame.
+    When supplied a buffer containing DTSHDHDR file content, the DTSHD
+    headers are skipped and the offset to the first byte of the STRMDATA
+    chunk is returned, along with the size of that chunk.
+
+  @param[in] dataStart DTS:X Profile 2 file content to parse
+  @param[in] dataSize Number of valid bytes in 'dataStart'
+  @param[out] Number of leading DTS:X Profile 2 audio frames to discard,
+              may be NULL
+  @param[out] Size of STRMDATA payload, may be NULL
+  @return STRMDATA payload offset or 0 if not a valid DTS:X Profile 2 file
+*/
+int av_dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size, size_t *strmdata_size)
+{
+    const uint8_t *data = data_start;
+    const uint8_t *data_end = data + data_size;
+    uint64_t chunk_size = 0;
+
+    if (data + DTSUHD_CHUNK_HEADER >= data_end || memcmp(data, "DTSHDHDR", 8))
+        return 0;
+
+    for (; data + DTSUHD_CHUNK_HEADER <= data_end; data += chunk_size + DTSUHD_CHUNK_HEADER) {
+        chunk_size = AV_RB64(data + 8);
+        if (chunk_size < 4 || chunk_size > ((uint64_t)1 << 61))
+            return AVERROR_INVALIDDATA;
+
+        if (!memcmp(data, "STRMDATA", 8)) {
+            if (strmdata_size)
+                *strmdata_size = chunk_size;
+            return (int)(data - data_start) + DTSUHD_CHUNK_HEADER;
+        }
+    }
+
+    return 0;
+}
diff --git a/libavcodec/dtsuhd_common.h b/libavcodec/dtsuhd_common.h
new file mode 100644
index 0000000000..e0994ba78c
--- /dev/null
+++ b/libavcodec/dtsuhd_common.h
@@ -0,0 +1,87 @@
+/*
+ * DTS-UHD common audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DTSUHD_COMMON_H
+#define AVCODEC_DTSUHD_COMMON_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "codec_par.h"
+
+#define DTSUHD_NONSYNCWORD 0x71C442E8
+#define DTSUHD_SYNCWORD    0x40411BF2
+
+#define DTSUHD_MAX_FRAME_SIZE 0x1000
+
+/* Return codes from dtsuhd_frame */
+enum DTSUHDStatus {
+    DTSUHD_OK,
+    DTSUHD_INCOMPLETE,    /* Entire frame not in buffer. */
+    DTSUHD_INVALID_FRAME, /* Error parsing frame. */
+    DTSUHD_NOSYNC,        /* No sync frame prior to non-sync frame. */
+    DTSUHD_NULL,          /* Function parameter may not be NULL. */
+};
+
+/* Return stream information from an audio frame parsed by dtsuhd_frame, */
+typedef struct DTSUHDDescriptorInfo {
+    int valid;           /* True if descriptor info is valid. */
+    char coding_name[5]; /* Four character, null term SampleEntry box name. */
+    int base_sample_freq_code;
+    int channel_count;
+    int decoder_profile_code;
+    int frame_duration_code;
+    int max_payload_code;
+    int num_pres_code;
+    int rep_type;
+    int sample_rate;
+    int sample_rate_mod;
+    int sample_size;
+    int channel_mask;
+    uint64_t ffmpeg_channel_mask;
+} DTSUHDDescriptorInfo;
+
+/* Return frame information from an audio frame parsed by dtsuhd_frame. */
+typedef struct DTSUHDFrameInfo {
+    int frame_bytes;  /* Size of entire frame in bytes. */
+    int sample_count; /* Number of samples in frame (samples per frame). */
+    int sample_rate;  /* Sample rate of frame (samples per second). */
+    int sync;         /* True if frame is a sync frame. */
+} DTSUHDFrameInfo;
+
+struct DTSUHD;
+typedef struct DTSUHD DTSUHD;
+
+struct DTSUHD *av_dtsuhd_create(void);
+void av_dtsuhd_destroy(DTSUHD*);
+int av_dtsuhd_frame(DTSUHD*, const uint8_t *data, size_t nData,
+                    AVCodecParameters*, uint8_t**, int*);
+int av_dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size,
+                               size_t *strmdata_size);
+int ff_dtsuhd_parse_frame(DTSUHD*, const uint8_t *data, size_t nData,
+                          DTSUHDFrameInfo*);
+
+static inline int dtsuhd_is_syncword(uint32_t syncword)
+{
+    return syncword == DTSUHD_NONSYNCWORD || syncword == DTSUHD_SYNCWORD;
+}
+
+#endif /* AVCODEC_DTSUHD_COMMON_H */
diff --git a/libavcodec/dtsuhd_parser.c b/libavcodec/dtsuhd_parser.c
new file mode 100644
index 0000000000..2860c1eee5
--- /dev/null
+++ b/libavcodec/dtsuhd_parser.c
@@ -0,0 +1,141 @@
+/*
+ * DTS-UHD audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Parse raw DTS-UHD audio frame input and return individual audio frames.
+ */
+
+#include "dtsuhd_common.h"
+#include "libavutil/intreadwrite.h"
+#include "parser.h"
+
+#define DTSUHD_BUFFER_SIZE (DTSUHD_MAX_FRAME_SIZE * 128)
+
+typedef struct DTSUHDParseContext {
+    DTSUHD *dtsuhd;
+    int buf_offset;
+    int buf_bytes;
+    int frame_bytes;
+    uint8_t *buf;
+} DTSUHDParseContext;
+
+static av_cold int parser_init(AVCodecParserContext *s)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+
+    pc->dtsuhd = av_dtsuhd_create();
+    pc->buf = av_calloc(DTSUHD_BUFFER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE, 1);
+    if (!pc->dtsuhd || !pc->buf)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void parser_close(AVCodecParserContext *s)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+
+    av_dtsuhd_destroy(pc->dtsuhd);
+    pc->dtsuhd = NULL;
+    av_freep(&pc->buf);
+    ff_parse_close(s);
+}
+
+// Keep data in contiguous buffer as required by dtsuhd_frame.
+static int append_buffer(DTSUHDParseContext *pc, const uint8_t **buf, int *buf_size, int *input_consumed)
+{
+    int copy_bytes;
+
+    pc->buf_offset += pc->frame_bytes;
+    pc->frame_bytes = 0;
+
+    // Buffer almost full, move partial frame to start of buffer for more space.
+    if (*buf_size > 0 && pc->buf_bytes + *buf_size > DTSUHD_BUFFER_SIZE) {
+        memmove(pc->buf, pc->buf + pc->buf_offset, pc->buf_bytes);
+        pc->buf_bytes -= pc->buf_offset;
+        pc->buf_offset = 0;
+    }
+
+    copy_bytes = FFMAX(0, FFMIN(DTSUHD_BUFFER_SIZE - pc->buf_bytes, *buf_size));
+
+    // Append input buffer to our context.
+    if (copy_bytes) {
+        memcpy(pc->buf + pc->buf_bytes, *buf, copy_bytes);
+        pc->buf_bytes += copy_bytes;
+    }
+
+    // Ensure buffer starts with a syncword
+    while (pc->buf_offset + 4 < pc->buf_bytes && !dtsuhd_is_syncword(AV_RB32(pc->buf + pc->buf_offset)))
+        pc->buf_offset++;
+
+    *input_consumed = copy_bytes;
+    *buf = pc->buf + pc->buf_offset;
+    *buf_size = pc->buf_bytes - pc->buf_offset;
+
+    return copy_bytes && pc->buf_bytes - pc->buf_offset < DTSUHD_MAX_FRAME_SIZE;
+}
+
+static int parser_parse(AVCodecParserContext *s, AVCodecContext *avctx,
+                        const uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+    DTSUHDFrameInfo fi;
+    int input_consumed = 0;
+
+    if (append_buffer(pc, &buf, &buf_size, &input_consumed)) {
+        *poutbuf = NULL;
+        *poutbuf_size = 0;
+        return input_consumed;
+    }
+
+    switch (ff_dtsuhd_parse_frame(pc->dtsuhd, buf, buf_size, &fi)) {
+    case DTSUHD_OK:
+        if (fi.sample_count)
+            s->duration = fi.sample_count;
+        if (fi.sample_rate)
+            avctx->sample_rate = fi.sample_rate;
+        buf_size = pc->frame_bytes = fi.frame_bytes;
+        break;
+    case DTSUHD_INCOMPLETE:
+        pc->frame_bytes = buf_size;
+        buf = NULL;
+        buf_size = 0;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unable to process DTS-UHD file. File may be invalid.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+
+    return input_consumed;
+}
+
+AVCodecParser ff_dtsuhd_parser = {
+    .codec_ids      = { AV_CODEC_ID_DTSUHD },
+    .priv_data_size = sizeof(DTSUHDParseContext),
+    .parser_init    = parser_init,
+    .parser_parse   = parser_parse,
+    .parser_close   = parser_close,
+};
diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
index d355808018..d724c8b402 100644
--- a/libavcodec/parsers.c
+++ b/libavcodec/parsers.c
@@ -37,6 +37,7 @@ extern const AVCodecParser ff_dirac_parser;
 extern const AVCodecParser ff_dnxhd_parser;
 extern const AVCodecParser ff_dolby_e_parser;
 extern const AVCodecParser ff_dpx_parser;
+extern const AVCodecParser ff_dtsuhd_parser;
 extern const AVCodecParser ff_dvaudio_parser;
 extern const AVCodecParser ff_dvbsub_parser;
 extern const AVCodecParser ff_dvdsub_parser;
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 65bc52fb24..4b7ec515fe 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -29,7 +29,7 @@
 
 #include "version_major.h"
 
-#define LIBAVCODEC_VERSION_MINOR  18
+#define LIBAVCODEC_VERSION_MINOR  19
 #define LIBAVCODEC_VERSION_MICRO 100
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
Paul B Mahol June 15, 2023, 3:46 p.m. UTC | #29
On Wed, Jun 14, 2023 at 10:01 PM Roy Funderburk <royffmpeg@funderburk.us>
wrote:

> Hi,
>
> I updated the libavcodec patch per Paul Mahol's reviews:
>
> dtsuhd_common.c:496 get_bits_long instead of get_bits used for reading 36
> bits
>
> dtsuhd_common.c:224 get_bits_var changed to accept arrays in VarBits
> structure, allowing arrays with all values less than 256 to use uint8_t
> arrays.
>
> Also removed bitfields from structures.
>


get_vlc2 can be made for get_bits_var(), first table bits (that are still
int and not uint8_t), the code that picks table index from which to take
bits.

It is also possible to make it take both first index and rest of it and
build bigger tables but that is very very advanced step for new
contributors.

Use INIT_VLC_SPARSE_STATIC, there are myriad examples in libavcodec, one of
them being imm4 decoder.


>
> Regards,
> -Roy
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Roy Funderburk June 15, 2023, 6:44 p.m. UTC | #30
On 6/15/23 8:46 AM, Paul B Mahol wrote:
> get_vlc2 can be made for get_bits_var(), first table bits (that are still
> int and not uint8_t), the code that picks table index from which to take
> bits.
> 
> It is also possible to make it take both first index and rest of it and
> build bigger tables but that is very very advanced step for new
> contributors.
> 
> Use INIT_VLC_SPARSE_STATIC, there are myriad examples in libavcodec, one of
> them being imm4 decoder.
> 

I will change that table bits array to uint8_t.

I encountered an issue when trying to set up the VLC table with
INIT_VLC_SPARSE_STATIC.

The current get_bits_var:

    static int get_bits_var(GetBitContext *gb, const VarBits *var_bits)
    {
        static const uint8_t bits_used[8] = { 1, 1, 1, 1, 2, 2, 3, 3 };
        int code = show_bits(gb, 3); /* value range is [0, 7] */

        skip_bits(gb, bits_used[code]);
        if (var_bits->bits[code] == 0)
            return 0;
        return get_bits_long(gb, var_bits->bits[code]) + var_bits->add[code];
    }

Changed to use get_vlc2 would be:

    static int get_bits_var(GetBitContext *gb, const VarBits *var_bits)
    {
        int code = get_vlc2(&gb, vlc.table, 3, 1);
        if (var_bits->bits[code] == 0)
            return 0;
        return get_bits_long(gb, var_bits->bits[code]) + var_bits->add[code];
    }

The "vlc.table" that INIT_VLC_SPARSE_STATIC needs to output is:

    len: 1, 1, 1, 1, 2, 2, 3, 3
    sym: 4, 4, 4, 4, 8, 8, 16, 32

INIT_VLC_SPARSE_STATIC would get the same len and sym and this code table as input:
    code: 0, 1, 2, 3, 4, 5, 6, 7

INIT_VLC_SPARSE_STATIC rejects the "len" and "code" because the code
of "2" will not fit into one bit.  Regardless of the fact that the desired
output table is not a valid table according to VLC functions, that invalid
table is what is needed.

This goes back to how get_bits_var decodes the bit context.
get_bits_var looks at three bits to get an index in the range of 0-7.
But it may not skip all of those bits.  One or two of those bits may also be
read by the final get_bits_long.

    code = show_bits(gb, 3);
    skip_bits(gb, bits_used[code]);
    get_bits_long(gb,...);

The VLC functions to construct tables have validation in place that prevents
the construction of the table needed for get_vlc2() to work with
get_bits_var.

So the only way I can find to use get_vlc2() is to not use any of the vlc.h
initialization functions, and instead hard-code the tables to be used by
get_vlc2() like this:

VLCElem table[8] = { {4,1}, {4,1}, {4,1}, {4,1}, {8,2}, {8,2}, {16,3}, {32,3} };

Before I do that, I wanted to verify this would be acceptable. Would
this be a use of get_vlc2() that could lead to issues in the future
if the GET_VLC macro changed so that it would not skip fewer bits than the
code length?

What is your opinion on this?

Thanks,
-Roy
Paul B Mahol June 18, 2023, 12:18 p.m. UTC | #31
On Thu, Jun 15, 2023 at 8:45 PM Roy Funderburk <royffmpeg@funderburk.us>
wrote:

>
>
> On 6/15/23 8:46 AM, Paul B Mahol wrote:
> > get_vlc2 can be made for get_bits_var(), first table bits (that are still
> > int and not uint8_t), the code that picks table index from which to take
> > bits.
> >
> > It is also possible to make it take both first index and rest of it and
> > build bigger tables but that is very very advanced step for new
> > contributors.
> >
> > Use INIT_VLC_SPARSE_STATIC, there are myriad examples in libavcodec, one
> of
> > them being imm4 decoder.
> >
>
> I will change that table bits array to uint8_t.
>
> I encountered an issue when trying to set up the VLC table with
> INIT_VLC_SPARSE_STATIC.
>
> The current get_bits_var:
>
>     static int get_bits_var(GetBitContext *gb, const VarBits *var_bits)
>     {
>         static const uint8_t bits_used[8] = { 1, 1, 1, 1, 2, 2, 3, 3 };
>         int code = show_bits(gb, 3); /* value range is [0, 7] */
>
>         skip_bits(gb, bits_used[code]);
>         if (var_bits->bits[code] == 0)
>             return 0;
>         return get_bits_long(gb, var_bits->bits[code]) +
> var_bits->add[code];
>     }
>
> Changed to use get_vlc2 would be:
>
>     static int get_bits_var(GetBitContext *gb, const VarBits *var_bits)
>     {
>         int code = get_vlc2(&gb, vlc.table, 3, 1);
>         if (var_bits->bits[code] == 0)
>             return 0;
>         return get_bits_long(gb, var_bits->bits[code]) +
> var_bits->add[code];
>     }
>
> The "vlc.table" that INIT_VLC_SPARSE_STATIC needs to output is:
>
>     len: 1, 1, 1, 1, 2, 2, 3, 3
>     sym: 4, 4, 4, 4, 8, 8, 16, 32
>
> INIT_VLC_SPARSE_STATIC would get the same len and sym and this code table
> as input:
>     code: 0, 1, 2, 3, 4, 5, 6, 7
>
> INIT_VLC_SPARSE_STATIC rejects the "len" and "code" because the code
> of "2" will not fit into one bit.  Regardless of the fact that the desired
> output table is not a valid table according to VLC functions, that invalid
> table is what is needed.
>
> This goes back to how get_bits_var decodes the bit context.
> get_bits_var looks at three bits to get an index in the range of 0-7.
> But it may not skip all of those bits.  One or two of those bits may also
> be
> read by the final get_bits_long.
>
>     code = show_bits(gb, 3);
>     skip_bits(gb, bits_used[code]);
>     get_bits_long(gb,...);
>
> The VLC functions to construct tables have validation in place that
> prevents
> the construction of the table needed for get_vlc2() to work with
> get_bits_var.
>
> So the only way I can find to use get_vlc2() is to not use any of the vlc.h
> initialization functions, and instead hard-code the tables to be used by
> get_vlc2() like this:
>
> VLCElem table[8] = { {4,1}, {4,1}, {4,1}, {4,1}, {8,2}, {8,2}, {16,3},
> {32,3} };
>
> Before I do that, I wanted to verify this would be acceptable. Would
> this be a use of get_vlc2() that could lead to issues in the future
> if the GET_VLC macro changed so that it would not skip fewer bits than the
> code length?
>
> What is your opinion on this?
>

Well, just keep that part as is currently, until someone else cleans it up.

Can probing in new demuxer be smarter than just decreasing score of another
demuxer?



>
> Thanks,
> -Roy
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Roy Funderburk June 20, 2023, 5:05 p.m. UTC | #32
On 6/18/23 5:18 AM, Paul B Mahol wrote:
> Well, just keep that part as is currently, until someone else cleans it up.
> 
> Can probing in new demuxer be smarter than just decreasing score of another
> demuxer?


A new patch is attached where the dtshddec.c probe function change is updated to no longer just decrease the score.

Regards,
-Roy
Parsing of DTS-UHD input files per ETSI TS 102 114 is added
as parser for codec id AV_CODEC_ID_DTSUHD.

Signed-off-by: Roy Funderburk <Roy.Funderburk@xperi.com>
---
 libavcodec/Makefile        |    1 +
 libavcodec/codec_desc.c    |    7 +
 libavcodec/codec_id.h      |    1 +
 libavcodec/dtsuhd_common.c | 1079 ++++++++++++++++++++++++++++++++++++
 libavcodec/dtsuhd_common.h |   87 +++
 libavcodec/dtsuhd_parser.c |  141 +++++
 libavcodec/parsers.c       |    1 +
 libavcodec/version.h       |    2 +-
 8 files changed, 1318 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/dtsuhd_common.c
 create mode 100644 libavcodec/dtsuhd_common.h
 create mode 100644 libavcodec/dtsuhd_parser.c

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 2efab60d7d..0b49984902 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1164,6 +1164,7 @@ OBJS-$(CONFIG_DIRAC_PARSER)            += dirac_parser.o
 OBJS-$(CONFIG_DNXHD_PARSER)            += dnxhd_parser.o dnxhddata.o
 OBJS-$(CONFIG_DOLBY_E_PARSER)          += dolby_e_parser.o dolby_e_parse.o
 OBJS-$(CONFIG_DPX_PARSER)              += dpx_parser.o
+OBJS-$(CONFIG_DTSUHD_PARSER)           += dtsuhd_parser.o dtsuhd_common.o
 OBJS-$(CONFIG_DVAUDIO_PARSER)          += dvaudio_parser.o
 OBJS-$(CONFIG_DVBSUB_PARSER)           += dvbsub_parser.o
 OBJS-$(CONFIG_DVD_NAV_PARSER)          += dvd_nav_parser.o
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index 3e31a1eed6..63dc939905 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -3406,6 +3406,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("RKA (RK Audio)"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
     },
+    {
+        .id        = AV_CODEC_ID_DTSUHD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dtsuhd",
+        .long_name = NULL_IF_CONFIG_SMALL("DTSUHD (DTS-UHD Audio Format)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
 
     /* subtitle codecs */
     {
diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h
index d23549d7e0..a5d580169b 100644
--- a/libavcodec/codec_id.h
+++ b/libavcodec/codec_id.h
@@ -542,6 +542,7 @@ enum AVCodecID {
     AV_CODEC_ID_FTR,
     AV_CODEC_ID_WAVARC,
     AV_CODEC_ID_RKA,
+    AV_CODEC_ID_DTSUHD,
 
     /* subtitle codecs */
     AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
diff --git a/libavcodec/dtsuhd_common.c b/libavcodec/dtsuhd_common.c
new file mode 100644
index 0000000000..4d91172b33
--- /dev/null
+++ b/libavcodec/dtsuhd_common.c
@@ -0,0 +1,1079 @@
+/*
+ * DTS-UHD common audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Parse DTS-UHD audio frame headers, report frame sizes and configuration.
+ * Specification: ETSI TS 103 491 V1.2.1
+ */
+
+#include <string.h>
+
+#include "dtsuhd_common.h"
+#include "get_bits.h"
+#include "put_bits.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/crc.h"
+
+#define DTSUHD_ALLOC_INCREMENT 16
+#define DTSUHD_CHUNK_HEADER    16
+#define DTSUHD_CRC_SEED        0xFFFF
+#define DTSUHD_UDTS_BUFFER     32 // work buffer to construct 'udts' box
+
+enum RepType {
+    REP_TYPE_CH_MASK_BASED,
+    REP_TYPE_MTRX2D_CH_MASK_BASED,
+    REP_TYPE_MTRX3D_CH_MASK_BASED,
+    REP_TYPE_BINAURAL,
+    REP_TYPE_AMBISONIC,
+    REP_TYPE_AUDIO_TRACKS,
+    REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF,
+    REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF,
+};
+
+typedef struct MDObject {
+    int started;  /* Object seen since last reset. */
+    int pres_index;
+    int rep_type;
+    int ch_activity_mask;
+} MDObject;
+
+typedef struct MD01 {
+    GetBitContext gb;
+    MDObject object[257]; /* object id max value is 256 */
+    int chunk_id;
+    int object_list[256]; int object_list_count;
+    int packets_acquired;
+    int static_md_extracted;
+    int static_md_packets;
+    int static_md_packet_size;
+    int static_md_update_flag;
+    uint8_t *buf; int buf_bytes; /* temporary buffer to accumulate static data */
+} MD01;
+
+typedef struct NAVI {
+    int bytes;
+    int id;
+    int index;
+    int present;
+} NAVI;
+
+typedef struct UHDAudio {
+    int mask;
+    int selectable;
+} UHDAudio;
+
+typedef struct UHDChunk {
+    int crc_flag;
+    int bytes;
+} UHDChunk;
+
+typedef struct VarBits {
+    uint8_t bits[8];
+    int add[8];
+} VarBits;
+
+struct DTSUHD {
+    const uint8_t *data; int data_bytes;  /* Original audio frame buffer. */
+    const AVCRC *crc;
+    GetBitContext gb;
+    MD01 *md01; int md01_count;
+    NAVI *navi; int navi_alloc, navi_count;
+    UHDAudio audio[256];
+    UHDChunk *chunk; int chunk_alloc, chunk_count;
+    int chunk_bytes;
+    int clock_rate;
+    int frame_bytes;
+    int frame_duration;
+    int frame_duration_code;
+    int ftoc_bytes;
+    int major_version;
+    int num_audio_pres;
+    int sample_rate;
+    int sample_rate_mod;
+    unsigned full_channel_mix_flag;
+    unsigned interactive_obj_limits_present;
+    unsigned is_sync_frame;
+    unsigned saw_sync;
+};
+
+/* Read from the MD01 buffer (if present), falling back to the frame buffer */
+static inline int get_bits_md01(DTSUHD *h, MD01 *md01, int bits)
+{
+    if (md01->buf)
+        return get_bits(&md01->gb, bits);
+    return get_bits(&h->gb, bits);
+}
+
+/* Skip bits in the MD01 buffer (if present), falling back to the frame buffer */
+static inline void skip_bits_md01(DTSUHD *h, MD01 *md01, int bits)
+{
+    if (md01->buf)
+        return skip_bits(&md01->gb, bits);
+    return skip_bits(&h->gb, bits);
+}
+
+/* In the specification Table 5-2, the pseudo code defaults the 'add'
+   parameter to true. Table 7-30 shows passing an explicit false, most
+   other calls do not pass the extractAndAdd parameter.  In the function
+   below, the indirection index and add parameter are pre-applied to the
+   input table itself.  The original table from the specification is
+   converted to the tables used by get_bits_var as follows:
+
+    VarBits table;
+    int original_table = { 1, 2, 3, 4 }; // From specification pseudo code
+    int index_table[8] = { 0, 0, 0, 0, 1, 1, 2, 3 }; // Indirection index
+    for (i = 0; i < 8; i++) {
+        table.bits[i] = original_table[index_table[i]]);
+        table.add[i] = 0;
+        for (index = 0; index < index_table[i]; index++)
+            table.add[index] += original_table[index];
+    }
+*/
+static int get_bits_var(GetBitContext *gb, const VarBits *var_bits)
+{
+    static const uint8_t bits_used[8] = { 1, 1, 1, 1, 2, 2, 3, 3 };
+    int code = show_bits(gb, 3); /* value range is [0, 7] */
+
+    skip_bits(gb, bits_used[code]);
+    if (var_bits->bits[code] == 0)
+        return 0;
+    return get_bits_long(gb, var_bits->bits[code]) + var_bits->add[code];
+}
+
+/* Implied by Table 6-2, MD01 chunk objects appended in for loop */
+static MD01 *chunk_append_md01(DTSUHD *h, int id)
+{
+    int md01_alloc = h->md01_count + 1;
+    if (av_reallocp_array(&h->md01, md01_alloc, sizeof(*h->md01)))
+        return NULL;
+
+    memset(h->md01 + h->md01_count, 0, sizeof(*h->md01));
+    h->md01[h->md01_count].chunk_id = id;
+    return h->md01 + h->md01_count++;
+}
+
+/* Return existing MD01 chunk based on chunkID */
+static MD01 *chunk_find_md01(DTSUHD *h, int id)
+{
+    int i;
+
+    for (i = 0; i < h->md01_count; i++)
+        if (id == h->md01[i].chunk_id)
+            return h->md01 + i;
+
+    return NULL;
+}
+
+/* Table 6-3 */
+static void chunk_reset(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->md01_count; i++)
+        av_freep(&h->md01[i].buf);
+    av_freep(&h->md01);
+    h->md01_count = 0;
+}
+
+static MDObject *find_default_audio(DTSUHD *h)
+{
+    MDObject *object;
+    int i, j;
+    int obj_index = -1;
+
+    for (i = 0; i < h->md01_count; i++) {
+        for (j = 0; j < 257; j++) {
+            object = h->md01[i].object + j;
+            if (object->started && h->audio[object->pres_index].selectable) {
+                if (obj_index < 0 || (object->pres_index < h->md01[i].object[obj_index].pres_index))
+                    obj_index = j;
+            }
+        }
+        if (obj_index >= 0)
+            return h->md01[i].object + obj_index;
+    }
+
+    return NULL;
+}
+
+/* Save channel mask, count, and rep type to descriptor info.
+   ETSI TS 103 491 Table 7-28 channel activity mask bits
+   mapping and SCTE DVS 243-4 Rev. 0.2 DG X Table 4.  Convert activity mask and
+   representation type to channel mask and channel counts.
+*/
+static void extract_object_info(MDObject *object, int *rep_type,
+                                int *channel_mask, uint64_t *ffmpeg_channel_mask)
+{
+    int i;
+    static const struct {
+        uint32_t activity_mask;
+        uint32_t channel_mask; // Mask as defined by ETSI TS 103 491
+        uint64_t ffmpeg_channel_mask; // Mask as defined in ffmpeg
+    } activity_map[] = {
+        // act mask | chan mask | ffmpeg channel mask
+        { 0x000001, 0x00000001, AV_CH_FRONT_CENTER },
+        { 0x000002, 0x00000006, AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT },
+        { 0x000004, 0x00000018, AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT },
+        { 0x000008, 0x00000020, AV_CH_LOW_FREQUENCY },
+        { 0x000010, 0x00000040, AV_CH_BACK_CENTER },
+        { 0x000020, 0x0000A000, AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT },
+        { 0x000040, 0x00000180, AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT },
+        { 0x000080, 0x00004000, AV_CH_TOP_FRONT_CENTER },
+        { 0x000100, 0x00080000, AV_CH_TOP_CENTER },
+        { 0x000200, 0x00001800, AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_FRONT_RIGHT_OF_CENTER },
+        { 0x000400, 0x00060000, AV_CHAN_WIDE_LEFT | AV_CHAN_WIDE_RIGHT },
+        { 0x000800, 0x00000600, AV_CH_SURROUND_DIRECT_LEFT | AV_CH_SURROUND_DIRECT_RIGHT },
+        { 0x001000, 0x00010000, AV_CH_LOW_FREQUENCY_2 },
+        { 0x002000, 0x00300000, AV_CH_TOP_SIDE_LEFT | AV_CH_TOP_SIDE_RIGHT },
+        { 0x004000, 0x00400000, AV_CH_TOP_BACK_CENTER },
+        { 0x008000, 0x01800000, AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT },
+        { 0x010000, 0x02000000, AV_CH_BOTTOM_FRONT_CENTER },
+        { 0x020000, 0x0C000000, AV_CH_BOTTOM_FRONT_LEFT | AV_CH_BOTTOM_FRONT_RIGHT },
+        { 0x140000, 0x30000000, AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT },
+        { 0x080000, 0xC0000000, AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT },
+        { 0 } // Terminator
+    };
+
+    if (object) {
+        for (i = 0; activity_map[i].activity_mask; i++) {
+            if (activity_map[i].activity_mask & object->ch_activity_mask) {
+                *channel_mask |= activity_map[i].channel_mask;
+                *ffmpeg_channel_mask |= activity_map[i].ffmpeg_channel_mask;
+            }
+        }
+        *rep_type = object->rep_type;
+    }
+}
+
+/* Assemble information for MP4 Sample Entry box.  Sample Size is always
+   16 bits.  The coding name is the name of the SampleEntry sub-box and is
+   'dtsx' unless the version of the bitstream is > 2.
+   If DecoderProfile == 2, then MaxPayloadCode will be zero.
+*/
+static void update_descriptor(DTSUHD *h, AVCodecParameters *codecpar,
+                              uint8_t **udts, int *udts_size)
+{
+    PutBitContext pbc;
+    int channel_mask = 0, rep_type = 0;
+    static const char *coding_name[] = { "dtsx", "dtsy" };
+    uint64_t ffmpeg_channel_mask = 0;
+
+    extract_object_info(find_default_audio(h), &rep_type, &channel_mask, &ffmpeg_channel_mask);
+    codecpar->codec_type            = AVMEDIA_TYPE_AUDIO;
+    codecpar->ch_layout.order       = AV_CHANNEL_ORDER_NATIVE;
+    codecpar->ch_layout.nb_channels = av_popcount(channel_mask);
+    codecpar->ch_layout.u.mask      = ffmpeg_channel_mask;
+    codecpar->codec_tag             = AV_RL32(coding_name[h->major_version > 2]);
+    codecpar->frame_size            = 512 << h->frame_duration_code;
+    codecpar->sample_rate           = h->sample_rate;
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+    codecpar->channels              = codecpar->ch_layout.nb_channels;
+    codecpar->channel_layout        = ffmpeg_channel_mask;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    if (udts && udts_size) {
+        *udts = av_calloc(1, DTSUHD_UDTS_BUFFER);
+        if (*udts) {
+            init_put_bits(&pbc, *udts, DTSUHD_UDTS_BUFFER);
+            put_bits32(&pbc, 0); // udts box size
+            put_bits32(&pbc, AV_RB32("udts")); // udts box signature
+            put_bits(&pbc, 6, h->major_version - 2);
+            put_bits(&pbc, 2, h->frame_duration_code);
+            put_bits(&pbc, 3, h->major_version > 2);
+            put_bits(&pbc, 5, h->num_audio_pres - 1);
+            put_bits32(&pbc,  channel_mask);
+            put_bits(&pbc, 1, h->sample_rate == 48000);
+            put_bits(&pbc, 2, h->sample_rate_mod);
+            put_bits(&pbc, 3, rep_type);
+            put_bits(&pbc, 3, 0);
+            put_bits(&pbc, 1, 0);
+            put_bits64(&pbc, h->num_audio_pres, 0); // ID Tag present for each presentation.
+            flush_put_bits(&pbc); // byte align
+            *udts_size = put_bytes_output(&pbc);
+            AV_WB32(*udts, *udts_size);
+        }
+    }
+}
+
+/* Table 6-17 p47 */
+static int parse_explicit_object_lists(DTSUHD *h, int mask, int index)
+{
+    GetBitContext *gb = &h->gb;
+    int i;
+    static const VarBits table = {
+        { 4, 4, 4, 4, 8, 8, 16, 32 }, { 0, 0, 0, 0, 16, 16, 272, 65808 }
+    };
+
+    for (i = 0; i < index; i++) {
+        if ((mask >> i) & 0x01) {
+            if (h->is_sync_frame || get_bits1(gb))
+                get_bits_var(gb, &table);
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-15 p44, Table 6-16 p45 */
+static int parse_aud_pres_params(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int audio;
+    int i;
+    int read_mask;
+    static const VarBits table = {
+        { 0, 0, 0, 0, 2, 2, 4, 5 }, { 0, 0, 0, 0, 1, 1, 5, 21 }
+    };
+
+    if (h->is_sync_frame) {
+        if (h->full_channel_mix_flag)
+            h->num_audio_pres = 1;
+        else
+            h->num_audio_pres = get_bits_var(gb, &table) + 1;
+        memset(h->audio, 0, sizeof(h->audio[0]) * h->num_audio_pres);
+    }
+
+    for (audio = 0; audio < h->num_audio_pres; audio++) {
+        if (h->is_sync_frame) {
+            if (h->full_channel_mix_flag)
+                h->audio[audio].selectable = 1;
+            else
+                h->audio[audio].selectable = get_bits1(gb);
+        }
+
+        if (h->audio[audio].selectable) {
+            if (h->is_sync_frame) {
+                read_mask = (audio > 0) ? get_bits(gb, audio) : 0;
+                h->audio[audio].mask = 0;
+                for (i = 0; read_mask; i++, read_mask >>= 1) {
+                    if (read_mask & 0x01)
+                        h->audio[audio].mask |= get_bits1(gb) << i;
+                }
+            }
+
+            if (parse_explicit_object_lists(h, h->audio[audio].mask, audio))
+                return 1;
+        } else {
+            h->audio[audio].mask = 0;
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-12 p 40 */
+static void decode_version(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int bits = get_bits1(gb) ? 3 : 6;
+
+    h->major_version = get_bits(gb, bits) + 2;
+    skip_bits(gb, bits);
+}
+
+/* Table 6-12 p 40 */
+static int parse_stream_params(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int has_ftoc_crc;
+    static const uint32_t table_base_duration[4] = { 512, 480, 384, 0 };
+    static const uint32_t table_clock_rate[4] = { 32000, 44100, 48000, 0 };
+
+    if (h->is_sync_frame)
+        h->full_channel_mix_flag = get_bits1(gb);
+
+    has_ftoc_crc = !h->full_channel_mix_flag || h->is_sync_frame;
+    if (has_ftoc_crc && av_crc(h->crc, DTSUHD_CRC_SEED, h->data, h->ftoc_bytes))
+        return 1;
+
+    if (h->is_sync_frame) {
+        if (h->full_channel_mix_flag)
+            h->major_version = 2;
+        else
+            decode_version(h);
+
+        h->frame_duration = table_base_duration[get_bits(gb, 2)];
+        h->frame_duration_code = get_bits(gb, 3);
+        h->frame_duration *= (h->frame_duration_code + 1);
+        h->clock_rate = table_clock_rate[get_bits(gb, 2)];
+        if (h->frame_duration == 0 || h->clock_rate == 0)
+            return 1; /* bitstream error */
+
+        skip_bits_long(gb, 36 * get_bits1(gb));  /* bTimeStampPresent */
+        h->sample_rate_mod = get_bits(gb, 2);
+        h->sample_rate = h->clock_rate * (1 << h->sample_rate_mod);
+
+        if (h->full_channel_mix_flag) {
+            h->interactive_obj_limits_present = 0;
+        } else {
+            skip_bits1(gb);  /* reserved flag. */
+            h->interactive_obj_limits_present = get_bits1(gb);
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-24 p52 */
+static void navi_purge(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->navi_count; i++)
+        if (!h->navi[i].present)
+            h->navi[i].bytes = 0;
+}
+
+/* Table 6-21 p50 */
+static void navi_clear(DTSUHD *h)
+{
+    if (h->navi)
+        memset(h->navi, 0, sizeof(h->navi[0]) * h->navi_count);
+    h->navi_count = 0;
+}
+
+/* Table 6-22 p51 */
+static void navi_clear_present(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->navi_count; i++)
+        h->navi[i].present = 0;
+}
+
+/* Table 6-23 p51.  Return 0 on success, and the index is returned in
+   the *listIndex parameter.
+*/
+static int navi_find_index(DTSUHD *h, int desired_index, int *list_index)
+{
+    int avail_index = h->navi_count;
+    int i;
+    int navi_alloc;
+
+    for (i = 0; i < h->navi_count; i++) {
+        if (h->navi[i].index == desired_index) {
+            *list_index = i;
+            h->navi[i].present = 1;
+            return 0;
+        }
+
+        if ((h->navi[i].present == 0) && (h->navi[i].bytes == 0) && (avail_index > i))
+            avail_index = i;
+    }
+
+    if (avail_index >= h->navi_count) {
+        if (h->navi_count >= h->navi_alloc) {
+            navi_alloc = h->navi_count + DTSUHD_ALLOC_INCREMENT;
+            if (av_reallocp_array(&h->navi, navi_alloc, sizeof(*h->navi)))
+                return 1;
+            h->navi_alloc = navi_alloc;
+        }
+        h->navi_count++;
+    }
+
+    *list_index = avail_index;
+    h->navi[avail_index].bytes = 0;
+    h->navi[avail_index].present = 1;
+    h->navi[avail_index].id = 256;
+    h->navi[avail_index].index = desired_index;
+
+    return 0;
+}
+
+/* Table 6-20 p48 */
+static int parse_chunk_navi(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int audio_chunks = 1;
+    int bytes;
+    int i;
+    int id;
+    int id_present;
+    int index;
+    int list_index;
+    static const VarBits table2468 = {
+        { 2, 2, 2, 2, 4, 4, 6, 8 }, { 0, 0, 0, 0, 4, 4, 20, 84 }
+    };
+    static const VarBits table_audio_chunk_sizes = {
+        { 9, 9, 9, 9, 11, 11, 13, 16 }, { 0, 0, 0, 0, 512, 512, 2560, 10752 }
+    };
+    static const VarBits table_chunk_sizes = {
+        { 6, 6, 6, 6, 9, 9, 12, 15 }, { 0, 0, 0, 0, 64, 64, 576, 4672 }
+    };
+
+    h->chunk_bytes = 0;
+    if (h->full_channel_mix_flag)
+        h->chunk_count = h->is_sync_frame;
+    else
+        h->chunk_count = get_bits_var(gb, &table2468);
+
+    if (h->chunk_count >= h->chunk_alloc) {
+        int chunk_alloc = h->chunk_count + DTSUHD_ALLOC_INCREMENT;
+        if (av_reallocp_array(&h->chunk, chunk_alloc, sizeof(*h->chunk)))
+            return 1;
+        h->chunk_alloc = chunk_alloc;
+    }
+
+    for (i = 0; i < h->chunk_count; i++) {
+        h->chunk_bytes += h->chunk[i].bytes = get_bits_var(gb, &table_chunk_sizes);
+        if (h->full_channel_mix_flag)
+            h->chunk[i].crc_flag = 0;
+        else
+        h->chunk[i].crc_flag = get_bits1(gb);
+    }
+
+    if (!h->full_channel_mix_flag)
+        audio_chunks = get_bits_var(gb, &table2468);
+
+    if (h->is_sync_frame)
+        navi_clear(h);
+    else
+        navi_clear_present(h);
+
+    for (i = 0; i < audio_chunks; i++) {
+        if (h->full_channel_mix_flag)
+            index = 0;
+        else
+            index = get_bits_var(gb, &table2468);
+
+        if (navi_find_index(h, index, &list_index))
+            return 1;
+
+        if (h->is_sync_frame)
+            id_present = 1;
+        else if (h->full_channel_mix_flag)
+            id_present = 0;
+        else
+            id_present = get_bits1(gb);
+
+        if (id_present) {
+            id = get_bits_var(gb, &table2468);
+            h->navi[list_index].id = id;
+        }
+
+        bytes = get_bits_var(gb, &table_audio_chunk_sizes);
+        h->chunk_bytes += bytes;
+        h->navi[list_index].bytes = bytes;
+    }
+
+    navi_purge(h);
+
+    return 0;
+}
+
+
+/* Table 6-6 */
+static int parse_md_chunk_list(DTSUHD *h, MD01 *md01)
+{
+    GetBitContext *gb = &h->gb;
+    static const VarBits table = {
+        { 3, 3, 3, 3, 4, 4, 6, 8 }, { 0, 0, 0, 0, 8, 8, 24, 88 }
+    };
+    int i;
+
+    if (h->full_channel_mix_flag) {
+        md01->object_list_count = 1;
+        md01->object_list[0] = 256;
+    } else {
+        md01->object_list_count = get_bits_var(gb, &table);
+        for (i = 0; i < md01->object_list_count; i++)
+            md01->object_list[i] = get_bits(gb, get_bits1(gb) ? 8 : 4);
+    }
+
+    return 0;
+}
+
+/* Table 7-9 */
+static void skip_mp_param_set(DTSUHD *h, MD01 *md01, int nominal_flag)
+{
+    skip_bits_md01(h, md01, 6); /* rLoudness */
+    if (nominal_flag == 0)
+        skip_bits_md01(h, md01, 5);
+
+    skip_bits_md01(h, md01, nominal_flag ? 2 : 4);
+}
+
+/* Table 7-8 */
+static int parse_static_md_params(DTSUHD *h, MD01 *md01, int only_first)
+{
+    int i;
+    int loudness_sets = 1;
+    int nominal_flag = 1;
+
+    if (h->full_channel_mix_flag == 0)
+        nominal_flag = get_bits_md01(h, md01, 1);
+
+    if (nominal_flag) {
+        if (h->full_channel_mix_flag == 0)
+            loudness_sets = get_bits_md01(h, md01, 1) ? 3 : 1;
+    } else {
+        loudness_sets = get_bits_md01(h, md01, 4) + 1;
+    }
+
+    for (i = 0; i < loudness_sets; i++)
+        skip_mp_param_set(h, md01, nominal_flag);
+
+    if (only_first)
+        return 0;
+
+    if (nominal_flag == 0)
+        skip_bits_md01(h, md01, 1);
+
+    for (i = 0; i < 3; i++) { /* Table 7-12 suggest 3 types */
+        if (get_bits_md01(h, md01, 1)) {
+            if (get_bits_md01(h, md01, 4) == 15) /* Table 7-14 */
+                skip_bits_md01(h, md01, 15);
+        }
+        if (get_bits_md01(h, md01, 1)) /* smooth md present */
+            skip_bits_md01(h, md01, 6 * 6);
+    }
+
+    if (h->full_channel_mix_flag == 0) {
+        i = md01->static_md_packets * md01->static_md_packet_size - get_bits_count(&md01->gb);
+        skip_bits(&md01->gb, i);
+    }
+    md01->static_md_extracted = 1;
+
+    return 0;
+}
+
+/* Table 7-7 */
+static int parse_multi_frame_md(DTSUHD *h, MD01 *md01)
+{
+    GetBitContext *gb = &h->gb;
+    int i, n;
+    static const VarBits table1 = {
+        { 0, 0, 0, 0, 6, 6, 9, 12 }, { 0, 0, 0, 0, 1, 1, 65, 577 }
+    };
+    static const VarBits table2 = {
+        { 5, 5, 5, 5, 7, 7, 9, 11 }, { 0, 0, 0, 0, 32, 32, 160, 672 }
+    };
+
+    if (h->is_sync_frame) {
+        md01->packets_acquired = 0;
+        if (h->full_channel_mix_flag) {
+            md01->static_md_packets = 1;
+            md01->static_md_packet_size = 0;
+        } else {
+            md01->static_md_packets = get_bits_var(gb, &table1) + 1;
+            md01->static_md_packet_size = get_bits_var(gb, &table2) + 3;
+        }
+
+        n = md01->static_md_packets * md01->static_md_packet_size;
+        if (n > md01->buf_bytes) {
+            if (av_reallocp(&md01->buf, n))
+                return 1;
+            md01->buf_bytes = n;
+        }
+
+        init_get_bits(&md01->gb, md01->buf, md01->buf_bytes * 8);
+        if (md01->static_md_packets > 1)
+            md01->static_md_update_flag = get_bits1(gb);
+        else
+            md01->static_md_update_flag = 1;
+    }
+
+    if (md01->packets_acquired < md01->static_md_packets) {
+        n = md01->packets_acquired * md01->static_md_packet_size;
+        for (i = 0; i < md01->static_md_packet_size; i++)
+            md01->buf[n + i] = get_bits(gb, 8);
+        md01->packets_acquired++;
+
+        if (md01->packets_acquired == md01->static_md_packets) {
+            if (md01->static_md_update_flag || !md01->static_md_extracted)
+                if (parse_static_md_params(h, md01, 0))
+                    return 1;
+        } else if (md01->packets_acquired == 1) {
+            if (md01->static_md_update_flag || !md01->static_md_extracted)
+                if (parse_static_md_params(h, md01, 1))
+                    return 1;
+        }
+    }
+
+    return 0;
+}
+
+/* Return 1 if suitable, 0 if not.  Table 7-18.  OBJGROUPIDSTART=224 Sec 7.8.7 p75 */
+static int is_suitable_for_render(DTSUHD *h, MD01 *md01, int object_id)
+{
+    GetBitContext *gb = &h->gb;
+    static const VarBits table = {
+        { 8, 8, 8, 8, 10, 10, 12, 14 }, { 0, 0, 0, 0, 256, 256, 1280, 5376 }
+    };
+
+    if (object_id >= 224 || get_bits1(gb))
+        return 1;
+
+    /*  Reject the render and skip the render data. */
+    skip_bits1(gb);
+    skip_bits(gb, get_bits_var(gb, &table));
+
+    return 0;
+}
+
+/* Table 7-26 */
+static void parse_ch_mask_params(DTSUHD *h, MD01 *md01, MDObject *object)
+{
+    GetBitContext *gb = &h->gb;
+    const int ch_index = object->rep_type == REP_TYPE_BINAURAL ? 1 : get_bits(gb, 4);
+    static const int mask_table[14] = { /* Table 7-27 */
+        0x000001, 0x000002, 0x000006, 0x00000F, 0x00001F, 0x00084B, 0x00002F,
+        0x00802F, 0x00486B, 0x00886B, 0x03FBFB, 0x000003, 0x000007, 0x000843,
+    };
+
+    if (ch_index == 14)
+        object->ch_activity_mask = get_bits(gb, 16);
+    else if (ch_index == 15)
+        object->ch_activity_mask = get_bits_long(gb, 32);
+    else
+        object->ch_activity_mask = mask_table[ch_index];
+}
+
+/* Table 7-22 */
+static int parse_object_metadata(DTSUHD *h, MD01 *md01, MDObject *object,
+                                 int start_frame_flag, int object_id)
+{
+    GetBitContext *gb = &h->gb;
+    int ch_mask_object_flag = 0;
+    int object_3d_metadata_flag = 0;
+    static const VarBits table1 = {
+        { 1, 1, 1, 1, 4, 4, 4, 8 }, { 0, 0, 0, 0, 2, 2, 18, 34 }
+    };
+    static const VarBits table2 = {
+        { 3, 3, 3, 3, 3, 3, 4, 8 }, { 0, 0, 0, 0, 8, 8, 16, 32 }
+    };
+
+    skip_bits(gb, object_id != 256);
+
+    if (start_frame_flag) {
+        object->rep_type = get_bits(gb, 3);
+        switch (object->rep_type) {
+            case REP_TYPE_BINAURAL:
+            case REP_TYPE_CH_MASK_BASED:
+            case REP_TYPE_MTRX2D_CH_MASK_BASED:
+            case REP_TYPE_MTRX3D_CH_MASK_BASED:
+                ch_mask_object_flag = 1;
+                break;
+
+            case REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF:
+            case REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF:
+                object_3d_metadata_flag = 1;
+                break;
+        }
+
+        if (ch_mask_object_flag) {
+            if (object_id != 256) {
+                skip_bits(gb, 3);  /* Object Importance Level */
+                if (get_bits1(gb))
+                    skip_bits(gb, get_bits1(gb) ? 3 : 5);
+
+                get_bits_var(gb, &table1);
+                get_bits_var(gb, &table2);
+
+                /* Skip optional Loudness block. */
+                if (get_bits1(gb))
+                    skip_bits(gb, 8);
+
+                /* Skip optional Object Interactive MD (Table 7-25). */
+                if (get_bits1(gb) && h->interactive_obj_limits_present) {
+                    if (get_bits1(gb))
+                        skip_bits(gb, 5 + 6 * object_3d_metadata_flag);
+                }
+            }
+
+            parse_ch_mask_params(h, md01, object);
+        }
+    }
+
+    /* Skip rest of object */
+    return 0;
+}
+
+/* Table 7-4 */
+static int parse_md01(DTSUHD *h, MD01 *md01, int pres_index)
+{
+    GetBitContext *gb = &h->gb;
+    uint32_t i;
+    uint32_t id;
+    uint32_t start_flag;
+
+    if (h->audio[pres_index].selectable) {
+        for (i = 0; i < 4; i++)  /* Table 7-5.  Scaling data. */
+            skip_bits(gb, 5 * get_bits1(gb));
+
+        if (get_bits1(gb) && parse_multi_frame_md(h, md01))
+            return 1;
+    }
+
+    /* Table 7-16: Object metadata. */
+    memset(md01->object, 0, sizeof(md01->object));
+    if (!h->full_channel_mix_flag)
+        skip_bits(gb, 11 * get_bits1(gb));
+
+    for (i = 0; i < md01->object_list_count; i++) {
+        id = md01->object_list[i];
+        if (!is_suitable_for_render(h, md01, id))
+            continue;
+
+        md01->object[id].pres_index = pres_index;
+        start_flag = 0;
+        if (!md01->object[id].started) {
+            skip_bits(gb, id != 256);
+            start_flag = md01->object[id].started = 1;
+        }
+
+        if ((id < 224 || id > 255) &&
+            parse_object_metadata(h, md01, md01->object + id, start_flag, id)) {
+            return 1;
+        }
+
+        break;
+    }
+
+    return 0;
+}
+
+/* Table 6-2 */
+static int parse_chunks(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    MD01 *md01;
+    const uint8_t *byte_start;
+    int bit_next;
+    int i;
+    static const VarBits table_aud_pres = {
+        { 0, 0, 0, 0, 2, 2, 4, 4 }, { 0, 0, 0, 0, 1, 1, 5, 21 }
+    };
+    int pres_index;
+    uint32_t id;
+
+    for (i = 0; i < h->chunk_count; i++) {
+        bit_next = get_bits_count(gb) + h->chunk[i].bytes * 8;
+        byte_start = h->data + get_bits_count(gb) / 8;
+        if (h->chunk[i].crc_flag && av_crc(h->crc, DTSUHD_CRC_SEED, byte_start, h->chunk[i].bytes))
+            return 1;
+
+        id = get_bits(gb, 8);
+        if (id == 1) {
+            pres_index = get_bits_var(gb, &table_aud_pres);
+        if (pres_index > 255)
+            return 1;
+        md01 = chunk_find_md01(h, id);
+        if (md01 == NULL)
+            md01 = chunk_append_md01(h, id);
+        if (md01 == NULL)
+            return 1;
+        if (parse_md_chunk_list(h, md01))
+            return 1;
+        if (parse_md01(h, md01, pres_index))
+            return 1;
+        }
+
+        skip_bits(gb, bit_next - get_bits_count(gb));
+    }
+
+    return 0;
+}
+
+/* Helper function for av_dtsuhd_frame and ff_dtsuhd_parse_frame */
+static int parse_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes)
+{
+    GetBitContext *gb;
+    int syncword;
+    static const VarBits table_payload = {
+        { 5, 5, 5, 5, 8, 8, 10, 12 }, { 0, 0, 0, 0, 32, 32, 288, 1312 }
+    };
+
+    if (!h || !data)
+        return DTSUHD_NULL;
+
+    if (data_bytes < 4)
+        return DTSUHD_INCOMPLETE; /* Data buffer does not contain the signature */
+
+    h->data = data;
+    h->data_bytes = data_bytes;
+    gb = &h->gb;
+    if (init_get_bits8(gb, data, data_bytes) < 0)
+        return DTSUHD_INVALID_FRAME;
+
+    syncword = get_bits_long(gb, 32);
+    h->is_sync_frame = syncword == DTSUHD_SYNCWORD;
+    h->saw_sync |= h->is_sync_frame;
+    if (!h->saw_sync || (!h->is_sync_frame && syncword != DTSUHD_NONSYNCWORD))
+        return DTSUHD_NOSYNC;  /* Invalid frame or have not parsed sync frame. */
+
+    h->ftoc_bytes = get_bits_var(gb, &table_payload) + 1;
+    if (h->ftoc_bytes < 5 || h->ftoc_bytes >= data_bytes)
+        return DTSUHD_INCOMPLETE;  /* Data buffer does not contain entire FTOC */
+
+    if (parse_stream_params(h))
+        return DTSUHD_INVALID_FRAME;
+
+    if (parse_aud_pres_params(h))
+        return DTSUHD_INVALID_FRAME;
+
+    if (parse_chunk_navi(h))  /* AudioChunkTypes and payload sizes. */
+        return DTSUHD_INVALID_FRAME;
+
+    /* At this point in the parsing, we can calculate the size of the frame. */
+    h->frame_bytes = h->ftoc_bytes + h->chunk_bytes;
+    if (h->frame_bytes > data_bytes)
+        return DTSUHD_INCOMPLETE;
+
+    return DTSUHD_OK;
+}
+
+/** Allocate parsing handle.  The parsing handle should be used to parse
+    one DTS:X Profile 2 Audio stream, then freed by calling DTSUHD_destroy().
+    Do not use the same parsing handle to parse multiple audio streams.
+
+  @return Parsing handle for use with other functions, or NULL on failure.
+*/
+DTSUHD *av_dtsuhd_create(void)
+{
+    DTSUHD *h = av_calloc(1, sizeof(DTSUHD));
+    if (h)
+        h->crc = av_crc_get_table(AV_CRC_16_CCITT);
+    return h;
+}
+
+/** Free all resources used by the parsing handle.
+
+  @param[in] h Handle allocated by dtshd_create
+*/
+void av_dtsuhd_destroy(DTSUHD *h)
+{
+    if (h) {
+        chunk_reset(h);
+        av_freep(&h->chunk);
+        av_freep(&h->navi);
+        av_freep(&h);
+    }
+}
+
+/** Parse a single DTS:X Profile 2 frame.
+    The frame must start at the first byte of the data buffer, and enough
+    of the frame must be present to decode the majority of the FTOC.
+    From Table 6-11 p40.
+
+    A sync frame must be the first frame provided, before any non-sync frames.
+    Signatures: sync=0x40411BF2, non-sync=0x71C442E8.
+
+  @param[in] h Handle allocated by DTSUHD_create
+  @param[in] First byte of a buffer containing the frame to parse
+  @param[in] nData Number of valid bytes in 'data'
+  @param[out] codecpar Filled out codec paramters with results of descriptor
+              parsing, may be NULL
+  @param[out] utds Filled MP4 udts sample entry descriptor.
+  @param[out] utds_size Size of valid 'udts' data.
+  @return 0 on success, DTSUHDStatus enumeration on error
+*/
+int av_dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
+                    AVCodecParameters *codecpar, uint8_t **udts, int *udts_size)
+{
+    int ret = parse_frame(h, data, data_bytes);
+
+    if (ret == DTSUHD_OK && codecpar && h->is_sync_frame) {
+        /* Skip PBRSmoothParams (Table 6-26) and align to the chunks immediately
+           following the FTOC CRC.
+        */
+        skip_bits(&h->gb, h->ftoc_bytes * 8 - get_bits_count(&h->gb));
+        if (parse_chunks(h))
+            return DTSUHD_INVALID_FRAME;
+        update_descriptor(h, codecpar, udts, udts_size);
+    }
+
+    return ret;
+}
+
+/** Similar to av_dtsuhd_frame, but for use only within libavcodec.
+
+  @param[in] h Handle allocated by DTSUHD_create
+  @param[in] First byte of a buffer containing the frame to parse
+  @param[in] nData Number of valid bytes in 'data'
+  @param[out] fi Results of frame parsing, may be NULL
+  @return 0 on success, DTSUHDStatus enumeration on error
+*/
+int ff_dtsuhd_parse_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
+                          DTSUHDFrameInfo *fi)
+{
+    int fraction = 1;
+    int i;
+    int ret = parse_frame(h, data, data_bytes);
+
+    if (ret == DTSUHD_OK && fi) {
+        /* 6.3.6.9: audio frame duration may be a fraction of metadata frame duration. */
+        for (i = 0; i < h->navi_count; i++) {
+            if (h->navi[i].present) {
+                if (h->navi[i].id == 3)
+                    fraction = 2;
+                else if (h->navi[i].id == 4)
+                    fraction = 4;
+            }
+        }
+
+        fi->sync = h->is_sync_frame;
+        fi->frame_bytes = h->frame_bytes;
+        fi->sample_rate = h->sample_rate;
+        fi->sample_count = (h->frame_duration * fi->sample_rate) / (h->clock_rate * fraction);
+    }
+
+    return ret;
+}
+
+/** Return the offset of the first UHD audio frame.
+    When supplied a buffer containing DTSHDHDR file content, the DTSHD
+    headers are skipped and the offset to the first byte of the STRMDATA
+    chunk is returned, along with the size of that chunk.
+
+  @param[in] dataStart DTS:X Profile 2 file content to parse
+  @param[in] dataSize Number of valid bytes in 'dataStart'
+  @param[out] Number of leading DTS:X Profile 2 audio frames to discard,
+              may be NULL
+  @param[out] Size of STRMDATA payload, may be NULL
+  @return STRMDATA payload offset or 0 if not a valid DTS:X Profile 2 file
+*/
+int av_dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size, size_t *strmdata_size)
+{
+    const uint8_t *data = data_start;
+    const uint8_t *data_end = data + data_size;
+    uint64_t chunk_size = 0;
+
+    if (data + DTSUHD_CHUNK_HEADER >= data_end || memcmp(data, "DTSHDHDR", 8))
+        return 0;
+
+    for (; data + DTSUHD_CHUNK_HEADER <= data_end; data += chunk_size + DTSUHD_CHUNK_HEADER) {
+        chunk_size = AV_RB64(data + 8);
+        if (chunk_size < 4 || chunk_size > ((uint64_t)1 << 61))
+            return AVERROR_INVALIDDATA;
+
+        if (!memcmp(data, "STRMDATA", 8)) {
+            if (strmdata_size)
+                *strmdata_size = chunk_size;
+            return (int)(data - data_start) + DTSUHD_CHUNK_HEADER;
+        }
+    }
+
+    return 0;
+}
diff --git a/libavcodec/dtsuhd_common.h b/libavcodec/dtsuhd_common.h
new file mode 100644
index 0000000000..e0994ba78c
--- /dev/null
+++ b/libavcodec/dtsuhd_common.h
@@ -0,0 +1,87 @@
+/*
+ * DTS-UHD common audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DTSUHD_COMMON_H
+#define AVCODEC_DTSUHD_COMMON_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "codec_par.h"
+
+#define DTSUHD_NONSYNCWORD 0x71C442E8
+#define DTSUHD_SYNCWORD    0x40411BF2
+
+#define DTSUHD_MAX_FRAME_SIZE 0x1000
+
+/* Return codes from dtsuhd_frame */
+enum DTSUHDStatus {
+    DTSUHD_OK,
+    DTSUHD_INCOMPLETE,    /* Entire frame not in buffer. */
+    DTSUHD_INVALID_FRAME, /* Error parsing frame. */
+    DTSUHD_NOSYNC,        /* No sync frame prior to non-sync frame. */
+    DTSUHD_NULL,          /* Function parameter may not be NULL. */
+};
+
+/* Return stream information from an audio frame parsed by dtsuhd_frame, */
+typedef struct DTSUHDDescriptorInfo {
+    int valid;           /* True if descriptor info is valid. */
+    char coding_name[5]; /* Four character, null term SampleEntry box name. */
+    int base_sample_freq_code;
+    int channel_count;
+    int decoder_profile_code;
+    int frame_duration_code;
+    int max_payload_code;
+    int num_pres_code;
+    int rep_type;
+    int sample_rate;
+    int sample_rate_mod;
+    int sample_size;
+    int channel_mask;
+    uint64_t ffmpeg_channel_mask;
+} DTSUHDDescriptorInfo;
+
+/* Return frame information from an audio frame parsed by dtsuhd_frame. */
+typedef struct DTSUHDFrameInfo {
+    int frame_bytes;  /* Size of entire frame in bytes. */
+    int sample_count; /* Number of samples in frame (samples per frame). */
+    int sample_rate;  /* Sample rate of frame (samples per second). */
+    int sync;         /* True if frame is a sync frame. */
+} DTSUHDFrameInfo;
+
+struct DTSUHD;
+typedef struct DTSUHD DTSUHD;
+
+struct DTSUHD *av_dtsuhd_create(void);
+void av_dtsuhd_destroy(DTSUHD*);
+int av_dtsuhd_frame(DTSUHD*, const uint8_t *data, size_t nData,
+                    AVCodecParameters*, uint8_t**, int*);
+int av_dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size,
+                               size_t *strmdata_size);
+int ff_dtsuhd_parse_frame(DTSUHD*, const uint8_t *data, size_t nData,
+                          DTSUHDFrameInfo*);
+
+static inline int dtsuhd_is_syncword(uint32_t syncword)
+{
+    return syncword == DTSUHD_NONSYNCWORD || syncword == DTSUHD_SYNCWORD;
+}
+
+#endif /* AVCODEC_DTSUHD_COMMON_H */
diff --git a/libavcodec/dtsuhd_parser.c b/libavcodec/dtsuhd_parser.c
new file mode 100644
index 0000000000..2860c1eee5
--- /dev/null
+++ b/libavcodec/dtsuhd_parser.c
@@ -0,0 +1,141 @@
+/*
+ * DTS-UHD audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Parse raw DTS-UHD audio frame input and return individual audio frames.
+ */
+
+#include "dtsuhd_common.h"
+#include "libavutil/intreadwrite.h"
+#include "parser.h"
+
+#define DTSUHD_BUFFER_SIZE (DTSUHD_MAX_FRAME_SIZE * 128)
+
+typedef struct DTSUHDParseContext {
+    DTSUHD *dtsuhd;
+    int buf_offset;
+    int buf_bytes;
+    int frame_bytes;
+    uint8_t *buf;
+} DTSUHDParseContext;
+
+static av_cold int parser_init(AVCodecParserContext *s)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+
+    pc->dtsuhd = av_dtsuhd_create();
+    pc->buf = av_calloc(DTSUHD_BUFFER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE, 1);
+    if (!pc->dtsuhd || !pc->buf)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void parser_close(AVCodecParserContext *s)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+
+    av_dtsuhd_destroy(pc->dtsuhd);
+    pc->dtsuhd = NULL;
+    av_freep(&pc->buf);
+    ff_parse_close(s);
+}
+
+// Keep data in contiguous buffer as required by dtsuhd_frame.
+static int append_buffer(DTSUHDParseContext *pc, const uint8_t **buf, int *buf_size, int *input_consumed)
+{
+    int copy_bytes;
+
+    pc->buf_offset += pc->frame_bytes;
+    pc->frame_bytes = 0;
+
+    // Buffer almost full, move partial frame to start of buffer for more space.
+    if (*buf_size > 0 && pc->buf_bytes + *buf_size > DTSUHD_BUFFER_SIZE) {
+        memmove(pc->buf, pc->buf + pc->buf_offset, pc->buf_bytes);
+        pc->buf_bytes -= pc->buf_offset;
+        pc->buf_offset = 0;
+    }
+
+    copy_bytes = FFMAX(0, FFMIN(DTSUHD_BUFFER_SIZE - pc->buf_bytes, *buf_size));
+
+    // Append input buffer to our context.
+    if (copy_bytes) {
+        memcpy(pc->buf + pc->buf_bytes, *buf, copy_bytes);
+        pc->buf_bytes += copy_bytes;
+    }
+
+    // Ensure buffer starts with a syncword
+    while (pc->buf_offset + 4 < pc->buf_bytes && !dtsuhd_is_syncword(AV_RB32(pc->buf + pc->buf_offset)))
+        pc->buf_offset++;
+
+    *input_consumed = copy_bytes;
+    *buf = pc->buf + pc->buf_offset;
+    *buf_size = pc->buf_bytes - pc->buf_offset;
+
+    return copy_bytes && pc->buf_bytes - pc->buf_offset < DTSUHD_MAX_FRAME_SIZE;
+}
+
+static int parser_parse(AVCodecParserContext *s, AVCodecContext *avctx,
+                        const uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+    DTSUHDFrameInfo fi;
+    int input_consumed = 0;
+
+    if (append_buffer(pc, &buf, &buf_size, &input_consumed)) {
+        *poutbuf = NULL;
+        *poutbuf_size = 0;
+        return input_consumed;
+    }
+
+    switch (ff_dtsuhd_parse_frame(pc->dtsuhd, buf, buf_size, &fi)) {
+    case DTSUHD_OK:
+        if (fi.sample_count)
+            s->duration = fi.sample_count;
+        if (fi.sample_rate)
+            avctx->sample_rate = fi.sample_rate;
+        buf_size = pc->frame_bytes = fi.frame_bytes;
+        break;
+    case DTSUHD_INCOMPLETE:
+        pc->frame_bytes = buf_size;
+        buf = NULL;
+        buf_size = 0;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unable to process DTS-UHD file. File may be invalid.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+
+    return input_consumed;
+}
+
+AVCodecParser ff_dtsuhd_parser = {
+    .codec_ids      = { AV_CODEC_ID_DTSUHD },
+    .priv_data_size = sizeof(DTSUHDParseContext),
+    .parser_init    = parser_init,
+    .parser_parse   = parser_parse,
+    .parser_close   = parser_close,
+};
diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
index d355808018..d724c8b402 100644
--- a/libavcodec/parsers.c
+++ b/libavcodec/parsers.c
@@ -37,6 +37,7 @@ extern const AVCodecParser ff_dirac_parser;
 extern const AVCodecParser ff_dnxhd_parser;
 extern const AVCodecParser ff_dolby_e_parser;
 extern const AVCodecParser ff_dpx_parser;
+extern const AVCodecParser ff_dtsuhd_parser;
 extern const AVCodecParser ff_dvaudio_parser;
 extern const AVCodecParser ff_dvbsub_parser;
 extern const AVCodecParser ff_dvdsub_parser;
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 65bc52fb24..4b7ec515fe 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -29,7 +29,7 @@
 
 #include "version_major.h"
 
-#define LIBAVCODEC_VERSION_MINOR  18
+#define LIBAVCODEC_VERSION_MINOR  19
 #define LIBAVCODEC_VERSION_MICRO 100
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
Roy Funderburk Aug. 17, 2023, 9:47 p.m. UTC | #33
Updated for master branch changes.
Parsing of DTS-UHD input files per ETSI TS 102 114 is added
as parser for codec id AV_CODEC_ID_DTSUHD.

Signed-off-by: Roy Funderburk <Roy.Funderburk@xperi.com>
---
 libavcodec/Makefile        |   1 +
 libavcodec/codec_desc.c    |   7 +
 libavcodec/codec_id.h      |   1 +
 libavcodec/dtsuhd_common.c | 982 +++++++++++++++++++++++++++++++++++++
 libavcodec/dtsuhd_common.h |  83 ++++
 libavcodec/dtsuhd_parser.c | 141 ++++++
 libavcodec/parsers.c       |   1 +
 libavcodec/version.h       |   2 +-
 8 files changed, 1217 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/dtsuhd_common.c
 create mode 100644 libavcodec/dtsuhd_common.h
 create mode 100644 libavcodec/dtsuhd_parser.c

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 3c16b51462..583abd1f88 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1167,6 +1167,7 @@ OBJS-$(CONFIG_DIRAC_PARSER)            += dirac_parser.o
 OBJS-$(CONFIG_DNXHD_PARSER)            += dnxhd_parser.o dnxhddata.o
 OBJS-$(CONFIG_DOLBY_E_PARSER)          += dolby_e_parser.o dolby_e_parse.o
 OBJS-$(CONFIG_DPX_PARSER)              += dpx_parser.o
+OBJS-$(CONFIG_DTSUHD_PARSER)           += dtsuhd_parser.o dtsuhd_common.o
 OBJS-$(CONFIG_DVAUDIO_PARSER)          += dvaudio_parser.o
 OBJS-$(CONFIG_DVBSUB_PARSER)           += dvbsub_parser.o
 OBJS-$(CONFIG_DVD_NAV_PARSER)          += dvd_nav_parser.o
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index 4406dd8318..e6af7f2e99 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -3413,6 +3413,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("AC-4"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
+    {
+        .id        = AV_CODEC_ID_DTSUHD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dtsuhd",
+        .long_name = NULL_IF_CONFIG_SMALL("DTSUHD (DTS-UHD Audio Format)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
 
     /* subtitle codecs */
     {
diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h
index a5a0cb8525..3e87aa1fe5 100644
--- a/libavcodec/codec_id.h
+++ b/libavcodec/codec_id.h
@@ -543,6 +543,7 @@ enum AVCodecID {
     AV_CODEC_ID_WAVARC,
     AV_CODEC_ID_RKA,
     AV_CODEC_ID_AC4,
+    AV_CODEC_ID_DTSUHD,
 
     /* subtitle codecs */
     AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
diff --git a/libavcodec/dtsuhd_common.c b/libavcodec/dtsuhd_common.c
new file mode 100644
index 0000000000..3d6b4ab4e0
--- /dev/null
+++ b/libavcodec/dtsuhd_common.c
@@ -0,0 +1,982 @@
+/*
+ * DTS-UHD common audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Parse DTS-UHD audio frame headers, report frame sizes and configuration.
+ * Specification: ETSI TS 103 491 V1.2.1
+ */
+
+#include <string.h>
+
+#include "dtsuhd_common.h"
+#include "get_bits.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/crc.h"
+
+#define DTSUHD_ALLOC_INCREMENT 16
+#define DTSUHD_CHUNK_HEADER    16
+#define DTSUHD_CRC_SEED 0xFFFF
+
+enum RepType {
+    REP_TYPE_CH_MASK_BASED,
+    REP_TYPE_MTRX2D_CH_MASK_BASED,
+    REP_TYPE_MTRX3D_CH_MASK_BASED,
+    REP_TYPE_BINAURAL,
+    REP_TYPE_AMBISONIC,
+    REP_TYPE_AUDIO_TRACKS,
+    REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF,
+    REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF,
+};
+
+typedef struct MDObject {
+    int started;  /* Object seen since last reset. */
+    int pres_index;
+    int rep_type;
+    int ch_activity_mask;
+} MDObject;
+
+typedef struct MD01 {
+    GetBitContext gb;
+    MDObject object[257]; /* object id max value is 256 */
+    int chunk_id;
+    int object_list[256]; int object_list_count;
+    int packets_acquired;
+    int static_md_extracted;
+    int static_md_packets;
+    int static_md_packet_size;
+    int static_md_update_flag;
+    uint8_t *buf; int buf_bytes; /* temporary buffer to accumulate static data */
+} MD01;
+
+typedef struct NAVI {
+    int bytes;
+    int id;
+    int index;
+    int present;
+} NAVI;
+
+typedef struct UHDAudio {
+    int mask;
+    int selectable;
+} UHDAudio;
+
+typedef struct UHDChunk {
+    int crc_flag;
+    int bytes;
+} UHDChunk;
+
+struct DTSUHD {
+    const uint8_t *data; int data_bytes;  /* Original audio frame buffer. */
+    const AVCRC *crc;
+    GetBitContext gb;
+    MD01 *md01; int md01_count;
+    NAVI *navi; int navi_alloc, navi_count;
+    UHDAudio audio[256];
+    UHDChunk *chunk; int chunk_alloc, chunk_count;
+    int chunk_bytes;
+    int clock_rate;
+    int frame_bytes;
+    int frame_duration;
+    int frame_duration_code;
+    int ftoc_bytes;
+    int major_version;
+    int num_audio_pres;
+    int sample_rate;
+    int sample_rate_mod;
+    unsigned full_channel_mix_flag:1;
+    unsigned interactive_obj_limits_present:1;
+    unsigned is_sync_frame:1;
+    unsigned saw_sync:1;
+};
+
+/* Read from the MD01 buffer (if present), falling back to the frame buffer */
+static inline int get_bits_md01(DTSUHD *h, MD01 *md01, int bits)
+{
+    if (md01->buf)
+        return get_bits(&md01->gb, bits);
+    return get_bits(&h->gb, bits);
+}
+
+/* In the specification, the pseudo code defaults the 'add' parameter to true.
+   Table 7-30 shows passing an explicit false, most other calls do not
+   pass the extractAndAdd parameter.  In this code, the add parameter is
+   applied to the input table itself, the last three entries being the bit
+   shifts of the first four entries (which would be zero if add was not set).
+
+   Function based on code in Table 5-2
+*/
+static int get_bits_var(GetBitContext *gb, const int table[])
+{
+    static const int bits_used[8] = { 1, 1, 1, 1, 2, 2, 3, 3 };
+    static const int index_table[8] = { 0, 0, 0, 0, 1, 1, 2, 3 };
+    int code = show_bits(gb, 3); /* value range is [0, 7] */
+    int i;
+    int index = index_table[code];
+    int value = 0;
+
+    skip_bits(gb, bits_used[code]);
+    if (table[index] == 0)
+        return 0;
+
+    /* The 'code' read from the bit context determines which additional values to add. */
+    for (i = 0; i < index; i++)
+        value += table[4 + i];
+
+    return get_bits_long(gb, table[index]) + value;
+}
+
+/* Implied by Table 6-2, MD01 chunk objects appended in for loop */
+static MD01 *chunk_append_md01(DTSUHD *h, int id)
+{
+    int md01_alloc = h->md01_count + 1;
+    if (av_reallocp_array(&h->md01, md01_alloc, sizeof(*h->md01)))
+        return NULL;
+
+    memset(h->md01 + h->md01_count, 0, sizeof(*h->md01));
+    h->md01[h->md01_count].chunk_id = id;
+    return h->md01 + h->md01_count++;
+}
+
+/* Return existing MD01 chunk based on chunkID */
+static MD01 *chunk_find_md01(DTSUHD *h, int id)
+{
+    int i;
+
+    for (i = 0; i < h->md01_count; i++)
+        if (id == h->md01[i].chunk_id)
+            return h->md01 + i;
+
+    return NULL;
+}
+
+/* Table 6-3 */
+static void chunk_reset(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->md01_count; i++)
+        av_freep(&h->md01[i].buf);
+    av_freep(&h->md01);
+    h->md01_count = 0;
+}
+
+static MDObject *find_default_audio(DTSUHD *h)
+{
+    MDObject *object;
+    int i, j;
+    int obj_index = -1;
+
+    for (i = 0; i < h->md01_count; i++) {
+        for (j = 0; j < 257; j++) {
+            object = h->md01[i].object + j;
+            if (object->started && h->audio[object->pres_index].selectable) {
+                if (obj_index < 0 || (object->pres_index < h->md01[i].object[obj_index].pres_index))
+                    obj_index = j;
+            }
+        }
+        if (obj_index >= 0)
+            return h->md01[i].object + obj_index;
+    }
+
+    return NULL;
+}
+
+/* Save channel mask, count, and rep type to descriptor info.
+   ETSI TS 103 491 Table 7-28 channel activity mask bits
+   mapping and SCTE DVS 243-4 Rev. 0.2 DG X Table 4.  Convert activity mask and
+   representation type to channel mask and channel counts.
+*/
+static void extract_object_info(MDObject *object, DTSUHDDescriptorInfo *info)
+{
+    int i;
+    static const struct {
+        uint32_t activity_mask;
+        uint32_t channel_mask; // Mask as defined by ETSI TS 103 491
+        uint64_t ffmpeg_channel_mask; // Mask as defined in ffmpeg
+    } activity_map[] = {
+        // act mask | chan mask | ffmpeg channel mask
+        { 0x000001, 0x00000001, AV_CH_FRONT_CENTER },
+        { 0x000002, 0x00000006, AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT },
+        { 0x000004, 0x00000018, AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT },
+        { 0x000008, 0x00000020, AV_CH_LOW_FREQUENCY },
+        { 0x000010, 0x00000040, AV_CH_BACK_CENTER },
+        { 0x000020, 0x0000A000, AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT },
+        { 0x000040, 0x00000180, AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT },
+        { 0x000080, 0x00004000, AV_CH_TOP_FRONT_CENTER },
+        { 0x000100, 0x00080000, AV_CH_TOP_CENTER },
+        { 0x000200, 0x00001800, AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_FRONT_RIGHT_OF_CENTER },
+        { 0x000400, 0x00060000, AV_CHAN_WIDE_LEFT | AV_CHAN_WIDE_RIGHT },
+        { 0x000800, 0x00000600, AV_CH_SURROUND_DIRECT_LEFT | AV_CH_SURROUND_DIRECT_RIGHT },
+        { 0x001000, 0x00010000, AV_CH_LOW_FREQUENCY_2 },
+        { 0x002000, 0x00300000, AV_CH_TOP_SIDE_LEFT | AV_CH_TOP_SIDE_RIGHT },
+        { 0x004000, 0x00400000, AV_CH_TOP_BACK_CENTER },
+        { 0x008000, 0x01800000, AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT },
+        { 0x010000, 0x02000000, AV_CH_BOTTOM_FRONT_CENTER },
+        { 0x020000, 0x0C000000, AV_CH_BOTTOM_FRONT_LEFT | AV_CH_BOTTOM_FRONT_RIGHT },
+        { 0x140000, 0x30000000, AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT },
+        { 0x080000, 0xC0000000, AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT },
+        { 0 } // Terminator
+    };
+
+    if (object) {
+        for (i = 0; activity_map[i].activity_mask; i++) {
+            if (activity_map[i].activity_mask & object->ch_activity_mask) {
+                info->channel_mask |= activity_map[i].channel_mask;
+                info->ffmpeg_channel_mask |= activity_map[i].ffmpeg_channel_mask;
+            }
+        }
+        info->channel_count = av_popcount(info->channel_mask);
+        info->rep_type = object->rep_type;
+    }
+}
+
+/* Assemble information for MP4 Sample Entry box.  Sample Size is always
+   16 bits.  The coding name is the name of the SampleEntry sub-box and is
+   'dtsx' unless the version of the bitstream is > 2.
+   If DecoderProfile == 2, then MaxPayloadCode will be zero.
+*/
+static void update_descriptor(DTSUHD *h, DTSUHDDescriptorInfo *info)
+{
+    static const char *coding_name[] = { "dtsx", "dtsy" };
+
+    memset(info, 0, sizeof(*info));
+    memcpy(info->coding_name, coding_name[h->major_version > 2], 5);
+    extract_object_info(find_default_audio(h), info);
+    info->base_sample_freq_code = h->sample_rate == 48000;
+    info->decoder_profile_code = h->major_version - 2;
+    info->frame_duration_code = h->frame_duration_code;
+    info->max_payload_code = 0 + (h->major_version > 2);
+    info->num_pres_code = h->num_audio_pres - 1;
+    info->sample_rate = h->sample_rate;
+    info->sample_rate_mod = h->sample_rate_mod;
+    info->sample_size = 16;
+    info->valid = 1;
+}
+
+/* Table 6-17 p47 */
+static int parse_explicit_object_lists(DTSUHD *h, int mask, int index)
+{
+    GetBitContext *gb = &h->gb;
+    int i;
+    static const int table[7] = { 4, 8, 16, 32, 16, 256, 65536 };
+
+    for (i = 0; i < index; i++) {
+        if ((mask >> i) & 0x01) {
+            if (h->is_sync_frame || get_bits1(gb))
+                get_bits_var(gb, table);
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-15 p44, Table 6-16 p45 */
+static int parse_aud_pres_params(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int audio;
+    int i;
+    int read_mask;
+    static const int table[7] = { 0, 2, 4, 5, 1, 4, 16 };
+
+    if (h->is_sync_frame) {
+        if (h->full_channel_mix_flag)
+            h->num_audio_pres = 1;
+        else
+            h->num_audio_pres = get_bits_var(gb, table) + 1;
+        memset(h->audio, 0, sizeof(h->audio[0]) * h->num_audio_pres);
+    }
+
+    for (audio = 0; audio < h->num_audio_pres; audio++) {
+        if (h->is_sync_frame) {
+            if (h->full_channel_mix_flag)
+                h->audio[audio].selectable = 1;
+            else
+                h->audio[audio].selectable = get_bits1(gb);
+        }
+
+        if (h->audio[audio].selectable) {
+            if (h->is_sync_frame) {
+                read_mask = (audio > 0) ? get_bits(gb, audio) : 0;
+                h->audio[audio].mask = 0;
+                for (i = 0; read_mask; i++, read_mask >>= 1) {
+                    if (read_mask & 0x01)
+                        h->audio[audio].mask |= get_bits1(gb) << i;
+                }
+            }
+
+            if (parse_explicit_object_lists(h, h->audio[audio].mask, audio))
+                return 1;
+        } else {
+            h->audio[audio].mask = 0;
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-12 p 40 */
+static void decode_version(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int bits = get_bits1(gb) ? 3 : 6;
+
+    h->major_version = get_bits(gb, bits) + 2;
+    skip_bits(gb, bits);
+}
+
+/* Table 6-12 p 40 */
+static int parse_stream_params(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int has_ftoc_crc;
+    static const uint32_t table_base_duration[4] = { 512, 480, 384, 0 };
+    static const uint32_t table_clock_rate[4] = { 32000, 44100, 48000, 0 };
+
+    if (h->is_sync_frame)
+        h->full_channel_mix_flag = get_bits1(gb);
+
+    has_ftoc_crc = !h->full_channel_mix_flag || h->is_sync_frame;
+    if (has_ftoc_crc && av_crc(h->crc, DTSUHD_CRC_SEED, h->data, h->ftoc_bytes))
+        return 1;
+
+    if (h->is_sync_frame) {
+        if (h->full_channel_mix_flag)
+            h->major_version = 2;
+        else
+            decode_version(h);
+
+        h->frame_duration = table_base_duration[get_bits(gb, 2)];
+        h->frame_duration_code = get_bits(gb, 3);
+        h->frame_duration *= (h->frame_duration_code + 1);
+        h->clock_rate = table_clock_rate[get_bits(gb, 2)];
+        if (h->frame_duration == 0 || h->clock_rate == 0)
+            return 1; /* bitstream error */
+
+        skip_bits(gb, 36 * get_bits1(gb));  /* bTimeStampPresent */
+        h->sample_rate_mod = get_bits(gb, 2);
+        h->sample_rate = h->clock_rate * (1 << h->sample_rate_mod);
+
+        if (h->full_channel_mix_flag) {
+            h->interactive_obj_limits_present = 0;
+        } else {
+            skip_bits1(gb);  /* reserved flag. */
+            h->interactive_obj_limits_present = get_bits1(gb);
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-24 p52 */
+static void navi_purge(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->navi_count; i++)
+        if (!h->navi[i].present)
+            h->navi[i].bytes = 0;
+}
+
+/* Table 6-21 p50 */
+static void navi_clear(DTSUHD *h)
+{
+    if (h->navi)
+        memset(h->navi, 0, sizeof(h->navi[0]) * h->navi_count);
+    h->navi_count = 0;
+}
+
+/* Table 6-22 p51 */
+static void navi_clear_present(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->navi_count; i++)
+        h->navi[i].present = 0;
+}
+
+/* Table 6-23 p51.  Return 0 on success, and the index is returned in
+   the *listIndex parameter.
+*/
+static int navi_find_index(DTSUHD *h, int desired_index, int *list_index)
+{
+    int avail_index = h->navi_count;
+    int i;
+    int navi_alloc;
+
+    for (i = 0; i < h->navi_count; i++) {
+        if (h->navi[i].index == desired_index) {
+            *list_index = i;
+            h->navi[i].present = 1;
+            return 0;
+        }
+
+        if ((h->navi[i].present == 0) && (h->navi[i].bytes == 0) && (avail_index > i))
+            avail_index = i;
+    }
+
+    if (avail_index >= h->navi_count) {
+        if (h->navi_count >= h->navi_alloc) {
+            navi_alloc = h->navi_count + DTSUHD_ALLOC_INCREMENT;
+            if (av_reallocp_array(&h->navi, navi_alloc, sizeof(*h->navi)))
+                return 1;
+            h->navi_alloc = navi_alloc;
+        }
+        h->navi_count++;
+    }
+
+    *list_index = avail_index;
+    h->navi[avail_index].bytes = 0;
+    h->navi[avail_index].present = 1;
+    h->navi[avail_index].id = 256;
+    h->navi[avail_index].index = desired_index;
+
+    return 0;
+}
+
+/* Table 6-20 p48 */
+static int parse_chunk_navi(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int audio_chunks = 1;
+    int bytes;
+    int i;
+    int id;
+    int id_present;
+    int index;
+    int list_index;
+    static const int table2468[7] = { 2, 4, 6, 8, 4, 16, 64 };
+    static const int table_audio_chunk_sizes[7] = { 9, 11, 13, 16, 512, 2048, 8192 };
+    static const int table_chunk_sizes[7] = { 6, 9, 12, 15, 64, 512, 4096 };
+
+    h->chunk_bytes = 0;
+    if (h->full_channel_mix_flag)
+        h->chunk_count = h->is_sync_frame;
+    else
+        h->chunk_count = get_bits_var(gb, table2468);
+
+    if (h->chunk_count >= h->chunk_alloc) {
+        int chunk_alloc = h->chunk_count + DTSUHD_ALLOC_INCREMENT;
+        if (av_reallocp_array(&h->chunk, chunk_alloc, sizeof(*h->chunk)))
+            return 1;
+        h->chunk_alloc = chunk_alloc;
+    }
+
+    for (i = 0; i < h->chunk_count; i++) {
+        h->chunk_bytes += h->chunk[i].bytes = get_bits_var(gb, table_chunk_sizes);
+        if (h->full_channel_mix_flag)
+            h->chunk[i].crc_flag = 0;
+        else
+        h->chunk[i].crc_flag = get_bits1(gb);
+    }
+
+    if (!h->full_channel_mix_flag)
+        audio_chunks = get_bits_var(gb, table2468);
+
+    if (h->is_sync_frame)
+        navi_clear(h);
+    else
+        navi_clear_present(h);
+
+    for (i = 0; i < audio_chunks; i++) {
+        if (h->full_channel_mix_flag)
+            index = 0;
+        else
+            index = get_bits_var(gb, table2468);
+
+        if (navi_find_index(h, index, &list_index))
+            return 1;
+
+        if (h->is_sync_frame)
+            id_present = 1;
+        else if (h->full_channel_mix_flag)
+            id_present = 0;
+        else
+            id_present = get_bits1(gb);
+
+        if (id_present) {
+            id = get_bits_var(gb, table2468);
+            h->navi[list_index].id = id;
+        }
+
+        bytes = get_bits_var(gb, table_audio_chunk_sizes);
+        h->chunk_bytes += bytes;
+        h->navi[list_index].bytes = bytes;
+    }
+
+    navi_purge(h);
+
+    return 0;
+}
+
+
+/* Table 6-6 */
+static int parse_md_chunk_list(DTSUHD *h, MD01 *md01)
+{
+    GetBitContext *gb = &h->gb;
+    static const int table1[7] = { 3, 4, 6, 8, 8, 16, 64 };
+    int i;
+
+    if (h->full_channel_mix_flag) {
+        md01->object_list_count = 1;
+        md01->object_list[0] = 256;
+    } else {
+        md01->object_list_count = get_bits_var(gb, table1);
+        for (i = 0; i < md01->object_list_count; i++)
+            md01->object_list[i] = get_bits(gb, get_bits1(gb) ? 8 : 4);
+    }
+
+    return 0;
+}
+
+/* Table 7-9 */
+static void skip_mp_param_set(DTSUHD *h, MD01 *md01, int nominal_flag)
+{
+    get_bits_md01(h, md01, 6); /* rLoudness */
+    if (nominal_flag == 0)
+        get_bits_md01(h, md01, 5);
+
+    get_bits_md01(h, md01, nominal_flag ? 2 : 4);
+}
+
+/* Table 7-8 */
+static int parse_static_md_params(DTSUHD *h, MD01 *md01, int only_first)
+{
+    int i;
+    int loudness_sets = 1;
+    int nominal_flag = 1;
+
+    if (h->full_channel_mix_flag == 0)
+        nominal_flag = get_bits_md01(h, md01, 1);
+
+    if (nominal_flag) {
+        if (h->full_channel_mix_flag == 0)
+            loudness_sets = get_bits_md01(h, md01, 1) ? 3 : 1;
+    } else {
+        loudness_sets = get_bits_md01(h, md01, 4) + 1;
+    }
+
+    for (i = 0; i < loudness_sets; i++)
+        skip_mp_param_set(h, md01, nominal_flag);
+
+    if (only_first)
+        return 0;
+
+    if (nominal_flag == 0)
+        get_bits_md01(h, md01, 1);
+
+    for (i = 0; i < 3; i++) { /* Table 7-12 suggest 3 types */
+        if (get_bits_md01(h, md01, 1)) {
+            if (get_bits_md01(h, md01, 4) == 15) /* Table 7-14 */
+                get_bits_md01(h, md01, 15);
+        }
+        if (get_bits_md01(h, md01, 1)) /* smooth md present */
+            get_bits_md01(h, md01, 6 * 6);
+    }
+
+    if (h->full_channel_mix_flag == 0) {
+        i = md01->static_md_packets * md01->static_md_packet_size - get_bits_count(&md01->gb);
+        skip_bits(&md01->gb, i);
+    }
+    md01->static_md_extracted = 1;
+
+    return 0;
+}
+
+/* Table 7-7 */
+static int parse_multi_frame_md(DTSUHD *h, MD01 *md01)
+{
+    GetBitContext *gb = &h->gb;
+    int i, n;
+    static const int table1[7] = { 0, 6, 9, 12, 1, 64, 512 };
+    static const int table2[7] = { 5, 7, 9, 11, 32, 128, 512 };
+
+    if (h->is_sync_frame) {
+        md01->packets_acquired = 0;
+        if (h->full_channel_mix_flag) {
+            md01->static_md_packets = 1;
+            md01->static_md_packet_size = 0;
+        } else {
+            md01->static_md_packets = get_bits_var(gb, table1) + 1;
+            md01->static_md_packet_size = get_bits_var(gb, table2) + 3;
+        }
+
+        n = md01->static_md_packets * md01->static_md_packet_size;
+        if (n > md01->buf_bytes) {
+            if (av_reallocp(&md01->buf, n))
+                return 1;
+            md01->buf_bytes = n;
+        }
+
+        init_get_bits(&md01->gb, md01->buf, md01->buf_bytes * 8);
+        if (md01->static_md_packets > 1)
+            md01->static_md_update_flag = get_bits1(gb);
+        else
+            md01->static_md_update_flag = 1;
+    }
+
+    if (md01->packets_acquired < md01->static_md_packets) {
+        n = md01->packets_acquired * md01->static_md_packet_size;
+        for (i = 0; i < md01->static_md_packet_size; i++)
+            md01->buf[n + i] = get_bits(gb, 8);
+        md01->packets_acquired++;
+
+        if (md01->packets_acquired == md01->static_md_packets) {
+            if (md01->static_md_update_flag || !md01->static_md_extracted)
+                if (parse_static_md_params(h, md01, 0))
+                    return 1;
+        } else if (md01->packets_acquired == 1) {
+            if (md01->static_md_update_flag || !md01->static_md_extracted)
+                if (parse_static_md_params(h, md01, 1))
+                    return 1;
+        }
+    }
+
+    return 0;
+}
+
+/* Return 1 if suitable, 0 if not.  Table 7-18.  OBJGROUPIDSTART=224 Sec 7.8.7 p75 */
+static int is_suitable_for_render(DTSUHD *h, MD01 *md01, int object_id)
+{
+    GetBitContext *gb = &h->gb;
+    static const int table[7] = { 8, 10, 12, 14, 256, 1024, 4096 };
+
+    if (object_id >= 224 || get_bits1(gb))
+        return 1;
+
+    /*  Reject the render and skip the render data. */
+    skip_bits1(gb);
+    skip_bits(gb, get_bits_var(gb, table));
+
+    return 0;
+}
+
+/* Table 7-26 */
+static void parse_ch_mask_params(DTSUHD *h, MD01 *md01, MDObject *object)
+{
+    GetBitContext *gb = &h->gb;
+    const int ch_index = object->rep_type == REP_TYPE_BINAURAL ? 1 : get_bits(gb, 4);
+    static const int mask_table[14] = { /* Table 7-27 */
+        0x000001, 0x000002, 0x000006, 0x00000F, 0x00001F, 0x00084B, 0x00002F,
+        0x00802F, 0x00486B, 0x00886B, 0x03FBFB, 0x000003, 0x000007, 0x000843,
+    };
+
+    if (ch_index == 14)
+        object->ch_activity_mask = get_bits(gb, 16);
+    else if (ch_index == 15)
+        object->ch_activity_mask = get_bits_long(gb, 32);
+    else
+        object->ch_activity_mask = mask_table[ch_index];
+}
+
+/* Table 7-22 */
+static int parse_object_metadata(DTSUHD *h, MD01 *md01, MDObject *object,
+                                 int start_frame_flag, int object_id)
+{
+    GetBitContext *gb = &h->gb;
+    int ch_mask_object_flag = 0;
+    int object_3d_metadata_flag = 0;
+    static const int table2[7] = { 1, 4, 4, 8, 2, 16, 16 };
+    static const int table3[7] = { 3, 3, 4, 8, 8, 8, 16 };
+
+    skip_bits(gb, object_id != 256);
+
+    if (start_frame_flag) {
+        object->rep_type = get_bits(gb, 3);
+        switch (object->rep_type) {
+            case REP_TYPE_BINAURAL:
+            case REP_TYPE_CH_MASK_BASED:
+            case REP_TYPE_MTRX2D_CH_MASK_BASED:
+            case REP_TYPE_MTRX3D_CH_MASK_BASED:
+                ch_mask_object_flag = 1;
+                break;
+
+            case REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF:
+            case REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF:
+                object_3d_metadata_flag = 1;
+                break;
+        }
+
+        if (ch_mask_object_flag) {
+            if (object_id != 256) {
+                skip_bits(gb, 3);  /* Object Importance Level */
+                if (get_bits1(gb))
+                    skip_bits(gb, get_bits1(gb) ? 3 : 5);
+
+                get_bits_var(gb, table2);
+                get_bits_var(gb, table3);
+
+                /* Skip optional Loudness block. */
+                if (get_bits1(gb))
+                    skip_bits(gb, 8);
+
+                /* Skip optional Object Interactive MD (Table 7-25). */
+                if (get_bits1(gb) && h->interactive_obj_limits_present) {
+                    if (get_bits1(gb))
+                        skip_bits(gb, 5 + 6 * object_3d_metadata_flag);
+                }
+            }
+
+            parse_ch_mask_params(h, md01, object);
+        }
+    }
+
+    /* Skip rest of object */
+    return 0;
+}
+
+/* Table 7-4 */
+static int parse_md01(DTSUHD *h, MD01 *md01, int pres_index)
+{
+    GetBitContext *gb = &h->gb;
+    uint32_t i;
+    uint32_t id;
+    uint32_t start_flag;
+
+    if (h->audio[pres_index].selectable) {
+        for (i = 0; i < 4; i++)  /* Table 7-5.  Scaling data. */
+            skip_bits(gb, 5 * get_bits1(gb));
+
+        if (get_bits1(gb) && parse_multi_frame_md(h, md01))
+            return 1;
+    }
+
+    /* Table 7-16: Object metadata. */
+    memset(md01->object, 0, sizeof(md01->object));
+    if (!h->full_channel_mix_flag)
+        skip_bits(gb, 11 * get_bits1(gb));
+
+    for (i = 0; i < md01->object_list_count; i++) {
+        id = md01->object_list[i];
+        if (!is_suitable_for_render(h, md01, id))
+            continue;
+
+        md01->object[id].pres_index = pres_index;
+        start_flag = 0;
+        if (!md01->object[id].started) {
+            skip_bits(gb, id != 256);
+            start_flag = md01->object[id].started = 1;
+        }
+
+        if ((id < 224 || id > 255) &&
+            parse_object_metadata(h, md01, md01->object + id, start_flag, id)) {
+            return 1;
+        }
+
+        break;
+    }
+
+    return 0;
+}
+
+/* Table 6-2 */
+static int parse_chunks(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    MD01 *md01;
+    const uint8_t *byte_start;
+    int bit_next;
+    int i;
+    static const int table_aud_pres[7] = { 0, 2, 4, 4, 1, 4, 16 };
+    int pres_index;
+    uint32_t id;
+
+    for (i = 0; i < h->chunk_count; i++) {
+        bit_next = get_bits_count(gb) + h->chunk[i].bytes * 8;
+        byte_start = h->data + get_bits_count(gb) / 8;
+        if (h->chunk[i].crc_flag && av_crc(h->crc, DTSUHD_CRC_SEED, byte_start, h->chunk[i].bytes))
+            return 1;
+
+        id = get_bits(gb, 8);
+        if (id == 1) {
+            pres_index = get_bits_var(gb, table_aud_pres);
+        if (pres_index > 255)
+            return 1;
+        md01 = chunk_find_md01(h, id);
+        if (md01 == NULL)
+            md01 = chunk_append_md01(h, id);
+        if (md01 == NULL)
+            return 1;
+        if (parse_md_chunk_list(h, md01))
+            return 1;
+        if (parse_md01(h, md01, pres_index))
+            return 1;
+        }
+
+        skip_bits(gb, bit_next - get_bits_count(gb));
+    }
+
+    return 0;
+}
+
+/** Allocate parsing handle.  The parsing handle should be used to parse
+    one DTS:X Profile 2 Audio stream, then freed by calling DTSUHD_destroy().
+    Do not use the same parsing handle to parse multiple audio streams.
+
+  @return Parsing handle for use with other functions, or NULL on failure.
+*/
+DTSUHD *av_dtsuhd_create(void)
+{
+    DTSUHD *h = av_calloc(1, sizeof(DTSUHD));
+    if (h)
+        h->crc = av_crc_get_table(AV_CRC_16_CCITT);
+    return h;
+}
+
+/** Free all resources used by the parsing handle.
+
+  @param[in] h Handle allocated by dtshd_create
+*/
+void av_dtsuhd_destroy(DTSUHD *h)
+{
+    if (h) {
+        chunk_reset(h);
+        av_freep(&h->chunk);
+        av_freep(&h->navi);
+        av_freep(&h);
+    }
+}
+
+/** Parse a single DTS:X Profile 2 frame.
+    The frame must start at the first byte of the data buffer, and enough
+    of the frame must be present to decode the majority of the FTOC.
+    From Table 6-11 p40.
+
+    A sync frame must be the first frame provided, before any non-sync frames.
+    Signatures: sync=0x40411BF2, non-sync=0x71C442E8.
+
+  @param[in] h Handle allocated by DTSUHD_create
+  @param[in] First byte of a buffer containing the frame to parse
+  @param[in] nData Number of valid bytes in 'data'
+  @param[out] fi Results of frame parsing, may be NULL
+  @param[out] di Results of descriptor parsing, may be NULL
+  @return 0 on success, DTSUHDStatus enumeration on error
+*/
+int av_dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
+                    DTSUHDFrameInfo *fi, DTSUHDDescriptorInfo *di)
+{
+    GetBitContext *gb;
+    int fraction = 1;
+    int i;
+    int syncword;
+    static const int table_payload[7] = { 5, 8, 10, 12, 32, 256, 1024 };
+
+    if (!h || !data)
+        return DTSUHD_NULL;
+
+    if (data_bytes < 4)
+        return DTSUHD_INCOMPLETE; /* Data buffer does not contain the signature */
+
+    h->data = data;
+    h->data_bytes = data_bytes;
+    gb = &h->gb;
+    init_get_bits(gb, data, data_bytes * 8);
+
+    syncword = get_bits_long(gb, 32);
+    h->is_sync_frame = syncword == DTSUHD_SYNCWORD;
+    h->saw_sync |= h->is_sync_frame;
+    if (!h->saw_sync || (!h->is_sync_frame && syncword != DTSUHD_NONSYNCWORD))
+        return DTSUHD_NOSYNC;  /* Invalid frame or have not parsed sync frame. */
+
+    h->ftoc_bytes = get_bits_var(gb, table_payload) + 1;
+    if (h->ftoc_bytes < 5 || h->ftoc_bytes >= data_bytes)
+        return DTSUHD_INCOMPLETE;  /* Data buffer does not contain entire FTOC */
+
+    if (parse_stream_params(h))
+        return DTSUHD_INVALID_FRAME;
+
+    if (parse_aud_pres_params(h))
+        return DTSUHD_INVALID_FRAME;
+
+    if (parse_chunk_navi(h))  /* AudioChunkTypes and payload sizes. */
+        return DTSUHD_INVALID_FRAME;
+
+    /* At this point in the parsing, we can calculate the size of the frame. */
+    h->frame_bytes = h->ftoc_bytes + h->chunk_bytes;
+    if (h->frame_bytes > data_bytes)
+        return DTSUHD_INCOMPLETE;
+
+    if (di && h->is_sync_frame) {
+        /* Skip PBRSmoothParams (Table 6-26) and align to the chunks immediately
+           following the FTOC CRC.
+        */
+        skip_bits(gb, h->ftoc_bytes * 8 - get_bits_count(gb));
+        if (parse_chunks(h))
+            return DTSUHD_INVALID_FRAME;
+        update_descriptor(h, di);
+    }
+
+    /* 6.3.6.9: audio frame duration may be a fraction of metadata frame duration. */
+    for (i = 0; i < h->navi_count; i++) {
+        if (h->navi[i].present) {
+            if (h->navi[i].id == 3)
+                fraction = 2;
+            else if (h->navi[i].id == 4)
+                fraction = 4;
+        }
+    }
+
+    if (fi) {
+        fi->sync = h->is_sync_frame;
+        fi->frame_bytes = h->frame_bytes;
+        fi->sample_rate = h->sample_rate;
+        fi->sample_count = (h->frame_duration * fi->sample_rate) / (h->clock_rate * fraction);
+    }
+
+    return DTSUHD_OK;
+}
+
+/** Return the offset of the first UHD audio frame.
+    When supplied a buffer containing DTSHDHDR file content, the DTSHD
+    headers are skipped and the offset to the first byte of the STRMDATA
+    chunk is returned, along with the size of that chunk.
+
+  @param[in] dataStart DTS:X Profile 2 file content to parse
+  @param[in] dataSize Number of valid bytes in 'dataStart'
+  @param[out] Number of leading DTS:X Profile 2 audio frames to discard,
+              may be NULL
+  @param[out] Size of STRMDATA payload, may be NULL
+  @return STRMDATA payload offset or 0 if not a valid DTS:X Profile 2 file
+*/
+int av_dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size, size_t *strmdata_size)
+{
+    const uint8_t *data = data_start;
+    const uint8_t *data_end = data + data_size;
+    uint64_t chunk_size = 0;
+
+    if (data + DTSUHD_CHUNK_HEADER >= data_end || memcmp(data, "DTSHDHDR", 8))
+        return 0;
+
+    for (; data + DTSUHD_CHUNK_HEADER <= data_end; data += chunk_size + DTSUHD_CHUNK_HEADER) {
+        chunk_size = AV_RB64(data + 8);
+        if (chunk_size < 4 || chunk_size > ((uint64_t)1 << 61))
+            return AVERROR_INVALIDDATA;
+
+        if (!memcmp(data, "STRMDATA", 8)) {
+            if (strmdata_size)
+                *strmdata_size = chunk_size;
+            return (int)(data - data_start) + DTSUHD_CHUNK_HEADER;
+        }
+    }
+
+    return 0;
+}
diff --git a/libavcodec/dtsuhd_common.h b/libavcodec/dtsuhd_common.h
new file mode 100644
index 0000000000..10280cd203
--- /dev/null
+++ b/libavcodec/dtsuhd_common.h
@@ -0,0 +1,83 @@
+/*
+ * DTS-UHD common audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DTSUHD_COMMON_H
+#define AVCODEC_DTSUHD_COMMON_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#define DTSUHD_NONSYNCWORD 0x71C442E8
+#define DTSUHD_SYNCWORD    0x40411BF2
+
+#define DTSUHD_MAX_FRAME_SIZE 0x1000
+
+/* Return codes from dtsuhd_frame */
+enum DTSUHDStatus {
+    DTSUHD_OK,
+    DTSUHD_INCOMPLETE,    /* Entire frame not in buffer. */
+    DTSUHD_INVALID_FRAME, /* Error parsing frame. */
+    DTSUHD_NOSYNC,        /* No sync frame prior to non-sync frame. */
+    DTSUHD_NULL,          /* Function parameter may not be NULL. */
+};
+
+/* Return stream information from an audio frame parsed by dtsuhd_frame, */
+typedef struct DTSUHDDescriptorInfo {
+    unsigned valid:1; /* True if descriptor info is valid. */
+    char coding_name[5]; /* Four character, null term SampleEntry box name. */
+    int base_sample_freq_code;
+    int channel_count;
+    int decoder_profile_code;
+    int frame_duration_code;
+    int max_payload_code;
+    int num_pres_code;
+    int rep_type;
+    int sample_rate;
+    int sample_rate_mod;
+    int sample_size;
+    int channel_mask;
+    uint64_t ffmpeg_channel_mask;
+} DTSUHDDescriptorInfo;
+
+/* Return frame information from an audio frame parsed by dtsuhd_frame. */
+typedef struct DTSUHDFrameInfo {
+    int frame_bytes;  /* Size of entire frame in bytes. */
+    int sample_count; /* Number of samples in frame (samples per frame). */
+    int sample_rate;  /* Sample rate of frame (samples per second). */
+    unsigned sync:1;  /* True if frame is a sync frame. */
+} DTSUHDFrameInfo;
+
+struct DTSUHD;
+typedef struct DTSUHD DTSUHD;
+
+struct DTSUHD *av_dtsuhd_create(void);
+void av_dtsuhd_destroy(DTSUHD*);
+int av_dtsuhd_frame(DTSUHD*, const uint8_t *data, size_t nData,
+                    DTSUHDFrameInfo*, DTSUHDDescriptorInfo*);
+int av_dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size,
+                               size_t *strmdata_size);
+
+static inline int dtsuhd_is_syncword(uint32_t syncword)
+{
+    return syncword == DTSUHD_NONSYNCWORD || syncword == DTSUHD_SYNCWORD;
+}
+
+#endif /* AVCODEC_DTSUHD_COMMON_H */
diff --git a/libavcodec/dtsuhd_parser.c b/libavcodec/dtsuhd_parser.c
new file mode 100644
index 0000000000..e8058e1701
--- /dev/null
+++ b/libavcodec/dtsuhd_parser.c
@@ -0,0 +1,141 @@
+/*
+ * DTS-UHD audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Parse raw DTS-UHD audio frame input and return individual audio frames.
+ */
+
+#include "dtsuhd_common.h"
+#include "libavutil/intreadwrite.h"
+#include "parser.h"
+
+#define DTSUHD_BUFFER_SIZE (DTSUHD_MAX_FRAME_SIZE * 128)
+
+typedef struct DTSUHDParseContext {
+    DTSUHD *dtsuhd;
+    int buf_offset;
+    int buf_bytes;
+    int frame_bytes;
+    uint8_t *buf;
+} DTSUHDParseContext;
+
+static av_cold int parser_init(AVCodecParserContext *s)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+
+    pc->dtsuhd = av_dtsuhd_create();
+    pc->buf = av_calloc(DTSUHD_BUFFER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE, 1);
+    if (!pc->dtsuhd || !pc->buf)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void parser_close(AVCodecParserContext *s)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+
+    av_dtsuhd_destroy(pc->dtsuhd);
+    pc->dtsuhd = NULL;
+    av_freep(&pc->buf);
+    ff_parse_close(s);
+}
+
+// Keep data in contiguous buffer as required by dtsuhd_frame.
+static int append_buffer(DTSUHDParseContext *pc, const uint8_t **buf, int *buf_size, int *input_consumed)
+{
+    int copy_bytes;
+
+    pc->buf_offset += pc->frame_bytes;
+    pc->frame_bytes = 0;
+
+    // Buffer almost full, move partial frame to start of buffer for more space.
+    if (*buf_size > 0 && pc->buf_bytes + *buf_size > DTSUHD_BUFFER_SIZE) {
+        memmove(pc->buf, pc->buf + pc->buf_offset, pc->buf_bytes);
+        pc->buf_bytes -= pc->buf_offset;
+        pc->buf_offset = 0;
+    }
+
+    copy_bytes = FFMAX(0, FFMIN(DTSUHD_BUFFER_SIZE - pc->buf_bytes, *buf_size));
+
+    // Append input buffer to our context.
+    if (copy_bytes) {
+        memcpy(pc->buf + pc->buf_bytes, *buf, copy_bytes);
+        pc->buf_bytes += copy_bytes;
+    }
+
+    // Ensure buffer starts with a syncword
+    while (pc->buf_offset + 4 < pc->buf_bytes && !dtsuhd_is_syncword(AV_RB32(pc->buf + pc->buf_offset)))
+        pc->buf_offset++;
+
+    *input_consumed = copy_bytes;
+    *buf = pc->buf + pc->buf_offset;
+    *buf_size = pc->buf_bytes - pc->buf_offset;
+
+    return copy_bytes && pc->buf_bytes - pc->buf_offset < DTSUHD_MAX_FRAME_SIZE;
+}
+
+static int parser_parse(AVCodecParserContext *s, AVCodecContext *avctx,
+                        const uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+    DTSUHDFrameInfo fi;
+    int input_consumed = 0;
+
+    if (append_buffer(pc, &buf, &buf_size, &input_consumed)) {
+        *poutbuf = NULL;
+        *poutbuf_size = 0;
+        return input_consumed;
+    }
+
+    switch (av_dtsuhd_frame(pc->dtsuhd, buf, buf_size, &fi, NULL)) {
+    case DTSUHD_OK:
+        if (fi.sample_count)
+            s->duration = fi.sample_count;
+        if (fi.sample_rate)
+            avctx->sample_rate = fi.sample_rate;
+        buf_size = pc->frame_bytes = fi.frame_bytes;
+        break;
+    case DTSUHD_INCOMPLETE:
+        pc->frame_bytes = buf_size;
+        buf = NULL;
+        buf_size = 0;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unable to process DTS-UHD file. File may be invalid.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+
+    return input_consumed;
+}
+
+AVCodecParser ff_dtsuhd_parser = {
+    .codec_ids      = { AV_CODEC_ID_DTSUHD },
+    .priv_data_size = sizeof(DTSUHDParseContext),
+    .parser_init    = parser_init,
+    .parser_parse   = parser_parse,
+    .parser_close   = parser_close,
+};
diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
index 285f81a901..6030a68e73 100644
--- a/libavcodec/parsers.c
+++ b/libavcodec/parsers.c
@@ -37,6 +37,7 @@ extern const AVCodecParser ff_dirac_parser;
 extern const AVCodecParser ff_dnxhd_parser;
 extern const AVCodecParser ff_dolby_e_parser;
 extern const AVCodecParser ff_dpx_parser;
+extern const AVCodecParser ff_dtsuhd_parser;
 extern const AVCodecParser ff_dvaudio_parser;
 extern const AVCodecParser ff_dvbsub_parser;
 extern const AVCodecParser ff_dvdsub_parser;
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 728ab8839d..e0fe2eb7b8 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -29,7 +29,7 @@
 
 #include "version_major.h"
 
-#define LIBAVCODEC_VERSION_MINOR  23
+#define LIBAVCODEC_VERSION_MINOR  24
 #define LIBAVCODEC_VERSION_MICRO 100
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
Paul B Mahol Aug. 17, 2023, 10:31 p.m. UTC | #34
Is decoder part still missing or?
Roy Funderburk Aug. 17, 2023, 10:51 p.m. UTC | #35
On 8/17/23 3:31 PM, Paul B Mahol wrote:
> Is decoder part still missing or?

This is just intended to read audio from a .dtsx file and output an mp4/mov file. There will not be a .dtsx to PCM decoder.

Thanks,
-Roy
Roy Funderburk Jan. 16, 2024, 9:02 p.m. UTC | #36
Updated to latest master changes.
Signed-off-by: Roy Funderburk <Roy.Funderburk@xperi.com>
---
 Changelog                 |   1 +
 configure                 |   1 +
 doc/general_contents.texi |   1 +
 libavformat/Makefile      |   1 +
 libavformat/allformats.c  |   1 +
 libavformat/dtshddec.c    |   2 +-
 libavformat/dtsuhddec.c   | 216 ++++++++++++++++++++++++++++++++++++++
 libavformat/movenc.c      |  32 ++++++
 libavformat/version.h     |   2 +-
 9 files changed, 255 insertions(+), 2 deletions(-)
 create mode 100644 libavformat/dtsuhddec.c

diff --git a/Changelog b/Changelog
index 4e7c1ce2c1..d1f19d7047 100644
--- a/Changelog
+++ b/Changelog
@@ -19,6 +19,7 @@ version <next>:
 - VVC decoder
 - fsync filter
 - Raw Captions with Time (RCWT) closed caption muxer
+- DTS-UHD demuxer
 
 version 6.1:
 - libaribcaption decoder
diff --git a/configure b/configure
index c8ae0a061d..70f511827e 100755
--- a/configure
+++ b/configure
@@ -3517,6 +3517,7 @@ dash_demuxer_deps="libxml2"
 dirac_demuxer_select="dirac_parser"
 dts_demuxer_select="dca_parser"
 dtshd_demuxer_select="dca_parser"
+dtsuhd_demuxer_select="dtsuhd_parser"
 dv_demuxer_select="dvprofile"
 dv_muxer_select="dvprofile"
 dxa_demuxer_select="riffdec"
diff --git a/doc/general_contents.texi b/doc/general_contents.texi
index 8b48fed060..2c6d008039 100644
--- a/doc/general_contents.texi
+++ b/doc/general_contents.texi
@@ -613,6 +613,7 @@ library:
 @item raw DNxHD                 @tab X @tab X
 @item raw DTS                   @tab X @tab X
 @item raw DTS-HD                @tab   @tab X
+@item raw DTS-UHD               @tab   @tab
 @item raw E-AC-3                @tab X @tab X
 @item raw EVC                   @tab X @tab X
 @item raw FLAC                  @tab X @tab X
diff --git a/libavformat/Makefile b/libavformat/Makefile
index dcc99eeac4..f98f157ab0 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -188,6 +188,7 @@ OBJS-$(CONFIG_DSICIN_DEMUXER)            += dsicin.o
 OBJS-$(CONFIG_DSS_DEMUXER)               += dss.o
 OBJS-$(CONFIG_DTSHD_DEMUXER)             += dtshddec.o
 OBJS-$(CONFIG_DTS_DEMUXER)               += dtsdec.o rawdec.o
+OBJS-$(CONFIG_DTSUHD_DEMUXER)            += dtsuhddec.o
 OBJS-$(CONFIG_DTS_MUXER)                 += rawenc.o
 OBJS-$(CONFIG_DV_MUXER)                  += dvenc.o
 OBJS-$(CONFIG_DVBSUB_DEMUXER)            += dvbsub.o rawdec.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index b04b43cab3..5e0608de7f 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -146,6 +146,7 @@ extern const AVInputFormat  ff_dss_demuxer;
 extern const AVInputFormat  ff_dts_demuxer;
 extern const FFOutputFormat ff_dts_muxer;
 extern const AVInputFormat  ff_dtshd_demuxer;
+extern const AVInputFormat  ff_dtsuhd_demuxer;
 extern const AVInputFormat  ff_dv_demuxer;
 extern const FFOutputFormat ff_dv_muxer;
 extern const AVInputFormat  ff_dvbsub_demuxer;
diff --git a/libavformat/dtshddec.c b/libavformat/dtshddec.c
index a3dea0668f..6e9e78a335 100644
--- a/libavformat/dtshddec.c
+++ b/libavformat/dtshddec.c
@@ -46,7 +46,7 @@ typedef struct DTSHDDemuxContext {
 static int dtshd_probe(const AVProbeData *p)
 {
     if (AV_RB64(p->buf) == DTSHDHDR)
-        return AVPROBE_SCORE_MAX;
+        return AVPROBE_SCORE_MAX - 4; // DTSUHD (.dtsx) files also have this signature.
     return 0;
 }
 
diff --git a/libavformat/dtsuhddec.c b/libavformat/dtsuhddec.c
new file mode 100644
index 0000000000..d840c0a033
--- /dev/null
+++ b/libavformat/dtsuhddec.c
@@ -0,0 +1,216 @@
+/*
+ * DTS-UHD audio demuxer
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Report DTS-UHD audio stream configuration and extract raw packet data.
+ */
+
+#include "internal.h"
+#include "libavcodec/dtsuhd_common.h"
+#include "libavcodec/put_bits.h"
+#include "libavutil/intreadwrite.h"
+
+#define DTSUHD_BUFFER_SIZE (1024 * 1024)
+
+typedef struct DTSUHDDemuxContext {
+    size_t data_end;
+    struct DTSUHD *dtsuhd;
+    uint8_t *buf;
+} DTSUHDDemuxContext;
+
+static int probe(const AVProbeData *p)
+{
+    int offset = av_dtsuhd_strmdata_payload(p->buf, p->buf_size, NULL);
+    int score = 0;
+    struct DTSUHD *h = av_dtsuhd_create();
+
+    if (h && offset >= 0) {
+        for (; offset + 4 < p->buf_size; offset++) {
+            if (dtsuhd_is_syncword(AV_RB32(p->buf + offset))) {
+                if (DTSUHD_OK == av_dtsuhd_frame(h, p->buf + offset, p->buf_size - offset, NULL, NULL)) {
+                    score = AVPROBE_SCORE_MAX - 3;
+                    break;
+                }
+            }
+        }
+    }
+
+    av_dtsuhd_destroy(h);
+    return score;
+}
+
+static av_cold int read_close(AVFormatContext *s)
+{
+    DTSUHDDemuxContext *dtsxs = s->priv_data;
+
+    av_freep(&dtsxs->buf);
+    av_dtsuhd_destroy(dtsxs->dtsuhd);
+    dtsxs->dtsuhd = NULL;
+
+    return 0;
+}
+
+static int find_first_syncword(DTSUHDDemuxContext *dtsuhd, int data_start)
+{
+    while (data_start + 4 < DTSUHD_BUFFER_SIZE &&
+        !dtsuhd_is_syncword(AV_RB32(dtsuhd->buf + data_start))) {
+        data_start++;
+    }
+
+    return data_start;
+}
+
+static int write_extradata(AVCodecParameters *par, DTSUHDDescriptorInfo *di)
+{
+    PutBitContext pbc;
+    int ret;
+    int size;
+    uint8_t udts[32];
+
+    init_put_bits(&pbc, udts, sizeof(udts));
+    put_bits32(&pbc, 0); // udts box size
+    put_bits32(&pbc, AV_RB32("udts")); // udts box signature
+    put_bits(&pbc, 6, di->decoder_profile_code);
+    put_bits(&pbc, 2, di->frame_duration_code);
+    put_bits(&pbc, 3, di->max_payload_code);
+    put_bits(&pbc, 5, di->num_pres_code);
+    put_bits32(&pbc,  di->channel_mask);
+    put_bits(&pbc, 1, di->base_sample_freq_code);
+    put_bits(&pbc, 2, di->sample_rate_mod);
+    put_bits(&pbc, 3, di->rep_type);
+    put_bits(&pbc, 3, 0);
+    put_bits(&pbc, 1, 0);
+    put_bits64(&pbc, di->num_pres_code + 1, 0); // ID Tag present for each presentation.
+    flush_put_bits(&pbc); // byte align
+    size = put_bytes_output(&pbc);
+    AV_WB32(udts, size);
+
+    ret = ff_alloc_extradata(par, size);
+    if (ret < 0)
+        return ret;
+
+    memcpy(par->extradata, udts, size);
+
+    return 0;
+}
+
+static int read_header(AVFormatContext *s)
+{
+    AVIOContext *pb = s->pb;
+    AVStream *st = avformat_new_stream(s, NULL);
+    DTSUHDDemuxContext *dtsuhd = s->priv_data;
+    DTSUHDDescriptorInfo di;
+    DTSUHDFrameInfo fi;
+    int buf_bytes;
+    int ret = DTSUHD_INVALID_FRAME;
+    int data_start;
+
+    if (!(pb->seekable & AVIO_SEEKABLE_NORMAL))
+        return AVERROR(EIO);
+
+    dtsuhd->buf = av_malloc(DTSUHD_BUFFER_SIZE);
+    dtsuhd->dtsuhd = av_dtsuhd_create();
+    if (!dtsuhd->buf || !dtsuhd->dtsuhd || !st)
+        return AVERROR(ENOMEM);
+
+    buf_bytes = avio_read(pb, dtsuhd->buf, DTSUHD_BUFFER_SIZE);
+    if (buf_bytes < 0)
+        return buf_bytes;
+
+    data_start = av_dtsuhd_strmdata_payload(dtsuhd->buf, buf_bytes, &dtsuhd->data_end);
+    if (data_start < 0)
+        return data_start;
+
+    dtsuhd->data_end += data_start;
+    if (data_start == 0)
+        dtsuhd->data_end = avio_size(pb); // Not a DTSHDHDR chunk file, decode frames to end of file.
+
+    data_start = find_first_syncword(dtsuhd, data_start);
+    if (avio_seek(pb, data_start, SEEK_SET) < 0)
+        return AVERROR(EINVAL);
+
+    ret = av_dtsuhd_frame(dtsuhd->dtsuhd, dtsuhd->buf + data_start,
+        buf_bytes - data_start, &fi, &di);
+    if (ret != DTSUHD_OK || !di.valid) {
+        av_log(s, AV_LOG_ERROR, "Unable to process DTS-UHD file. File may be invalid.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    ffstream(st)->need_parsing          = AVSTREAM_PARSE_FULL_RAW;
+    st->codecpar->codec_type            = AVMEDIA_TYPE_AUDIO;
+    st->codecpar->codec_id              = s->iformat->raw_codec_id;
+    st->codecpar->ch_layout.order       = AV_CHANNEL_ORDER_NATIVE;
+    st->codecpar->ch_layout.nb_channels = di.channel_count;
+    st->codecpar->ch_layout.u.mask      = di.ffmpeg_channel_mask;
+    st->codecpar->codec_tag             = AV_RL32(di.coding_name);
+    st->codecpar->frame_size            = 512 << di.frame_duration_code;
+    st->codecpar->sample_rate           = di.sample_rate;
+
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+    st->codecpar->channels       = di.channel_count;
+    st->codecpar->channel_layout = di.ffmpeg_channel_mask;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    ret = write_extradata(st->codecpar, &di);
+    if (ret < 0)
+        return ret;
+
+    if (st->codecpar->sample_rate)
+        avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
+
+    return 0;
+}
+
+static int read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    DTSUHDDemuxContext *dtsuhd = s->priv_data;
+    int64_t size, left;
+    int ret;
+
+    left = dtsuhd->data_end - avio_tell(s->pb);
+    size = FFMIN(left, DTSUHD_MAX_FRAME_SIZE);
+    if (size <= 0)
+        return AVERROR_EOF;
+
+    ret = av_get_packet(s->pb, pkt, size);
+    if (ret < 0)
+        return ret;
+
+    pkt->stream_index = 0;
+
+    return ret;
+}
+
+AVInputFormat ff_dtsuhd_demuxer = {
+    .name           = "dtsuhd",
+    .long_name      = NULL_IF_CONFIG_SMALL("DTS-UHD"),
+    .priv_data_size = sizeof(DTSUHDDemuxContext),
+    .read_probe     = probe,
+    .read_header    = read_header,
+    .read_packet    = read_packet,
+    .read_close     = read_close,
+    .flags          = AVFMT_GENERIC_INDEX,
+    .extensions     = "dtsx",
+    .raw_codec_id   = AV_CODEC_ID_DTSUHD,
+};
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index c95410f5c1..238c82b7ee 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -762,6 +762,24 @@ static int mov_write_esds_tag(AVIOContext *pb, MOVTrack *track) // Basic
     return update_size(pb, pos);
 }
 
+static int mov_write_udts_tag(AVIOContext *pb, MOVTrack *track)
+{
+    if (track->vos_len < 12) {
+        av_log(pb, AV_LOG_ERROR,
+               "Cannot write moov atom before DTS-UHD packets."
+               " Set the delay_moov flag to fix this.\n");
+        return AVERROR(EINVAL);
+    }
+
+    /* Write vos_data is udts box. */
+    if (memcmp(track->vos_data + 4, "udts", 4) == 0) {
+        avio_write(pb, track->vos_data, track->vos_len);
+        return track->vos_len;
+    }
+
+    return 0;
+}
+
 static int mov_pcm_le_gt16(enum AVCodecID codec_id)
 {
     return codec_id == AV_CODEC_ID_PCM_S24LE ||
@@ -1372,6 +1390,8 @@ static int mov_write_audio_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
         ret = mov_write_dops_tag(s, pb, track);
     else if (track->par->codec_id == AV_CODEC_ID_TRUEHD)
         ret = mov_write_dmlp_tag(s, pb, track);
+    else if (track->par->codec_id == AV_CODEC_ID_DTSUHD)
+        ret = mov_write_udts_tag(pb, track);
     else if (tag == MOV_MP4_IPCM_TAG || tag == MOV_MP4_FPCM_TAG) {
         if (track->par->ch_layout.nb_channels > 1)
             ret = mov_write_chnl_tag(s, pb, track);
@@ -2823,6 +2843,7 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
     if ((track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
          track->par->codec_id == AV_CODEC_ID_TRUEHD ||
          track->par->codec_id == AV_CODEC_ID_MPEGH_3D_AUDIO ||
+         track->par->codec_id == AV_CODEC_ID_DTSUHD ||
          track->par->codec_tag == MKTAG('r','t','p',' ')) &&
         track->has_keyframes && track->has_keyframes < track->entry)
         mov_write_stss_tag(pb, track, MOV_SYNC_SAMPLE);
@@ -5739,6 +5760,14 @@ static void mov_parse_truehd_frame(AVPacket *pkt, MOVTrack *trk)
     return;
 }
 
+static void mov_parse_dtsuhd_frame(AVPacket *pkt, MOVTrack *trk)
+{
+    if (pkt->size > 4 && AV_RB32(pkt->data) == 0x40411BF2) {
+        trk->cluster[trk->entry].flags |= MOV_SYNC_SAMPLE;
+        trk->has_keyframes++;
+    }
+}
+
 static int mov_flush_fragment_interleaving(AVFormatContext *s, MOVTrack *track)
 {
     MOVMuxContext *mov = s->priv_data;
@@ -6391,6 +6420,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
         mov_parse_vc1_frame(pkt, trk);
     } else if (par->codec_id == AV_CODEC_ID_TRUEHD) {
         mov_parse_truehd_frame(pkt, trk);
+    } else if (par->codec_id == AV_CODEC_ID_DTSUHD) {
+        mov_parse_dtsuhd_frame(pkt, trk);
     } else if (pkt->flags & AV_PKT_FLAG_KEY) {
         if (mov->mode == MODE_MOV && par->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
             trk->entry > 0) { // force sync sample for the first key frame
@@ -7850,6 +7881,7 @@ static const AVCodecTag codec_mp4_tags[] = {
     { AV_CODEC_ID_AC3,             MKTAG('a', 'c', '-', '3') },
     { AV_CODEC_ID_EAC3,            MKTAG('e', 'c', '-', '3') },
     { AV_CODEC_ID_DTS,             MKTAG('m', 'p', '4', 'a') },
+    { AV_CODEC_ID_DTSUHD,          MKTAG('d', 't', 's', 'x') },
     { AV_CODEC_ID_TRUEHD,          MKTAG('m', 'l', 'p', 'a') },
     { AV_CODEC_ID_FLAC,            MKTAG('f', 'L', 'a', 'C') },
     { AV_CODEC_ID_OPUS,            MKTAG('O', 'p', 'u', 's') },
diff --git a/libavformat/version.h b/libavformat/version.h
index de9cc8e31d..683184d5da 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -31,7 +31,7 @@
 
 #include "version_major.h"
 
-#define LIBAVFORMAT_VERSION_MINOR  20
+#define LIBAVFORMAT_VERSION_MINOR  21
 #define LIBAVFORMAT_VERSION_MICRO 100
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
diff mbox series

Patch

diff --git a/Changelog b/Changelog
index a40f32c23f..f683b49bb2 100644
--- a/Changelog
+++ b/Changelog
@@ -3,6 +3,7 @@  releases are sorted from youngest to oldest.
   version <next>:
  - libaribcaption decoder
+- DTS-UHD demuxer
   version 6.0:
  - Radiance HDR image support
diff --git a/configure b/configure
index 033db7442d..557821ceef 100755
--- a/configure
+++ b/configure
@@ -3425,6 +3425,7 @@  dash_demuxer_deps="libxml2"
  dirac_demuxer_select="dirac_parser"
  dts_demuxer_select="dca_parser"
  dtshd_demuxer_select="dca_parser"
+dtsuhd_demuxer_select="dtsuhd_parser"
  dv_demuxer_select="dvprofile"
  dv_muxer_select="dvprofile"
  dxa_demuxer_select="riffdec"
diff --git a/doc/general_contents.texi b/doc/general_contents.texi
index 2eeebd847d..e1ba9c4597 100644
--- a/doc/general_contents.texi
+++ b/doc/general_contents.texi
@@ -597,6 +597,7 @@  library:
  @item raw DNxHD                 @tab X @tab X
  @item raw DTS                   @tab X @tab X
  @item raw DTS-HD                @tab   @tab X
+@item raw DTS-UHD               @tab   @tab
  @item raw E-AC-3                @tab X @tab X
  @item raw FLAC                  @tab X @tab X
  @item raw GSM                   @tab   @tab X
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index aa10fbfcf8..f57564e9eb 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1155,6 +1155,7 @@  OBJS-$(CONFIG_DIRAC_PARSER)            += dirac_parser.o
  OBJS-$(CONFIG_DNXHD_PARSER)            += dnxhd_parser.o dnxhddata.o
  OBJS-$(CONFIG_DOLBY_E_PARSER)          += dolby_e_parser.o dolby_e_parse.o
  OBJS-$(CONFIG_DPX_PARSER)              += dpx_parser.o
+OBJS-$(CONFIG_DTSUHD_PARSER)           += dtsuhd_parser.o dtsuhd_common.o
  OBJS-$(CONFIG_DVAUDIO_PARSER)          += dvaudio_parser.o
  OBJS-$(CONFIG_DVBSUB_PARSER)           += dvbsub_parser.o
  OBJS-$(CONFIG_DVD_NAV_PARSER)          += dvd_nav_parser.o
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index efdcb59bc9..a58315f46b 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -3369,6 +3369,13 @@  static const AVCodecDescriptor codec_descriptors[] = {
          .long_name = NULL_IF_CONFIG_SMALL("RKA (RK Audio)"),
          .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
      },
+    {
+        .id        = AV_CODEC_ID_DTSUHD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dtsuhd",
+        .long_name = NULL_IF_CONFIG_SMALL("DTSUHD (DTS-UHD Audio Format)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
       /* subtitle codecs */
      {
diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h
index 64df9699f4..6d8b145ee3 100644
--- a/libavcodec/codec_id.h
+++ b/libavcodec/codec_id.h
@@ -538,6 +538,7 @@  enum AVCodecID {
      AV_CODEC_ID_FTR,
      AV_CODEC_ID_WAVARC,
      AV_CODEC_ID_RKA,
+    AV_CODEC_ID_DTSUHD,
       /* subtitle codecs */
      AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
diff --git a/libavcodec/dtsuhd_common.c b/libavcodec/dtsuhd_common.c
new file mode 100644
index 0000000000..110cb0c371
--- /dev/null
+++ b/libavcodec/dtsuhd_common.c
@@ -0,0 +1,991 @@ 
+/*
+ * DTS-UHD common audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Parse DTS-UHD audio frame headers, report frame sizes and configuration.
+ * Specification: ETSI TS 103 491 V1.2.1
+ */
+
+#include <string.h>
+
+#include "dtsuhd_common.h"
+#include "get_bits.h"
+#include "libavutil/channel_layout.h"
+
+#define DTSUHD_ALLOC_INCREMENT 16
+#define DTSUHD_CHUNK_HEADER    16
+
+enum RepType {
+    REP_TYPE_CH_MASK_BASED,
+    REP_TYPE_MTRX2D_CH_MASK_BASED,
+    REP_TYPE_MTRX3D_CH_MASK_BASED,
+    REP_TYPE_BINAURAL,
+    REP_TYPE_AMBISONIC,
+    REP_TYPE_AUDIO_TRACKS,
+    REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF,
+    REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF,
+};
+
+typedef struct MDObject {
+    int started;  /* Object seen since last reset. */
+    int pres_index;
+    int rep_type;
+    int ch_activity_mask;
+} MDObject;
+
+typedef struct MD01 {
+    GetBitContext gb;
+    MDObject object[257]; /* object id max value is 256 */
+    int chunk_id;
+    int object_list[256]; int object_list_count;
+    int packets_acquired;
+    int static_md_extracted;
+    int static_md_packets;
+    int static_md_packet_size;
+    int static_md_update_flag;
+    uint8_t *buf; int buf_bytes; /* temporary buffer to accumulate static data */
+} MD01;
+
+typedef struct NAVI {
+    int bytes;
+    int id;
+    int index;
+    int present;
+} NAVI;
+
+typedef struct UHDAudio {
+    int mask;
+    int selectable;
+} UHDAudio;
+
+typedef struct UHDChunk {
+    int crc_flag;
+    int bytes;
+} UHDChunk;
+
+struct DTSUHD {
+    const uint8_t *data; int data_bytes;  /* Original audio frame buffer. */
+    GetBitContext gb;
+    MD01 *md01; int md01_count;
+    NAVI *navi; int navi_alloc, navi_count;
+    UHDAudio audio[256];
+    UHDChunk *chunk; int chunk_alloc, chunk_count;
+    int chunk_bytes;
+    int clock_rate;
+    int frame_bytes;
+    int frame_duration;
+    int frame_duration_code;
+    int ftoc_bytes;
+    int major_version;
+    int num_audio_pres;
+    int sample_rate;
+    int sample_rate_mod;
+    unsigned full_channel_mix_flag:1;
+    unsigned interactive_obj_limits_present:1;
+    unsigned is_sync_frame:1;
+    unsigned saw_sync:1;
+};
+
+/* Read from the MD01 buffer (if present), falling back to the frame buffer */
+static inline int get_bits_md01(DTSUHD *h, MD01 *md01, int bits)
+{
+    if (md01->buf)
+        return get_bits(&md01->gb, bits);
+    return get_bits(&h->gb, bits);
+}
+
+/* In the specification, the pseudo code defaults the 'add' parameter to true.
+   Table 7-30 shows passing an explicit false, most other calls do not
+   pass the extractAndAdd parameter.
+
+   Function based on code in Table 5-2
+*/
+static int get_bits_var(GetBitContext *gb, const uint8_t table[], int add)
+{
+    static const int bits_used[8] = { 1, 1, 1, 1, 2, 2, 3, 3 };
+    static const int index_table[8] = { 0, 0, 0, 0, 1, 1, 2, 3 };
+    int code = show_bits(gb, 3); /* value range is [0, 7] */
+    int i;
+    int index = index_table[code];
+    int value = 0;
+
+    skip_bits(gb, bits_used[code]);
+    if (table[index] > 0) {
+        if (add) {
+            for (i = 0; i < index; i++)
+                value += 1 << table[i];
+        }
+        value += get_bits_long(gb, table[index]);
+    }
+
+    return value;
+}
+
+/* Implied by Table 6-2, MD01 chunk objects appended in for loop */
+static MD01 *chunk_append_md01(DTSUHD *h, int id)
+{
+    int md01_alloc = h->md01_count + 1;
+    if (av_reallocp_array(&h->md01, md01_alloc, sizeof(*h->md01)))
+        return NULL;
+
+    memset(h->md01 + h->md01_count, 0, sizeof(*h->md01));
+    h->md01[h->md01_count].chunk_id = id;
+    return h->md01 + h->md01_count++;
+}
+
+/* Return existing MD01 chunk based on chunkID */
+static MD01 *chunk_find_md01(DTSUHD *h, int id)
+{
+    int i;
+
+    for (i = 0; i < h->md01_count; i++)
+        if (id == h->md01[i].chunk_id)
+            return h->md01 + i;
+
+    return NULL;
+}
+
+/* Table 6-3 */
+static void chunk_reset(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->md01_count; i++)
+        av_freep(&h->md01[i].buf);
+    av_freep(&h->md01);
+    h->md01_count = 0;
+}
+
+static MDObject *find_default_audio(DTSUHD *h)
+{
+    MDObject *object;
+    int i, j;
+    int obj_index = -1;
+
+    for (i = 0; i < h->md01_count; i++) {
+        for (j = 0; j < 257; j++) {
+            object = h->md01[i].object + j;
+            if (object->started && h->audio[object->pres_index].selectable) {
+                if (obj_index < 0 || (object->pres_index < h->md01[i].object[obj_index].pres_index))
+                    obj_index = j;
+            }
+        }
+        if (obj_index >= 0)
+            return h->md01[i].object + obj_index;
+    }
+
+    return NULL;
+}
+
+/* Save channel mask, count, and rep type to descriptor info.
+   ETSI TS 103 491 Table 7-28 channel activity mask bits
+   mapping and SCTE DVS 243-4 Rev. 0.2 DG X Table 4.  Convert activity mask and
+   representation type to channel mask and channel counts.
+*/
+static void extract_object_info(MDObject *object, DTSUHDDescriptorInfo *info)
+{
+    int i;
+    static const struct {
+        uint32_t activity_mask;
+        uint32_t channel_mask; // Mask as defined by ETSI TS 103 491
+        uint64_t ffmpeg_channel_mask; // Mask as defined in ffmpeg
+    } activity_map[] = {
+        // act mask | chan mask | ffmpeg channel mask
+        { 0x000001, 0x00000001, AV_CH_FRONT_CENTER },
+        { 0x000002, 0x00000006, AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT },
+        { 0x000004, 0x00000018, AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT },
+        { 0x000008, 0x00000020, AV_CH_LOW_FREQUENCY },
+        { 0x000010, 0x00000040, AV_CH_BACK_CENTER },
+        { 0x000020, 0x0000A000, AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT },
+        { 0x000040, 0x00000180, AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT },
+        { 0x000080, 0x00004000, AV_CH_TOP_FRONT_CENTER },
+        { 0x000100, 0x00080000, AV_CH_TOP_CENTER },
+        { 0x000200, 0x00001800, AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_FRONT_RIGHT_OF_CENTER },
+        { 0x000400, 0x00060000, AV_CHAN_WIDE_LEFT | AV_CHAN_WIDE_RIGHT },
+        { 0x000800, 0x00000600, AV_CH_SURROUND_DIRECT_LEFT | AV_CH_SURROUND_DIRECT_RIGHT },
+        { 0x001000, 0x00010000, AV_CH_LOW_FREQUENCY_2 },
+        { 0x002000, 0x00300000, AV_CH_TOP_SIDE_LEFT | AV_CH_TOP_SIDE_RIGHT },
+        { 0x004000, 0x00400000, AV_CH_TOP_BACK_CENTER },
+        { 0x008000, 0x01800000, AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT },
+        { 0x010000, 0x02000000, AV_CH_BOTTOM_FRONT_CENTER },
+        { 0x020000, 0x0C000000, AV_CH_BOTTOM_FRONT_LEFT | AV_CH_BOTTOM_FRONT_RIGHT },
+        { 0x140000, 0x30000000, AV_CH_TOP_FRONT_LEFT | AV_CH_TOP_FRONT_RIGHT },
+        { 0x080000, 0xC0000000, AV_CH_TOP_BACK_LEFT | AV_CH_TOP_BACK_RIGHT },
+        { 0 } // Terminator
+    };
+
+    if (object) {
+        for (i = 0; activity_map[i].activity_mask; i++) {
+            if (activity_map[i].activity_mask & object->ch_activity_mask) {
+                info->channel_mask |= activity_map[i].channel_mask;
+                info->ffmpeg_channel_mask |= activity_map[i].ffmpeg_channel_mask;
+            }
+        }
+        info->channel_count = av_popcount(info->channel_mask);
+        info->rep_type = object->rep_type;
+    }
+}
+
+/* Assemble information for MP4 Sample Entry box.  Sample Size is always
+   16 bits.  The coding name is the name of the SampleEntry sub-box and is
+   'dtsx' unless the version of the bitstream is > 2.
+   If DecoderProfile == 2, then MaxPayloadCode will be zero.
+*/
+static void update_descriptor(DTSUHD *h, DTSUHDDescriptorInfo *info)
+{
+    static const char *coding_name[] = { "dtsx", "dtsy" };
+
+    memset(info, 0, sizeof(*info));
+    memcpy(info->coding_name, coding_name[h->major_version > 2], 5);
+    extract_object_info(find_default_audio(h), info);
+    info->base_sample_freq_code = h->sample_rate == 48000;
+    info->decoder_profile_code = h->major_version - 2;
+    info->frame_duration_code = h->frame_duration_code;
+    info->max_payload_code = 0 + (h->major_version > 2);
+    info->num_pres_code = h->num_audio_pres - 1;
+    info->sample_rate = h->sample_rate;
+    info->sample_rate_mod = h->sample_rate_mod;
+    info->sample_size = 16;
+    info->valid = 1;
+}
+
+/* Table 6-17 p47 */
+static int parse_explicit_object_lists(DTSUHD *h, int mask, int index)
+{
+    GetBitContext *gb = &h->gb;
+    int i;
+    static const uint8_t table[4] = { 4, 8, 16, 32 };
+
+    for (i = 0; i < index; i++) {
+        if ((mask >> i) & 0x01) {
+            if (h->is_sync_frame || get_bits1(gb))
+                get_bits_var(gb, table, 1);
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-15 p44, Table 6-16 p45 */
+static int parse_aud_pres_params(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int audio;
+    int i;
+    int read_mask;
+    static const uint8_t table[4] = { 0, 2, 4, 5 };
+
+    if (h->is_sync_frame) {
+        if (h->full_channel_mix_flag)
+            h->num_audio_pres = 1;
+        else
+            h->num_audio_pres = get_bits_var(gb, table, 1) + 1;
+        memset(h->audio, 0, sizeof(h->audio[0]) * h->num_audio_pres);
+    }
+
+    for (audio = 0; audio < h->num_audio_pres; audio++) {
+        if (h->is_sync_frame) {
+            if (h->full_channel_mix_flag)
+                h->audio[audio].selectable = 1;
+            else
+                h->audio[audio].selectable = get_bits1(gb);
+        }
+
+        if (h->audio[audio].selectable) {
+            if (h->is_sync_frame) {
+                read_mask = (audio > 0) ? get_bits(gb, audio) : 0;
+                h->audio[audio].mask = 0;
+                for (i = 0; read_mask; i++, read_mask >>= 1) {
+                    if (read_mask & 0x01)
+                        h->audio[audio].mask |= get_bits1(gb) << i;
+                }
+            }
+
+            if (parse_explicit_object_lists(h, h->audio[audio].mask, audio))
+                return 1;
+        } else {
+            h->audio[audio].mask = 0;
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-9 p 38 */
+static int check_crc(DTSUHD *h, int bit, int bytes)
+{
+    GetBitContext gb;
+    int i;
+    static const uint16_t lookup[16] = {
+        0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
+        0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF
+    };
+    uint16_t crc = 0xFFFF;
+
+    init_get_bits(&gb, h->data, h->data_bytes * 8);
+    skip_bits(&gb, bit);
+    for (i = -bytes; i < bytes; i++)
+        crc = (crc << 4) ^ lookup[(crc >> 12) ^ get_bits(&gb, 4)];
+
+    return crc != 0;
+}
+
+/* Table 6-12 p 40 */
+static void decode_version(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int bits = get_bits1(gb) ? 3 : 6;
+
+    h->major_version = get_bits(gb, bits) + 2;
+    skip_bits(gb, bits);
+}
+
+/* Table 6-12 p 40 */
+static int parse_stream_params(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int has_ftoc_crc;
+    static const uint32_t table_base_duration[4] = { 512, 480, 384, 0 };
+    static const uint32_t table_clock_rate[4] = { 32000, 44100, 48000, 0 };
+
+    if (h->is_sync_frame)
+        h->full_channel_mix_flag = get_bits1(gb);
+
+    has_ftoc_crc = !h->full_channel_mix_flag || h->is_sync_frame;
+    if (has_ftoc_crc && check_crc(h, 0, h->ftoc_bytes))
+        return 1;
+
+    if (h->is_sync_frame) {
+        if (h->full_channel_mix_flag)
+            h->major_version = 2;
+        else
+            decode_version(h);
+
+        h->frame_duration = table_base_duration[get_bits(gb, 2)];
+        h->frame_duration_code = get_bits(gb, 3);
+        h->frame_duration *= (h->frame_duration_code + 1);
+        h->clock_rate = table_clock_rate[get_bits(gb, 2)];
+        if (h->frame_duration == 0 || h->clock_rate == 0)
+            return 1; /* bitstream error */
+
+        skip_bits(gb, 36 * get_bits1(gb));  /* bTimeStampPresent */
+        h->sample_rate_mod = get_bits(gb, 2);
+        h->sample_rate = h->clock_rate * (1 << h->sample_rate_mod);
+
+        if (h->full_channel_mix_flag) {
+            h->interactive_obj_limits_present = 0;
+        } else {
+            skip_bits1(gb);  /* reserved flag. */
+            h->interactive_obj_limits_present = get_bits1(gb);
+        }
+    }
+
+    return 0;
+}
+
+/* Table 6-24 p52 */
+static void navi_purge(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->navi_count; i++)
+        if (!h->navi[i].present)
+            h->navi[i].bytes = 0;
+}
+
+/* Table 6-21 p50 */
+static void navi_clear(DTSUHD *h)
+{
+    if (h->navi)
+        memset(h->navi, 0, sizeof(h->navi[0]) * h->navi_count);
+    h->navi_count = 0;
+}
+
+/* Table 6-22 p51 */
+static void navi_clear_present(DTSUHD *h)
+{
+    int i;
+
+    for (i = 0; i < h->navi_count; i++)
+        h->navi[i].present = 0;
+}
+
+/* Table 6-23 p51.  Return 0 on success, and the index is returned in
+   the *listIndex parameter.
+*/
+static int navi_find_index(DTSUHD *h, int desired_index, int *list_index)
+{
+    int avail_index = h->navi_count;
+    int i;
+    int navi_alloc;
+
+    for (i = 0; i < h->navi_count; i++) {
+        if (h->navi[i].index == desired_index) {
+            *list_index = i;
+            h->navi[i].present = 1;
+            return 0;
+        }
+
+        if ((h->navi[i].present == 0) && (h->navi[i].bytes == 0) && (avail_index > i))
+            avail_index = i;
+    }
+
+    if (avail_index >= h->navi_count) {
+        if (h->navi_count >= h->navi_alloc) {
+            navi_alloc = h->navi_count + DTSUHD_ALLOC_INCREMENT;
+            if (av_reallocp_array(&h->navi, navi_alloc, sizeof(*h->navi)))
+                return 1;
+            h->navi_alloc = navi_alloc;
+        }
+        h->navi_count++;
+    }
+
+    *list_index = avail_index;
+    h->navi[avail_index].bytes = 0;
+    h->navi[avail_index].present = 1;
+    h->navi[avail_index].id = 256;
+    h->navi[avail_index].index = desired_index;
+
+    return 0;
+}
+
+/* Table 6-20 p48 */
+static int parse_chunk_navi(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    int audio_chunks = 1;
+    int bytes;
+    int i;
+    int id;
+    int id_present;
+    int index;
+    int list_index;
+    static const uint8_t table2468[4] = { 2, 4, 6, 8 };
+    static const uint8_t table_audio_chunk_sizes[4] = { 9, 11, 13, 16 };
+    static const uint8_t table_chunk_sizes[4] = { 6, 9, 12, 15 };
+
+    h->chunk_bytes = 0;
+    if (h->full_channel_mix_flag)
+        h->chunk_count = h->is_sync_frame;
+    else
+        h->chunk_count = get_bits_var(gb, table2468, 1);
+
+    if (h->chunk_count >= h->chunk_alloc) {
+        int chunk_alloc = h->chunk_count + DTSUHD_ALLOC_INCREMENT;
+        if (av_reallocp_array(&h->chunk, chunk_alloc, sizeof(*h->chunk)))
+            return 1;
+        h->chunk_alloc = chunk_alloc;
+    }
+
+    for (i = 0; i < h->chunk_count; i++) {
+        h->chunk_bytes += h->chunk[i].bytes = get_bits_var(gb, table_chunk_sizes, 1);
+        if (h->full_channel_mix_flag)
+            h->chunk[i].crc_flag = 0;
+        else
+        h->chunk[i].crc_flag = get_bits1(gb);
+    }
+
+    if (!h->full_channel_mix_flag)
+        audio_chunks = get_bits_var(gb, table2468, 1);
+
+    if (h->is_sync_frame)
+        navi_clear(h);
+    else
+        navi_clear_present(h);
+
+    for (i = 0; i < audio_chunks; i++) {
+        if (h->full_channel_mix_flag)
+            index = 0;
+        else
+            index = get_bits_var(gb, table2468, 1);
+
+        if (navi_find_index(h, index, &list_index))
+            return 1;
+
+        if (h->is_sync_frame)
+            id_present = 1;
+        else if (h->full_channel_mix_flag)
+            id_present = 0;
+        else
+            id_present = get_bits1(gb);
+
+        if (id_present) {
+            id = get_bits_var(gb, table2468, 1);
+            h->navi[list_index].id = id;
+        }
+
+        bytes = get_bits_var(gb, table_audio_chunk_sizes, 1);
+        h->chunk_bytes += bytes;
+        h->navi[list_index].bytes = bytes;
+    }
+
+    navi_purge(h);
+
+    return 0;
+}
+
+
+/* Table 6-6 */
+static int parse_md_chunk_list(DTSUHD *h, MD01 *md01)
+{
+    GetBitContext *gb = &h->gb;
+    const uint8_t table1[4] = { 3, 4, 6, 8 };
+    int i;
+
+    if (h->full_channel_mix_flag) {
+        md01->object_list_count = 1;
+        md01->object_list[0] = 256;
+    } else {
+        md01->object_list_count = get_bits_var(gb, table1, 1);
+        for (i = 0; i < md01->object_list_count; i++)
+            md01->object_list[i] = get_bits(gb, get_bits1(gb) ? 8 : 4);
+    }
+
+    return 0;
+}
+
+/* Table 7-9 */
+static void skip_mp_param_set(DTSUHD *h, MD01 *md01, int nominal_flag)
+{
+    get_bits_md01(h, md01, 6); /* rLoudness */
+    if (nominal_flag == 0)
+        get_bits_md01(h, md01, 5);
+
+    get_bits_md01(h, md01, nominal_flag ? 2 : 4);
+}
+
+/* Table 7-8 */
+static int parse_static_md_params(DTSUHD *h, MD01 *md01, int only_first)
+{
+    int i;
+    int loudness_sets = 1;
+    int nominal_flag = 1;
+
+    if (h->full_channel_mix_flag == 0)
+        nominal_flag = get_bits_md01(h, md01, 1);
+
+    if (nominal_flag) {
+        if (h->full_channel_mix_flag == 0)
+            loudness_sets = get_bits_md01(h, md01, 1) ? 3 : 1;
+    } else {
+        loudness_sets = get_bits_md01(h, md01, 4) + 1;
+    }
+
+    for (i = 0; i < loudness_sets; i++)
+        skip_mp_param_set(h, md01, nominal_flag);
+
+    if (only_first)
+        return 0;
+
+    if (nominal_flag == 0)
+        get_bits_md01(h, md01, 1);
+
+    for (i = 0; i < 3; i++) { /* Table 7-12 suggest 3 types */
+        if (get_bits_md01(h, md01, 1)) {
+            if (get_bits_md01(h, md01, 4) == 15) /* Table 7-14 */
+                get_bits_md01(h, md01, 15);
+        }
+        if (get_bits_md01(h, md01, 1)) /* smooth md present */
+            get_bits_md01(h, md01, 6 * 6);
+    }
+
+    if (h->full_channel_mix_flag == 0) {
+        i = md01->static_md_packets * md01->static_md_packet_size - get_bits_count(&md01->gb);
+        skip_bits(&md01->gb, i);
+    }
+    md01->static_md_extracted = 1;
+
+    return 0;
+}
+
+/* Table 7-7 */
+static int parse_multi_frame_md(DTSUHD *h, MD01 *md01)
+{
+    GetBitContext *gb = &h->gb;
+    int i, n;
+    static const uint8_t table1[4] = { 0, 6, 9, 12 };
+    static const uint8_t table2[4] = { 5, 7, 9, 11 };
+
+    if (h->is_sync_frame) {
+        md01->packets_acquired = 0;
+        if (h->full_channel_mix_flag) {
+            md01->static_md_packets = 1;
+            md01->static_md_packet_size = 0;
+        } else {
+            md01->static_md_packets = get_bits_var(gb, table1, 1) + 1;
+            md01->static_md_packet_size = get_bits_var(gb, table2, 1) + 3;
+        }
+
+        n = md01->static_md_packets * md01->static_md_packet_size;
+        if (n > md01->buf_bytes) {
+            if (av_reallocp(&md01->buf, n))
+                return 1;
+            md01->buf_bytes = n;
+        }
+
+        init_get_bits(&md01->gb, md01->buf, md01->buf_bytes * 8);
+        if (md01->static_md_packets > 1)
+            md01->static_md_update_flag = get_bits1(gb);
+        else
+            md01->static_md_update_flag = 1;
+    }
+
+    if (md01->packets_acquired < md01->static_md_packets) {
+        n = md01->packets_acquired * md01->static_md_packet_size;
+        for (i = 0; i < md01->static_md_packet_size; i++)
+            md01->buf[n + i] = get_bits(gb, 8);
+        md01->packets_acquired++;
+
+        if (md01->packets_acquired == md01->static_md_packets) {
+            if (md01->static_md_update_flag || !md01->static_md_extracted)
+                if (parse_static_md_params(h, md01, 0))
+                    return 1;
+        } else if (md01->packets_acquired == 1) {
+            if (md01->static_md_update_flag || !md01->static_md_extracted)
+                if (parse_static_md_params(h, md01, 1))
+                    return 1;
+        }
+    }
+
+    return 0;
+}
+
+/* Return 1 if suitable, 0 if not.  Table 7-18.  OBJGROUPIDSTART=224 Sec 7.8.7 p75 */
+static int is_suitable_for_render(DTSUHD *h, MD01 *md01, int object_id)
+{
+    GetBitContext *gb = &h->gb;
+    static const uint8_t table[4] = { 8, 10, 12, 14 };
+
+    if (object_id >= 224 || get_bits1(gb))
+        return 1;
+
+    /*  Reject the render and skip the render data. */
+    skip_bits1(gb);
+    skip_bits(gb, get_bits_var(gb, table, 1));
+
+    return 0;
+}
+
+/* Table 7-26 */
+static void parse_ch_mask_params(DTSUHD *h, MD01 *md01, MDObject *object)
+{
+    GetBitContext *gb = &h->gb;
+    const int ch_index = object->rep_type == REP_TYPE_BINAURAL ? 1 : get_bits(gb, 4);
+    static const int mask_table[14] = { /* Table 7-27 */
+        0x000001, 0x000002, 0x000006, 0x00000F, 0x00001F, 0x00084B, 0x00002F,
+        0x00802F, 0x00486B, 0x00886B, 0x03FBFB, 0x000003, 0x000007, 0x000843,
+    };
+
+    if (ch_index == 14)
+        object->ch_activity_mask = get_bits(gb, 16);
+    else if (ch_index == 15)
+        object->ch_activity_mask = get_bits(gb, 32);
+    else
+        object->ch_activity_mask = mask_table[ch_index];
+}
+
+/* Table 7-22 */
+static int parse_object_metadata(DTSUHD *h, MD01 *md01, MDObject *object,
+                                 int start_frame_flag, int object_id)
+{
+    GetBitContext *gb = &h->gb;
+    int ch_mask_object_flag = 0;
+    int object_3d_metadata_flag = 0;
+    static const uint8_t table2[4] = { 1, 4, 4, 8 };
+    static const uint8_t table3[4] = { 3, 3, 4, 8 };
+
+    skip_bits(gb, object_id != 256);
+
+    if (start_frame_flag) {
+        object->rep_type = get_bits(gb, 3);
+        switch (object->rep_type) {
+            case REP_TYPE_BINAURAL:
+            case REP_TYPE_CH_MASK_BASED:
+            case REP_TYPE_MTRX2D_CH_MASK_BASED:
+            case REP_TYPE_MTRX3D_CH_MASK_BASED:
+                ch_mask_object_flag = 1;
+                break;
+
+            case REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF:
+            case REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF:
+                object_3d_metadata_flag = 1;
+                break;
+        }
+
+        if (ch_mask_object_flag) {
+            if (object_id != 256) {
+                skip_bits(gb, 3);  /* Object Importance Level */
+                if (get_bits1(gb))
+                    skip_bits(gb, get_bits1(gb) ? 3 : 5);
+
+                get_bits_var(gb, table2, 1);
+                get_bits_var(gb, table3, 1);
+
+                /* Skip optional Loudness block. */
+                if (get_bits1(gb))
+                    skip_bits(gb, 8);
+
+                /* Skip optional Object Interactive MD (Table 7-25). */
+                if (get_bits1(gb) && h->interactive_obj_limits_present) {
+                    if (get_bits1(gb))
+                        skip_bits(gb, 5 + 6 * object_3d_metadata_flag);
+                }
+            }
+
+            parse_ch_mask_params(h, md01, object);
+        }
+    }
+
+    /* Skip rest of object */
+    return 0;
+}
+
+/* Table 7-4 */
+static int parse_md01(DTSUHD *h, MD01 *md01, int pres_index)
+{
+    GetBitContext *gb = &h->gb;
+    uint32_t i;
+    uint32_t id;
+    uint32_t start_flag;
+
+    if (h->audio[pres_index].selectable) {
+        for (i = 0; i < 4; i++)  /* Table 7-5.  Scaling data. */
+            skip_bits(gb, 5 * get_bits1(gb));
+
+        if (get_bits1(gb) && parse_multi_frame_md(h, md01))
+            return 1;
+    }
+
+    /* Table 7-16: Object metadata. */
+    memset(md01->object, 0, sizeof(md01->object));
+    if (!h->full_channel_mix_flag)
+        skip_bits(gb, 11 * get_bits1(gb));
+
+    for (i = 0; i < md01->object_list_count; i++) {
+        id = md01->object_list[i];
+        if (!is_suitable_for_render(h, md01, id))
+            continue;
+
+        md01->object[id].pres_index = pres_index;
+        start_flag = 0;
+        if (!md01->object[id].started) {
+            skip_bits(gb, id != 256);
+            start_flag = md01->object[id].started = 1;
+        }
+
+        if ((id < 224 || id > 255) &&
+            parse_object_metadata(h, md01, md01->object + id, start_flag, id)) {
+            return 1;
+        }
+
+        break;
+    }
+
+    return 0;
+}
+
+/* Table 6-2 */
+static int parse_chunks(DTSUHD *h)
+{
+    GetBitContext *gb = &h->gb;
+    MD01 *md01;
+    int bit_next;
+    int i;
+    static const uint8_t table_aud_pres[4] = { 0, 2, 4, 4 };
+    int pres_index;
+    uint32_t id;
+
+    for (i = 0; i < h->chunk_count; i++) {
+        bit_next = get_bits_count(gb) + h->chunk[i].bytes * 8;
+        if (h->chunk[i].crc_flag && check_crc(h, get_bits_count(gb), h->chunk[i].bytes))
+            return 1;
+
+        id = get_bits(gb, 8);
+        if (id == 1) {
+            pres_index = get_bits_var(gb, table_aud_pres, 1);
+        if (pres_index > 255)
+            return 1;
+        md01 = chunk_find_md01(h, id);
+        if (md01 == NULL)
+            md01 = chunk_append_md01(h, id);
+        if (md01 == NULL)
+            return 1;
+        if (parse_md_chunk_list(h, md01))
+            return 1;
+        if (parse_md01(h, md01, pres_index))
+            return 1;
+        }
+
+        skip_bits(gb, bit_next - get_bits_count(gb));
+    }
+
+    return 0;
+}
+
+/** Allocate parsing handle.  The parsing handle should be used to parse
+    one DTS:X Profile 2 Audio stream, then freed by calling DTSUHD_destroy().
+    Do not use the same parsing handle to parse multiple audio streams.
+
+  @return Parsing handle for use with other functions, or NULL on failure.
+*/
+DTSUHD *dtsuhd_create(void)
+{
+    return av_calloc(1, sizeof(DTSUHD));
+}
+
+/** Free all resources used by the parsing handle.
+
+  @param[in] h Handle allocated by dtshd_create
+*/
+void dtsuhd_destroy(DTSUHD *h)
+{
+    if (h) {
+        chunk_reset(h);
+        av_freep(&h->chunk);
+        av_freep(&h->navi);
+        av_freep(&h);
+    }
+}
+
+/** Parse a single DTS:X Profile 2 frame.
+    The frame must start at the first byte of the data buffer, and enough
+    of the frame must be present to decode the majority of the FTOC.
+    From Table 6-11 p40.
+
+    A sync frame must be the first frame provided, before any non-sync frames.
+    Signatures: sync=0x40411BF2, non-sync=0x71C442E8.
+
+  @param[in] h Handle allocated by DTSUHD_create
+  @param[in] First byte of a buffer containing the frame to parse
+  @param[in] nData Number of valid bytes in 'data'
+  @param[out] fi Results of frame parsing, may be NULL
+  @param[out] di Results of descriptor parsing, may be NULL
+  @return 0 on success, DTSUHDStatus enumeration on error
+*/
+int dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes,
+                 DTSUHDFrameInfo *fi, DTSUHDDescriptorInfo *di)
+{
+    GetBitContext *gb;
+    int fraction = 1;
+    int i;
+    int syncword;
+    static const uint8_t table_payload[4] = { 5, 8, 10, 12 };
+
+    if (!h || !data)
+        return DTSUHD_NULL;
+
+    if (data_bytes < 4)
+        return DTSUHD_INCOMPLETE; /* Data buffer does not contain the signature */
+
+    h->data = data;
+    h->data_bytes = data_bytes;
+    gb = &h->gb;
+    init_get_bits(gb, data, data_bytes * 8);
+
+    syncword = get_bits_long(gb, 32);
+    h->is_sync_frame = syncword == DTSUHD_SYNCWORD;
+    h->saw_sync |= h->is_sync_frame;
+    if (!h->saw_sync || (!h->is_sync_frame && syncword != DTSUHD_NONSYNCWORD))
+        return DTSUHD_NOSYNC;  /* Invalid frame or have not parsed sync frame. */
+
+    h->ftoc_bytes = get_bits_var(gb, table_payload, 1) + 1;
+    if (h->ftoc_bytes < 5 || h->ftoc_bytes >= data_bytes)
+        return DTSUHD_INCOMPLETE;  /* Data buffer does not contain entire FTOC */
+
+    if (parse_stream_params(h))
+        return DTSUHD_INVALID_FRAME;
+
+    if (parse_aud_pres_params(h))
+        return DTSUHD_INVALID_FRAME;
+
+    if (parse_chunk_navi(h))  /* AudioChunkTypes and payload sizes. */
+        return DTSUHD_INVALID_FRAME;
+
+    /* At this point in the parsing, we can calculate the size of the frame. */
+    h->frame_bytes = h->ftoc_bytes + h->chunk_bytes;
+    if (h->frame_bytes > data_bytes)
+        return DTSUHD_INCOMPLETE;
+
+    if (di && h->is_sync_frame) {
+        /* Skip PBRSmoothParams (Table 6-26) and align to the chunks immediately
+           following the FTOC CRC.
+        */
+        skip_bits(gb, h->ftoc_bytes * 8 - get_bits_count(gb));
+        if (parse_chunks(h))
+            return DTSUHD_INVALID_FRAME;
+        update_descriptor(h, di);
+    }
+
+    /* 6.3.6.9: audio frame duration may be a fraction of metadata frame duration. */
+    for (i = 0; i < h->navi_count; i++) {
+        if (h->navi[i].present) {
+            if (h->navi[i].id == 3)
+                fraction = 2;
+            else if (h->navi[i].id == 4)
+                fraction = 4;
+        }
+    }
+
+    if (fi) {
+        fi->sync = h->is_sync_frame;
+        fi->frame_bytes = h->frame_bytes;
+        fi->sample_rate = h->sample_rate;
+        fi->sample_count = (h->frame_duration * fi->sample_rate) / (h->clock_rate * fraction);
+        fi->duration = (double)fi->sample_count / fi->sample_rate;
+    }
+
+    return DTSUHD_OK;
+}
+
+/** Return the offset of the first UHD audio frame.
+    When supplied a buffer containing DTSHDHDR file content, the DTSHD
+    headers are skipped and the offset to the first byte of the STRMDATA
+    chunk is returned, along with the size of that chunk.
+
+  @param[in] dataStart DTS:X Profile 2 file content to parse
+  @param[in] dataSize Number of valid bytes in 'dataStart'
+  @param[out] Number of leading DTS:X Profile 2 audio frames to discard,
+              may be NULL
+  @param[out] Size of STRMDATA payload, may be NULL
+  @return STRMDATA payload offset or 0 if not a valid DTS:X Profile 2 file
+*/
+int dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size, size_t *strmdata_size)
+{
+    const uint8_t *data = data_start;
+    const uint8_t *data_end = data + data_size;
+    uint64_t chunk_size = 0;
+
+    if (data + DTSUHD_CHUNK_HEADER >= data_end || memcmp(data, "DTSHDHDR", 8))
+        return 0;
+
+    for (; data + DTSUHD_CHUNK_HEADER + 4 <= data_end; data += chunk_size + DTSUHD_CHUNK_HEADER) {
+        chunk_size = AV_RB64(data + 8);
+
+        if (!memcmp(data, "STRMDATA", 8)) {
+            if (strmdata_size)
+                *strmdata_size = chunk_size;
+            return (int)(data - data_start) + DTSUHD_CHUNK_HEADER;
+        }
+    }
+
+    return 0;
+}
diff --git a/libavcodec/dtsuhd_common.h b/libavcodec/dtsuhd_common.h
new file mode 100644
index 0000000000..8b4e8ce2aa
--- /dev/null
+++ b/libavcodec/dtsuhd_common.h
@@ -0,0 +1,84 @@ 
+/*
+ * DTS-UHD common audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DTSUHD_COMMON_H
+#define AVCODEC_DTSUHD_COMMON_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#define DTSUHD_NONSYNCWORD 0x71C442E8
+#define DTSUHD_SYNCWORD    0x40411BF2
+
+#define DTSUHD_MAX_FRAME_SIZE 0x1000
+
+/* Return codes from dtsuhd_frame */
+enum DTSUHDStatus {
+    DTSUHD_OK,
+    DTSUHD_INCOMPLETE,    /* Entire frame not in buffer. */
+    DTSUHD_INVALID_FRAME, /* Error parsing frame. */
+    DTSUHD_NOSYNC,        /* No sync frame prior to non-sync frame. */
+    DTSUHD_NULL,          /* Function parameter may not be NULL. */
+};
+
+/* Return stream information from an audio frame parsed by dtsuhd_frame, */
+typedef struct DTSUHDDescriptorInfo {
+    unsigned valid:1; /* True if descriptor info is valid. */
+    char coding_name[5]; /* Four character, null term SampleEntry box name. */
+    int base_sample_freq_code;
+    int channel_count;
+    int decoder_profile_code;
+    int frame_duration_code;
+    int max_payload_code;
+    int num_pres_code;
+    int rep_type;
+    int sample_rate;
+    int sample_rate_mod;
+    int sample_size;
+    int channel_mask;
+    uint64_t ffmpeg_channel_mask;
+} DTSUHDDescriptorInfo;
+
+/* Return frame information from an audio frame parsed by dtsuhd_frame. */
+typedef struct DTSUHDFrameInfo {
+    double duration;  /* Duration of frame in seconds (seconds per frame). */
+    int frame_bytes;  /* Size of entire frame in bytes. */
+    int sample_count; /* Number of samples in frame (samples per frame). */
+    int sample_rate;  /* Sample rate of frame (samples per second). */
+    unsigned sync:1;  /* True if frame is a sync frame. */
+} DTSUHDFrameInfo;
+
+struct DTSUHD;
+typedef struct DTSUHD DTSUHD;
+
+struct DTSUHD *dtsuhd_create(void);
+void dtsuhd_destroy(DTSUHD*);
+int dtsuhd_frame(DTSUHD*, const uint8_t *data, size_t nData,
+                 DTSUHDFrameInfo*, DTSUHDDescriptorInfo*);
+int dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size,
+                            size_t *strmdata_size);
+
+static inline int dtsuhd_is_syncword(uint32_t syncword)
+{
+    return syncword == DTSUHD_NONSYNCWORD || syncword == DTSUHD_SYNCWORD;
+}
+
+#endif /* AVCODEC_DTSUHD_COMMON_H */
diff --git a/libavcodec/dtsuhd_parser.c b/libavcodec/dtsuhd_parser.c
new file mode 100644
index 0000000000..4c553b8e4f
--- /dev/null
+++ b/libavcodec/dtsuhd_parser.c
@@ -0,0 +1,141 @@ 
+/*
+ * DTS-UHD audio frame parsing code
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Parse raw DTS-UHD audio frame input and return individual audio frames.
+ */
+
+#include "dtsuhd_common.h"
+#include "libavutil/intreadwrite.h"
+#include "parser.h"
+
+#define DTSUHD_BUFFER_SIZE (DTSUHD_MAX_FRAME_SIZE * 128)
+
+typedef struct DTSUHDParseContext {
+    DTSUHD *dtsuhd;
+    int buf_offset;
+    int buf_bytes;
+    int frame_bytes;
+    uint8_t *buf;
+} DTSUHDParseContext;
+
+static av_cold int parser_init(AVCodecParserContext *s)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+
+    pc->dtsuhd = dtsuhd_create();
+    pc->buf = av_calloc(DTSUHD_BUFFER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE, 1);
+    if (!pc->dtsuhd || !pc->buf)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void parser_close(AVCodecParserContext *s)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+
+    dtsuhd_destroy(pc->dtsuhd);
+    pc->dtsuhd = NULL;
+    av_freep(&pc->buf);
+    ff_parse_close(s);
+}
+
+// Keep data in contiguous buffer as required by dtsuhd_frame.
+static int append_buffer(DTSUHDParseContext *pc, const uint8_t **buf, int *buf_size, int *input_consumed)
+{
+    int copy_bytes;
+
+    pc->buf_offset += pc->frame_bytes;
+    pc->frame_bytes = 0;
+
+    // Buffer almost full, move partial frame to start of buffer for more space.
+    if (*buf_size > 0 && pc->buf_bytes + *buf_size > DTSUHD_BUFFER_SIZE) {
+        memmove(pc->buf, pc->buf + pc->buf_offset, pc->buf_bytes);
+        pc->buf_bytes -= pc->buf_offset;
+        pc->buf_offset = 0;
+    }
+
+    copy_bytes = FFMAX(0, FFMIN(DTSUHD_BUFFER_SIZE - pc->buf_bytes, *buf_size));
+
+    // Append input buffer to our context.
+    if (copy_bytes) {
+        memcpy(pc->buf + pc->buf_bytes, *buf, copy_bytes);
+        pc->buf_bytes += copy_bytes;
+    }
+
+    // Ensure buffer starts with a syncword
+    while (pc->buf_offset + 4 < pc->buf_bytes && !dtsuhd_is_syncword(AV_RB32(pc->buf + pc->buf_offset)))
+        pc->buf_offset++;
+
+    *input_consumed = copy_bytes;
+    *buf = pc->buf + pc->buf_offset;
+    *buf_size = pc->buf_bytes - pc->buf_offset;
+
+    return copy_bytes && pc->buf_bytes - pc->buf_offset < DTSUHD_MAX_FRAME_SIZE;
+}
+
+static int parser_parse(AVCodecParserContext *s, AVCodecContext *avctx,
+                        const uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size)
+{
+    DTSUHDParseContext *pc = s->priv_data;
+    DTSUHDFrameInfo fi;
+    int input_consumed = 0;
+
+    if (append_buffer(pc, &buf, &buf_size, &input_consumed)) {
+        *poutbuf = NULL;
+        *poutbuf_size = 0;
+        return input_consumed;
+    }
+
+    switch (dtsuhd_frame(pc->dtsuhd, buf, buf_size, &fi, NULL)) {
+    case DTSUHD_OK:
+        if (fi.sample_count)
+            s->duration = fi.sample_count;
+        if (fi.sample_rate)
+            avctx->sample_rate = fi.sample_rate;
+        buf_size = pc->frame_bytes = fi.frame_bytes;
+        break;
+    case DTSUHD_INCOMPLETE:
+        pc->frame_bytes = buf_size;
+        buf = NULL;
+        buf_size = 0;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unable to process DTS-UHD file. File may be invalid.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+
+    return input_consumed;
+}
+
+AVCodecParser ff_dtsuhd_parser = {
+    .codec_ids      = { AV_CODEC_ID_DTSUHD },
+    .priv_data_size = sizeof(DTSUHDParseContext),
+    .parser_init    = parser_init,
+    .parser_parse   = parser_parse,
+    .parser_close   = parser_close,
+};
diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c
index d355808018..d724c8b402 100644
--- a/libavcodec/parsers.c
+++ b/libavcodec/parsers.c
@@ -37,6 +37,7 @@  extern const AVCodecParser ff_dirac_parser;
  extern const AVCodecParser ff_dnxhd_parser;
  extern const AVCodecParser ff_dolby_e_parser;
  extern const AVCodecParser ff_dpx_parser;
+extern const AVCodecParser ff_dtsuhd_parser;
  extern const AVCodecParser ff_dvaudio_parser;
  extern const AVCodecParser ff_dvbsub_parser;
  extern const AVCodecParser ff_dvdsub_parser;
diff --git a/libavformat/Makefile b/libavformat/Makefile
index 048649689b..42cf19348f 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -186,6 +186,7 @@  OBJS-$(CONFIG_DSICIN_DEMUXER)            += dsicin.o
  OBJS-$(CONFIG_DSS_DEMUXER)               += dss.o
  OBJS-$(CONFIG_DTSHD_DEMUXER)             += dtshddec.o
  OBJS-$(CONFIG_DTS_DEMUXER)               += dtsdec.o rawdec.o
+OBJS-$(CONFIG_DTSUHD_DEMUXER)            += dtsuhddec.o
  OBJS-$(CONFIG_DTS_MUXER)                 += rawenc.o
  OBJS-$(CONFIG_DV_MUXER)                  += dvenc.o
  OBJS-$(CONFIG_DVBSUB_DEMUXER)            += dvbsub.o rawdec.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index cb5b69e9cd..1b48ce6073 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -144,6 +144,7 @@  extern const AVInputFormat  ff_dss_demuxer;
  extern const AVInputFormat  ff_dts_demuxer;
  extern const FFOutputFormat ff_dts_muxer;
  extern const AVInputFormat  ff_dtshd_demuxer;
+extern const AVInputFormat  ff_dtsuhd_demuxer;
  extern const AVInputFormat  ff_dv_demuxer;
  extern const FFOutputFormat ff_dv_muxer;
  extern const AVInputFormat  ff_dvbsub_demuxer;
diff --git a/libavformat/dtshddec.c b/libavformat/dtshddec.c
index a3dea0668f..6e9e78a335 100644
--- a/libavformat/dtshddec.c
+++ b/libavformat/dtshddec.c
@@ -46,7 +46,7 @@  typedef struct DTSHDDemuxContext {
  static int dtshd_probe(const AVProbeData *p)
  {
      if (AV_RB64(p->buf) == DTSHDHDR)
-        return AVPROBE_SCORE_MAX;
+        return AVPROBE_SCORE_MAX - 4; // DTSUHD (.dtsx) files also have this signature.
      return 0;
  }
  diff --git a/libavformat/dtsuhddec.c b/libavformat/dtsuhddec.c
new file mode 100644
index 0000000000..e15176382d
--- /dev/null
+++ b/libavformat/dtsuhddec.c
@@ -0,0 +1,214 @@ 
+/*
+ * DTS-UHD audio demuxer
+ * Copyright (c) 2023 Xperi Corporation / DTS, Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Report DTS-UHD audio stream configuration and extract raw packet data.
+ */
+
+#include "internal.h"
+#include "libavcodec/dtsuhd_common.h"
+#include "libavcodec/put_bits.h"
+#include "libavutil/intreadwrite.h"
+
+#define DTSUHD_BUFFER_SIZE (1024 * 1024)
+
+typedef struct DTSUHDDemuxContext {
+    size_t data_end;
+    struct DTSUHD *dtsuhd;
+    uint8_t *buf;
+} DTSUHDDemuxContext;
+
+static int probe(const AVProbeData *p)
+{
+    int offset = dtsuhd_strmdata_payload(p->buf, p->buf_size, NULL);
+    int score = 0;
+    struct DTSUHD *h = dtsuhd_create();
+
+    for (; offset + 4 < p->buf_size; offset++) {
+        if (dtsuhd_is_syncword(AV_RB32(p->buf + offset))) {
+            if (DTSUHD_OK == dtsuhd_frame(h, p->buf + offset, p->buf_size - offset, NULL, NULL)) {
+                score = AVPROBE_SCORE_MAX - 3;
+                break;
+            }
+        }
+    }
+
+    dtsuhd_destroy(h);
+    return score;
+}
+
+static av_cold int read_close(AVFormatContext *s)
+{
+    DTSUHDDemuxContext *dtsxs = s->priv_data;
+
+    av_freep(&dtsxs->buf);
+    dtsuhd_destroy(dtsxs->dtsuhd);
+    dtsxs->dtsuhd = NULL;
+
+    return 0;
+}
+
+static int find_first_syncword(DTSUHDDemuxContext *dtsuhd, int data_start)
+{
+    while (data_start + 4 < DTSUHD_BUFFER_SIZE &&
+        !dtsuhd_is_syncword(AV_RB32(dtsuhd->buf + data_start))) {
+        data_start++;
+    }
+
+    return data_start;
+}
+
+static int write_extradata(AVCodecParameters *par, DTSUHDDescriptorInfo *di)
+{
+    PutBitContext pbc;
+    int ret;
+    int size;
+    uint8_t udts[32];
+
+    init_put_bits(&pbc, udts, sizeof(udts));
+    put_bits32(&pbc, 0); // udts box size
+    put_bits(&pbc, 8, 'u'); // udts box signature
+    put_bits(&pbc, 8, 'd');
+    put_bits(&pbc, 8, 't');
+    put_bits(&pbc, 8, 's');
+    put_bits(&pbc, 6, di->decoder_profile_code);
+    put_bits(&pbc, 2, di->frame_duration_code);
+    put_bits(&pbc, 3, di->max_payload_code);
+    put_bits(&pbc, 5, di->num_pres_code);
+    put_bits32(&pbc,  di->channel_mask);
+    put_bits(&pbc, 1, di->base_sample_freq_code);
+    put_bits(&pbc, 2, di->sample_rate_mod);
+    put_bits(&pbc, 3, di->rep_type);
+    put_bits(&pbc, 3, 0);
+    put_bits(&pbc, 1, 0);
+    put_bits64(&pbc, di->num_pres_code + 1, 0); // ID Tag present for each presentation.
+    flush_put_bits(&pbc); // byte align
+    size = put_bits_count(&pbc) >> 3;
+    AV_WB32(udts, size);
+
+    ret = ff_alloc_extradata(par, size);
+    if (ret < 0)
+        return ret;
+
+    memcpy(par->extradata, udts, size);
+
+    return 0;
+}
+
+static int read_header(AVFormatContext *s)
+{
+    AVIOContext *pb = s->pb;
+    AVStream *st = avformat_new_stream(s, NULL);
+    DTSUHDDemuxContext *dtsuhd = s->priv_data;
+    DTSUHDDescriptorInfo di;
+    DTSUHDFrameInfo fi;
+    int buf_bytes;
+    int ret = DTSUHD_INVALID_FRAME;
+    int data_start;
+
+    if (!(pb->seekable & AVIO_SEEKABLE_NORMAL))
+        return AVERROR(EIO);
+
+    dtsuhd->buf = av_malloc(DTSUHD_BUFFER_SIZE);
+    dtsuhd->dtsuhd = dtsuhd_create();
+    if (!dtsuhd->buf || !dtsuhd->dtsuhd || !st)
+        return AVERROR(ENOMEM);
+
+    buf_bytes = avio_read(pb, dtsuhd->buf, DTSUHD_BUFFER_SIZE);
+    if (buf_bytes < 0)
+        return buf_bytes;
+
+    data_start = dtsuhd_strmdata_payload(dtsuhd->buf, buf_bytes, &dtsuhd->data_end);
+    dtsuhd->data_end += data_start;
+    if (data_start == 0)
+        dtsuhd->data_end = avio_size(pb); // Not a DTSHDHDR chunk file, decode frames to end of file.
+
+    data_start = find_first_syncword(dtsuhd, data_start);
+    if (avio_seek(pb, data_start, SEEK_SET) < 0)
+        return AVERROR(EINVAL);
+
+    ret = dtsuhd_frame(dtsuhd->dtsuhd, dtsuhd->buf + data_start,
+        buf_bytes - data_start, &fi, &di);
+    if (ret != DTSUHD_OK || !di.valid) {
+        av_log(s, AV_LOG_ERROR, "Unable to process DTS-UHD file. File may be invalid.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    ffstream(st)->need_parsing = AVSTREAM_PARSE_FULL_RAW;
+    st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
+    st->codecpar->codec_id = s->iformat->raw_codec_id;
+    st->codecpar->ch_layout.order = AV_CHANNEL_ORDER_NATIVE;
+    st->codecpar->ch_layout.nb_channels = di.channel_count;
+    st->codecpar->ch_layout.u.mask = di.ffmpeg_channel_mask;
+    st->codecpar->codec_tag = AV_RL32(di.coding_name);
+    st->codecpar->frame_size = 512 << di.frame_duration_code;
+    st->codecpar->sample_rate = di.sample_rate;
+
+#if FF_API_OLD_CHANNEL_LAYOUT
+FF_DISABLE_DEPRECATION_WARNINGS
+    st->codecpar->channels = di.channel_count;
+    st->codecpar->channel_layout = di.ffmpeg_channel_mask;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    ret = write_extradata(st->codecpar, &di);
+    if (ret < 0)
+        return ret;
+
+    if (st->codecpar->sample_rate)
+        avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
+
+    return 0;
+}
+
+static int read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    DTSUHDDemuxContext *dtsuhd = s->priv_data;
+    int64_t size, left;
+    int ret;
+
+    left = dtsuhd->data_end - avio_tell(s->pb);
+    size = FFMIN(left, DTSUHD_MAX_FRAME_SIZE);
+    if (size <= 0)
+        return AVERROR_EOF;
+
+    ret = av_get_packet(s->pb, pkt, size);
+    if (ret < 0)
+        return ret;
+
+    pkt->stream_index = 0;
+
+    return ret;
+}
+
+AVInputFormat ff_dtsuhd_demuxer = {
+    .name           = "dtsuhd",
+    .long_name      = NULL_IF_CONFIG_SMALL("DTS-UHD"),
+    .priv_data_size = sizeof(DTSUHDDemuxContext),
+    .read_probe     = probe,
+    .read_header    = read_header,
+    .read_packet    = read_packet,
+    .read_close     = read_close,
+    .flags          = AVFMT_GENERIC_INDEX,
+    .extensions     = "dtsx",
+    .raw_codec_id   = AV_CODEC_ID_DTSUHD,
+};
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index c370922c7d..e727407694 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -762,6 +762,24 @@  static int mov_write_esds_tag(AVIOContext *pb, MOVTrack *track) // Basic
      return update_size(pb, pos);
  }
  +static int mov_write_udts_tag(AVIOContext *pb, MOVTrack *track)
+{
+    if (track->vos_len < 12) {
+        av_log(pb, AV_LOG_ERROR,
+               "Cannot write moov atom before DTS-UHD packets."
+               " Set the delay_moov flag to fix this.\n");
+        return AVERROR(EINVAL);
+    }
+
+    /* Write vos_data is udts box. */
+    if (memcmp(track->vos_data + 4, "udts", 4) == 0) {
+        avio_write(pb, track->vos_data, track->vos_len);
+        return track->vos_len;
+    }
+
+    return 0;
+}
+
  static int mov_pcm_le_gt16(enum AVCodecID codec_id)
  {
      return codec_id == AV_CODEC_ID_PCM_S24LE ||
@@ -1367,6 +1385,8 @@  static int mov_write_audio_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
          ret = mov_write_dops_tag(s, pb, track);
      else if (track->par->codec_id == AV_CODEC_ID_TRUEHD)
          ret = mov_write_dmlp_tag(s, pb, track);
+    else if (track->par->codec_id == AV_CODEC_ID_DTSUHD)
+        ret = mov_write_udts_tag(pb, track);
      else if (tag == MOV_MP4_IPCM_TAG || tag == MOV_MP4_FPCM_TAG) {
          if (track->par->ch_layout.nb_channels > 1)
              ret = mov_write_chnl_tag(s, pb, track);
@@ -2781,6 +2801,7 @@  static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
      if ((track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
           track->par->codec_id == AV_CODEC_ID_TRUEHD ||
           track->par->codec_id == AV_CODEC_ID_MPEGH_3D_AUDIO ||
+         track->par->codec_id == AV_CODEC_ID_DTSUHD ||
           track->par->codec_tag == MKTAG('r','t','p',' ')) &&
          track->has_keyframes && track->has_keyframes < track->entry)
          mov_write_stss_tag(pb, track, MOV_SYNC_SAMPLE);
@@ -5673,6 +5694,14 @@  static void mov_parse_vc1_frame(AVPacket *pkt, MOVTrack *trk)
      }
  }
  +static void mov_parse_dtsuhd_frame(AVPacket *pkt, MOVTrack *trk)
+{
+    if (pkt->size > 4 && AV_RB32(pkt->data) == 0x40411BF2) {
+        trk->cluster[trk->entry].flags |= MOV_SYNC_SAMPLE;
+        trk->has_keyframes++;
+    }
+ }
+
  static void mov_parse_truehd_frame(AVPacket *pkt, MOVTrack *trk)
  {
      int length;
@@ -6343,6 +6372,8 @@  int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
          mov_parse_vc1_frame(pkt, trk);
      } else if (par->codec_id == AV_CODEC_ID_TRUEHD) {
          mov_parse_truehd_frame(pkt, trk);
+    } else if (par->codec_id == AV_CODEC_ID_DTSUHD) {
+        mov_parse_dtsuhd_frame(pkt, trk);
      } else if (pkt->flags & AV_PKT_FLAG_KEY) {
          if (mov->mode == MODE_MOV && par->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
              trk->entry > 0) { // force sync sample for the first key frame
@@ -7800,6 +7831,7 @@  static const AVCodecTag codec_mp4_tags[] = {
      { AV_CODEC_ID_AC3,             MKTAG('a', 'c', '-', '3') },
      { AV_CODEC_ID_EAC3,            MKTAG('e', 'c', '-', '3') },
      { AV_CODEC_ID_DTS,             MKTAG('m', 'p', '4', 'a') },
+    { AV_CODEC_ID_DTSUHD,          MKTAG('d', 't', 's', 'x') },
      { AV_CODEC_ID_TRUEHD,          MKTAG('m', 'l', 'p', 'a') },
      { AV_CODEC_ID_FLAC,            MKTAG('f', 'L', 'a', 'C') },
      { AV_CODEC_ID_OPUS,            MKTAG('O', 'p', 'u', 's') },
diff --git a/libavformat/version.h b/libavformat/version.h
index cc56b7cf5c..384cbd49cc 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -31,7 +31,7 @@ 
   #include "version_major.h"
  -#define LIBAVFORMAT_VERSION_MINOR   4
+#define LIBAVFORMAT_VERSION_MINOR   5
  #define LIBAVFORMAT_VERSION_MICRO 101
   #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
-- 
2.17.1