diff mbox series

[FFmpeg-devel,v3,2/2] avformat/movenc: add support for TTML muxing

Message ID 20210726134104.42737-3-jeebjp@gmail.com
State New
Headers show
Series TTML in MP4, part 1
Related show

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Jan Ekström July 26, 2021, 1:41 p.m. UTC
From: Jan Ekström <jan.ekstrom@24i.com>

Includes basic support for both the ISMV ('dfxp') and MP4 ('stpp')
methods. This initial version also foregoes fragmentation support
in case the built-in sample squashing is to be utilized, as this
eases the initial review.

Additionally, add basic tests for both muxing modes in MP4.

Signed-off-by: Jan Ekström <jan.ekstrom@24i.com>
---
 libavformat/Makefile             |   2 +-
 libavformat/isom.h               |   3 +
 libavformat/movenc.c             | 179 ++++++++++++++++++++++++++++++-
 libavformat/movenc.h             |   5 +
 libavformat/movenc_ttml.c        | 178 ++++++++++++++++++++++++++++++
 libavformat/movenc_ttml.h        |  31 ++++++
 tests/fate/subtitles.mak         |   4 +
 tests/ref/fate/sub-ttml-mp4-dfxp |  44 ++++++++
 tests/ref/fate/sub-ttml-mp4-stpp |  44 ++++++++
 9 files changed, 487 insertions(+), 3 deletions(-)
 create mode 100644 libavformat/movenc_ttml.c
 create mode 100644 libavformat/movenc_ttml.h
 create mode 100644 tests/ref/fate/sub-ttml-mp4-dfxp
 create mode 100644 tests/ref/fate/sub-ttml-mp4-stpp

Comments

Martin Storsjö Aug. 2, 2021, 12:46 p.m. UTC | #1
On Mon, 26 Jul 2021, Jan Ekström wrote:

> From: Jan Ekström <jan.ekstrom@24i.com>
>
> Includes basic support for both the ISMV ('dfxp') and MP4 ('stpp')
> methods. This initial version also foregoes fragmentation support
> in case the built-in sample squashing is to be utilized, as this
> eases the initial review.
>
> Additionally, add basic tests for both muxing modes in MP4.
>
> Signed-off-by: Jan Ekström <jan.ekstrom@24i.com>
> ---
> libavformat/Makefile             |   2 +-
> libavformat/isom.h               |   3 +
> libavformat/movenc.c             | 179 ++++++++++++++++++++++++++++++-
> libavformat/movenc.h             |   5 +
> libavformat/movenc_ttml.c        | 178 ++++++++++++++++++++++++++++++
> libavformat/movenc_ttml.h        |  31 ++++++
> tests/fate/subtitles.mak         |   4 +
> tests/ref/fate/sub-ttml-mp4-dfxp |  44 ++++++++
> tests/ref/fate/sub-ttml-mp4-stpp |  44 ++++++++
> 9 files changed, 487 insertions(+), 3 deletions(-)
> create mode 100644 libavformat/movenc_ttml.c
> create mode 100644 libavformat/movenc_ttml.h
> create mode 100644 tests/ref/fate/sub-ttml-mp4-dfxp
> create mode 100644 tests/ref/fate/sub-ttml-mp4-stpp

Thanks for addressing my comments. No further comments from me on this, 
but Andreas had more valuable comments on it than me, so I'd let him 
follow up and finish the review of this.

// Martin
Andreas Rheinhardt Aug. 5, 2021, 7:32 p.m. UTC | #2
Jan Ekström:
> From: Jan Ekström <jan.ekstrom@24i.com>
> 
> Includes basic support for both the ISMV ('dfxp') and MP4 ('stpp')
> methods. This initial version also foregoes fragmentation support
> in case the built-in sample squashing is to be utilized, as this
> eases the initial review.
> 
> Additionally, add basic tests for both muxing modes in MP4.
> 
> Signed-off-by: Jan Ekström <jan.ekstrom@24i.com>
> ---
>  libavformat/Makefile             |   2 +-
>  libavformat/isom.h               |   3 +
>  libavformat/movenc.c             | 179 ++++++++++++++++++++++++++++++-
>  libavformat/movenc.h             |   5 +
>  libavformat/movenc_ttml.c        | 178 ++++++++++++++++++++++++++++++
>  libavformat/movenc_ttml.h        |  31 ++++++
>  tests/fate/subtitles.mak         |   4 +
>  tests/ref/fate/sub-ttml-mp4-dfxp |  44 ++++++++
>  tests/ref/fate/sub-ttml-mp4-stpp |  44 ++++++++
>  9 files changed, 487 insertions(+), 3 deletions(-)
>  create mode 100644 libavformat/movenc_ttml.c
>  create mode 100644 libavformat/movenc_ttml.h
>  create mode 100644 tests/ref/fate/sub-ttml-mp4-dfxp
>  create mode 100644 tests/ref/fate/sub-ttml-mp4-stpp
> 
> diff --git a/libavformat/Makefile b/libavformat/Makefile
> index 813ddd3c20..7e0f587b41 100644
> --- a/libavformat/Makefile
> +++ b/libavformat/Makefile
> @@ -337,7 +337,7 @@ OBJS-$(CONFIG_MOV_DEMUXER)               += mov.o mov_chan.o mov_esds.o \
>                                              qtpalette.o replaygain.o
>  OBJS-$(CONFIG_MOV_MUXER)                 += movenc.o av1.o avc.o hevc.o vpcc.o \
>                                              movenchint.o mov_chan.o rtp.o \
> -                                            movenccenc.o rawutils.o
> +                                            movenccenc.o movenc_ttml.o rawutils.o
>  OBJS-$(CONFIG_MP2_MUXER)                 += rawenc.o
>  OBJS-$(CONFIG_MP3_DEMUXER)               += mp3dec.o replaygain.o
>  OBJS-$(CONFIG_MP3_MUXER)                 += mp3enc.o rawenc.o id3v2enc.o
> diff --git a/libavformat/isom.h b/libavformat/isom.h
> index ac1b3f3d56..34a58c79b7 100644
> --- a/libavformat/isom.h
> +++ b/libavformat/isom.h
> @@ -387,4 +387,7 @@ static inline enum AVCodecID ff_mov_get_lpcm_codec_id(int bps, int flags)
>      return ff_get_pcm_codec_id(bps, flags & 1, flags & 2, flags & 4 ? -1 : 0);
>  }
>  
> +#define MOV_ISMV_TTML_TAG MKTAG('d', 'f', 'x', 'p')
> +#define MOV_MP4_TTML_TAG  MKTAG('s', 't', 'p', 'p')
> +
>  #endif /* AVFORMAT_ISOM_H */
> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> index c85efe8748..f3e295ad80 100644
> --- a/libavformat/movenc.c
> +++ b/libavformat/movenc.c
> @@ -56,6 +56,8 @@
>  #include "hevc.h"
>  #include "rtpenc.h"
>  #include "mov_chan.h"
> +#include "movenc_ttml.h"
> +#include "ttmlenc.h"
>  #include "vpcc.h"
>  
>  static const AVOption options[] = {
> @@ -119,6 +121,7 @@ static const AVClass mov_isobmff_muxer_class = {
>  };
>  
>  static int get_moov_size(AVFormatContext *s);
> +static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt);
>  
>  static int utf8len(const uint8_t *b)
>  {
> @@ -1787,7 +1790,29 @@ static int mov_write_subtitle_tag(AVIOContext *pb, MOVTrack *track)
>  
>      if (track->par->codec_id == AV_CODEC_ID_DVD_SUBTITLE)
>          mov_write_esds_tag(pb, track);
> -    else if (track->par->extradata_size)
> +    else if (track->par->codec_id == AV_CODEC_ID_TTML) {
> +        switch (track->par->codec_tag) {
> +        case MOV_ISMV_TTML_TAG:
> +            // ISMV dfxp requires no extradata.
> +            break;
> +        case MOV_MP4_TTML_TAG:
> +            // As specified in 14496-30, XMLSubtitleSampleEntry
> +            // Namespace
> +            avio_put_str(pb, "http://www.w3.org/ns/ttml");
> +            // Empty schema_location
> +            avio_w8(pb, 0);
> +            // Empty auxiliary_mime_types
> +            avio_w8(pb, 0);
> +            break;
> +        default:
> +            av_log(NULL, AV_LOG_ERROR,
> +                   "Unknown codec tag '%s' utilized for TTML stream with "
> +                   "index %d (track id %d)!\n",
> +                   av_fourcc2str(track->par->codec_tag), track->st->index,
> +                   track->track_id);
> +            return AVERROR(EINVAL);
> +        }
> +    } else if (track->par->extradata_size)
>          avio_write(pb, track->par->extradata, track->par->extradata_size);
>  
>      if (track->mode == MODE_MP4 &&
> @@ -2661,6 +2686,14 @@ static int mov_write_nmhd_tag(AVIOContext *pb)
>      return 12;
>  }
>  
> +static int mov_write_sthd_tag(AVIOContext *pb)
> +{
> +    avio_wb32(pb, 12);
> +    ffio_wfourcc(pb, "sthd");
> +    avio_wb32(pb, 0);
> +    return 12;
> +}
> +
>  static int mov_write_tcmi_tag(AVIOContext *pb, MOVTrack *track)
>  {
>      int64_t pos = avio_tell(pb);
> @@ -2787,6 +2820,8 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
>                      hdlr_type = "sbtl";
>                  } else if (track->tag == MKTAG('m','p','4','s')) {
>                      hdlr_type = "subp";
> +                } else if (track->tag == MOV_MP4_TTML_TAG) {
> +                    hdlr_type = "subt";
>                  } else {
>                      hdlr_type = "text";
>                  }
> @@ -2865,6 +2900,8 @@ static int mov_write_minf_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
>      else if (track->par->codec_type == AVMEDIA_TYPE_SUBTITLE) {
>          if (track->tag == MKTAG('t','e','x','t') || is_clcp_track(track)) {
>              mov_write_gmhd_tag(pb, track);
> +        } else if (track->tag == MOV_MP4_TTML_TAG) {
> +            mov_write_sthd_tag(pb);
>          } else {
>              mov_write_nmhd_tag(pb);
>          }
> @@ -5253,6 +5290,68 @@ static int mov_flush_fragment_interleaving(AVFormatContext *s, MOVTrack *track)
>      return 0;
>  }
>  
> +static int mov_write_squashed_packet(AVFormatContext *s, MOVTrack *track)
> +{
> +    MOVMuxContext *mov = s->priv_data;
> +    AVPacket *squashed_packet = mov->pkt;
> +    int ret = AVERROR_BUG;
> +
> +    switch (track->st->codecpar->codec_id) {
> +    case AV_CODEC_ID_TTML: {
> +        int had_packets = !!track->squashed_packet_queue;
> +
> +        if ((ret = ff_mov_generate_squashed_ttml_packet(s, track, squashed_packet)) < 0) {
> +            goto finish_squash;
> +        }
> +
> +        // We have generated a padding packet (no actual input packets in
> +        // queue) and its duration is zero. Skipping writing it.
> +        if (!had_packets && squashed_packet->duration == 0) {
> +            goto finish_squash;
> +        }
> +
> +        track->end_reliable = 1;
> +        break;
> +    }
> +    default:
> +        ret = AVERROR(EINVAL);
> +        goto finish_squash;
> +    }
> +
> +    squashed_packet->stream_index = track->st->index;
> +
> +    ret = mov_write_single_packet(s, squashed_packet);
> +
> +finish_squash:
> +    av_packet_unref(squashed_packet);
> +
> +    return ret;
> +}
> +
> +static int mov_write_squashed_packets(AVFormatContext *s)
> +{
> +    MOVMuxContext *mov = s->priv_data;
> +
> +    for (int i = 0; i < s->nb_streams; i++) {
> +        MOVTrack *track = &mov->tracks[i];
> +        int ret = AVERROR_BUG;
> +
> +        if (track->squash_fragment_samples_to_one && !track->entry) {
> +            if ((ret = mov_write_squashed_packet(s, track)) < 0) {
> +                av_log(s, AV_LOG_ERROR,
> +                       "Failed to write squashed packet for %s stream with "
> +                       "index %d and track id %d. Error: %s\n",
> +                       avcodec_get_name(track->st->codecpar->codec_id),
> +                       track->st->index, track->track_id,
> +                       av_err2str(ret));
> +                return ret;
> +            }
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  static int mov_flush_fragment(AVFormatContext *s, int force)
>  {
>      MOVMuxContext *mov = s->priv_data;
> @@ -5264,6 +5363,11 @@ static int mov_flush_fragment(AVFormatContext *s, int force)
>      if (!(mov->flags & FF_MOV_FLAG_FRAGMENT))
>          return 0;
>  
> +    // Check if we have any tracks that require squashing.
> +    // In that case, we'll have to write the packet here.
> +    if ((ret = mov_write_squashed_packets(s)) < 0)
> +        return ret;
> +
>      // Try to fill in the duration of the last packet in each stream
>      // from queued packets in the interleave queues. If the flushing
>      // of fragments was triggered automatically by an AVPacket, we
> @@ -5739,7 +5843,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>      trk->cluster[trk->entry].entries          = samples_in_chunk;
>      trk->cluster[trk->entry].dts              = pkt->dts;
>      trk->cluster[trk->entry].pts              = pkt->pts;
> -    if (!trk->entry && trk->start_dts != AV_NOPTS_VALUE) {
> +    if (!trk->squash_fragment_samples_to_one &&
> +        !trk->entry && trk->start_dts != AV_NOPTS_VALUE) {
>          if (!trk->frag_discont) {
>              /* First packet of a new fragment. We already wrote the duration
>               * of the last packet of the previous fragment based on track_duration,
> @@ -6032,6 +6137,33 @@ static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>              }
>          }
>  
> +        if (trk->squash_fragment_samples_to_one) {
> +            /*
> +             * If the track has to have its samples squashed into one sample,
> +             * we just take it into the track's queue.
> +             * This will then be utilized as the samples get written in either
> +             * mov_flush_fragment or when the mux is finalized in
> +             * mov_write_trailer.
> +             */
> +            int ret = AVERROR_BUG;
> +
> +            if (pkt->pts == AV_NOPTS_VALUE) {
> +                av_log(s, AV_LOG_ERROR,
> +                       "Packets without a valid presentation timestamp are "
> +                       "not supported with packet squashing!\n");
> +                return AVERROR(EINVAL);
> +            }
> +
> +            if ((ret = avpriv_packet_list_put(&trk->squashed_packet_queue,
> +                                              &trk->squashed_packet_queue_end,
> +                                              pkt, av_packet_ref, 0)) < 0) {
> +                return ret;
> +            }
> +
> +            return 0;
> +        }
> +
> +
>          if (trk->mode == MODE_MOV && trk->par->codec_type == AVMEDIA_TYPE_VIDEO) {
>              AVPacket *opkt = pkt;
>              int reshuffle_ret, ret;
> @@ -6310,6 +6442,11 @@ static void mov_free(AVFormatContext *s)
>  
>          ff_mov_cenc_free(&mov->tracks[i].cenc);
>          ffio_free_dyn_buf(&mov->tracks[i].mdat_buf);
> +
> +        if (mov->tracks[i].squashed_packet_queue) {
> +            avpriv_packet_list_free(&(mov->tracks[i].squashed_packet_queue),
> +                                    &(mov->tracks[i].squashed_packet_queue_end));
> +        }
>      }
>  
>      av_freep(&mov->tracks);
> @@ -6700,6 +6837,36 @@ static int mov_init(AVFormatContext *s)
>              }
>          } else if (st->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE) {
>              track->timescale = st->time_base.den;
> +
> +            if (track->par->codec_id == AV_CODEC_ID_TTML) {
> +                /* 14496-30 requires us to use a single sample per fragment
> +                   for TTML, for which we define a per-track flag.
> +
> +                   We set the flag in case we are receiving TTML paragraphs
> +                   from the input, in other words in case we are not doing
> +                   stream copy. */
> +                track->squash_fragment_samples_to_one =
> +                    ff_is_ttml_stream_paragraph_based(track->par);
> +
> +                if (mov->flags & FF_MOV_FLAG_FRAGMENT &&
> +                    track->squash_fragment_samples_to_one) {
> +                    av_log(s, AV_LOG_ERROR,
> +                           "Fragmentation is not currently supported for "
> +                           "TTML in MP4/ISMV (track synchronization between "
> +                           "subtitles and other media is not yet implemented)!\n");
> +                    return AVERROR_PATCHWELCOME;
> +                }
> +
> +                if (track->mode != MODE_ISM &&
> +                    track->par->codec_tag == MOV_ISMV_TTML_TAG &&
> +                    s->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL) {
> +                    av_log(s, AV_LOG_ERROR,
> +                           "ISMV style TTML support with the 'dfxp' tag in "
> +                           "non-ISMV formats is not officially supported. Add "
> +                           "'-strict unofficial' if you want to use it.\n");
> +                    return AVERROR_EXPERIMENTAL;
> +                }
> +            }
>          } else if (st->codecpar->codec_type == AVMEDIA_TYPE_DATA) {
>              track->timescale = st->time_base.den;
>          } else {
> @@ -7046,6 +7213,11 @@ static int mov_write_trailer(AVFormatContext *s)
>          }
>      }
>  
> +    // Check if we have any tracks that require squashing.
> +    // In that case, we'll have to write the packet here.
> +    if ((res = mov_write_squashed_packets(s)) < 0)
> +        return res;
> +
>      // If there were no chapters when the header was written, but there
>      // are chapters now, write them in the trailer.  This only works
>      // when we are not doing fragments.
> @@ -7190,6 +7362,8 @@ static const AVCodecTag codec_mp4_tags[] = {
>      { AV_CODEC_ID_MOV_TEXT,        MKTAG('t', 'x', '3', 'g') },
>      { AV_CODEC_ID_BIN_DATA,        MKTAG('g', 'p', 'm', 'd') },
>      { AV_CODEC_ID_MPEGH_3D_AUDIO,  MKTAG('m', 'h', 'm', '1') },
> +    { AV_CODEC_ID_TTML,            MOV_MP4_TTML_TAG          },
> +    { AV_CODEC_ID_TTML,            MOV_ISMV_TTML_TAG         },
>      { AV_CODEC_ID_NONE,               0 },
>  };
>  #if CONFIG_MP4_MUXER || CONFIG_PSP_MUXER
> @@ -7198,6 +7372,7 @@ static const AVCodecTag *const mp4_codec_tags_list[] = { codec_mp4_tags, NULL };
>  
>  static const AVCodecTag codec_ism_tags[] = {
>      { AV_CODEC_ID_WMAPRO      , MKTAG('w', 'm', 'a', ' ') },
> +    { AV_CODEC_ID_TTML        , MOV_ISMV_TTML_TAG         },
>      { AV_CODEC_ID_NONE        ,    0 },
>  };
>  
> diff --git a/libavformat/movenc.h b/libavformat/movenc.h
> index af1ea0bce6..95db1bf46d 100644
> --- a/libavformat/movenc.h
> +++ b/libavformat/movenc.h
> @@ -26,6 +26,7 @@
>  
>  #include "avformat.h"
>  #include "movenccenc.h"
> +#include "libavcodec/packet_internal.h"
>  
>  #define MOV_FRAG_INFO_ALLOC_INCREMENT 64
>  #define MOV_INDEX_CLUSTER_SIZE 1024
> @@ -164,6 +165,10 @@ typedef struct MOVTrack {
>      int pal_done;
>  
>      int is_unaligned_qt_rgb;
> +
> +    unsigned int squash_fragment_samples_to_one; //< flag to note formats where all samples for a fragment are to be squashed
> +
> +    PacketList *squashed_packet_queue, *squashed_packet_queue_end;
>  } MOVTrack;
>  
>  typedef enum {
> diff --git a/libavformat/movenc_ttml.c b/libavformat/movenc_ttml.c
> new file mode 100644
> index 0000000000..bf4a6fd89e
> --- /dev/null
> +++ b/libavformat/movenc_ttml.c
> @@ -0,0 +1,178 @@
> +/*
> + * MP4, ISMV Muxer TTML helpers
> + * Copyright (c) 2021 24i
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "avformat.h"
> +#include "avio_internal.h"
> +#include "isom.h"
> +#include "movenc.h"
> +#include "movenc_ttml.h"
> +#include "libavcodec/packet_internal.h"
> +
> +static const unsigned char empty_ttml_document[] =
> +    "<tt xml:lang=\"\" xmlns=\"http://www.w3.org/ns/ttml\" />";
> +
> +static int mov_init_ttml_writer(MOVTrack *track, AVFormatContext **out_ctx)
> +{
> +    AVStream *movenc_stream = track->st, *ttml_stream = NULL;
> +    AVFormatContext *ttml_ctx = NULL;
> +    int ret = AVERROR_BUG;
> +    if ((ret = avformat_alloc_output_context2(&ttml_ctx, NULL,
> +                                              "ttml", NULL)) < 0)
> +        return ret;
> +
> +    if ((ret = avio_open_dyn_buf(&ttml_ctx->pb)) < 0)
> +        goto fail;
> +
> +    if (!(ttml_stream = avformat_new_stream(ttml_ctx, NULL))) {
> +        ret = AVERROR(ENOMEM);
> +        goto fail;
> +    }
> +
> +    if ((ret = avcodec_parameters_copy(ttml_stream->codecpar,
> +                                       movenc_stream->codecpar)) < 0)
> +        goto fail;
> +
> +    ttml_stream->time_base = movenc_stream->time_base;
> +
> +    *out_ctx = ttml_ctx;
> +
> +    return 0;
> +
> +fail:
> +    ffio_free_dyn_buf(&ttml_ctx->pb);
> +    avformat_free_context(ttml_ctx);

If you used out_ctx directly (i.e. don't use ttml_ctx at all), you could
remove the cleanup code in
 this function and instead reuse the cleanup code in
 ff_mov_generate_squashed_ttml_packet().
 (But you will have to add a check for whether ttml_ctx exists in
ff_mov_generate_squashed_ttml_packet().)


> +
> +    return ret;
> +}
> +
> +static int mov_write_ttml_document_from_queue(AVFormatContext *s,
> +                                              AVFormatContext *ttml_ctx,
> +                                              MOVTrack *track,
> +                                              AVPacket *pkt,
> +                                              int64_t *out_start_ts,
> +                                              int64_t *out_duration)
> +{
> +    int ret = AVERROR_BUG;
> +    int64_t start_ts = track->start_dts == AV_NOPTS_VALUE ?
> +                       0 : (track->start_dts + track->track_duration);
> +    int64_t end_ts   = start_ts;
> +
> +    if ((ret = avformat_write_header(ttml_ctx, NULL)) < 0) {
> +        return ret;
> +    }
> +
> +    while (!avpriv_packet_list_get(&track->squashed_packet_queue,
> +                                   &track->squashed_packet_queue_end,
> +                                   pkt)) {
> +        end_ts = FFMAX(end_ts, pkt->pts + pkt->duration);
> +
> +        // in case of the 'dfxp' muxing mode, each written document is offset
> +        // to its containing sample's beginning.
> +        if (track->par->codec_tag == MOV_ISMV_TTML_TAG) {
> +            pkt->dts = pkt->pts = (pkt->pts - start_ts);
> +        }
> +
> +        pkt->stream_index = 0;
> +
> +        av_packet_rescale_ts(pkt, track->st->time_base,
> +                             ttml_ctx->streams[pkt->stream_index]->time_base);
> +
> +        if ((ret = av_write_frame(ttml_ctx, pkt)) < 0) {
> +            goto cleanup;
> +        }
> +
> +        av_packet_unref(pkt);
> +    }
> +
> +    if ((ret = av_write_trailer(ttml_ctx)) < 0)
> +        goto cleanup;
> +
> +    *out_start_ts = start_ts;
> +    *out_duration = end_ts - start_ts;
> +
> +    ret = 0;
> +
> +cleanup:
> +    return ret;
> +}
> +
> +int ff_mov_generate_squashed_ttml_packet(AVFormatContext *s,
> +                                         MOVTrack *track, AVPacket *pkt)
> +{
> +    AVFormatContext *ttml_ctx = NULL;
> +    // values for the generated AVPacket
> +    int64_t start_ts = 0;
> +    int64_t duration = 0;
> +
> +    int ret = AVERROR_BUG;
> +
> +    if ((ret = mov_init_ttml_writer(track, &ttml_ctx)) < 0) {
> +        av_log(s, AV_LOG_ERROR, "Failed to initialize the TTML writer: %s\n",
> +               av_err2str(ret));
> +        return ret;
> +    }
> +
> +    if (!track->squashed_packet_queue) {
> +        // empty queue, write minimal empty document with zero duration
> +        avio_write(ttml_ctx->pb, empty_ttml_document,
> +                   sizeof(empty_ttml_document) - 1);
> +        start_ts = 0;
> +        duration = 0;
> +        goto generate_packet;
> +    }
> +
> +    if ((ret = mov_write_ttml_document_from_queue(s, ttml_ctx, track, pkt,
> +                                                  &start_ts,
> +                                                  &duration)) < 0) {
> +        av_log(s, AV_LOG_ERROR,
> +               "Failed to generate a squashed TTML packet from the packet "
> +               "queue: %s\n",
> +               av_err2str(ret));
> +        goto cleanup;
> +    }
> +
> +generate_packet:
> +    {
> +        // Generate an AVPacket from the data written into the dynamic buffer.
> +        uint8_t *buf = NULL;
> +        int buf_len = avio_close_dyn_buf(ttml_ctx->pb, &buf);
> +        ttml_ctx->pb = NULL;
> +
> +        if ((ret = av_packet_from_data(pkt, buf, buf_len)) < 0) {
> +            av_log(s, AV_LOG_ERROR,
> +                   "Failed to create a TTML AVPacket from AVIO data: %s\n",
> +                   av_err2str(ret));
> +            av_freep(&buf);
> +            goto cleanup;
> +        }
> +
> +        pkt->pts = pkt->dts = start_ts;
> +        pkt->duration = duration;
> +        pkt->flags |= AV_PKT_FLAG_KEY;
> +    }
> +
> +    ret = 0;
> +
> +cleanup:
> +    ffio_free_dyn_buf(&ttml_ctx->pb);
> +    avformat_free_context(ttml_ctx);
> +    return ret;
> +}
> diff --git a/libavformat/movenc_ttml.h b/libavformat/movenc_ttml.h
> new file mode 100644
> index 0000000000..c71ecd0997
> --- /dev/null
> +++ b/libavformat/movenc_ttml.h
> @@ -0,0 +1,31 @@
> +/*
> + * MP4, ISMV Muxer TTML helpers
> + * Copyright (c) 2021 24i
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVFORMAT_MOVENC_TTML_H
> +#define AVFORMAT_MOVENC_TTML_H
> +
> +#include "avformat.h"
> +#include "movenc.h"
> +
> +int ff_mov_generate_squashed_ttml_packet(AVFormatContext *s,
> +                                         MOVTrack *track, AVPacket *pkt);
> +
> +#endif /* AVFORMAT_MOVENC_TTML_H */
> diff --git a/tests/fate/subtitles.mak b/tests/fate/subtitles.mak
> index ee65afe35b..880109f201 100644
> --- a/tests/fate/subtitles.mak
> +++ b/tests/fate/subtitles.mak
> @@ -109,6 +109,10 @@ fate-sub-dvb: CMD = framecrc -i $(TARGET_SAMPLES)/sub/dvbsubtest_filter.ts -map
>  FATE_SUBTITLES-$(call ALLYES, FILE_PROTOCOL PIPE_PROTOCOL SRT_DEMUXER SUBRIP_DECODER TTML_ENCODER TTML_MUXER) += fate-sub-ttmlenc
>  fate-sub-ttmlenc: CMD = fmtstdout ttml -i $(TARGET_SAMPLES)/sub/SubRip_capability_tester.srt
>  
> +FATE_SUBTITLES-$(call ALLYES, FILE_PROTOCOL SRT_DEMUXER MOV_DEMUXER SUBRIP_DECODER TTML_ENCODER TTML_MUXER MOV_MUXER) += fate-sub-ttml-mp4-stpp fate-sub-ttml-mp4-dfxp
> +fate-sub-ttml-mp4-stpp: CMD = transcode srt $(TARGET_SAMPLES)/sub/SubRip_capability_tester.srt mp4 "-map 0:s -c:s ttml -time_base:s 1:1000" "-map 0 -c copy" "" "-of json -show_entries packet:stream=index,codec_type,codec_tag_string,codec_tag,codec_name,time_base,start_time,duration_ts,duration,nb_frames,nb_read_packets:stream_tags"
> +fate-sub-ttml-mp4-dfxp: CMD = transcode srt $(TARGET_SAMPLES)/sub/SubRip_capability_tester.srt mp4 "-map 0:s -c:s ttml -time_base:s 1:1000 -tag:s dfxp -strict unofficial" "-map 0 -c copy" "" "-of json -show_entries packet:stream=index,codec_type,codec_tag_string,codec_tag,codec_name,time_base,start_time,duration_ts,duration,nb_frames,nb_read_packets:stream_tags"
> +
>  FATE_SUBTITLES-$(call ENCMUX, ASS, ASS) += $(FATE_SUBTITLES_ASS-yes)
>  FATE_SUBTITLES += $(FATE_SUBTITLES-yes)
>  
> diff --git a/tests/ref/fate/sub-ttml-mp4-dfxp b/tests/ref/fate/sub-ttml-mp4-dfxp
> new file mode 100644
> index 0000000000..0172e5b7e6
> --- /dev/null
> +++ b/tests/ref/fate/sub-ttml-mp4-dfxp
> @@ -0,0 +1,44 @@
> +2e7e01c821c111466e7a2844826b7f6d *tests/data/fate/sub-ttml-mp4-dfxp.mp4
> +8519 tests/data/fate/sub-ttml-mp4-dfxp.mp4
> +#tb 0: 1/1000
> +#media_type 0: data
> +#codec_id 0: none
> +0,          0,          0,    68500,     7866, 0x456c36b7
> +{
> +    "packets": [
> +        {
> +            "codec_type": "data",
> +            "stream_index": 0,
> +            "pts": 0,
> +            "pts_time": "0.000000",
> +            "dts": 0,
> +            "dts_time": "0.000000",
> +            "duration": 68500,
> +            "duration_time": "68.500000",
> +            "size": "7866",
> +            "pos": "44",
> +            "flags": "K_"
> +        }
> +    ],
> +    "programs": [
> +
> +    ],
> +    "streams": [
> +        {
> +            "index": 0,
> +            "codec_type": "data",
> +            "codec_tag_string": "dfxp",
> +            "codec_tag": "0x70786664",
> +            "time_base": "1/1000",
> +            "start_time": "0.000000",
> +            "duration_ts": 68500,
> +            "duration": "68.500000",
> +            "nb_frames": "1",
> +            "nb_read_packets": "1",
> +            "tags": {
> +                "language": "und",
> +                "handler_name": "SubtitleHandler"
> +            }
> +        }
> +    ]
> +}
> diff --git a/tests/ref/fate/sub-ttml-mp4-stpp b/tests/ref/fate/sub-ttml-mp4-stpp
> new file mode 100644
> index 0000000000..a5165b568d
> --- /dev/null
> +++ b/tests/ref/fate/sub-ttml-mp4-stpp
> @@ -0,0 +1,44 @@
> +cbd2c7ff864a663b0d893deac5a0caec *tests/data/fate/sub-ttml-mp4-stpp.mp4
> +8547 tests/data/fate/sub-ttml-mp4-stpp.mp4
> +#tb 0: 1/1000
> +#media_type 0: data
> +#codec_id 0: none
> +0,          0,          0,    68500,     7866, 0x456c36b7
> +{
> +    "packets": [
> +        {
> +            "codec_type": "data",
> +            "stream_index": 0,
> +            "pts": 0,
> +            "pts_time": "0.000000",
> +            "dts": 0,
> +            "dts_time": "0.000000",
> +            "duration": 68500,
> +            "duration_time": "68.500000",
> +            "size": "7866",
> +            "pos": "44",
> +            "flags": "K_"
> +        }
> +    ],
> +    "programs": [
> +
> +    ],
> +    "streams": [
> +        {
> +            "index": 0,
> +            "codec_type": "data",
> +            "codec_tag_string": "stpp",
> +            "codec_tag": "0x70707473",
> +            "time_base": "1/1000",
> +            "start_time": "0.000000",
> +            "duration_ts": 68500,
> +            "duration": "68.500000",
> +            "nb_frames": "1",
> +            "nb_read_packets": "1",
> +            "tags": {
> +                "language": "und",
> +                "handler_name": "SubtitleHandler"
> +            }
> +        }
> +    ]
> +}
>
Jan Ekström Aug. 9, 2021, 1:11 p.m. UTC | #3
On Thu, Aug 5, 2021 at 10:33 PM Andreas Rheinhardt
<andreas.rheinhardt@outlook.com> wrote:
>
> Jan Ekström:
> > From: Jan Ekström <jan.ekstrom@24i.com>
> >
> > Includes basic support for both the ISMV ('dfxp') and MP4 ('stpp')
> > methods. This initial version also foregoes fragmentation support
> > in case the built-in sample squashing is to be utilized, as this
> > eases the initial review.
> >
> > Additionally, add basic tests for both muxing modes in MP4.
> >
> > Signed-off-by: Jan Ekström <jan.ekstrom@24i.com>
> > ---
> >  libavformat/Makefile             |   2 +-
> >  libavformat/isom.h               |   3 +
> >  libavformat/movenc.c             | 179 ++++++++++++++++++++++++++++++-
> >  libavformat/movenc.h             |   5 +
> >  libavformat/movenc_ttml.c        | 178 ++++++++++++++++++++++++++++++
> >  libavformat/movenc_ttml.h        |  31 ++++++
> >  tests/fate/subtitles.mak         |   4 +
> >  tests/ref/fate/sub-ttml-mp4-dfxp |  44 ++++++++
> >  tests/ref/fate/sub-ttml-mp4-stpp |  44 ++++++++
> >  9 files changed, 487 insertions(+), 3 deletions(-)
> >  create mode 100644 libavformat/movenc_ttml.c
> >  create mode 100644 libavformat/movenc_ttml.h
> >  create mode 100644 tests/ref/fate/sub-ttml-mp4-dfxp
> >  create mode 100644 tests/ref/fate/sub-ttml-mp4-stpp
> >
> > diff --git a/libavformat/Makefile b/libavformat/Makefile
> > index 813ddd3c20..7e0f587b41 100644
> > --- a/libavformat/Makefile
> > +++ b/libavformat/Makefile
> > @@ -337,7 +337,7 @@ OBJS-$(CONFIG_MOV_DEMUXER)               += mov.o mov_chan.o mov_esds.o \
> >                                              qtpalette.o replaygain.o
> >  OBJS-$(CONFIG_MOV_MUXER)                 += movenc.o av1.o avc.o hevc.o vpcc.o \
> >                                              movenchint.o mov_chan.o rtp.o \
> > -                                            movenccenc.o rawutils.o
> > +                                            movenccenc.o movenc_ttml.o rawutils.o
> >  OBJS-$(CONFIG_MP2_MUXER)                 += rawenc.o
> >  OBJS-$(CONFIG_MP3_DEMUXER)               += mp3dec.o replaygain.o
> >  OBJS-$(CONFIG_MP3_MUXER)                 += mp3enc.o rawenc.o id3v2enc.o
> > diff --git a/libavformat/isom.h b/libavformat/isom.h
> > index ac1b3f3d56..34a58c79b7 100644
> > --- a/libavformat/isom.h
> > +++ b/libavformat/isom.h
> > @@ -387,4 +387,7 @@ static inline enum AVCodecID ff_mov_get_lpcm_codec_id(int bps, int flags)
> >      return ff_get_pcm_codec_id(bps, flags & 1, flags & 2, flags & 4 ? -1 : 0);
> >  }
> >
> > +#define MOV_ISMV_TTML_TAG MKTAG('d', 'f', 'x', 'p')
> > +#define MOV_MP4_TTML_TAG  MKTAG('s', 't', 'p', 'p')
> > +
> >  #endif /* AVFORMAT_ISOM_H */
> > diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> > index c85efe8748..f3e295ad80 100644
> > --- a/libavformat/movenc.c
> > +++ b/libavformat/movenc.c
> > @@ -56,6 +56,8 @@
> >  #include "hevc.h"
> >  #include "rtpenc.h"
> >  #include "mov_chan.h"
> > +#include "movenc_ttml.h"
> > +#include "ttmlenc.h"
> >  #include "vpcc.h"
> >
> >  static const AVOption options[] = {
> > @@ -119,6 +121,7 @@ static const AVClass mov_isobmff_muxer_class = {
> >  };
> >
> >  static int get_moov_size(AVFormatContext *s);
> > +static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt);
> >
> >  static int utf8len(const uint8_t *b)
> >  {
> > @@ -1787,7 +1790,29 @@ static int mov_write_subtitle_tag(AVIOContext *pb, MOVTrack *track)
> >
> >      if (track->par->codec_id == AV_CODEC_ID_DVD_SUBTITLE)
> >          mov_write_esds_tag(pb, track);
> > -    else if (track->par->extradata_size)
> > +    else if (track->par->codec_id == AV_CODEC_ID_TTML) {
> > +        switch (track->par->codec_tag) {
> > +        case MOV_ISMV_TTML_TAG:
> > +            // ISMV dfxp requires no extradata.
> > +            break;
> > +        case MOV_MP4_TTML_TAG:
> > +            // As specified in 14496-30, XMLSubtitleSampleEntry
> > +            // Namespace
> > +            avio_put_str(pb, "http://www.w3.org/ns/ttml");
> > +            // Empty schema_location
> > +            avio_w8(pb, 0);
> > +            // Empty auxiliary_mime_types
> > +            avio_w8(pb, 0);
> > +            break;
> > +        default:
> > +            av_log(NULL, AV_LOG_ERROR,
> > +                   "Unknown codec tag '%s' utilized for TTML stream with "
> > +                   "index %d (track id %d)!\n",
> > +                   av_fourcc2str(track->par->codec_tag), track->st->index,
> > +                   track->track_id);
> > +            return AVERROR(EINVAL);
> > +        }
> > +    } else if (track->par->extradata_size)
> >          avio_write(pb, track->par->extradata, track->par->extradata_size);
> >
> >      if (track->mode == MODE_MP4 &&
> > @@ -2661,6 +2686,14 @@ static int mov_write_nmhd_tag(AVIOContext *pb)
> >      return 12;
> >  }
> >
> > +static int mov_write_sthd_tag(AVIOContext *pb)
> > +{
> > +    avio_wb32(pb, 12);
> > +    ffio_wfourcc(pb, "sthd");
> > +    avio_wb32(pb, 0);
> > +    return 12;
> > +}
> > +
> >  static int mov_write_tcmi_tag(AVIOContext *pb, MOVTrack *track)
> >  {
> >      int64_t pos = avio_tell(pb);
> > @@ -2787,6 +2820,8 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
> >                      hdlr_type = "sbtl";
> >                  } else if (track->tag == MKTAG('m','p','4','s')) {
> >                      hdlr_type = "subp";
> > +                } else if (track->tag == MOV_MP4_TTML_TAG) {
> > +                    hdlr_type = "subt";
> >                  } else {
> >                      hdlr_type = "text";
> >                  }
> > @@ -2865,6 +2900,8 @@ static int mov_write_minf_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
> >      else if (track->par->codec_type == AVMEDIA_TYPE_SUBTITLE) {
> >          if (track->tag == MKTAG('t','e','x','t') || is_clcp_track(track)) {
> >              mov_write_gmhd_tag(pb, track);
> > +        } else if (track->tag == MOV_MP4_TTML_TAG) {
> > +            mov_write_sthd_tag(pb);
> >          } else {
> >              mov_write_nmhd_tag(pb);
> >          }
> > @@ -5253,6 +5290,68 @@ static int mov_flush_fragment_interleaving(AVFormatContext *s, MOVTrack *track)
> >      return 0;
> >  }
> >
> > +static int mov_write_squashed_packet(AVFormatContext *s, MOVTrack *track)
> > +{
> > +    MOVMuxContext *mov = s->priv_data;
> > +    AVPacket *squashed_packet = mov->pkt;
> > +    int ret = AVERROR_BUG;
> > +
> > +    switch (track->st->codecpar->codec_id) {
> > +    case AV_CODEC_ID_TTML: {
> > +        int had_packets = !!track->squashed_packet_queue;
> > +
> > +        if ((ret = ff_mov_generate_squashed_ttml_packet(s, track, squashed_packet)) < 0) {
> > +            goto finish_squash;
> > +        }
> > +
> > +        // We have generated a padding packet (no actual input packets in
> > +        // queue) and its duration is zero. Skipping writing it.
> > +        if (!had_packets && squashed_packet->duration == 0) {
> > +            goto finish_squash;
> > +        }
> > +
> > +        track->end_reliable = 1;
> > +        break;
> > +    }
> > +    default:
> > +        ret = AVERROR(EINVAL);
> > +        goto finish_squash;
> > +    }
> > +
> > +    squashed_packet->stream_index = track->st->index;
> > +
> > +    ret = mov_write_single_packet(s, squashed_packet);
> > +
> > +finish_squash:
> > +    av_packet_unref(squashed_packet);
> > +
> > +    return ret;
> > +}
> > +
> > +static int mov_write_squashed_packets(AVFormatContext *s)
> > +{
> > +    MOVMuxContext *mov = s->priv_data;
> > +
> > +    for (int i = 0; i < s->nb_streams; i++) {
> > +        MOVTrack *track = &mov->tracks[i];
> > +        int ret = AVERROR_BUG;
> > +
> > +        if (track->squash_fragment_samples_to_one && !track->entry) {
> > +            if ((ret = mov_write_squashed_packet(s, track)) < 0) {
> > +                av_log(s, AV_LOG_ERROR,
> > +                       "Failed to write squashed packet for %s stream with "
> > +                       "index %d and track id %d. Error: %s\n",
> > +                       avcodec_get_name(track->st->codecpar->codec_id),
> > +                       track->st->index, track->track_id,
> > +                       av_err2str(ret));
> > +                return ret;
> > +            }
> > +        }
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> >  static int mov_flush_fragment(AVFormatContext *s, int force)
> >  {
> >      MOVMuxContext *mov = s->priv_data;
> > @@ -5264,6 +5363,11 @@ static int mov_flush_fragment(AVFormatContext *s, int force)
> >      if (!(mov->flags & FF_MOV_FLAG_FRAGMENT))
> >          return 0;
> >
> > +    // Check if we have any tracks that require squashing.
> > +    // In that case, we'll have to write the packet here.
> > +    if ((ret = mov_write_squashed_packets(s)) < 0)
> > +        return ret;
> > +
> >      // Try to fill in the duration of the last packet in each stream
> >      // from queued packets in the interleave queues. If the flushing
> >      // of fragments was triggered automatically by an AVPacket, we
> > @@ -5739,7 +5843,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
> >      trk->cluster[trk->entry].entries          = samples_in_chunk;
> >      trk->cluster[trk->entry].dts              = pkt->dts;
> >      trk->cluster[trk->entry].pts              = pkt->pts;
> > -    if (!trk->entry && trk->start_dts != AV_NOPTS_VALUE) {
> > +    if (!trk->squash_fragment_samples_to_one &&
> > +        !trk->entry && trk->start_dts != AV_NOPTS_VALUE) {
> >          if (!trk->frag_discont) {
> >              /* First packet of a new fragment. We already wrote the duration
> >               * of the last packet of the previous fragment based on track_duration,
> > @@ -6032,6 +6137,33 @@ static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
> >              }
> >          }
> >
> > +        if (trk->squash_fragment_samples_to_one) {
> > +            /*
> > +             * If the track has to have its samples squashed into one sample,
> > +             * we just take it into the track's queue.
> > +             * This will then be utilized as the samples get written in either
> > +             * mov_flush_fragment or when the mux is finalized in
> > +             * mov_write_trailer.
> > +             */
> > +            int ret = AVERROR_BUG;
> > +
> > +            if (pkt->pts == AV_NOPTS_VALUE) {
> > +                av_log(s, AV_LOG_ERROR,
> > +                       "Packets without a valid presentation timestamp are "
> > +                       "not supported with packet squashing!\n");
> > +                return AVERROR(EINVAL);
> > +            }
> > +
> > +            if ((ret = avpriv_packet_list_put(&trk->squashed_packet_queue,
> > +                                              &trk->squashed_packet_queue_end,
> > +                                              pkt, av_packet_ref, 0)) < 0) {
> > +                return ret;
> > +            }
> > +
> > +            return 0;
> > +        }
> > +
> > +
> >          if (trk->mode == MODE_MOV && trk->par->codec_type == AVMEDIA_TYPE_VIDEO) {
> >              AVPacket *opkt = pkt;
> >              int reshuffle_ret, ret;
> > @@ -6310,6 +6442,11 @@ static void mov_free(AVFormatContext *s)
> >
> >          ff_mov_cenc_free(&mov->tracks[i].cenc);
> >          ffio_free_dyn_buf(&mov->tracks[i].mdat_buf);
> > +
> > +        if (mov->tracks[i].squashed_packet_queue) {
> > +            avpriv_packet_list_free(&(mov->tracks[i].squashed_packet_queue),
> > +                                    &(mov->tracks[i].squashed_packet_queue_end));
> > +        }
> >      }
> >
> >      av_freep(&mov->tracks);
> > @@ -6700,6 +6837,36 @@ static int mov_init(AVFormatContext *s)
> >              }
> >          } else if (st->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE) {
> >              track->timescale = st->time_base.den;
> > +
> > +            if (track->par->codec_id == AV_CODEC_ID_TTML) {
> > +                /* 14496-30 requires us to use a single sample per fragment
> > +                   for TTML, for which we define a per-track flag.
> > +
> > +                   We set the flag in case we are receiving TTML paragraphs
> > +                   from the input, in other words in case we are not doing
> > +                   stream copy. */
> > +                track->squash_fragment_samples_to_one =
> > +                    ff_is_ttml_stream_paragraph_based(track->par);
> > +
> > +                if (mov->flags & FF_MOV_FLAG_FRAGMENT &&
> > +                    track->squash_fragment_samples_to_one) {
> > +                    av_log(s, AV_LOG_ERROR,
> > +                           "Fragmentation is not currently supported for "
> > +                           "TTML in MP4/ISMV (track synchronization between "
> > +                           "subtitles and other media is not yet implemented)!\n");
> > +                    return AVERROR_PATCHWELCOME;
> > +                }
> > +
> > +                if (track->mode != MODE_ISM &&
> > +                    track->par->codec_tag == MOV_ISMV_TTML_TAG &&
> > +                    s->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL) {
> > +                    av_log(s, AV_LOG_ERROR,
> > +                           "ISMV style TTML support with the 'dfxp' tag in "
> > +                           "non-ISMV formats is not officially supported. Add "
> > +                           "'-strict unofficial' if you want to use it.\n");
> > +                    return AVERROR_EXPERIMENTAL;
> > +                }
> > +            }
> >          } else if (st->codecpar->codec_type == AVMEDIA_TYPE_DATA) {
> >              track->timescale = st->time_base.den;
> >          } else {
> > @@ -7046,6 +7213,11 @@ static int mov_write_trailer(AVFormatContext *s)
> >          }
> >      }
> >
> > +    // Check if we have any tracks that require squashing.
> > +    // In that case, we'll have to write the packet here.
> > +    if ((res = mov_write_squashed_packets(s)) < 0)
> > +        return res;
> > +
> >      // If there were no chapters when the header was written, but there
> >      // are chapters now, write them in the trailer.  This only works
> >      // when we are not doing fragments.
> > @@ -7190,6 +7362,8 @@ static const AVCodecTag codec_mp4_tags[] = {
> >      { AV_CODEC_ID_MOV_TEXT,        MKTAG('t', 'x', '3', 'g') },
> >      { AV_CODEC_ID_BIN_DATA,        MKTAG('g', 'p', 'm', 'd') },
> >      { AV_CODEC_ID_MPEGH_3D_AUDIO,  MKTAG('m', 'h', 'm', '1') },
> > +    { AV_CODEC_ID_TTML,            MOV_MP4_TTML_TAG          },
> > +    { AV_CODEC_ID_TTML,            MOV_ISMV_TTML_TAG         },
> >      { AV_CODEC_ID_NONE,               0 },
> >  };
> >  #if CONFIG_MP4_MUXER || CONFIG_PSP_MUXER
> > @@ -7198,6 +7372,7 @@ static const AVCodecTag *const mp4_codec_tags_list[] = { codec_mp4_tags, NULL };
> >
> >  static const AVCodecTag codec_ism_tags[] = {
> >      { AV_CODEC_ID_WMAPRO      , MKTAG('w', 'm', 'a', ' ') },
> > +    { AV_CODEC_ID_TTML        , MOV_ISMV_TTML_TAG         },
> >      { AV_CODEC_ID_NONE        ,    0 },
> >  };
> >
> > diff --git a/libavformat/movenc.h b/libavformat/movenc.h
> > index af1ea0bce6..95db1bf46d 100644
> > --- a/libavformat/movenc.h
> > +++ b/libavformat/movenc.h
> > @@ -26,6 +26,7 @@
> >
> >  #include "avformat.h"
> >  #include "movenccenc.h"
> > +#include "libavcodec/packet_internal.h"
> >
> >  #define MOV_FRAG_INFO_ALLOC_INCREMENT 64
> >  #define MOV_INDEX_CLUSTER_SIZE 1024
> > @@ -164,6 +165,10 @@ typedef struct MOVTrack {
> >      int pal_done;
> >
> >      int is_unaligned_qt_rgb;
> > +
> > +    unsigned int squash_fragment_samples_to_one; //< flag to note formats where all samples for a fragment are to be squashed
> > +
> > +    PacketList *squashed_packet_queue, *squashed_packet_queue_end;
> >  } MOVTrack;
> >
> >  typedef enum {
> > diff --git a/libavformat/movenc_ttml.c b/libavformat/movenc_ttml.c
> > new file mode 100644
> > index 0000000000..bf4a6fd89e
> > --- /dev/null
> > +++ b/libavformat/movenc_ttml.c
> > @@ -0,0 +1,178 @@
> > +/*
> > + * MP4, ISMV Muxer TTML helpers
> > + * Copyright (c) 2021 24i
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> > + */
> > +
> > +#include "avformat.h"
> > +#include "avio_internal.h"
> > +#include "isom.h"
> > +#include "movenc.h"
> > +#include "movenc_ttml.h"
> > +#include "libavcodec/packet_internal.h"
> > +
> > +static const unsigned char empty_ttml_document[] =
> > +    "<tt xml:lang=\"\" xmlns=\"http://www.w3.org/ns/ttml\" />";
> > +
> > +static int mov_init_ttml_writer(MOVTrack *track, AVFormatContext **out_ctx)
> > +{
> > +    AVStream *movenc_stream = track->st, *ttml_stream = NULL;
> > +    AVFormatContext *ttml_ctx = NULL;
> > +    int ret = AVERROR_BUG;
> > +    if ((ret = avformat_alloc_output_context2(&ttml_ctx, NULL,
> > +                                              "ttml", NULL)) < 0)
> > +        return ret;
> > +
> > +    if ((ret = avio_open_dyn_buf(&ttml_ctx->pb)) < 0)
> > +        goto fail;
> > +
> > +    if (!(ttml_stream = avformat_new_stream(ttml_ctx, NULL))) {
> > +        ret = AVERROR(ENOMEM);
> > +        goto fail;
> > +    }
> > +
> > +    if ((ret = avcodec_parameters_copy(ttml_stream->codecpar,
> > +                                       movenc_stream->codecpar)) < 0)
> > +        goto fail;
> > +
> > +    ttml_stream->time_base = movenc_stream->time_base;
> > +
> > +    *out_ctx = ttml_ctx;
> > +
> > +    return 0;
> > +
> > +fail:
> > +    ffio_free_dyn_buf(&ttml_ctx->pb);
> > +    avformat_free_context(ttml_ctx);
>
> If you used out_ctx directly (i.e. don't use ttml_ctx at all), you could
> remove the cleanup code in
>  this function and instead reuse the cleanup code in
>  ff_mov_generate_squashed_ttml_packet().
>  (But you will have to add a check for whether ttml_ctx exists in
> ff_mov_generate_squashed_ttml_packet().)
>
>

I'm kind of on the edge with this. I agree with trying to deduplicate
logic, but also functions being self-contained is nice.

I thus requested some comments comments from other people and Anton
noted that he preferred the self-containment of this function, albeit
only slightly.

Personally I'm mostly worn out with this stuff, so "meh". If you feel
heavily for this deduplication, I can add it to the next (and
hopefully final) revision of this patch set.

Jan
diff mbox series

Patch

diff --git a/libavformat/Makefile b/libavformat/Makefile
index 813ddd3c20..7e0f587b41 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -337,7 +337,7 @@  OBJS-$(CONFIG_MOV_DEMUXER)               += mov.o mov_chan.o mov_esds.o \
                                             qtpalette.o replaygain.o
 OBJS-$(CONFIG_MOV_MUXER)                 += movenc.o av1.o avc.o hevc.o vpcc.o \
                                             movenchint.o mov_chan.o rtp.o \
-                                            movenccenc.o rawutils.o
+                                            movenccenc.o movenc_ttml.o rawutils.o
 OBJS-$(CONFIG_MP2_MUXER)                 += rawenc.o
 OBJS-$(CONFIG_MP3_DEMUXER)               += mp3dec.o replaygain.o
 OBJS-$(CONFIG_MP3_MUXER)                 += mp3enc.o rawenc.o id3v2enc.o
diff --git a/libavformat/isom.h b/libavformat/isom.h
index ac1b3f3d56..34a58c79b7 100644
--- a/libavformat/isom.h
+++ b/libavformat/isom.h
@@ -387,4 +387,7 @@  static inline enum AVCodecID ff_mov_get_lpcm_codec_id(int bps, int flags)
     return ff_get_pcm_codec_id(bps, flags & 1, flags & 2, flags & 4 ? -1 : 0);
 }
 
+#define MOV_ISMV_TTML_TAG MKTAG('d', 'f', 'x', 'p')
+#define MOV_MP4_TTML_TAG  MKTAG('s', 't', 'p', 'p')
+
 #endif /* AVFORMAT_ISOM_H */
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index c85efe8748..f3e295ad80 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -56,6 +56,8 @@ 
 #include "hevc.h"
 #include "rtpenc.h"
 #include "mov_chan.h"
+#include "movenc_ttml.h"
+#include "ttmlenc.h"
 #include "vpcc.h"
 
 static const AVOption options[] = {
@@ -119,6 +121,7 @@  static const AVClass mov_isobmff_muxer_class = {
 };
 
 static int get_moov_size(AVFormatContext *s);
+static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt);
 
 static int utf8len(const uint8_t *b)
 {
@@ -1787,7 +1790,29 @@  static int mov_write_subtitle_tag(AVIOContext *pb, MOVTrack *track)
 
     if (track->par->codec_id == AV_CODEC_ID_DVD_SUBTITLE)
         mov_write_esds_tag(pb, track);
-    else if (track->par->extradata_size)
+    else if (track->par->codec_id == AV_CODEC_ID_TTML) {
+        switch (track->par->codec_tag) {
+        case MOV_ISMV_TTML_TAG:
+            // ISMV dfxp requires no extradata.
+            break;
+        case MOV_MP4_TTML_TAG:
+            // As specified in 14496-30, XMLSubtitleSampleEntry
+            // Namespace
+            avio_put_str(pb, "http://www.w3.org/ns/ttml");
+            // Empty schema_location
+            avio_w8(pb, 0);
+            // Empty auxiliary_mime_types
+            avio_w8(pb, 0);
+            break;
+        default:
+            av_log(NULL, AV_LOG_ERROR,
+                   "Unknown codec tag '%s' utilized for TTML stream with "
+                   "index %d (track id %d)!\n",
+                   av_fourcc2str(track->par->codec_tag), track->st->index,
+                   track->track_id);
+            return AVERROR(EINVAL);
+        }
+    } else if (track->par->extradata_size)
         avio_write(pb, track->par->extradata, track->par->extradata_size);
 
     if (track->mode == MODE_MP4 &&
@@ -2661,6 +2686,14 @@  static int mov_write_nmhd_tag(AVIOContext *pb)
     return 12;
 }
 
+static int mov_write_sthd_tag(AVIOContext *pb)
+{
+    avio_wb32(pb, 12);
+    ffio_wfourcc(pb, "sthd");
+    avio_wb32(pb, 0);
+    return 12;
+}
+
 static int mov_write_tcmi_tag(AVIOContext *pb, MOVTrack *track)
 {
     int64_t pos = avio_tell(pb);
@@ -2787,6 +2820,8 @@  static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
                     hdlr_type = "sbtl";
                 } else if (track->tag == MKTAG('m','p','4','s')) {
                     hdlr_type = "subp";
+                } else if (track->tag == MOV_MP4_TTML_TAG) {
+                    hdlr_type = "subt";
                 } else {
                     hdlr_type = "text";
                 }
@@ -2865,6 +2900,8 @@  static int mov_write_minf_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
     else if (track->par->codec_type == AVMEDIA_TYPE_SUBTITLE) {
         if (track->tag == MKTAG('t','e','x','t') || is_clcp_track(track)) {
             mov_write_gmhd_tag(pb, track);
+        } else if (track->tag == MOV_MP4_TTML_TAG) {
+            mov_write_sthd_tag(pb);
         } else {
             mov_write_nmhd_tag(pb);
         }
@@ -5253,6 +5290,68 @@  static int mov_flush_fragment_interleaving(AVFormatContext *s, MOVTrack *track)
     return 0;
 }
 
+static int mov_write_squashed_packet(AVFormatContext *s, MOVTrack *track)
+{
+    MOVMuxContext *mov = s->priv_data;
+    AVPacket *squashed_packet = mov->pkt;
+    int ret = AVERROR_BUG;
+
+    switch (track->st->codecpar->codec_id) {
+    case AV_CODEC_ID_TTML: {
+        int had_packets = !!track->squashed_packet_queue;
+
+        if ((ret = ff_mov_generate_squashed_ttml_packet(s, track, squashed_packet)) < 0) {
+            goto finish_squash;
+        }
+
+        // We have generated a padding packet (no actual input packets in
+        // queue) and its duration is zero. Skipping writing it.
+        if (!had_packets && squashed_packet->duration == 0) {
+            goto finish_squash;
+        }
+
+        track->end_reliable = 1;
+        break;
+    }
+    default:
+        ret = AVERROR(EINVAL);
+        goto finish_squash;
+    }
+
+    squashed_packet->stream_index = track->st->index;
+
+    ret = mov_write_single_packet(s, squashed_packet);
+
+finish_squash:
+    av_packet_unref(squashed_packet);
+
+    return ret;
+}
+
+static int mov_write_squashed_packets(AVFormatContext *s)
+{
+    MOVMuxContext *mov = s->priv_data;
+
+    for (int i = 0; i < s->nb_streams; i++) {
+        MOVTrack *track = &mov->tracks[i];
+        int ret = AVERROR_BUG;
+
+        if (track->squash_fragment_samples_to_one && !track->entry) {
+            if ((ret = mov_write_squashed_packet(s, track)) < 0) {
+                av_log(s, AV_LOG_ERROR,
+                       "Failed to write squashed packet for %s stream with "
+                       "index %d and track id %d. Error: %s\n",
+                       avcodec_get_name(track->st->codecpar->codec_id),
+                       track->st->index, track->track_id,
+                       av_err2str(ret));
+                return ret;
+            }
+        }
+    }
+
+    return 0;
+}
+
 static int mov_flush_fragment(AVFormatContext *s, int force)
 {
     MOVMuxContext *mov = s->priv_data;
@@ -5264,6 +5363,11 @@  static int mov_flush_fragment(AVFormatContext *s, int force)
     if (!(mov->flags & FF_MOV_FLAG_FRAGMENT))
         return 0;
 
+    // Check if we have any tracks that require squashing.
+    // In that case, we'll have to write the packet here.
+    if ((ret = mov_write_squashed_packets(s)) < 0)
+        return ret;
+
     // Try to fill in the duration of the last packet in each stream
     // from queued packets in the interleave queues. If the flushing
     // of fragments was triggered automatically by an AVPacket, we
@@ -5739,7 +5843,8 @@  int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
     trk->cluster[trk->entry].entries          = samples_in_chunk;
     trk->cluster[trk->entry].dts              = pkt->dts;
     trk->cluster[trk->entry].pts              = pkt->pts;
-    if (!trk->entry && trk->start_dts != AV_NOPTS_VALUE) {
+    if (!trk->squash_fragment_samples_to_one &&
+        !trk->entry && trk->start_dts != AV_NOPTS_VALUE) {
         if (!trk->frag_discont) {
             /* First packet of a new fragment. We already wrote the duration
              * of the last packet of the previous fragment based on track_duration,
@@ -6032,6 +6137,33 @@  static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
             }
         }
 
+        if (trk->squash_fragment_samples_to_one) {
+            /*
+             * If the track has to have its samples squashed into one sample,
+             * we just take it into the track's queue.
+             * This will then be utilized as the samples get written in either
+             * mov_flush_fragment or when the mux is finalized in
+             * mov_write_trailer.
+             */
+            int ret = AVERROR_BUG;
+
+            if (pkt->pts == AV_NOPTS_VALUE) {
+                av_log(s, AV_LOG_ERROR,
+                       "Packets without a valid presentation timestamp are "
+                       "not supported with packet squashing!\n");
+                return AVERROR(EINVAL);
+            }
+
+            if ((ret = avpriv_packet_list_put(&trk->squashed_packet_queue,
+                                              &trk->squashed_packet_queue_end,
+                                              pkt, av_packet_ref, 0)) < 0) {
+                return ret;
+            }
+
+            return 0;
+        }
+
+
         if (trk->mode == MODE_MOV && trk->par->codec_type == AVMEDIA_TYPE_VIDEO) {
             AVPacket *opkt = pkt;
             int reshuffle_ret, ret;
@@ -6310,6 +6442,11 @@  static void mov_free(AVFormatContext *s)
 
         ff_mov_cenc_free(&mov->tracks[i].cenc);
         ffio_free_dyn_buf(&mov->tracks[i].mdat_buf);
+
+        if (mov->tracks[i].squashed_packet_queue) {
+            avpriv_packet_list_free(&(mov->tracks[i].squashed_packet_queue),
+                                    &(mov->tracks[i].squashed_packet_queue_end));
+        }
     }
 
     av_freep(&mov->tracks);
@@ -6700,6 +6837,36 @@  static int mov_init(AVFormatContext *s)
             }
         } else if (st->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE) {
             track->timescale = st->time_base.den;
+
+            if (track->par->codec_id == AV_CODEC_ID_TTML) {
+                /* 14496-30 requires us to use a single sample per fragment
+                   for TTML, for which we define a per-track flag.
+
+                   We set the flag in case we are receiving TTML paragraphs
+                   from the input, in other words in case we are not doing
+                   stream copy. */
+                track->squash_fragment_samples_to_one =
+                    ff_is_ttml_stream_paragraph_based(track->par);
+
+                if (mov->flags & FF_MOV_FLAG_FRAGMENT &&
+                    track->squash_fragment_samples_to_one) {
+                    av_log(s, AV_LOG_ERROR,
+                           "Fragmentation is not currently supported for "
+                           "TTML in MP4/ISMV (track synchronization between "
+                           "subtitles and other media is not yet implemented)!\n");
+                    return AVERROR_PATCHWELCOME;
+                }
+
+                if (track->mode != MODE_ISM &&
+                    track->par->codec_tag == MOV_ISMV_TTML_TAG &&
+                    s->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL) {
+                    av_log(s, AV_LOG_ERROR,
+                           "ISMV style TTML support with the 'dfxp' tag in "
+                           "non-ISMV formats is not officially supported. Add "
+                           "'-strict unofficial' if you want to use it.\n");
+                    return AVERROR_EXPERIMENTAL;
+                }
+            }
         } else if (st->codecpar->codec_type == AVMEDIA_TYPE_DATA) {
             track->timescale = st->time_base.den;
         } else {
@@ -7046,6 +7213,11 @@  static int mov_write_trailer(AVFormatContext *s)
         }
     }
 
+    // Check if we have any tracks that require squashing.
+    // In that case, we'll have to write the packet here.
+    if ((res = mov_write_squashed_packets(s)) < 0)
+        return res;
+
     // If there were no chapters when the header was written, but there
     // are chapters now, write them in the trailer.  This only works
     // when we are not doing fragments.
@@ -7190,6 +7362,8 @@  static const AVCodecTag codec_mp4_tags[] = {
     { AV_CODEC_ID_MOV_TEXT,        MKTAG('t', 'x', '3', 'g') },
     { AV_CODEC_ID_BIN_DATA,        MKTAG('g', 'p', 'm', 'd') },
     { AV_CODEC_ID_MPEGH_3D_AUDIO,  MKTAG('m', 'h', 'm', '1') },
+    { AV_CODEC_ID_TTML,            MOV_MP4_TTML_TAG          },
+    { AV_CODEC_ID_TTML,            MOV_ISMV_TTML_TAG         },
     { AV_CODEC_ID_NONE,               0 },
 };
 #if CONFIG_MP4_MUXER || CONFIG_PSP_MUXER
@@ -7198,6 +7372,7 @@  static const AVCodecTag *const mp4_codec_tags_list[] = { codec_mp4_tags, NULL };
 
 static const AVCodecTag codec_ism_tags[] = {
     { AV_CODEC_ID_WMAPRO      , MKTAG('w', 'm', 'a', ' ') },
+    { AV_CODEC_ID_TTML        , MOV_ISMV_TTML_TAG         },
     { AV_CODEC_ID_NONE        ,    0 },
 };
 
diff --git a/libavformat/movenc.h b/libavformat/movenc.h
index af1ea0bce6..95db1bf46d 100644
--- a/libavformat/movenc.h
+++ b/libavformat/movenc.h
@@ -26,6 +26,7 @@ 
 
 #include "avformat.h"
 #include "movenccenc.h"
+#include "libavcodec/packet_internal.h"
 
 #define MOV_FRAG_INFO_ALLOC_INCREMENT 64
 #define MOV_INDEX_CLUSTER_SIZE 1024
@@ -164,6 +165,10 @@  typedef struct MOVTrack {
     int pal_done;
 
     int is_unaligned_qt_rgb;
+
+    unsigned int squash_fragment_samples_to_one; //< flag to note formats where all samples for a fragment are to be squashed
+
+    PacketList *squashed_packet_queue, *squashed_packet_queue_end;
 } MOVTrack;
 
 typedef enum {
diff --git a/libavformat/movenc_ttml.c b/libavformat/movenc_ttml.c
new file mode 100644
index 0000000000..bf4a6fd89e
--- /dev/null
+++ b/libavformat/movenc_ttml.c
@@ -0,0 +1,178 @@ 
+/*
+ * MP4, ISMV Muxer TTML helpers
+ * Copyright (c) 2021 24i
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avformat.h"
+#include "avio_internal.h"
+#include "isom.h"
+#include "movenc.h"
+#include "movenc_ttml.h"
+#include "libavcodec/packet_internal.h"
+
+static const unsigned char empty_ttml_document[] =
+    "<tt xml:lang=\"\" xmlns=\"http://www.w3.org/ns/ttml\" />";
+
+static int mov_init_ttml_writer(MOVTrack *track, AVFormatContext **out_ctx)
+{
+    AVStream *movenc_stream = track->st, *ttml_stream = NULL;
+    AVFormatContext *ttml_ctx = NULL;
+    int ret = AVERROR_BUG;
+    if ((ret = avformat_alloc_output_context2(&ttml_ctx, NULL,
+                                              "ttml", NULL)) < 0)
+        return ret;
+
+    if ((ret = avio_open_dyn_buf(&ttml_ctx->pb)) < 0)
+        goto fail;
+
+    if (!(ttml_stream = avformat_new_stream(ttml_ctx, NULL))) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    if ((ret = avcodec_parameters_copy(ttml_stream->codecpar,
+                                       movenc_stream->codecpar)) < 0)
+        goto fail;
+
+    ttml_stream->time_base = movenc_stream->time_base;
+
+    *out_ctx = ttml_ctx;
+
+    return 0;
+
+fail:
+    ffio_free_dyn_buf(&ttml_ctx->pb);
+    avformat_free_context(ttml_ctx);
+
+    return ret;
+}
+
+static int mov_write_ttml_document_from_queue(AVFormatContext *s,
+                                              AVFormatContext *ttml_ctx,
+                                              MOVTrack *track,
+                                              AVPacket *pkt,
+                                              int64_t *out_start_ts,
+                                              int64_t *out_duration)
+{
+    int ret = AVERROR_BUG;
+    int64_t start_ts = track->start_dts == AV_NOPTS_VALUE ?
+                       0 : (track->start_dts + track->track_duration);
+    int64_t end_ts   = start_ts;
+
+    if ((ret = avformat_write_header(ttml_ctx, NULL)) < 0) {
+        return ret;
+    }
+
+    while (!avpriv_packet_list_get(&track->squashed_packet_queue,
+                                   &track->squashed_packet_queue_end,
+                                   pkt)) {
+        end_ts = FFMAX(end_ts, pkt->pts + pkt->duration);
+
+        // in case of the 'dfxp' muxing mode, each written document is offset
+        // to its containing sample's beginning.
+        if (track->par->codec_tag == MOV_ISMV_TTML_TAG) {
+            pkt->dts = pkt->pts = (pkt->pts - start_ts);
+        }
+
+        pkt->stream_index = 0;
+
+        av_packet_rescale_ts(pkt, track->st->time_base,
+                             ttml_ctx->streams[pkt->stream_index]->time_base);
+
+        if ((ret = av_write_frame(ttml_ctx, pkt)) < 0) {
+            goto cleanup;
+        }
+
+        av_packet_unref(pkt);
+    }
+
+    if ((ret = av_write_trailer(ttml_ctx)) < 0)
+        goto cleanup;
+
+    *out_start_ts = start_ts;
+    *out_duration = end_ts - start_ts;
+
+    ret = 0;
+
+cleanup:
+    return ret;
+}
+
+int ff_mov_generate_squashed_ttml_packet(AVFormatContext *s,
+                                         MOVTrack *track, AVPacket *pkt)
+{
+    AVFormatContext *ttml_ctx = NULL;
+    // values for the generated AVPacket
+    int64_t start_ts = 0;
+    int64_t duration = 0;
+
+    int ret = AVERROR_BUG;
+
+    if ((ret = mov_init_ttml_writer(track, &ttml_ctx)) < 0) {
+        av_log(s, AV_LOG_ERROR, "Failed to initialize the TTML writer: %s\n",
+               av_err2str(ret));
+        return ret;
+    }
+
+    if (!track->squashed_packet_queue) {
+        // empty queue, write minimal empty document with zero duration
+        avio_write(ttml_ctx->pb, empty_ttml_document,
+                   sizeof(empty_ttml_document) - 1);
+        start_ts = 0;
+        duration = 0;
+        goto generate_packet;
+    }
+
+    if ((ret = mov_write_ttml_document_from_queue(s, ttml_ctx, track, pkt,
+                                                  &start_ts,
+                                                  &duration)) < 0) {
+        av_log(s, AV_LOG_ERROR,
+               "Failed to generate a squashed TTML packet from the packet "
+               "queue: %s\n",
+               av_err2str(ret));
+        goto cleanup;
+    }
+
+generate_packet:
+    {
+        // Generate an AVPacket from the data written into the dynamic buffer.
+        uint8_t *buf = NULL;
+        int buf_len = avio_close_dyn_buf(ttml_ctx->pb, &buf);
+        ttml_ctx->pb = NULL;
+
+        if ((ret = av_packet_from_data(pkt, buf, buf_len)) < 0) {
+            av_log(s, AV_LOG_ERROR,
+                   "Failed to create a TTML AVPacket from AVIO data: %s\n",
+                   av_err2str(ret));
+            av_freep(&buf);
+            goto cleanup;
+        }
+
+        pkt->pts = pkt->dts = start_ts;
+        pkt->duration = duration;
+        pkt->flags |= AV_PKT_FLAG_KEY;
+    }
+
+    ret = 0;
+
+cleanup:
+    ffio_free_dyn_buf(&ttml_ctx->pb);
+    avformat_free_context(ttml_ctx);
+    return ret;
+}
diff --git a/libavformat/movenc_ttml.h b/libavformat/movenc_ttml.h
new file mode 100644
index 0000000000..c71ecd0997
--- /dev/null
+++ b/libavformat/movenc_ttml.h
@@ -0,0 +1,31 @@ 
+/*
+ * MP4, ISMV Muxer TTML helpers
+ * Copyright (c) 2021 24i
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFORMAT_MOVENC_TTML_H
+#define AVFORMAT_MOVENC_TTML_H
+
+#include "avformat.h"
+#include "movenc.h"
+
+int ff_mov_generate_squashed_ttml_packet(AVFormatContext *s,
+                                         MOVTrack *track, AVPacket *pkt);
+
+#endif /* AVFORMAT_MOVENC_TTML_H */
diff --git a/tests/fate/subtitles.mak b/tests/fate/subtitles.mak
index ee65afe35b..880109f201 100644
--- a/tests/fate/subtitles.mak
+++ b/tests/fate/subtitles.mak
@@ -109,6 +109,10 @@  fate-sub-dvb: CMD = framecrc -i $(TARGET_SAMPLES)/sub/dvbsubtest_filter.ts -map
 FATE_SUBTITLES-$(call ALLYES, FILE_PROTOCOL PIPE_PROTOCOL SRT_DEMUXER SUBRIP_DECODER TTML_ENCODER TTML_MUXER) += fate-sub-ttmlenc
 fate-sub-ttmlenc: CMD = fmtstdout ttml -i $(TARGET_SAMPLES)/sub/SubRip_capability_tester.srt
 
+FATE_SUBTITLES-$(call ALLYES, FILE_PROTOCOL SRT_DEMUXER MOV_DEMUXER SUBRIP_DECODER TTML_ENCODER TTML_MUXER MOV_MUXER) += fate-sub-ttml-mp4-stpp fate-sub-ttml-mp4-dfxp
+fate-sub-ttml-mp4-stpp: CMD = transcode srt $(TARGET_SAMPLES)/sub/SubRip_capability_tester.srt mp4 "-map 0:s -c:s ttml -time_base:s 1:1000" "-map 0 -c copy" "" "-of json -show_entries packet:stream=index,codec_type,codec_tag_string,codec_tag,codec_name,time_base,start_time,duration_ts,duration,nb_frames,nb_read_packets:stream_tags"
+fate-sub-ttml-mp4-dfxp: CMD = transcode srt $(TARGET_SAMPLES)/sub/SubRip_capability_tester.srt mp4 "-map 0:s -c:s ttml -time_base:s 1:1000 -tag:s dfxp -strict unofficial" "-map 0 -c copy" "" "-of json -show_entries packet:stream=index,codec_type,codec_tag_string,codec_tag,codec_name,time_base,start_time,duration_ts,duration,nb_frames,nb_read_packets:stream_tags"
+
 FATE_SUBTITLES-$(call ENCMUX, ASS, ASS) += $(FATE_SUBTITLES_ASS-yes)
 FATE_SUBTITLES += $(FATE_SUBTITLES-yes)
 
diff --git a/tests/ref/fate/sub-ttml-mp4-dfxp b/tests/ref/fate/sub-ttml-mp4-dfxp
new file mode 100644
index 0000000000..0172e5b7e6
--- /dev/null
+++ b/tests/ref/fate/sub-ttml-mp4-dfxp
@@ -0,0 +1,44 @@ 
+2e7e01c821c111466e7a2844826b7f6d *tests/data/fate/sub-ttml-mp4-dfxp.mp4
+8519 tests/data/fate/sub-ttml-mp4-dfxp.mp4
+#tb 0: 1/1000
+#media_type 0: data
+#codec_id 0: none
+0,          0,          0,    68500,     7866, 0x456c36b7
+{
+    "packets": [
+        {
+            "codec_type": "data",
+            "stream_index": 0,
+            "pts": 0,
+            "pts_time": "0.000000",
+            "dts": 0,
+            "dts_time": "0.000000",
+            "duration": 68500,
+            "duration_time": "68.500000",
+            "size": "7866",
+            "pos": "44",
+            "flags": "K_"
+        }
+    ],
+    "programs": [
+
+    ],
+    "streams": [
+        {
+            "index": 0,
+            "codec_type": "data",
+            "codec_tag_string": "dfxp",
+            "codec_tag": "0x70786664",
+            "time_base": "1/1000",
+            "start_time": "0.000000",
+            "duration_ts": 68500,
+            "duration": "68.500000",
+            "nb_frames": "1",
+            "nb_read_packets": "1",
+            "tags": {
+                "language": "und",
+                "handler_name": "SubtitleHandler"
+            }
+        }
+    ]
+}
diff --git a/tests/ref/fate/sub-ttml-mp4-stpp b/tests/ref/fate/sub-ttml-mp4-stpp
new file mode 100644
index 0000000000..a5165b568d
--- /dev/null
+++ b/tests/ref/fate/sub-ttml-mp4-stpp
@@ -0,0 +1,44 @@ 
+cbd2c7ff864a663b0d893deac5a0caec *tests/data/fate/sub-ttml-mp4-stpp.mp4
+8547 tests/data/fate/sub-ttml-mp4-stpp.mp4
+#tb 0: 1/1000
+#media_type 0: data
+#codec_id 0: none
+0,          0,          0,    68500,     7866, 0x456c36b7
+{
+    "packets": [
+        {
+            "codec_type": "data",
+            "stream_index": 0,
+            "pts": 0,
+            "pts_time": "0.000000",
+            "dts": 0,
+            "dts_time": "0.000000",
+            "duration": 68500,
+            "duration_time": "68.500000",
+            "size": "7866",
+            "pos": "44",
+            "flags": "K_"
+        }
+    ],
+    "programs": [
+
+    ],
+    "streams": [
+        {
+            "index": 0,
+            "codec_type": "data",
+            "codec_tag_string": "stpp",
+            "codec_tag": "0x70707473",
+            "time_base": "1/1000",
+            "start_time": "0.000000",
+            "duration_ts": 68500,
+            "duration": "68.500000",
+            "nb_frames": "1",
+            "nb_read_packets": "1",
+            "tags": {
+                "language": "und",
+                "handler_name": "SubtitleHandler"
+            }
+        }
+    ]
+}