diff mbox series

[FFmpeg-devel,2/2] avformat/movenc: add support for fragmented TTML muxing

Message ID 20221223125157.83340-3-jeebjp@gmail.com
State New
Headers show
Series Initial support for fragmented TTML muxing | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Jan Ekström Dec. 23, 2022, 12:51 p.m. UTC
From: Jan Ekström <jan.ekstrom@24i.com>

Attempts to base the fragmentation timing on other streams
as most receivers expect media fragments to be more or less
aligned.

Currently does not support fragmentation on subtitle track
only, as the subtitle packet queue timings would have to be
checked in addition to the current fragmentation timing logic.

Signed-off-by: Jan Ekström <jan.ekstrom@24i.com>
---
 libavformat/movenc.c      |   9 ---
 libavformat/movenc_ttml.c | 163 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 159 insertions(+), 13 deletions(-)

Comments

Andreas Rheinhardt Dec. 23, 2022, 2:41 p.m. UTC | #1
Jan Ekström:
> From: Jan Ekström <jan.ekstrom@24i.com>
> 
> Attempts to base the fragmentation timing on other streams
> as most receivers expect media fragments to be more or less
> aligned.
> 
> Currently does not support fragmentation on subtitle track
> only, as the subtitle packet queue timings would have to be
> checked in addition to the current fragmentation timing logic.
> 
> Signed-off-by: Jan Ekström <jan.ekstrom@24i.com>
> ---
>  libavformat/movenc.c      |   9 ---
>  libavformat/movenc_ttml.c | 163 +++++++++++++++++++++++++++++++++++++-
>  2 files changed, 159 insertions(+), 13 deletions(-)
> 
> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> index 7d49892283..e9a7984f8a 100644
> --- a/libavformat/movenc.c
> +++ b/libavformat/movenc.c
> @@ -7188,15 +7188,6 @@ static int mov_init(AVFormatContext *s)
>                  track->squash_fragment_samples_to_one =
>                      ff_is_ttml_stream_paragraph_based(track->par);
>  
> -                if (mov->flags & FF_MOV_FLAG_FRAGMENT &&
> -                    track->squash_fragment_samples_to_one) {
> -                    av_log(s, AV_LOG_ERROR,
> -                           "Fragmentation is not currently supported for "
> -                           "TTML in MP4/ISMV (track synchronization between "
> -                           "subtitles and other media is not yet implemented)!\n");
> -                    return AVERROR_PATCHWELCOME;
> -                }
> -
>                  if (track->mode != MODE_ISM &&
>                      track->par->codec_tag == MOV_ISMV_TTML_TAG &&
>                      s->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL) {
> diff --git a/libavformat/movenc_ttml.c b/libavformat/movenc_ttml.c
> index 6deae49657..27ec7d9487 100644
> --- a/libavformat/movenc_ttml.c
> +++ b/libavformat/movenc_ttml.c
> @@ -54,6 +54,50 @@ static int mov_init_ttml_writer(MOVTrack *track, AVFormatContext **out_ctx)
>      return 0;
>  }
>  
> +static void mov_calculate_start_and_end_of_other_tracks(
> +    AVFormatContext *s, MOVTrack *track, int64_t *start_pts, int64_t *end_pts)
> +{
> +    MOVMuxContext *mov = s->priv_data;
> +
> +    // Initialize at the end of the previous document/fragment, which is NOPTS
> +    // until the first fragment is created.
> +    int64_t max_track_end_dts = *start_pts = track->end_pts;
> +
> +    for (unsigned int i = 0; i < s->nb_streams; i++) {
> +        MOVTrack *other_track = &mov->tracks[i];
> +
> +        // Skip our own track, any other track that needs squashing,
> +        // or any track which still has its start_dts at NOPTS or
> +        // any track that did not yet get any packets.
> +        if (track == other_track ||
> +            other_track->squash_fragment_samples_to_one ||
> +            other_track->start_dts == AV_NOPTS_VALUE ||
> +            !other_track->entry) {
> +            continue;
> +        }
> +
> +        {
> +            int64_t picked_start = av_rescale_q_rnd(other_track->cluster[0].dts + other_track->cluster[0].cts,
> +                                                    other_track->st->time_base,
> +                                                    track->st->time_base,
> +                                                    AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX);
> +            int64_t picked_end   = av_rescale_q_rnd(other_track->end_pts,
> +                                                    other_track->st->time_base,
> +                                                    track->st->time_base,
> +                                                    AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX);
> +
> +            if (*start_pts == AV_NOPTS_VALUE)
> +                *start_pts = picked_start;
> +            else if (picked_start >= track->end_pts)
> +                *start_pts = FFMIN(*start_pts, picked_start);
> +
> +            max_track_end_dts = FFMAX(max_track_end_dts, picked_end);
> +        }
> +    }
> +
> +    *end_pts = max_track_end_dts;
> +}
> +
>  static int mov_write_ttml_document_from_queue(AVFormatContext *s,
>                                                AVFormatContext *ttml_ctx,
>                                                MOVTrack *track,
> @@ -65,13 +109,87 @@ static int mov_write_ttml_document_from_queue(AVFormatContext *s,
>      int64_t start_ts = track->start_dts == AV_NOPTS_VALUE ?
>                         0 : (track->start_dts + track->track_duration);
>      int64_t end_ts   = start_ts;
> +    unsigned int time_limited = 0;
> +    PacketList back_to_queue_list = { 0 };
> +
> +    if (*out_start_ts != AV_NOPTS_VALUE) {
> +        // we have non-nopts values here, thus we have been given a time range
> +        time_limited = 1;
> +        start_ts = *out_start_ts;
> +        end_ts   = *out_start_ts + *out_duration;
> +    }
>  
>      if ((ret = avformat_write_header(ttml_ctx, NULL)) < 0) {
>          return ret;
>      }
>  
>      while (!avpriv_packet_list_get(&track->squashed_packet_queue, pkt)) {
> -        end_ts = FFMAX(end_ts, pkt->pts + pkt->duration);
> +        unsigned int stop_at_current_packet = 0;
> +        int64_t pts_before      = pkt->pts;
> +        int64_t duration_before = pkt->duration;
> +
> +        if (time_limited) {
> +            // special cases first:
> +            if (pkt->pts + pkt->duration < start_ts) {
> +                // too late for our fragment, unfortunately
> +                // unref and proceed to next packet in queue.
> +                av_log(s, AV_LOG_WARNING,
> +                       "Very late TTML packet in queue, dropping packet with "
> +                       "pts: %"PRId64", duration: %"PRId64"\n",
> +                       pkt->pts, pkt->duration);
> +                av_packet_unref(pkt);
> +                goto next_iteration;
> +            } else if (pkt->pts >= end_ts) {
> +                // starts after this fragment, put back to original queue
> +                ret = avpriv_packet_list_put(&track->squashed_packet_queue,
> +                                             pkt, av_packet_ref,
> +                                             FF_PACKETLIST_FLAG_PREPEND);
> +                if (ret < 0)
> +                    goto cleanup;
> +
> +                stop_at_current_packet = 1;
> +                goto next_iteration;

You can just break here and remove stop_at_current_packet.

> +            }
> +
> +            // limit packet pts to start_ts
> +            if (pkt->pts < start_ts) {
> +                pkt->duration -= start_ts - pkt->pts;
> +                pkt->pts = start_ts;
> +            }
> +
> +            if (pkt->pts + pkt->duration > end_ts) {
> +                // goes over our current fragment, create duplicate and
> +                // put it back to list after iteration has finished in
> +                // order to handle multiple subtitles at the same time.
> +                int64_t offset = end_ts - pkt->pts;
> +
> +                ret = avpriv_packet_list_put(&back_to_queue_list,
> +                                             pkt, av_packet_ref,
> +                                             FF_PACKETLIST_FLAG_PREPEND);
> +                if (ret < 0)
> +                    goto cleanup;
> +
> +                back_to_queue_list.head->pkt.pts =
> +                back_to_queue_list.head->pkt.dts =
> +                back_to_queue_list.head->pkt.pts + offset;
> +                back_to_queue_list.head->pkt.duration -= offset;
> +
> +                // and for our normal packet we just set duration to offset
> +                pkt->duration = offset;
> +            }
> +        } else {
> +            end_ts = FFMAX(end_ts, pkt->pts + pkt->duration);
> +        }
> +
> +        av_log(s, AV_LOG_TRACE,
> +               "TTML packet writeout: pts: %"PRId64" (%"PRId64"), "
> +               "duration: %"PRId64"\n",
> +               pkt->pts, pkt->pts - start_ts, pkt->duration);
> +        if (pkt->pts != pts_before || pkt->duration != duration_before) {
> +            av_log(s, AV_LOG_TRACE,
> +                   "Adjustments: pts: %"PRId64", duration: %"PRId64"\n",
> +                   pkt->pts - pts_before, pkt->duration - duration_before);
> +        }
>  
>          // in case of the 'dfxp' muxing mode, each written document is offset
>          // to its containing sample's beginning.
> @@ -89,6 +207,10 @@ static int mov_write_ttml_document_from_queue(AVFormatContext *s,
>          }
>  
>          av_packet_unref(pkt);
> +
> +next_iteration:
> +        if (stop_at_current_packet)
> +            break;
>      }
>  
>      if ((ret = av_write_trailer(ttml_ctx)) < 0)
> @@ -100,15 +222,30 @@ static int mov_write_ttml_document_from_queue(AVFormatContext *s,
>      ret = 0;
>  
>  cleanup:
> +    while (!avpriv_packet_list_get(&back_to_queue_list, pkt)) {

You are removing some packets from the list and then readding some of
these in a way that keeps the order of the list. Makes me wonder whether
it would not be better to iterate through the list and process the
packets without moving them from the list (and remove the packets from
the list that are not supposed to be kept there even if said packets are
in the middle of the list). The only mildly unelegant thing is that one
would need to duplicate and restore the time related fields in the loop.

> +        ret = avpriv_packet_list_put(&track->squashed_packet_queue,
> +                                     pkt, av_packet_ref,
> +                                     FF_PACKETLIST_FLAG_PREPEND);
> +
> +        // unrelated to whether we succeed or not, we unref the packet
> +        // received from the temporary list.
> +        av_packet_unref(pkt);

In this case, you should not use av_packet_ref() above at all, but move
the packet to the list; then you only need to unref the packet in the
error case.

> +
> +        if (ret < 0) {
> +            avpriv_packet_list_free(&back_to_queue_list);
> +            break;

If you returned ret directly here, the effective scope of ret would be
the loop body.

> +        }
> +    }
>      return ret;
>  }
>  
>  int ff_mov_generate_squashed_ttml_packet(AVFormatContext *s,
>                                           MOVTrack *track, AVPacket *pkt)
>  {
> +    MOVMuxContext *mov = s->priv_data;
>      AVFormatContext *ttml_ctx = NULL;
>      // values for the generated AVPacket
> -    int64_t start_ts = 0;
> +    int64_t start_ts = AV_NOPTS_VALUE;
>      int64_t duration = 0;
>  
>      int ret = AVERROR_BUG;
> @@ -119,12 +256,30 @@ int ff_mov_generate_squashed_ttml_packet(AVFormatContext *s,
>          goto cleanup;
>      }
>  
> +    if (mov->flags & FF_MOV_FLAG_FRAGMENT) {
> +        int64_t calculated_start = AV_NOPTS_VALUE;
> +        int64_t calculated_end = AV_NOPTS_VALUE;
> +
> +        mov_calculate_start_and_end_of_other_tracks(s, track, &calculated_start, &calculated_end);
> +
> +        if (calculated_start != AV_NOPTS_VALUE) {
> +            start_ts = calculated_start;
> +            duration = calculated_end - calculated_start;
> +            av_log(s, AV_LOG_VERBOSE,
> +                   "Calculated subtitle fragment start: %"PRId64", "
> +                   "duration: %"PRId64"\n",
> +                   start_ts, duration);
> +        }
> +    }
> +
>      if (!track->squashed_packet_queue.head) {
>          // empty queue, write minimal empty document with zero duration
>          avio_write(ttml_ctx->pb, empty_ttml_document,
>                     sizeof(empty_ttml_document) - 1);
> -        start_ts = 0;
> -        duration = 0;
> +        if (start_ts == AV_NOPTS_VALUE) {
> +            start_ts = 0;
> +            duration = 0;
> +        }
>          goto generate_packet;
>      }
>
diff mbox series

Patch

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 7d49892283..e9a7984f8a 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -7188,15 +7188,6 @@  static int mov_init(AVFormatContext *s)
                 track->squash_fragment_samples_to_one =
                     ff_is_ttml_stream_paragraph_based(track->par);
 
-                if (mov->flags & FF_MOV_FLAG_FRAGMENT &&
-                    track->squash_fragment_samples_to_one) {
-                    av_log(s, AV_LOG_ERROR,
-                           "Fragmentation is not currently supported for "
-                           "TTML in MP4/ISMV (track synchronization between "
-                           "subtitles and other media is not yet implemented)!\n");
-                    return AVERROR_PATCHWELCOME;
-                }
-
                 if (track->mode != MODE_ISM &&
                     track->par->codec_tag == MOV_ISMV_TTML_TAG &&
                     s->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL) {
diff --git a/libavformat/movenc_ttml.c b/libavformat/movenc_ttml.c
index 6deae49657..27ec7d9487 100644
--- a/libavformat/movenc_ttml.c
+++ b/libavformat/movenc_ttml.c
@@ -54,6 +54,50 @@  static int mov_init_ttml_writer(MOVTrack *track, AVFormatContext **out_ctx)
     return 0;
 }
 
+static void mov_calculate_start_and_end_of_other_tracks(
+    AVFormatContext *s, MOVTrack *track, int64_t *start_pts, int64_t *end_pts)
+{
+    MOVMuxContext *mov = s->priv_data;
+
+    // Initialize at the end of the previous document/fragment, which is NOPTS
+    // until the first fragment is created.
+    int64_t max_track_end_dts = *start_pts = track->end_pts;
+
+    for (unsigned int i = 0; i < s->nb_streams; i++) {
+        MOVTrack *other_track = &mov->tracks[i];
+
+        // Skip our own track, any other track that needs squashing,
+        // or any track which still has its start_dts at NOPTS or
+        // any track that did not yet get any packets.
+        if (track == other_track ||
+            other_track->squash_fragment_samples_to_one ||
+            other_track->start_dts == AV_NOPTS_VALUE ||
+            !other_track->entry) {
+            continue;
+        }
+
+        {
+            int64_t picked_start = av_rescale_q_rnd(other_track->cluster[0].dts + other_track->cluster[0].cts,
+                                                    other_track->st->time_base,
+                                                    track->st->time_base,
+                                                    AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX);
+            int64_t picked_end   = av_rescale_q_rnd(other_track->end_pts,
+                                                    other_track->st->time_base,
+                                                    track->st->time_base,
+                                                    AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX);
+
+            if (*start_pts == AV_NOPTS_VALUE)
+                *start_pts = picked_start;
+            else if (picked_start >= track->end_pts)
+                *start_pts = FFMIN(*start_pts, picked_start);
+
+            max_track_end_dts = FFMAX(max_track_end_dts, picked_end);
+        }
+    }
+
+    *end_pts = max_track_end_dts;
+}
+
 static int mov_write_ttml_document_from_queue(AVFormatContext *s,
                                               AVFormatContext *ttml_ctx,
                                               MOVTrack *track,
@@ -65,13 +109,87 @@  static int mov_write_ttml_document_from_queue(AVFormatContext *s,
     int64_t start_ts = track->start_dts == AV_NOPTS_VALUE ?
                        0 : (track->start_dts + track->track_duration);
     int64_t end_ts   = start_ts;
+    unsigned int time_limited = 0;
+    PacketList back_to_queue_list = { 0 };
+
+    if (*out_start_ts != AV_NOPTS_VALUE) {
+        // we have non-nopts values here, thus we have been given a time range
+        time_limited = 1;
+        start_ts = *out_start_ts;
+        end_ts   = *out_start_ts + *out_duration;
+    }
 
     if ((ret = avformat_write_header(ttml_ctx, NULL)) < 0) {
         return ret;
     }
 
     while (!avpriv_packet_list_get(&track->squashed_packet_queue, pkt)) {
-        end_ts = FFMAX(end_ts, pkt->pts + pkt->duration);
+        unsigned int stop_at_current_packet = 0;
+        int64_t pts_before      = pkt->pts;
+        int64_t duration_before = pkt->duration;
+
+        if (time_limited) {
+            // special cases first:
+            if (pkt->pts + pkt->duration < start_ts) {
+                // too late for our fragment, unfortunately
+                // unref and proceed to next packet in queue.
+                av_log(s, AV_LOG_WARNING,
+                       "Very late TTML packet in queue, dropping packet with "
+                       "pts: %"PRId64", duration: %"PRId64"\n",
+                       pkt->pts, pkt->duration);
+                av_packet_unref(pkt);
+                goto next_iteration;
+            } else if (pkt->pts >= end_ts) {
+                // starts after this fragment, put back to original queue
+                ret = avpriv_packet_list_put(&track->squashed_packet_queue,
+                                             pkt, av_packet_ref,
+                                             FF_PACKETLIST_FLAG_PREPEND);
+                if (ret < 0)
+                    goto cleanup;
+
+                stop_at_current_packet = 1;
+                goto next_iteration;
+            }
+
+            // limit packet pts to start_ts
+            if (pkt->pts < start_ts) {
+                pkt->duration -= start_ts - pkt->pts;
+                pkt->pts = start_ts;
+            }
+
+            if (pkt->pts + pkt->duration > end_ts) {
+                // goes over our current fragment, create duplicate and
+                // put it back to list after iteration has finished in
+                // order to handle multiple subtitles at the same time.
+                int64_t offset = end_ts - pkt->pts;
+
+                ret = avpriv_packet_list_put(&back_to_queue_list,
+                                             pkt, av_packet_ref,
+                                             FF_PACKETLIST_FLAG_PREPEND);
+                if (ret < 0)
+                    goto cleanup;
+
+                back_to_queue_list.head->pkt.pts =
+                back_to_queue_list.head->pkt.dts =
+                back_to_queue_list.head->pkt.pts + offset;
+                back_to_queue_list.head->pkt.duration -= offset;
+
+                // and for our normal packet we just set duration to offset
+                pkt->duration = offset;
+            }
+        } else {
+            end_ts = FFMAX(end_ts, pkt->pts + pkt->duration);
+        }
+
+        av_log(s, AV_LOG_TRACE,
+               "TTML packet writeout: pts: %"PRId64" (%"PRId64"), "
+               "duration: %"PRId64"\n",
+               pkt->pts, pkt->pts - start_ts, pkt->duration);
+        if (pkt->pts != pts_before || pkt->duration != duration_before) {
+            av_log(s, AV_LOG_TRACE,
+                   "Adjustments: pts: %"PRId64", duration: %"PRId64"\n",
+                   pkt->pts - pts_before, pkt->duration - duration_before);
+        }
 
         // in case of the 'dfxp' muxing mode, each written document is offset
         // to its containing sample's beginning.
@@ -89,6 +207,10 @@  static int mov_write_ttml_document_from_queue(AVFormatContext *s,
         }
 
         av_packet_unref(pkt);
+
+next_iteration:
+        if (stop_at_current_packet)
+            break;
     }
 
     if ((ret = av_write_trailer(ttml_ctx)) < 0)
@@ -100,15 +222,30 @@  static int mov_write_ttml_document_from_queue(AVFormatContext *s,
     ret = 0;
 
 cleanup:
+    while (!avpriv_packet_list_get(&back_to_queue_list, pkt)) {
+        ret = avpriv_packet_list_put(&track->squashed_packet_queue,
+                                     pkt, av_packet_ref,
+                                     FF_PACKETLIST_FLAG_PREPEND);
+
+        // unrelated to whether we succeed or not, we unref the packet
+        // received from the temporary list.
+        av_packet_unref(pkt);
+
+        if (ret < 0) {
+            avpriv_packet_list_free(&back_to_queue_list);
+            break;
+        }
+    }
     return ret;
 }
 
 int ff_mov_generate_squashed_ttml_packet(AVFormatContext *s,
                                          MOVTrack *track, AVPacket *pkt)
 {
+    MOVMuxContext *mov = s->priv_data;
     AVFormatContext *ttml_ctx = NULL;
     // values for the generated AVPacket
-    int64_t start_ts = 0;
+    int64_t start_ts = AV_NOPTS_VALUE;
     int64_t duration = 0;
 
     int ret = AVERROR_BUG;
@@ -119,12 +256,30 @@  int ff_mov_generate_squashed_ttml_packet(AVFormatContext *s,
         goto cleanup;
     }
 
+    if (mov->flags & FF_MOV_FLAG_FRAGMENT) {
+        int64_t calculated_start = AV_NOPTS_VALUE;
+        int64_t calculated_end = AV_NOPTS_VALUE;
+
+        mov_calculate_start_and_end_of_other_tracks(s, track, &calculated_start, &calculated_end);
+
+        if (calculated_start != AV_NOPTS_VALUE) {
+            start_ts = calculated_start;
+            duration = calculated_end - calculated_start;
+            av_log(s, AV_LOG_VERBOSE,
+                   "Calculated subtitle fragment start: %"PRId64", "
+                   "duration: %"PRId64"\n",
+                   start_ts, duration);
+        }
+    }
+
     if (!track->squashed_packet_queue.head) {
         // empty queue, write minimal empty document with zero duration
         avio_write(ttml_ctx->pb, empty_ttml_document,
                    sizeof(empty_ttml_document) - 1);
-        start_ts = 0;
-        duration = 0;
+        if (start_ts == AV_NOPTS_VALUE) {
+            start_ts = 0;
+            duration = 0;
+        }
         goto generate_packet;
     }