diff mbox series

[FFmpeg-devel] lavf/mux: bypass interleave delay early when waiting on subtitle streams

Message ID 20200311213635.51736-1-rcombs@rcombs.me
State New
Headers show
Series [FFmpeg-devel] lavf/mux: bypass interleave delay early when waiting on subtitle streams | expand

Checks

Context Check Description
andriy/ffmpeg-patchwork success Make fate finished

Commit Message

rcombs March 11, 2020, 9:36 p.m. UTC
This avoids long delays when converting live streams that have sparse subtitles
---
 libavformat/avformat.h      |  9 +++++++++
 libavformat/mux.c           | 25 +++++++++++++++++++++----
 libavformat/options_table.h |  1 +
 libavformat/version.h       |  2 +-
 4 files changed, 32 insertions(+), 5 deletions(-)

Comments

Andreas Rheinhardt March 12, 2020, 7:51 a.m. UTC | #1
rcombs:
> This avoids long delays when converting live streams that have sparse subtitles
> ---
>  libavformat/avformat.h      |  9 +++++++++
>  libavformat/mux.c           | 25 +++++++++++++++++++++----
>  libavformat/options_table.h |  1 +
>  libavformat/version.h       |  2 +-
>  4 files changed, 32 insertions(+), 5 deletions(-)
> 
> diff --git a/libavformat/avformat.h b/libavformat/avformat.h
> index 9b9b634ec3..da7e5755e8 100644
> --- a/libavformat/avformat.h
> +++ b/libavformat/avformat.h
> @@ -1957,6 +1957,15 @@ typedef struct AVFormatContext {
>       * - decoding: set by user
>       */
>      int max_probe_packets;
> +
> +    /**
> +     * Maximum buffering duration for interleaving sparse streams.
> +     *
> +     * @see max_interleave_delta
> +     *
> +     * Applies only to subtitle and data streams.
> +     */
> +    int64_t max_sparse_interleave_delta;
>  } AVFormatContext;
>  
>  #if FF_API_FORMAT_GET_SET
> diff --git a/libavformat/mux.c b/libavformat/mux.c
> index d88746e8c5..f2f272cf65 100644
> --- a/libavformat/mux.c
> +++ b/libavformat/mux.c
> @@ -1033,6 +1033,7 @@ int ff_interleave_packet_per_dts(AVFormatContext *s, AVPacket *out,
>      AVPacketList *pktl;
>      int stream_count = 0;
>      int noninterleaved_count = 0;
> +    int sparse_count = 0;
>      int i, ret;
>      int eof = flush;
>  
> @@ -1044,6 +1045,9 @@ int ff_interleave_packet_per_dts(AVFormatContext *s, AVPacket *out,
>      for (i = 0; i < s->nb_streams; i++) {
>          if (s->streams[i]->last_in_packet_buffer) {
>              ++stream_count;
> +        } else if (s->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE ||
> +                   s->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_DATA) {
> +            ++sparse_count;
>          } else if (s->streams[i]->codecpar->codec_type != AVMEDIA_TYPE_ATTACHMENT &&
>                     s->streams[i]->codecpar->codec_id != AV_CODEC_ID_VP8 &&
>                     s->streams[i]->codecpar->codec_id != AV_CODEC_ID_VP9) {
> @@ -1054,10 +1058,12 @@ int ff_interleave_packet_per_dts(AVFormatContext *s, AVPacket *out,
>      if (s->internal->nb_interleaved_streams == stream_count)
>          flush = 1;
>  
> -    if (s->max_interleave_delta > 0 &&
> -        s->internal->packet_buffer &&
> +    if (s->internal->packet_buffer &&
>          !flush &&
> -        s->internal->nb_interleaved_streams == stream_count+noninterleaved_count
> +        ((s->max_interleave_delta > 0 &&
> +          s->internal->nb_interleaved_streams == stream_count+noninterleaved_count+sparse_count) ||
> +         (s->max_sparse_interleave_delta > 0 &&
> +          s->internal->nb_interleaved_streams == stream_count+sparse_count))

If max_sparse_interleave_delta has its default value and
max_interleave_delta has been explicitly set to zero (thereby
indicating that one should wait until one has one packet for each
stream), the check used here might output a packet even if one does
not have packets for some of the sparse streams. This is in
contradiction to the documentation of max_interleave_delta.

(Nit: Use stream_count+sparse_count+noninterleaved_count.)

>      ) {
>          AVPacket *top_pkt = &s->internal->packet_buffer->pkt;
>          int64_t delta_dts = INT64_MIN;
> @@ -1078,12 +1084,23 @@ int ff_interleave_packet_per_dts(AVFormatContext *s, AVPacket *out,
>              delta_dts = FFMAX(delta_dts, last_dts - top_dts);
>          }
>  
> -        if (delta_dts > s->max_interleave_delta) {
> +        if (s->max_interleave_delta > 0 &&
> +            delta_dts > s->max_interleave_delta &&
> +            s->internal->nb_interleaved_streams == stream_count+noninterleaved_count+sparse_count) {
>              av_log(s, AV_LOG_DEBUG,
>                     "Delay between the first packet and last packet in the "
>                     "muxing queue is %"PRId64" > %"PRId64": forcing output\n",
>                     delta_dts, s->max_interleave_delta);
>              flush = 1;
> +        } else if (s->max_sparse_interleave_delta > 0 &&
> +                   delta_dts > s->max_sparse_interleave_delta &&
> +                   s->internal->nb_interleaved_streams == stream_count+sparse_count) {
> +            av_log(s, AV_LOG_DEBUG,
> +                   "Delay between the first packet and last packet in the "
> +                   "muxing queue is %"PRId64" > %"PRId64" and all delayed "
> +                   "streams are sparse: forcing output\n",
> +                   delta_dts, s->max_sparse_interleave_delta);
> +            flush = 1;
>          }
>      }
>  
Do all of these additional checks have a measurable performance impact?

Why didn't you include attached picture-streams (whether or not they
are timed thumbnails) among the sparse streams? (They are counted
among the nb_interleaved_streams and cause lots of delay*.)

The muxing code has even more gotchas: a06fac35 added these exceptions
for VP8 and VP9 (which exempted them from max_interleave_delta which
is actually against said field's documented behaviour) because of
delay caused by libvpx (does this issue still exist?), yet it is
applied indiscriminately even on streamcopy. This and the fact that
the check for how to treat streams without packets will be pretty long
when the checks for sparse streams (incl. attached pictures) have been
added leads me to the following proposal: We add a new field
interleavement_type to AVStream that the caller may set before
avformat_write_header() (or avformat_init_output() if that is called)
and its possible values are
AV_INTERLEAVEMENT_TYPE_NORMAL (this is a normal stream, i.e.
max_interleave_delta applies to it)
AV_INTERLEAVEMENT_TYPE_SPARSE (this is a sparse stream;
max_sparse_interleave_delta applies to it)
AV_INTERLEAVEMENT_TYPE_ESSENTIAL (this is an essential stream and one
should not write any output if one doesn't have a packet for it
(unless explicit flushing/EOF); this is how VP8 and VP9 are treated
right now)
AV_INTERLEAVEMENT_TYPE_IGNORE (not having a packet of a stream of this
type does not block outputting packets; attachment streams must not be
set to anything else)
and a default value that if set will be replaced during initilization
by a sane default value.
This gives a caller more fine-grained control of interleavement and
e.g. allows ffmpeg to treat VP8 and VP9 special when it comes out of
libvpx and normal in other situations.

Finally, this patch is only about reducing delay when waiting for
subtitles and if I am not mistaken, it does not help with situations
where subtitles are available too early like in ticket #7064; and it
only minimally affects e.g. ticket #6037 (with this patch, more
packets are output before writing the trailer (the length of the
muxing queue when writing the trailer is about
max_sparse_interleave_delta instead of max_interleave_delta)), doesn't it?

- Andreas

*: And for some reason if I just use ffmpeg -i <file with attached
pic> -map 0 -c copy -t 1 -f null - it reads the whole file; but that
has to be a bug somewhere else (not the muxing code).
rcombs March 20, 2020, 1:22 a.m. UTC | #2
> On Mar 12, 2020, at 02:51, Andreas Rheinhardt <andreas.rheinhardt@gmail.com> wrote:
> 
> rcombs:
>> This avoids long delays when converting live streams that have sparse subtitles
>> ---
>> libavformat/avformat.h      |  9 +++++++++
>> libavformat/mux.c           | 25 +++++++++++++++++++++----
>> libavformat/options_table.h |  1 +
>> libavformat/version.h       |  2 +-
>> 4 files changed, 32 insertions(+), 5 deletions(-)
>> 
>> diff --git a/libavformat/avformat.h b/libavformat/avformat.h
>> index 9b9b634ec3..da7e5755e8 100644
>> --- a/libavformat/avformat.h
>> +++ b/libavformat/avformat.h
>> @@ -1957,6 +1957,15 @@ typedef struct AVFormatContext {
>>      * - decoding: set by user
>>      */
>>     int max_probe_packets;
>> +
>> +    /**
>> +     * Maximum buffering duration for interleaving sparse streams.
>> +     *
>> +     * @see max_interleave_delta
>> +     *
>> +     * Applies only to subtitle and data streams.
>> +     */
>> +    int64_t max_sparse_interleave_delta;
>> } AVFormatContext;
>> 
>> #if FF_API_FORMAT_GET_SET
>> diff --git a/libavformat/mux.c b/libavformat/mux.c
>> index d88746e8c5..f2f272cf65 100644
>> --- a/libavformat/mux.c
>> +++ b/libavformat/mux.c
>> @@ -1033,6 +1033,7 @@ int ff_interleave_packet_per_dts(AVFormatContext *s, AVPacket *out,
>>     AVPacketList *pktl;
>>     int stream_count = 0;
>>     int noninterleaved_count = 0;
>> +    int sparse_count = 0;
>>     int i, ret;
>>     int eof = flush;
>> 
>> @@ -1044,6 +1045,9 @@ int ff_interleave_packet_per_dts(AVFormatContext *s, AVPacket *out,
>>     for (i = 0; i < s->nb_streams; i++) {
>>         if (s->streams[i]->last_in_packet_buffer) {
>>             ++stream_count;
>> +        } else if (s->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE ||
>> +                   s->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_DATA) {
>> +            ++sparse_count;
>>         } else if (s->streams[i]->codecpar->codec_type != AVMEDIA_TYPE_ATTACHMENT &&
>>                    s->streams[i]->codecpar->codec_id != AV_CODEC_ID_VP8 &&
>>                    s->streams[i]->codecpar->codec_id != AV_CODEC_ID_VP9) {
>> @@ -1054,10 +1058,12 @@ int ff_interleave_packet_per_dts(AVFormatContext *s, AVPacket *out,
>>     if (s->internal->nb_interleaved_streams == stream_count)
>>         flush = 1;
>> 
>> -    if (s->max_interleave_delta > 0 &&
>> -        s->internal->packet_buffer &&
>> +    if (s->internal->packet_buffer &&
>>         !flush &&
>> -        s->internal->nb_interleaved_streams == stream_count+noninterleaved_count
>> +        ((s->max_interleave_delta > 0 &&
>> +          s->internal->nb_interleaved_streams == stream_count+noninterleaved_count+sparse_count) ||
>> +         (s->max_sparse_interleave_delta > 0 &&
>> +          s->internal->nb_interleaved_streams == stream_count+sparse_count))
> 
> If max_sparse_interleave_delta has its default value and
> max_interleave_delta has been explicitly set to zero (thereby
> indicating that one should wait until one has one packet for each
> stream), the check used here might output a packet even if one does
> not have packets for some of the sparse streams. This is in
> contradiction to the documentation of max_interleave_delta.

Hmmmmm, is that any different from how this changes the default behavior? Maybe I should just clarify in the docs?

> 
> (Nit: Use stream_count+sparse_count+noninterleaved_count.)

Sure.

> 
>>     ) {
>>         AVPacket *top_pkt = &s->internal->packet_buffer->pkt;
>>         int64_t delta_dts = INT64_MIN;
>> @@ -1078,12 +1084,23 @@ int ff_interleave_packet_per_dts(AVFormatContext *s, AVPacket *out,
>>             delta_dts = FFMAX(delta_dts, last_dts - top_dts);
>>         }
>> 
>> -        if (delta_dts > s->max_interleave_delta) {
>> +        if (s->max_interleave_delta > 0 &&
>> +            delta_dts > s->max_interleave_delta &&
>> +            s->internal->nb_interleaved_streams == stream_count+noninterleaved_count+sparse_count) {
>>             av_log(s, AV_LOG_DEBUG,
>>                    "Delay between the first packet and last packet in the "
>>                    "muxing queue is %"PRId64" > %"PRId64": forcing output\n",
>>                    delta_dts, s->max_interleave_delta);
>>             flush = 1;
>> +        } else if (s->max_sparse_interleave_delta > 0 &&
>> +                   delta_dts > s->max_sparse_interleave_delta &&
>> +                   s->internal->nb_interleaved_streams == stream_count+sparse_count) {
>> +            av_log(s, AV_LOG_DEBUG,
>> +                   "Delay between the first packet and last packet in the "
>> +                   "muxing queue is %"PRId64" > %"PRId64" and all delayed "
>> +                   "streams are sparse: forcing output\n",
>> +                   delta_dts, s->max_sparse_interleave_delta);
>> +            flush = 1;
>>         }
>>     }
>> 
> Do all of these additional checks have a measurable performance impact?

I'd be shocked if they did; whenever I've perf-tested (de)muxing code it's always been I/O-bound, even on extremely fast drives.

> 
> Why didn't you include attached picture-streams (whether or not they
> are timed thumbnails) among the sparse streams? (They are counted
> among the nb_interleaved_streams and cause lots of delay*.)

Good idea, added.

> 
> The muxing code has even more gotchas: a06fac35 added these exceptions
> for VP8 and VP9 (which exempted them from max_interleave_delta which
> is actually against said field's documented behaviour) because of
> delay caused by libvpx (does this issue still exist?), yet it is
> applied indiscriminately even on streamcopy. This and the fact that
> the check for how to treat streams without packets will be pretty long
> when the checks for sparse streams (incl. attached pictures) have been
> added leads me to the following proposal: We add a new field
> interleavement_type to AVStream that the caller may set before
> avformat_write_header() (or avformat_init_output() if that is called)
> and its possible values are
> AV_INTERLEAVEMENT_TYPE_NORMAL (this is a normal stream, i.e.
> max_interleave_delta applies to it)
> AV_INTERLEAVEMENT_TYPE_SPARSE (this is a sparse stream;
> max_sparse_interleave_delta applies to it)
> AV_INTERLEAVEMENT_TYPE_ESSENTIAL (this is an essential stream and one
> should not write any output if one doesn't have a packet for it
> (unless explicit flushing/EOF); this is how VP8 and VP9 are treated
> right now)
> AV_INTERLEAVEMENT_TYPE_IGNORE (not having a packet of a stream of this
> type does not block outputting packets; attachment streams must not be
> set to anything else)
> and a default value that if set will be replaced during initilization
> by a sane default value.
> This gives a caller more fine-grained control of interleavement and
> e.g. allows ffmpeg to treat VP8 and VP9 special when it comes out of
> libvpx and normal in other situations.

This seems like a pretty good idea; I'd imagine we'd want to have a TYPE_AUTO that avformat_init_output converts into the value we currently use?
In any case, does this need to block applying the patch? This seems like a useful improvement but one that could be built on top of this change (replacing some of the checks), and wouldn't result in any changes to the added API.

> 
> Finally, this patch is only about reducing delay when waiting for
> subtitles and if I am not mistaken, it does not help with situations
> where subtitles are available too early like in ticket #7064; and it
> only minimally affects e.g. ticket #6037 (with this patch, more
> packets are output before writing the trailer (the length of the
> muxing queue when writing the trailer is about
> max_sparse_interleave_delta instead of max_interleave_delta)), doesn't it?

Right. I think #7064 would be pretty easy to fix with a code change in this area, though; just skip over sparse streams in the delta_dts loop.

> 
> - Andreas
> 
> *: And for some reason if I just use ffmpeg -i <file with attached
> pic> -map 0 -c copy -t 1 -f null - it reads the whole file; but that
> has to be a bug somewhere else (not the muxing code).

Yeah, that's because the stream never gets past 1 second, so check_recording_time never calls close_output_stream, so need_output sticks at 1.

> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Jan Ekström April 21, 2020, 6:34 p.m. UTC | #3
On Wed, Mar 11, 2020 at 11:36 PM rcombs <rcombs@rcombs.me> wrote:
>
> This avoids long delays when converting live streams that have sparse subtitles
> ---
>  libavformat/avformat.h      |  9 +++++++++
>  libavformat/mux.c           | 25 +++++++++++++++++++++----
>  libavformat/options_table.h |  1 +
>  libavformat/version.h       |  2 +-
>  4 files changed, 32 insertions(+), 5 deletions(-)

For the record, was debugging a case of rather bursty muxing of a live
stream from a live TV source to fragmented mp4 with keyframe-based
fragmentation and subtitles in the mux.

For easier debugging I utilized fix_sub_duration with ffmpeg.c, static
2 second GOPs, and added  av_gettime_relative logging calls at the end
of mov_flush_fragment. It showed things like 2-3 2 second fragments
happening in very quick succession, and then a pause for a while.
Making the process rather jittery.

Haven't yet really been able to look at this patch code wise, but
clearly with this patch the output fragmentation becomes much, much
more stable (nice ~2 second timings all way through while I was
running my tests).

Best regards,
Jan
diff mbox series

Patch

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 9b9b634ec3..da7e5755e8 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -1957,6 +1957,15 @@  typedef struct AVFormatContext {
      * - decoding: set by user
      */
     int max_probe_packets;
+
+    /**
+     * Maximum buffering duration for interleaving sparse streams.
+     *
+     * @see max_interleave_delta
+     *
+     * Applies only to subtitle and data streams.
+     */
+    int64_t max_sparse_interleave_delta;
 } AVFormatContext;
 
 #if FF_API_FORMAT_GET_SET
diff --git a/libavformat/mux.c b/libavformat/mux.c
index d88746e8c5..f2f272cf65 100644
--- a/libavformat/mux.c
+++ b/libavformat/mux.c
@@ -1033,6 +1033,7 @@  int ff_interleave_packet_per_dts(AVFormatContext *s, AVPacket *out,
     AVPacketList *pktl;
     int stream_count = 0;
     int noninterleaved_count = 0;
+    int sparse_count = 0;
     int i, ret;
     int eof = flush;
 
@@ -1044,6 +1045,9 @@  int ff_interleave_packet_per_dts(AVFormatContext *s, AVPacket *out,
     for (i = 0; i < s->nb_streams; i++) {
         if (s->streams[i]->last_in_packet_buffer) {
             ++stream_count;
+        } else if (s->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE ||
+                   s->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_DATA) {
+            ++sparse_count;
         } else if (s->streams[i]->codecpar->codec_type != AVMEDIA_TYPE_ATTACHMENT &&
                    s->streams[i]->codecpar->codec_id != AV_CODEC_ID_VP8 &&
                    s->streams[i]->codecpar->codec_id != AV_CODEC_ID_VP9) {
@@ -1054,10 +1058,12 @@  int ff_interleave_packet_per_dts(AVFormatContext *s, AVPacket *out,
     if (s->internal->nb_interleaved_streams == stream_count)
         flush = 1;
 
-    if (s->max_interleave_delta > 0 &&
-        s->internal->packet_buffer &&
+    if (s->internal->packet_buffer &&
         !flush &&
-        s->internal->nb_interleaved_streams == stream_count+noninterleaved_count
+        ((s->max_interleave_delta > 0 &&
+          s->internal->nb_interleaved_streams == stream_count+noninterleaved_count+sparse_count) ||
+         (s->max_sparse_interleave_delta > 0 &&
+          s->internal->nb_interleaved_streams == stream_count+sparse_count))
     ) {
         AVPacket *top_pkt = &s->internal->packet_buffer->pkt;
         int64_t delta_dts = INT64_MIN;
@@ -1078,12 +1084,23 @@  int ff_interleave_packet_per_dts(AVFormatContext *s, AVPacket *out,
             delta_dts = FFMAX(delta_dts, last_dts - top_dts);
         }
 
-        if (delta_dts > s->max_interleave_delta) {
+        if (s->max_interleave_delta > 0 &&
+            delta_dts > s->max_interleave_delta &&
+            s->internal->nb_interleaved_streams == stream_count+noninterleaved_count+sparse_count) {
             av_log(s, AV_LOG_DEBUG,
                    "Delay between the first packet and last packet in the "
                    "muxing queue is %"PRId64" > %"PRId64": forcing output\n",
                    delta_dts, s->max_interleave_delta);
             flush = 1;
+        } else if (s->max_sparse_interleave_delta > 0 &&
+                   delta_dts > s->max_sparse_interleave_delta &&
+                   s->internal->nb_interleaved_streams == stream_count+sparse_count) {
+            av_log(s, AV_LOG_DEBUG,
+                   "Delay between the first packet and last packet in the "
+                   "muxing queue is %"PRId64" > %"PRId64" and all delayed "
+                   "streams are sparse: forcing output\n",
+                   delta_dts, s->max_sparse_interleave_delta);
+            flush = 1;
         }
     }
 
diff --git a/libavformat/options_table.h b/libavformat/options_table.h
index e26b512440..db04f970e2 100644
--- a/libavformat/options_table.h
+++ b/libavformat/options_table.h
@@ -91,6 +91,7 @@  static const AVOption avformat_options[] = {
 {"metadata_header_padding", "set number of bytes to be written as padding in a metadata header", OFFSET(metadata_header_padding), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, E},
 {"output_ts_offset", "set output timestamp offset", OFFSET(output_ts_offset), AV_OPT_TYPE_DURATION, {.i64 = 0}, -INT64_MAX, INT64_MAX, E},
 {"max_interleave_delta", "maximum buffering duration for interleaving", OFFSET(max_interleave_delta), AV_OPT_TYPE_INT64, { .i64 = 10000000 }, 0, INT64_MAX, E },
+{"max_sparse_interleave_delta", "maximum buffering duration for interleaving sparse streams", OFFSET(max_sparse_interleave_delta), AV_OPT_TYPE_INT64, { .i64 = 1000000 }, 0, INT64_MAX, E },
 {"f_strict", "how strictly to follow the standards (deprecated; use strict, save via avconv)", OFFSET(strict_std_compliance), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, D|E, "strict"},
 {"strict", "how strictly to follow the standards", OFFSET(strict_std_compliance), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, D|E, "strict"},
 {"very", "strictly conform to a older more strict version of the spec or reference software", 0, AV_OPT_TYPE_CONST, {.i64 = FF_COMPLIANCE_VERY_STRICT }, INT_MIN, INT_MAX, D|E, "strict"},
diff --git a/libavformat/version.h b/libavformat/version.h
index e815c1f3c4..18c2f5fec2 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -32,7 +32,7 @@ 
 // Major bumping may affect Ticket5467, 5421, 5451(compatibility with Chromium)
 // Also please add any ticket numbers that you believe might be affected here
 #define LIBAVFORMAT_VERSION_MAJOR  58
-#define LIBAVFORMAT_VERSION_MINOR  41
+#define LIBAVFORMAT_VERSION_MINOR  42
 #define LIBAVFORMAT_VERSION_MICRO 100
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \