diff mbox

[FFmpeg-devel] avformat/hls: Added subtitle support

Message ID 20161116103110.GA1778@debianvm1
State Superseded
Headers show

Commit Message

Franklin Phillips Nov. 16, 2016, 10:31 a.m. UTC
This patch is a fix for ticket #2833.

Each subtitle segment is its own WebVTT file and has to demuxed individually. The timing of the subtitles are not perfect but it is the best I could do and it also does not take into account the X-TIMESTAMP-MAP header in the WebVTT files which is needed to conform to the specification (https://tools.ietf.org/html/draft-pantos-http-live-streaming-20#section-3.5).

Signed-off-by: Franklin Phillips <franklinphillips@gmx.com>
---
 libavformat/hls.c | 178 ++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 159 insertions(+), 19 deletions(-)

Comments

wm4 Nov. 16, 2016, 10:41 a.m. UTC | #1
On Wed, 16 Nov 2016 10:31:10 +0000
Franklin Phillips <franklinphillips@gmx.com> wrote:

> This patch is a fix for ticket #2833.
> 
> Each subtitle segment is its own WebVTT file and has to demuxed individually.

> The timing of the subtitles are not perfect but it is the best I could

What does this mean?

> do and it also does not take into account the X-TIMESTAMP-MAP header
> in the WebVTT files which is needed to conform to the specification
> (https://tools.ietf.org/html/draft-pantos-http-live-streaming-20#section-3.5).

I'm not entirely familiar how HLS works. Shouldn't this be fine as long
as the webvtt events have the correct "final" timestamp?

> 
> Signed-off-by: Franklin Phillips <franklinphillips@gmx.com>
> ---
>  libavformat/hls.c | 178 ++++++++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 159 insertions(+), 19 deletions(-)
> 
> diff --git a/libavformat/hls.c b/libavformat/hls.c
> index 3ae3c7c..bf13be4 100644
> --- a/libavformat/hls.c
> +++ b/libavformat/hls.c
> @@ -153,6 +153,8 @@ struct playlist {
>       * playlist, if any. */
>      int n_init_sections;
>      struct segment **init_sections;
> +
> +    int is_subtitle; /* Indicates if the playlist is for subtitles */
>  };
>  
>  /*
> @@ -312,6 +314,8 @@ static struct playlist *new_playlist(HLSContext *c, const char *url,
>      pls->is_id3_timestamped = -1;
>      pls->id3_mpegts_timestamp = AV_NOPTS_VALUE;
>  
> +    pls->is_subtitle = 0;
> +
>      dynarray_add(&c->playlists, &c->n_playlists, pls);
>      return pls;
>  }
> @@ -482,11 +486,6 @@ static struct rendition *new_rendition(HLSContext *c, struct rendition_info *inf
>      if (type == AVMEDIA_TYPE_SUBTITLE && !info->uri[0])
>          return NULL;
>  
> -    /* TODO: handle subtitles (each segment has to parsed separately) */
> -    if (c->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL)
> -        if (type == AVMEDIA_TYPE_SUBTITLE)
> -            return NULL;
> -
>      rend = av_mallocz(sizeof(struct rendition));
>      if (!rend)
>          return NULL;
> @@ -501,9 +500,14 @@ static struct rendition *new_rendition(HLSContext *c, struct rendition_info *inf
>      /* add the playlist if this is an external rendition */
>      if (info->uri[0]) {
>          rend->playlist = new_playlist(c, info->uri, url_base);
> -        if (rend->playlist)
> +        if (rend->playlist) {
>              dynarray_add(&rend->playlist->renditions,
>                           &rend->playlist->n_renditions, rend);
> +            if (type == AVMEDIA_TYPE_SUBTITLE) {
> +                rend->playlist->is_subtitle = 1;
> +                rend->playlist->is_id3_timestamped = 0;
> +            }
> +        }
>      }
>  
>      if (info->assoc_language[0]) {
> @@ -1349,6 +1353,136 @@ reload:
>      goto restart;
>  }
>  
> +static int nested_io_open(AVFormatContext *s, AVIOContext **pb, const char *url,
> +                          int flags, AVDictionary **opts)
> +{
> +    av_log(s, AV_LOG_ERROR,
> +           "A HLS playlist item '%s' referred to an external file '%s'. "
> +           "Opening this file was forbidden for security reasons\n",
> +           s->filename, url);
> +    return AVERROR(EPERM);
> +}
> +
> +static int read_data_simple(void *opaque, uint8_t *buf, int buf_size)
> +{
> +    struct playlist *v = opaque;
> +    HLSContext *c = v->parent->priv_data;
> +    struct segment *seg;
> +
> +    if (v->cur_seq_no >= v->start_seq_no + v->n_segments) {
> +        return AVERROR_EOF;
> +    } else {
> +        seg = current_segment(v);
> +    }
> +
> +    if (!v->input) {
> +        open_input(c, v, seg);
> +    }
> +
> +    return read_from_url(v, seg, buf, buf_size, READ_NORMAL);
> +}
> +
> +static int read_packet_subtitle(struct playlist *v, AVPacket *pkt)
> +{
> +    HLSContext *c = v->parent->priv_data;
> +    int ret, i;
> +
> +restart:
> +    if (!v->needed)
> +        return AVERROR_EOF;
> +
> +    if (!v->input) {
> +        int64_t reload_interval;
> +
> +        /* Check that the playlist is still needed before opening a new
> +         * segment. */
> +        if (v->ctx && v->ctx->nb_streams) {
> +            v->needed = 0;
> +            for (i = 0; i < v->n_main_streams; i++) {
> +                if (v->main_streams[i]->discard < AVDISCARD_ALL) {
> +                    v->needed = 1;
> +                    break;
> +                }
> +            }
> +        }
> +        if (!v->needed) {
> +            av_log(v->parent, AV_LOG_INFO, "No longer receiving playlist %d\n",
> +                v->index);
> +            return AVERROR_EOF;
> +        }
> +
> +        /* If this is a live stream and the reload interval has elapsed since
> +         * the last playlist reload, reload the playlists now. */
> +        reload_interval = default_reload_interval(v);
> +
> +        if (!v->finished &&
> +            av_gettime_relative() - v->last_load_time >= reload_interval) {
> +            if ((ret = parse_playlist(c, v->url, v, NULL)) < 0) {
> +                av_log(v->parent, AV_LOG_WARNING, "Failed to reload playlist %d\n",
> +                       v->index);
> +                return ret;
> +            }
> +            /* If we need to reload the playlist again below (if
> +             * there's still no more segments), switch to a reload
> +             * interval of half the target duration. */
> +            reload_interval = v->target_duration / 2;
> +        }
> +        if (v->cur_seq_no < v->start_seq_no) {
> +            av_log(NULL, AV_LOG_WARNING,
> +                   "skipping %d segments ahead, expired from subtitle playlists\n",
> +                   v->start_seq_no - v->cur_seq_no);
> +            v->cur_seq_no = v->start_seq_no;
> +        }
> +        if (v->cur_seq_no >= v->start_seq_no + v->n_segments) {
> +            return AVERROR_EOF;
> +        }
> +    }
> +
> +    if (v->ctx == NULL) {
> +        AVInputFormat *in_fmt;
> +
> +        if (!(v->ctx = avformat_alloc_context())) {
> +            return AVERROR(ENOMEM);
> +        }
> +
> +        v->read_buffer = av_malloc(INITIAL_BUFFER_SIZE);
> +        if (!v->read_buffer){
> +            avformat_free_context(v->ctx);
> +            v->ctx = NULL;
> +            return AVERROR(ENOMEM);
> +        }
> +
> +        ffio_init_context(&v->pb, v->read_buffer, INITIAL_BUFFER_SIZE, 0, v,
> +                          read_data_simple, NULL, NULL);
> +        v->pb.seekable = 0;
> +        v->ctx->pb = &v->pb;
> +        v->ctx->io_open = nested_io_open;
> +
> +        ret = ff_copy_whiteblacklists(v->ctx, v->parent);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
> +        in_fmt = av_find_input_format("webvtt");
> +        ret = avformat_open_input(&v->ctx, current_segment(v)->url, in_fmt, NULL);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +    }
> +
> +    ret = av_read_frame(v->ctx, pkt);
> +    if (ret < 0) {
> +        ff_format_io_close(v->parent, &v->input);
> +        avformat_close_input(&v->ctx);
> +        if (ret == AVERROR_EOF) {
> +            v->cur_seq_no++;
> +            goto restart;
> +        }
> +    }
> +
> +    return ret;
> +}
> +

That seems to duplicate a lot of code from the normal read path...

>  static void add_renditions_to_variant(HLSContext *c, struct variant *var,
>                                        enum AVMediaType type, const char *group_id)
>  {
> @@ -1492,16 +1626,6 @@ static int save_avio_options(AVFormatContext *s)
>      return ret;
>  }
>  
> -static int nested_io_open(AVFormatContext *s, AVIOContext **pb, const char *url,
> -                          int flags, AVDictionary **opts)
> -{
> -    av_log(s, AV_LOG_ERROR,
> -           "A HLS playlist item '%s' referred to an external file '%s'. "
> -           "Opening this file was forbidden for security reasons\n",
> -           s->filename, url);
> -    return AVERROR(EPERM);
> -}
> -
>  static void add_stream_to_programs(AVFormatContext *s, struct playlist *pls, AVStream *stream)
>  {
>      HLSContext *c = s->priv_data;
> @@ -1744,8 +1868,14 @@ static int hls_read_header(AVFormatContext *s)
>              pls->ctx = NULL;
>              goto fail;
>          }
> -        ffio_init_context(&pls->pb, pls->read_buffer, INITIAL_BUFFER_SIZE, 0, pls,
> -                          read_data, NULL, NULL);
> +
> +        if (pls->is_subtitle) {
> +            ffio_init_context(&pls->pb, pls->read_buffer, INITIAL_BUFFER_SIZE, 0, pls,
> +                              read_data_simple, NULL, NULL);
> +        } else {
> +            ffio_init_context(&pls->pb, pls->read_buffer, INITIAL_BUFFER_SIZE, 0, pls,
> +                              read_data, NULL, NULL);
> +        }
>          pls->pb.seekable = 0;
>          ret = av_probe_input_buffer(&pls->pb, &in_fmt, pls->segments[0]->url,
>                                      NULL, 0, 0);
> @@ -1843,6 +1973,8 @@ static int recheck_discard_flags(AVFormatContext *s, int first)
>          } else if (first && !pls->cur_needed && pls->needed) {
>              if (pls->input)
>                  ff_format_io_close(pls->parent, &pls->input);
> +            if (pls->is_subtitle)
> +                avformat_close_input(&pls->ctx);
>              pls->needed = 0;
>              changed = 1;
>              av_log(s, AV_LOG_INFO, "No longer receiving playlist %d\n", i);
> @@ -1909,7 +2041,12 @@ static int hls_read_packet(AVFormatContext *s, AVPacket *pkt)
>              while (1) {
>                  int64_t ts_diff;
>                  AVRational tb;
> -                ret = av_read_frame(pls->ctx, &pls->pkt);
> +                if (pls->is_subtitle) {
> +                    ret = read_packet_subtitle(pls, &pls->pkt);
> +                } else {
> +                    ret = av_read_frame(pls->ctx, &pls->pkt);
> +                }
> +
>                  if (ret < 0) {
>                      if (!avio_feof(&pls->pb) && ret != AVERROR_EOF)
>                          return ret;
> @@ -2087,6 +2224,9 @@ static int hls_read_seek(AVFormatContext *s, int stream_index,
>          /* Flush the packet queue of the subdemuxer. */
>          ff_read_frame_flush(pls->ctx);
>  
> +        if (pls->is_subtitle)
> +            avformat_close_input(&pls->ctx);
> +
>          pls->seek_timestamp = seek_timestamp;
>          pls->seek_flags = flags;
>
Franklin Phillips Nov. 16, 2016, 11:21 a.m. UTC | #2
On Wed, Nov 16, 2016 at 11:41:41AM +0100, wm4 wrote:
> On Wed, 16 Nov 2016 10:31:10 +0000
> Franklin Phillips <franklinphillips@gmx.com> wrote:
> 
> > This patch is a fix for ticket #2833.
> > 
> > Each subtitle segment is its own WebVTT file and has to demuxed individually.
> 
> > The timing of the subtitles are not perfect but it is the best I could
> 
> What does this mean?
> 
When I convert a mp4 file with subtitles into hls and then back into mp4, the subtitles are off by a couple 100 milliseconds usually. It seems to be caused by the first audio/video packet not having a 0 timestamp causing ffmpeg to subtract the timestamp of that first packet from all the subsequent packets.

> > do and it also does not take into account the X-TIMESTAMP-MAP header
> > in the WebVTT files which is needed to conform to the specification
> > (https://tools.ietf.org/html/draft-pantos-http-live-streaming-20#section-3.5).
> 
> I'm not entirely familiar how HLS works. Shouldn't this be fine as long
> as the webvtt events have the correct "final" timestamp?
> 
Yes, I believe so, the fall back in the specification is to assume the value of that header is 0 meaning no offset and all the examples I've seen have that value set to 0 anyway. I don't think it's a crucial feature that needs to be done ASAP.

> > 
> > Signed-off-by: Franklin Phillips <franklinphillips@gmx.com>
> > ---
> >  libavformat/hls.c | 178 ++++++++++++++++++++++++++++++++++++++++++++++++------
> >  1 file changed, 159 insertions(+), 19 deletions(-)
> > 
> > diff --git a/libavformat/hls.c b/libavformat/hls.c
> > index 3ae3c7c..bf13be4 100644
> > --- a/libavformat/hls.c
> > +++ b/libavformat/hls.c
> > @@ -153,6 +153,8 @@ struct playlist {
> >       * playlist, if any. */
> >      int n_init_sections;
> >      struct segment **init_sections;
> > +
> > +    int is_subtitle; /* Indicates if the playlist is for subtitles */
> >  };
> >  
> >  /*
> > @@ -312,6 +314,8 @@ static struct playlist *new_playlist(HLSContext *c, const char *url,
> >      pls->is_id3_timestamped = -1;
> >      pls->id3_mpegts_timestamp = AV_NOPTS_VALUE;
> >  
> > +    pls->is_subtitle = 0;
> > +
> >      dynarray_add(&c->playlists, &c->n_playlists, pls);
> >      return pls;
> >  }
> > @@ -482,11 +486,6 @@ static struct rendition *new_rendition(HLSContext *c, struct rendition_info *inf
> >      if (type == AVMEDIA_TYPE_SUBTITLE && !info->uri[0])
> >          return NULL;
> >  
> > -    /* TODO: handle subtitles (each segment has to parsed separately) */
> > -    if (c->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL)
> > -        if (type == AVMEDIA_TYPE_SUBTITLE)
> > -            return NULL;
> > -
> >      rend = av_mallocz(sizeof(struct rendition));
> >      if (!rend)
> >          return NULL;
> > @@ -501,9 +500,14 @@ static struct rendition *new_rendition(HLSContext *c, struct rendition_info *inf
> >      /* add the playlist if this is an external rendition */
> >      if (info->uri[0]) {
> >          rend->playlist = new_playlist(c, info->uri, url_base);
> > -        if (rend->playlist)
> > +        if (rend->playlist) {
> >              dynarray_add(&rend->playlist->renditions,
> >                           &rend->playlist->n_renditions, rend);
> > +            if (type == AVMEDIA_TYPE_SUBTITLE) {
> > +                rend->playlist->is_subtitle = 1;
> > +                rend->playlist->is_id3_timestamped = 0;
> > +            }
> > +        }
> >      }
> >  
> >      if (info->assoc_language[0]) {
> > @@ -1349,6 +1353,136 @@ reload:
> >      goto restart;
> >  }
> >  
> > +static int nested_io_open(AVFormatContext *s, AVIOContext **pb, const char *url,
> > +                          int flags, AVDictionary **opts)
> > +{
> > +    av_log(s, AV_LOG_ERROR,
> > +           "A HLS playlist item '%s' referred to an external file '%s'. "
> > +           "Opening this file was forbidden for security reasons\n",
> > +           s->filename, url);
> > +    return AVERROR(EPERM);
> > +}
> > +
> > +static int read_data_simple(void *opaque, uint8_t *buf, int buf_size)
> > +{
> > +    struct playlist *v = opaque;
> > +    HLSContext *c = v->parent->priv_data;
> > +    struct segment *seg;
> > +
> > +    if (v->cur_seq_no >= v->start_seq_no + v->n_segments) {
> > +        return AVERROR_EOF;
> > +    } else {
> > +        seg = current_segment(v);
> > +    }
> > +
> > +    if (!v->input) {
> > +        open_input(c, v, seg);
> > +    }
> > +
> > +    return read_from_url(v, seg, buf, buf_size, READ_NORMAL);
> > +}
> > +
> > +static int read_packet_subtitle(struct playlist *v, AVPacket *pkt)
> > +{
> > +    HLSContext *c = v->parent->priv_data;
> > +    int ret, i;
> > +
> > +restart:
> > +    if (!v->needed)
> > +        return AVERROR_EOF;
> > +
> > +    if (!v->input) {
> > +        int64_t reload_interval;
> > +
> > +        /* Check that the playlist is still needed before opening a new
> > +         * segment. */
> > +        if (v->ctx && v->ctx->nb_streams) {
> > +            v->needed = 0;
> > +            for (i = 0; i < v->n_main_streams; i++) {
> > +                if (v->main_streams[i]->discard < AVDISCARD_ALL) {
> > +                    v->needed = 1;
> > +                    break;
> > +                }
> > +            }
> > +        }
> > +        if (!v->needed) {
> > +            av_log(v->parent, AV_LOG_INFO, "No longer receiving playlist %d\n",
> > +                v->index);
> > +            return AVERROR_EOF;
> > +        }
> > +
> > +        /* If this is a live stream and the reload interval has elapsed since
> > +         * the last playlist reload, reload the playlists now. */
> > +        reload_interval = default_reload_interval(v);
> > +
> > +        if (!v->finished &&
> > +            av_gettime_relative() - v->last_load_time >= reload_interval) {
> > +            if ((ret = parse_playlist(c, v->url, v, NULL)) < 0) {
> > +                av_log(v->parent, AV_LOG_WARNING, "Failed to reload playlist %d\n",
> > +                       v->index);
> > +                return ret;
> > +            }
> > +            /* If we need to reload the playlist again below (if
> > +             * there's still no more segments), switch to a reload
> > +             * interval of half the target duration. */
> > +            reload_interval = v->target_duration / 2;
> > +        }
> > +        if (v->cur_seq_no < v->start_seq_no) {
> > +            av_log(NULL, AV_LOG_WARNING,
> > +                   "skipping %d segments ahead, expired from subtitle playlists\n",
> > +                   v->start_seq_no - v->cur_seq_no);
> > +            v->cur_seq_no = v->start_seq_no;
> > +        }
> > +        if (v->cur_seq_no >= v->start_seq_no + v->n_segments) {
> > +            return AVERROR_EOF;
> > +        }
> > +    }
> > +
> > +    if (v->ctx == NULL) {
> > +        AVInputFormat *in_fmt;
> > +
> > +        if (!(v->ctx = avformat_alloc_context())) {
> > +            return AVERROR(ENOMEM);
> > +        }
> > +
> > +        v->read_buffer = av_malloc(INITIAL_BUFFER_SIZE);
> > +        if (!v->read_buffer){
> > +            avformat_free_context(v->ctx);
> > +            v->ctx = NULL;
> > +            return AVERROR(ENOMEM);
> > +        }
> > +
> > +        ffio_init_context(&v->pb, v->read_buffer, INITIAL_BUFFER_SIZE, 0, v,
> > +                          read_data_simple, NULL, NULL);
> > +        v->pb.seekable = 0;
> > +        v->ctx->pb = &v->pb;
> > +        v->ctx->io_open = nested_io_open;
> > +
> > +        ret = ff_copy_whiteblacklists(v->ctx, v->parent);
> > +        if (ret < 0) {
> > +            return ret;
> > +        }
> > +
> > +        in_fmt = av_find_input_format("webvtt");
> > +        ret = avformat_open_input(&v->ctx, current_segment(v)->url, in_fmt, NULL);
> > +        if (ret < 0) {
> > +            return ret;
> > +        }
> > +    }
> > +
> > +    ret = av_read_frame(v->ctx, pkt);
> > +    if (ret < 0) {
> > +        ff_format_io_close(v->parent, &v->input);
> > +        avformat_close_input(&v->ctx);
> > +        if (ret == AVERROR_EOF) {
> > +            v->cur_seq_no++;
> > +            goto restart;
> > +        }
> > +    }
> > +
> > +    return ret;
> > +}
> > +
> 
> That seems to duplicate a lot of code from the normal read path...
> 
I don't think there is a lot of duplication, and the duplication that there is, I don't think can be factored out cleanly due to the different ways the two methods are called and the difference in behaviour of the two methods.

> >  static void add_renditions_to_variant(HLSContext *c, struct variant *var,
> >                                        enum AVMediaType type, const char *group_id)
> >  {
> > @@ -1492,16 +1626,6 @@ static int save_avio_options(AVFormatContext *s)
> >      return ret;
> >  }
> >  
> > -static int nested_io_open(AVFormatContext *s, AVIOContext **pb, const char *url,
> > -                          int flags, AVDictionary **opts)
> > -{
> > -    av_log(s, AV_LOG_ERROR,
> > -           "A HLS playlist item '%s' referred to an external file '%s'. "
> > -           "Opening this file was forbidden for security reasons\n",
> > -           s->filename, url);
> > -    return AVERROR(EPERM);
> > -}
> > -
> >  static void add_stream_to_programs(AVFormatContext *s, struct playlist *pls, AVStream *stream)
> >  {
> >      HLSContext *c = s->priv_data;
> > @@ -1744,8 +1868,14 @@ static int hls_read_header(AVFormatContext *s)
> >              pls->ctx = NULL;
> >              goto fail;
> >          }
> > -        ffio_init_context(&pls->pb, pls->read_buffer, INITIAL_BUFFER_SIZE, 0, pls,
> > -                          read_data, NULL, NULL);
> > +
> > +        if (pls->is_subtitle) {
> > +            ffio_init_context(&pls->pb, pls->read_buffer, INITIAL_BUFFER_SIZE, 0, pls,
> > +                              read_data_simple, NULL, NULL);
> > +        } else {
> > +            ffio_init_context(&pls->pb, pls->read_buffer, INITIAL_BUFFER_SIZE, 0, pls,
> > +                              read_data, NULL, NULL);
> > +        }
> >          pls->pb.seekable = 0;
> >          ret = av_probe_input_buffer(&pls->pb, &in_fmt, pls->segments[0]->url,
> >                                      NULL, 0, 0);
> > @@ -1843,6 +1973,8 @@ static int recheck_discard_flags(AVFormatContext *s, int first)
> >          } else if (first && !pls->cur_needed && pls->needed) {
> >              if (pls->input)
> >                  ff_format_io_close(pls->parent, &pls->input);
> > +            if (pls->is_subtitle)
> > +                avformat_close_input(&pls->ctx);
> >              pls->needed = 0;
> >              changed = 1;
> >              av_log(s, AV_LOG_INFO, "No longer receiving playlist %d\n", i);
> > @@ -1909,7 +2041,12 @@ static int hls_read_packet(AVFormatContext *s, AVPacket *pkt)
> >              while (1) {
> >                  int64_t ts_diff;
> >                  AVRational tb;
> > -                ret = av_read_frame(pls->ctx, &pls->pkt);
> > +                if (pls->is_subtitle) {
> > +                    ret = read_packet_subtitle(pls, &pls->pkt);
> > +                } else {
> > +                    ret = av_read_frame(pls->ctx, &pls->pkt);
> > +                }
> > +
> >                  if (ret < 0) {
> >                      if (!avio_feof(&pls->pb) && ret != AVERROR_EOF)
> >                          return ret;
> > @@ -2087,6 +2224,9 @@ static int hls_read_seek(AVFormatContext *s, int stream_index,
> >          /* Flush the packet queue of the subdemuxer. */
> >          ff_read_frame_flush(pls->ctx);
> >  
> > +        if (pls->is_subtitle)
> > +            avformat_close_input(&pls->ctx);
> > +
> >          pls->seek_timestamp = seek_timestamp;
> >          pls->seek_flags = flags;
> >  
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
wm4 Nov. 16, 2016, 11:33 a.m. UTC | #3
On Wed, 16 Nov 2016 11:21:59 +0000
Franklin Phillips <franklinphillips@gmx.com> wrote:

> On Wed, Nov 16, 2016 at 11:41:41AM +0100, wm4 wrote:
> > On Wed, 16 Nov 2016 10:31:10 +0000
> > Franklin Phillips <franklinphillips@gmx.com> wrote:
> >   
> > > This patch is a fix for ticket #2833.
> > > 
> > > Each subtitle segment is its own WebVTT file and has to demuxed individually.  
> >   
> > > The timing of the subtitles are not perfect but it is the best I could  
> > 
> > What does this mean?
> >   
> When I convert a mp4 file with subtitles into hls and then back into mp4, the subtitles are off by a couple 100 milliseconds usually. It seems to be caused by the first audio/video packet not having a 0 timestamp causing ffmpeg to subtract the timestamp of that first packet from all the subsequent packets.

FFmpeg determines a global start_time, which is probably used in
transcoding to offset all streams or so. But that shouldn't lead to
timestamp mismatches because it's the same offset.

Can you check the timestamps with ffprobe? Also make sure you return
subtitle packets ideally before audio/video packets with the same or a
higher timestamp. Otherwise it could happen that a subtitle packet is
too late to be rendered on video. (Though I don't know what happens
when remuxing subtitles in this scenario.)

> > > do and it also does not take into account the X-TIMESTAMP-MAP header
> > > in the WebVTT files which is needed to conform to the specification
> > > (https://tools.ietf.org/html/draft-pantos-http-live-streaming-20#section-3.5).  
> > 
> > I'm not entirely familiar how HLS works. Shouldn't this be fine as long
> > as the webvtt events have the correct "final" timestamp?
> >   
> Yes, I believe so, the fall back in the specification is to assume the value of that header is 0 meaning no offset and all the examples I've seen have that value set to 0 anyway. I don't think it's a crucial feature that needs to be done ASAP.
> 
> > > 

> > > [...]
> > > +  
> > 
> > That seems to duplicate a lot of code from the normal read path...
> >   
> I don't think there is a lot of duplication, and the duplication that there is, I don't think can be factored out cleanly due to the different ways the two methods are called and the difference in behaviour of the two methods.
> 

Well, if you say so. Still hurts a bit.
Franklin Phillips Nov. 16, 2016, 1:25 p.m. UTC | #4
On Wed, Nov 16, 2016 at 12:33:32PM +0100, wm4 wrote:
> On Wed, 16 Nov 2016 11:21:59 +0000
> Franklin Phillips <franklinphillips@gmx.com> wrote:
> 
> > On Wed, Nov 16, 2016 at 11:41:41AM +0100, wm4 wrote:
> > > On Wed, 16 Nov 2016 10:31:10 +0000
> > > Franklin Phillips <franklinphillips@gmx.com> wrote:
> > >   
> > > > This patch is a fix for ticket #2833.
> > > > 
> > > > Each subtitle segment is its own WebVTT file and has to demuxed individually.  
> > >   
> > > > The timing of the subtitles are not perfect but it is the best I could  
> > > 
> > > What does this mean?
> > >   
> > When I convert a mp4 file with subtitles into hls and then back into mp4, the subtitles are off by a couple 100 milliseconds usually. It seems to be caused by the first audio/video packet not having a 0 timestamp causing ffmpeg to subtract the timestamp of that first packet from all the subsequent packets.
> 
> FFmpeg determines a global start_time, which is probably used in
> transcoding to offset all streams or so. But that shouldn't lead to
> timestamp mismatches because it's the same offset.
> 
> Can you check the timestamps with ffprobe? Also make sure you return
> subtitle packets ideally before audio/video packets with the same or a
> higher timestamp. Otherwise it could happen that a subtitle packet is
> too late to be rendered on video. (Though I don't know what happens
> when remuxing subtitles in this scenario.)
> 
I think you are implying the subtitles are delayed, but that is not the case, in most cases I've found them to be early.

I just double checked what was happening and it is as follows:

demuxer -> ist_index:0 type:subtitle next_dts:NOPTS next_dts_time:NOPTS next_pts:NOPTS next_pts_time:NOPTS pkt_pts:127 pkt_pts_time:0.127 pkt_dts:127 pkt_dts_time:0.127 off:-1401000 off_time:-1.401
demuxer+ffmpeg -> ist_index:0 type:subtitle pkt_pts:-1274 pkt_pts_time:-1.274 pkt_dts:-1274 pkt_dts_time:-1.274 off:-1401000 off_time:-1.401
muxer <- type:subtitle pkt_pts:-1274 pkt_pts_time:-1.274 pkt_dts:-1274 pkt_dts_time:-1.274 size:45
demuxer -> ist_index:2 type:audio next_dts:NOPTS next_dts_time:NOPTS next_pts:NOPTS next_pts_time:NOPTS pkt_pts:126090 pkt_pts_time:1.401 pkt_dts:126090 pkt_dts_time:1.401 off:-1401000 off_time:-1.401
demuxer+ffmpeg -> ist_index:2 type:audio pkt_pts:0 pkt_pts_time:0 pkt_dts:0 pkt_dts_time:0 off:-1401000 off_time:-1.401
muxer <- type:audio pkt_pts:0 pkt_pts_time:0 pkt_dts:0 pkt_dts_time:0 size:407

This is showing the first two packets of a hls playlist, first one is the subtitle packet which has a timestamp of 0.127 (this is the timestamp that appears in the .vtt segment file so the demuxer appears to be working correctly), it is being offset by -1.401 which is the timestamp of the first audio packet. The problem is that the subtitle should appear 0.127 seconds from the start of the video and the offset is not needed. Now that I think about it, the issue may be with the hls muxer which was used to create these hls segments because the timestamps of the subtitles do not have the same point of reference as the audio/video packets.

> > > > do and it also does not take into account the X-TIMESTAMP-MAP header
> > > > in the WebVTT files which is needed to conform to the specification
> > > > (https://tools.ietf.org/html/draft-pantos-http-live-streaming-20#section-3.5).  
> > > 
> > > I'm not entirely familiar how HLS works. Shouldn't this be fine as long
> > > as the webvtt events have the correct "final" timestamp?
> > >   
> > Yes, I believe so, the fall back in the specification is to assume the value of that header is 0 meaning no offset and all the examples I've seen have that value set to 0 anyway. I don't think it's a crucial feature that needs to be done ASAP.
> > 
> > > > 
> 
> > > > [...]
> > > > +  
> > > 
> > > That seems to duplicate a lot of code from the normal read path...
> > >   
> > I don't think there is a lot of duplication, and the duplication that there is, I don't think can be factored out cleanly due to the different ways the two methods are called and the difference in behaviour of the two methods.
> > 
> 
> Well, if you say so. Still hurts a bit.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Franklin Phillips Nov. 29, 2016, 5:44 p.m. UTC | #5
Hi,

If there are no more comments can we look at merging this please?

Thanks
wm4 Nov. 29, 2016, 6:10 p.m. UTC | #6
On Tue, 29 Nov 2016 17:44:15 +0000
Franklin Phillips <franklinphillips@gmx.com> wrote:

> Hi,
> 
> If there are no more comments can we look at merging this please?

I think your previous post implied that subtitles are actually timed
correctly, right?

If so, I don't mind it being applied as-is.
Franklin Phillips Nov. 29, 2016, 7:05 p.m. UTC | #7
On Tue, Nov 29, 2016 at 07:10:52PM +0100, wm4 wrote:
> On Tue, 29 Nov 2016 17:44:15 +0000
> Franklin Phillips <franklinphillips@gmx.com> wrote:
> 
> > Hi,
> > 
> > If there are no more comments can we look at merging this please?
> 
> I think your previous post implied that subtitles are actually timed
> correctly, right?
The timing is correct as long as the X-TIMESTAMP-MAP header in the subtitle files is 0 because I haven't implemented that. If I encounter situations where that header contains a value other than 0 outside of the sample files from Apple I will consider implementing it.

> 
> If so, I don't mind it being applied as-is.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
diff mbox

Patch

diff --git a/libavformat/hls.c b/libavformat/hls.c
index 3ae3c7c..bf13be4 100644
--- a/libavformat/hls.c
+++ b/libavformat/hls.c
@@ -153,6 +153,8 @@  struct playlist {
      * playlist, if any. */
     int n_init_sections;
     struct segment **init_sections;
+
+    int is_subtitle; /* Indicates if the playlist is for subtitles */
 };
 
 /*
@@ -312,6 +314,8 @@  static struct playlist *new_playlist(HLSContext *c, const char *url,
     pls->is_id3_timestamped = -1;
     pls->id3_mpegts_timestamp = AV_NOPTS_VALUE;
 
+    pls->is_subtitle = 0;
+
     dynarray_add(&c->playlists, &c->n_playlists, pls);
     return pls;
 }
@@ -482,11 +486,6 @@  static struct rendition *new_rendition(HLSContext *c, struct rendition_info *inf
     if (type == AVMEDIA_TYPE_SUBTITLE && !info->uri[0])
         return NULL;
 
-    /* TODO: handle subtitles (each segment has to parsed separately) */
-    if (c->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL)
-        if (type == AVMEDIA_TYPE_SUBTITLE)
-            return NULL;
-
     rend = av_mallocz(sizeof(struct rendition));
     if (!rend)
         return NULL;
@@ -501,9 +500,14 @@  static struct rendition *new_rendition(HLSContext *c, struct rendition_info *inf
     /* add the playlist if this is an external rendition */
     if (info->uri[0]) {
         rend->playlist = new_playlist(c, info->uri, url_base);
-        if (rend->playlist)
+        if (rend->playlist) {
             dynarray_add(&rend->playlist->renditions,
                          &rend->playlist->n_renditions, rend);
+            if (type == AVMEDIA_TYPE_SUBTITLE) {
+                rend->playlist->is_subtitle = 1;
+                rend->playlist->is_id3_timestamped = 0;
+            }
+        }
     }
 
     if (info->assoc_language[0]) {
@@ -1349,6 +1353,136 @@  reload:
     goto restart;
 }
 
+static int nested_io_open(AVFormatContext *s, AVIOContext **pb, const char *url,
+                          int flags, AVDictionary **opts)
+{
+    av_log(s, AV_LOG_ERROR,
+           "A HLS playlist item '%s' referred to an external file '%s'. "
+           "Opening this file was forbidden for security reasons\n",
+           s->filename, url);
+    return AVERROR(EPERM);
+}
+
+static int read_data_simple(void *opaque, uint8_t *buf, int buf_size)
+{
+    struct playlist *v = opaque;
+    HLSContext *c = v->parent->priv_data;
+    struct segment *seg;
+
+    if (v->cur_seq_no >= v->start_seq_no + v->n_segments) {
+        return AVERROR_EOF;
+    } else {
+        seg = current_segment(v);
+    }
+
+    if (!v->input) {
+        open_input(c, v, seg);
+    }
+
+    return read_from_url(v, seg, buf, buf_size, READ_NORMAL);
+}
+
+static int read_packet_subtitle(struct playlist *v, AVPacket *pkt)
+{
+    HLSContext *c = v->parent->priv_data;
+    int ret, i;
+
+restart:
+    if (!v->needed)
+        return AVERROR_EOF;
+
+    if (!v->input) {
+        int64_t reload_interval;
+
+        /* Check that the playlist is still needed before opening a new
+         * segment. */
+        if (v->ctx && v->ctx->nb_streams) {
+            v->needed = 0;
+            for (i = 0; i < v->n_main_streams; i++) {
+                if (v->main_streams[i]->discard < AVDISCARD_ALL) {
+                    v->needed = 1;
+                    break;
+                }
+            }
+        }
+        if (!v->needed) {
+            av_log(v->parent, AV_LOG_INFO, "No longer receiving playlist %d\n",
+                v->index);
+            return AVERROR_EOF;
+        }
+
+        /* If this is a live stream and the reload interval has elapsed since
+         * the last playlist reload, reload the playlists now. */
+        reload_interval = default_reload_interval(v);
+
+        if (!v->finished &&
+            av_gettime_relative() - v->last_load_time >= reload_interval) {
+            if ((ret = parse_playlist(c, v->url, v, NULL)) < 0) {
+                av_log(v->parent, AV_LOG_WARNING, "Failed to reload playlist %d\n",
+                       v->index);
+                return ret;
+            }
+            /* If we need to reload the playlist again below (if
+             * there's still no more segments), switch to a reload
+             * interval of half the target duration. */
+            reload_interval = v->target_duration / 2;
+        }
+        if (v->cur_seq_no < v->start_seq_no) {
+            av_log(NULL, AV_LOG_WARNING,
+                   "skipping %d segments ahead, expired from subtitle playlists\n",
+                   v->start_seq_no - v->cur_seq_no);
+            v->cur_seq_no = v->start_seq_no;
+        }
+        if (v->cur_seq_no >= v->start_seq_no + v->n_segments) {
+            return AVERROR_EOF;
+        }
+    }
+
+    if (v->ctx == NULL) {
+        AVInputFormat *in_fmt;
+
+        if (!(v->ctx = avformat_alloc_context())) {
+            return AVERROR(ENOMEM);
+        }
+
+        v->read_buffer = av_malloc(INITIAL_BUFFER_SIZE);
+        if (!v->read_buffer){
+            avformat_free_context(v->ctx);
+            v->ctx = NULL;
+            return AVERROR(ENOMEM);
+        }
+
+        ffio_init_context(&v->pb, v->read_buffer, INITIAL_BUFFER_SIZE, 0, v,
+                          read_data_simple, NULL, NULL);
+        v->pb.seekable = 0;
+        v->ctx->pb = &v->pb;
+        v->ctx->io_open = nested_io_open;
+
+        ret = ff_copy_whiteblacklists(v->ctx, v->parent);
+        if (ret < 0) {
+            return ret;
+        }
+
+        in_fmt = av_find_input_format("webvtt");
+        ret = avformat_open_input(&v->ctx, current_segment(v)->url, in_fmt, NULL);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    ret = av_read_frame(v->ctx, pkt);
+    if (ret < 0) {
+        ff_format_io_close(v->parent, &v->input);
+        avformat_close_input(&v->ctx);
+        if (ret == AVERROR_EOF) {
+            v->cur_seq_no++;
+            goto restart;
+        }
+    }
+
+    return ret;
+}
+
 static void add_renditions_to_variant(HLSContext *c, struct variant *var,
                                       enum AVMediaType type, const char *group_id)
 {
@@ -1492,16 +1626,6 @@  static int save_avio_options(AVFormatContext *s)
     return ret;
 }
 
-static int nested_io_open(AVFormatContext *s, AVIOContext **pb, const char *url,
-                          int flags, AVDictionary **opts)
-{
-    av_log(s, AV_LOG_ERROR,
-           "A HLS playlist item '%s' referred to an external file '%s'. "
-           "Opening this file was forbidden for security reasons\n",
-           s->filename, url);
-    return AVERROR(EPERM);
-}
-
 static void add_stream_to_programs(AVFormatContext *s, struct playlist *pls, AVStream *stream)
 {
     HLSContext *c = s->priv_data;
@@ -1744,8 +1868,14 @@  static int hls_read_header(AVFormatContext *s)
             pls->ctx = NULL;
             goto fail;
         }
-        ffio_init_context(&pls->pb, pls->read_buffer, INITIAL_BUFFER_SIZE, 0, pls,
-                          read_data, NULL, NULL);
+
+        if (pls->is_subtitle) {
+            ffio_init_context(&pls->pb, pls->read_buffer, INITIAL_BUFFER_SIZE, 0, pls,
+                              read_data_simple, NULL, NULL);
+        } else {
+            ffio_init_context(&pls->pb, pls->read_buffer, INITIAL_BUFFER_SIZE, 0, pls,
+                              read_data, NULL, NULL);
+        }
         pls->pb.seekable = 0;
         ret = av_probe_input_buffer(&pls->pb, &in_fmt, pls->segments[0]->url,
                                     NULL, 0, 0);
@@ -1843,6 +1973,8 @@  static int recheck_discard_flags(AVFormatContext *s, int first)
         } else if (first && !pls->cur_needed && pls->needed) {
             if (pls->input)
                 ff_format_io_close(pls->parent, &pls->input);
+            if (pls->is_subtitle)
+                avformat_close_input(&pls->ctx);
             pls->needed = 0;
             changed = 1;
             av_log(s, AV_LOG_INFO, "No longer receiving playlist %d\n", i);
@@ -1909,7 +2041,12 @@  static int hls_read_packet(AVFormatContext *s, AVPacket *pkt)
             while (1) {
                 int64_t ts_diff;
                 AVRational tb;
-                ret = av_read_frame(pls->ctx, &pls->pkt);
+                if (pls->is_subtitle) {
+                    ret = read_packet_subtitle(pls, &pls->pkt);
+                } else {
+                    ret = av_read_frame(pls->ctx, &pls->pkt);
+                }
+
                 if (ret < 0) {
                     if (!avio_feof(&pls->pb) && ret != AVERROR_EOF)
                         return ret;
@@ -2087,6 +2224,9 @@  static int hls_read_seek(AVFormatContext *s, int stream_index,
         /* Flush the packet queue of the subdemuxer. */
         ff_read_frame_flush(pls->ctx);
 
+        if (pls->is_subtitle)
+            avformat_close_input(&pls->ctx);
+
         pls->seek_timestamp = seek_timestamp;
         pls->seek_flags = flags;