diff mbox

[FFmpeg-devel] avcodec: add HDMV Text Subtitle decoder

Message ID 20170131142217.23731-1-onemda@gmail.com
State Superseded
Headers show

Commit Message

Paul B Mahol Jan. 31, 2017, 2:22 p.m. UTC
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libavcodec/Makefile        |   2 +
 libavcodec/allcodecs.c     |   2 +
 libavcodec/textst_parser.c |  49 ++++++++++++++++++++
 libavcodec/textstdec.c     | 108 +++++++++++++++++++++++++++++++++++++++++++++
 libavformat/utils.c        |   1 +
 5 files changed, 162 insertions(+)
 create mode 100644 libavcodec/textst_parser.c
 create mode 100644 libavcodec/textstdec.c

Comments

Carl Eugen Hoyos Jan. 31, 2017, 3:29 p.m. UTC | #1
2017-01-31 15:22 GMT+01:00 Paul B Mahol <onemda@gmail.com>:

> +AVCodec ff_textst_decoder = {
> +    .name           = "textst",
> +    .long_name      = NULL_IF_CONFIG_SMALL("HDMV TextST subtitle"),

Is this patch related to ticket #4481?

Thank you, Carl Eugen
Paul B Mahol Jan. 31, 2017, 3:32 p.m. UTC | #2
On 1/31/17, Carl Eugen Hoyos <ceffmpeg@gmail.com> wrote:
> 2017-01-31 15:22 GMT+01:00 Paul B Mahol <onemda@gmail.com>:
>
>> +AVCodec ff_textst_decoder = {
>> +    .name           = "textst",
>> +    .long_name      = NULL_IF_CONFIG_SMALL("HDMV TextST subtitle"),
>
> Is this patch related to ticket #4481?

Yes!
wm4 Jan. 31, 2017, 3:42 p.m. UTC | #3
On Tue, 31 Jan 2017 15:22:17 +0100
Paul B Mahol <onemda@gmail.com> wrote:

> Signed-off-by: Paul B Mahol <onemda@gmail.com>
> ---
>  libavcodec/Makefile        |   2 +
>  libavcodec/allcodecs.c     |   2 +
>  libavcodec/textst_parser.c |  49 ++++++++++++++++++++
>  libavcodec/textstdec.c     | 108 +++++++++++++++++++++++++++++++++++++++++++++
>  libavformat/utils.c        |   1 +
>  5 files changed, 162 insertions(+)
>  create mode 100644 libavcodec/textst_parser.c
>  create mode 100644 libavcodec/textstdec.c
> 
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 43a6add..edadb0f 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -539,6 +539,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER)            += svq1enc.o svq1.o  h263data.o  \
>  OBJS-$(CONFIG_SVQ3_DECODER)            += svq3.o svq13.o mpegutils.o h264data.o
>  OBJS-$(CONFIG_TEXT_DECODER)            += textdec.o ass.o
>  OBJS-$(CONFIG_TEXT_ENCODER)            += srtenc.o ass_split.o
> +OBJS-$(CONFIG_TEXTST_DECODER)          += textstdec.o ass.o
>  OBJS-$(CONFIG_TAK_DECODER)             += takdec.o tak.o takdsp.o
>  OBJS-$(CONFIG_TARGA_DECODER)           += targa.o
>  OBJS-$(CONFIG_TARGA_ENCODER)           += targaenc.o rle.o
> @@ -945,6 +946,7 @@ OBJS-$(CONFIG_RV30_PARSER)             += rv34_parser.o
>  OBJS-$(CONFIG_RV40_PARSER)             += rv34_parser.o
>  OBJS-$(CONFIG_SIPR_PARSER)             += sipr_parser.o
>  OBJS-$(CONFIG_TAK_PARSER)              += tak_parser.o tak.o
> +OBJS-$(CONFIG_TEXTST_PARSER)           += textst_parser.o
>  OBJS-$(CONFIG_VC1_PARSER)              += vc1_parser.o vc1.o vc1data.o  \
>                                            simple_idct.o wmv2data.o
>  OBJS-$(CONFIG_VP3_PARSER)              += vp3_parser.o
> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> index f92b2b7..9a90533 100644
> --- a/libavcodec/allcodecs.c
> +++ b/libavcodec/allcodecs.c
> @@ -581,6 +581,7 @@ void avcodec_register_all(void)
>      REGISTER_DECODER(SUBVIEWER,         subviewer);
>      REGISTER_DECODER(SUBVIEWER1,        subviewer1);
>      REGISTER_ENCDEC (TEXT,              text);
> +    REGISTER_DECODER(TEXTST,            textst);
>      REGISTER_DECODER(VPLAYER,           vplayer);
>      REGISTER_ENCDEC (WEBVTT,            webvtt);
>      REGISTER_ENCDEC (XSUB,              xsub);
> @@ -704,6 +705,7 @@ void avcodec_register_all(void)
>      REGISTER_PARSER(RV40,               rv40);
>      REGISTER_PARSER(SIPR,               sipr);
>      REGISTER_PARSER(TAK,                tak);
> +    REGISTER_PARSER(TEXTST,             textst);
>      REGISTER_PARSER(VC1,                vc1);
>      REGISTER_PARSER(VORBIS,             vorbis);
>      REGISTER_PARSER(VP3,                vp3);
> diff --git a/libavcodec/textst_parser.c b/libavcodec/textst_parser.c
> new file mode 100644
> index 0000000..5079a96
> --- /dev/null
> +++ b/libavcodec/textst_parser.c
> @@ -0,0 +1,49 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * HDMV TextST subtitle parser
> + */
> +
> +#include "libavutil/intreadwrite.h"
> +#include "parser.h"
> +
> +static int textst_parse(AVCodecParserContext *s1, AVCodecContext *avctx,
> +                        const uint8_t **poutbuf, int *poutbuf_size,
> +                        const uint8_t *buf, int buf_size)
> +{
> +    if (buf_size > 13) {
> +        int64_t end;
> +
> +        s1->pts = ((int64_t)(buf[3] & 1) << 32) | AV_RB32(&buf[4]);
> +        end = ((int64_t)(buf[8] & 1) << 32) | AV_RB32(&buf[9]);
> +        s1->duration = (end - s1->pts);
> +    }
> +
> +    /* always return the full packet. this parser isn't doing any splitting or
> +       combining, only packet analysis */
> +    *poutbuf      = buf;
> +    *poutbuf_size = buf_size;
> +    return buf_size;
> +}
> +
> +AVCodecParser ff_textst_parser = {
> +    .codec_ids      = { AV_CODEC_ID_HDMV_TEXT_SUBTITLE },
> +    .parser_parse   = textst_parse,
> +};

Why does it need to be in a parser, instead of the demuxer? It seems
like this codec exists only in .ts anyway, and the way PTS/duration is
extracted seems very closely tied to the format.

> diff --git a/libavcodec/textstdec.c b/libavcodec/textstdec.c
> new file mode 100644
> index 0000000..a259d2d
> --- /dev/null
> +++ b/libavcodec/textstdec.c
> @@ -0,0 +1,108 @@
> +/*
> + * HDMV TextST decoder
> + * Copyright (c) 2017 Paul B Mahol
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include <string.h>
> +
> +#include "libavutil/bprint.h"
> +#include "avcodec.h"
> +#include "ass.h"
> +#include "bytestream.h"
> +
> +static int textst_event_to_ass(AVBPrint *buf, const char *p, int size)
> +{
> +    GetByteContext gb;
> +    int i, count;
> +
> +    bytestream2_init(&gb, p, size);
> +    count = bytestream2_get_byte(&gb);
> +
> +    if (count > 127) {
> +        int size = bytestream2_get_be16(&gb);
> +        bytestream2_skip(&gb, size);
> +    }
> +    if (bytestream2_get_bytes_left(&gb) > 2) {
> +        count = bytestream2_get_byte(&gb);
> +
> +        for (i = 0; i < count; i++) {
> +            int dlength;
> +
> +            bytestream2_skip(&gb, 2);
> +            dlength = bytestream2_get_be16(&gb);
> +
> +            while (dlength > 3) {
> +                int type, length;
> +                int code = bytestream2_get_byte(&gb);
> +
> +                dlength--;
> +                if (code != 0x1b)
> +                    continue;
> +
> +                type   = bytestream2_get_byte(&gb);
> +                length = bytestream2_get_byte(&gb);
> +                dlength -= (2 + length);
> +
> +                switch (type) {
> +                case 1:
> +                    av_bprint_append_data(buf, gb.buffer, FFMIN(length, bytestream2_get_bytes_left(&gb)));
> +                    bytestream2_skip(&gb, length);
> +                    break;
> +                case 0x0a:
> +                    av_bprintf(buf, "\n");
> +                default:
> +                    bytestream2_skip(&gb, length);
> +                    break;
> +                }
> +            }
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static int textst_decode_frame(AVCodecContext *avctx,
> +                               void *data, int *got_sub_ptr, AVPacket *avpkt)
> +{
> +    FFASSDecoderContext *s = avctx->priv_data;
> +    const char *ptr = avpkt->data;
> +    AVSubtitle *sub = data;
> +    AVBPrint buf;
> +    int ret = 0;
> +
> +    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
> +    if (ptr && avpkt->size > 13 && !textst_event_to_ass(&buf, ptr + 13, avpkt->size - 13))
> +        ret = ff_ass_add_rect(sub, buf.str, s->readorder++, 0, NULL, NULL);
> +    av_bprint_finalize(&buf, NULL);
> +    if (ret < 0)
> +        return ret;
> +    *got_sub_ptr = sub->num_rects > 0;
> +    return avpkt->size;
> +}
> +
> +AVCodec ff_textst_decoder = {
> +    .name           = "textst",
> +    .long_name      = NULL_IF_CONFIG_SMALL("HDMV TextST subtitle"),
> +    .type           = AVMEDIA_TYPE_SUBTITLE,
> +    .id             = AV_CODEC_ID_HDMV_TEXT_SUBTITLE,
> +    .decode         = textst_decode_frame,
> +    .init           = ff_ass_subtitle_header_default,
> +    .flush          = ff_ass_decoder_flush,
> +    .priv_data_size = sizeof(FFASSDecoderContext),
> +};
> diff --git a/libavformat/utils.c b/libavformat/utils.c
> index 0711310..74f808f 100644
> --- a/libavformat/utils.c
> +++ b/libavformat/utils.c
> @@ -1445,6 +1445,7 @@ static int parse_packet(AVFormatContext *s, AVPacket *pkt, int stream_index)
>  
>          out_pkt.stream_index = st->index;
>          out_pkt.pts          = st->parser->pts;
> +        out_pkt.duration     = st->parser->duration;
>          out_pkt.dts          = st->parser->dts;
>          out_pkt.pos          = st->parser->pos;
>  

This overwrites values written to out_pkt.duration above this code.
Does it even pass FATE?
Petri Hintukainen Feb. 1, 2017, 9:05 a.m. UTC | #4
ti, 2017-01-31 kello 16:42 +0100, wm4 kirjoitti:
> On Tue, 31 Jan 2017 15:22:17 +0100
> Paul B Mahol <onemda@gmail.com> wrote:
> 
> > Signed-off-by: Paul B Mahol <onemda@gmail.com>
> > ---
> >  libavcodec/Makefile        |   2 +
> >  libavcodec/allcodecs.c     |   2 +
> >  libavcodec/textst_parser.c |  49 ++++++++++++++++++++
> >  libavcodec/textstdec.c     | 108
> > +++++++++++++++++++++++++++++++++++++++++++++
> >  libavformat/utils.c        |   1 +
> >  5 files changed, 162 insertions(+)
> >  create mode 100644 libavcodec/textst_parser.c
> >  create mode 100644 libavcodec/textstdec.c
> > 
> > diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> > index 43a6add..edadb0f 100644
> > --- a/libavcodec/Makefile
> > +++ b/libavcodec/Makefile
> > @@ -539,6 +539,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER)            +=
> > svq1enc.o svq1.o  h263data.o  \
> >  OBJS-$(CONFIG_SVQ3_DECODER)            += svq3.o svq13.o
> > mpegutils.o h264data.o
> >  OBJS-$(CONFIG_TEXT_DECODER)            += textdec.o ass.o
> >  OBJS-$(CONFIG_TEXT_ENCODER)            += srtenc.o ass_split.o
> > +OBJS-$(CONFIG_TEXTST_DECODER)          += textstdec.o ass.o
> >  OBJS-$(CONFIG_TAK_DECODER)             += takdec.o tak.o takdsp.o
> >  OBJS-$(CONFIG_TARGA_DECODER)           += targa.o
> >  OBJS-$(CONFIG_TARGA_ENCODER)           += targaenc.o rle.o
> > @@ -945,6 +946,7 @@ OBJS-$(CONFIG_RV30_PARSER)             +=
> > rv34_parser.o
> >  OBJS-$(CONFIG_RV40_PARSER)             += rv34_parser.o
> >  OBJS-$(CONFIG_SIPR_PARSER)             += sipr_parser.o
> >  OBJS-$(CONFIG_TAK_PARSER)              += tak_parser.o tak.o
> > +OBJS-$(CONFIG_TEXTST_PARSER)           += textst_parser.o
> >  OBJS-$(CONFIG_VC1_PARSER)              += vc1_parser.o vc1.o
> > vc1data.o  \
> >                                            simple_idct.o wmv2data.o
> >  OBJS-$(CONFIG_VP3_PARSER)              += vp3_parser.o
> > diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> > index f92b2b7..9a90533 100644
> > --- a/libavcodec/allcodecs.c
> > +++ b/libavcodec/allcodecs.c
> > @@ -581,6 +581,7 @@ void avcodec_register_all(void)
> >      REGISTER_DECODER(SUBVIEWER,         subviewer);
> >      REGISTER_DECODER(SUBVIEWER1,        subviewer1);
> >      REGISTER_ENCDEC (TEXT,              text);
> > +    REGISTER_DECODER(TEXTST,            textst);
> >      REGISTER_DECODER(VPLAYER,           vplayer);
> >      REGISTER_ENCDEC (WEBVTT,            webvtt);
> >      REGISTER_ENCDEC (XSUB,              xsub);
> > @@ -704,6 +705,7 @@ void avcodec_register_all(void)
> >      REGISTER_PARSER(RV40,               rv40);
> >      REGISTER_PARSER(SIPR,               sipr);
> >      REGISTER_PARSER(TAK,                tak);
> > +    REGISTER_PARSER(TEXTST,             textst);
> >      REGISTER_PARSER(VC1,                vc1);
> >      REGISTER_PARSER(VORBIS,             vorbis);
> >      REGISTER_PARSER(VP3,                vp3);
> > diff --git a/libavcodec/textst_parser.c
> > b/libavcodec/textst_parser.c
> > new file mode 100644
> > index 0000000..5079a96
> > --- /dev/null
> > +++ b/libavcodec/textst_parser.c
> > @@ -0,0 +1,49 @@
> > +/*
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later
> > version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General
> > Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > 02110-1301 USA
> > + */
> > +
> > +/**
> > + * @file
> > + * HDMV TextST subtitle parser
> > + */
> > +
> > +#include "libavutil/intreadwrite.h"
> > +#include "parser.h"
> > +
> > +static int textst_parse(AVCodecParserContext *s1, AVCodecContext
> > *avctx,
> > +                        const uint8_t **poutbuf, int
> > *poutbuf_size,
> > +                        const uint8_t *buf, int buf_size)
> > +{
> > +    if (buf_size > 13) {
> > +        int64_t end;
> > +
> > +        s1->pts = ((int64_t)(buf[3] & 1) << 32) |
> > AV_RB32(&buf[4]);
> > +        end = ((int64_t)(buf[8] & 1) << 32) | AV_RB32(&buf[9]);
> > +        s1->duration = (end - s1->pts);
> > +    }
> > +
> > +    /* always return the full packet. this parser isn't doing any
> > splitting or
> > +       combining, only packet analysis */
> > +    *poutbuf      = buf;
> > +    *poutbuf_size = buf_size;
> > +    return buf_size;
> > +}
> > +
> > +AVCodecParser ff_textst_parser = {
> > +    .codec_ids      = { AV_CODEC_ID_HDMV_TEXT_SUBTITLE },
> > +    .parser_parse   = textst_parse,
> > +};
> 
> Why does it need to be in a parser, instead of the demuxer? It seems
> like this codec exists only in .ts anyway, and the way PTS/duration
> is
> extracted seems very closely tied to the format.

I think a parser could be good idea, it could be useful when remuxing ?

In the original BluRay mpeg-ts files this codec uses PES private stream
2, so there are no timestamps in packets coming from mpeg-ts demuxer.

Parser could also store the first segment (style segment) in extradata.
I think matroska stores style segment in extradata (not 100% sure, I
don't have such files at hand). This kind of makes sense, it allows
starting playback from middle of the mkv file.

Maybe parser should not overwrite the timestamps if those are already
present ? I don't know if timestamps in subtitle segments are modified
when muxing to matroska.

> > diff --git a/libavcodec/textstdec.c b/libavcodec/textstdec.c
> > new file mode 100644
> > index 0000000..a259d2d
> > --- /dev/null
> > +++ b/libavcodec/textstdec.c
> > @@ -0,0 +1,108 @@
> > +/*
> > + * HDMV TextST decoder
> > + * Copyright (c) 2017 Paul B Mahol
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later
> > version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General
> > Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > 02110-1301 USA
> > + */
> > +
> > +#include <string.h>
> > +
> > +#include "libavutil/bprint.h"
> > +#include "avcodec.h"
> > +#include "ass.h"
> > +#include "bytestream.h"
> > +
> > +static int textst_event_to_ass(AVBPrint *buf, const char *p, int
> > size)
> > +{
> > +    GetByteContext gb;
> > +    int i, count;
> > +
> > +    bytestream2_init(&gb, p, size);
> > +    count = bytestream2_get_byte(&gb);
> > +
> > +    if (count > 127) {
> > +        int size = bytestream2_get_be16(&gb);
> > +        bytestream2_skip(&gb, size);
> > +    }
> > +    if (bytestream2_get_bytes_left(&gb) > 2) {
> > +        count = bytestream2_get_byte(&gb);
> > +
> > +        for (i = 0; i < count; i++) {
> > +            int dlength;
> > +
> > +            bytestream2_skip(&gb, 2);
> > +            dlength = bytestream2_get_be16(&gb);
> > +
> > +            while (dlength > 3) {
> > +                int type, length;
> > +                int code = bytestream2_get_byte(&gb);
> > +
> > +                dlength--;
> > +                if (code != 0x1b)
> > +                    continue;
> > +
> > +                type   = bytestream2_get_byte(&gb);
> > +                length = bytestream2_get_byte(&gb);
> > +                dlength -= (2 + length);
> > +
> > +                switch (type) {
> > +                case 1:
> > +                    av_bprint_append_data(buf, gb.buffer,
> > FFMIN(length, bytestream2_get_bytes_left(&gb)));
> > +                    bytestream2_skip(&gb, length);
> > +                    break;
> > +                case 0x0a:
> > +                    av_bprintf(buf, "\n");
> > +                default:
> > +                    bytestream2_skip(&gb, length);
> > +                    break;
> > +                }
> > +            }
> > +        }
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int textst_decode_frame(AVCodecContext *avctx,
> > +                               void *data, int *got_sub_ptr,
> > AVPacket *avpkt)
> > +{
> > +    FFASSDecoderContext *s = avctx->priv_data;
> > +    const char *ptr = avpkt->data;
> > +    AVSubtitle *sub = data;
> > +    AVBPrint buf;
> > +    int ret = 0;
> > +
> > +    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
> > +    if (ptr && avpkt->size > 13 && !textst_event_to_ass(&buf, ptr
> > + 13, avpkt->size - 13))
> > +        ret = ff_ass_add_rect(sub, buf.str, s->readorder++, 0,
> > NULL, NULL);
> > +    av_bprint_finalize(&buf, NULL);
> > +    if (ret < 0)
> > +        return ret;
> > +    *got_sub_ptr = sub->num_rects > 0;
> > +    return avpkt->size;
> > +}
> > +
> > +AVCodec ff_textst_decoder = {
> > +    .name           = "textst",
> > +    .long_name      = NULL_IF_CONFIG_SMALL("HDMV TextST
> > subtitle"),
> > +    .type           = AVMEDIA_TYPE_SUBTITLE,
> > +    .id             = AV_CODEC_ID_HDMV_TEXT_SUBTITLE,
> > +    .decode         = textst_decode_frame,
> > +    .init           = ff_ass_subtitle_header_default,
> > +    .flush          = ff_ass_decoder_flush,
> > +    .priv_data_size = sizeof(FFASSDecoderContext),
> > +};
> > diff --git a/libavformat/utils.c b/libavformat/utils.c
> > index 0711310..74f808f 100644
> > --- a/libavformat/utils.c
> > +++ b/libavformat/utils.c
> > @@ -1445,6 +1445,7 @@ static int parse_packet(AVFormatContext *s,
> > AVPacket *pkt, int stream_index)
> >  
> >          out_pkt.stream_index = st->index;
> >          out_pkt.pts          = st->parser->pts;
> > +        out_pkt.duration     = st->parser->duration;
> >          out_pkt.dts          = st->parser->dts;
> >          out_pkt.pos          = st->parser->pos;
> >  
> 
> This overwrites values written to out_pkt.duration above this code.
> Does it even pass FATE?
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
wm4 Feb. 1, 2017, 9:19 a.m. UTC | #5
On Wed, 01 Feb 2017 11:05:48 +0200
Petri Hintukainen <phintuka@gmail.com> wrote:

> ti, 2017-01-31 kello 16:42 +0100, wm4 kirjoitti:
> > On Tue, 31 Jan 2017 15:22:17 +0100
> > Paul B Mahol <onemda@gmail.com> wrote:
> >   
> > > Signed-off-by: Paul B Mahol <onemda@gmail.com>
> > > ---
> > >  libavcodec/Makefile        |   2 +
> > >  libavcodec/allcodecs.c     |   2 +
> > >  libavcodec/textst_parser.c |  49 ++++++++++++++++++++
> > >  libavcodec/textstdec.c     | 108
> > > +++++++++++++++++++++++++++++++++++++++++++++
> > >  libavformat/utils.c        |   1 +
> > >  5 files changed, 162 insertions(+)
> > >  create mode 100644 libavcodec/textst_parser.c
> > >  create mode 100644 libavcodec/textstdec.c
> > > 
> > > diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> > > index 43a6add..edadb0f 100644
> > > --- a/libavcodec/Makefile
> > > +++ b/libavcodec/Makefile
> > > @@ -539,6 +539,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER)            +=
> > > svq1enc.o svq1.o  h263data.o  \
> > >  OBJS-$(CONFIG_SVQ3_DECODER)            += svq3.o svq13.o
> > > mpegutils.o h264data.o
> > >  OBJS-$(CONFIG_TEXT_DECODER)            += textdec.o ass.o
> > >  OBJS-$(CONFIG_TEXT_ENCODER)            += srtenc.o ass_split.o
> > > +OBJS-$(CONFIG_TEXTST_DECODER)          += textstdec.o ass.o
> > >  OBJS-$(CONFIG_TAK_DECODER)             += takdec.o tak.o takdsp.o
> > >  OBJS-$(CONFIG_TARGA_DECODER)           += targa.o
> > >  OBJS-$(CONFIG_TARGA_ENCODER)           += targaenc.o rle.o
> > > @@ -945,6 +946,7 @@ OBJS-$(CONFIG_RV30_PARSER)             +=
> > > rv34_parser.o
> > >  OBJS-$(CONFIG_RV40_PARSER)             += rv34_parser.o
> > >  OBJS-$(CONFIG_SIPR_PARSER)             += sipr_parser.o
> > >  OBJS-$(CONFIG_TAK_PARSER)              += tak_parser.o tak.o
> > > +OBJS-$(CONFIG_TEXTST_PARSER)           += textst_parser.o
> > >  OBJS-$(CONFIG_VC1_PARSER)              += vc1_parser.o vc1.o
> > > vc1data.o  \
> > >                                            simple_idct.o wmv2data.o
> > >  OBJS-$(CONFIG_VP3_PARSER)              += vp3_parser.o
> > > diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> > > index f92b2b7..9a90533 100644
> > > --- a/libavcodec/allcodecs.c
> > > +++ b/libavcodec/allcodecs.c
> > > @@ -581,6 +581,7 @@ void avcodec_register_all(void)
> > >      REGISTER_DECODER(SUBVIEWER,         subviewer);
> > >      REGISTER_DECODER(SUBVIEWER1,        subviewer1);
> > >      REGISTER_ENCDEC (TEXT,              text);
> > > +    REGISTER_DECODER(TEXTST,            textst);
> > >      REGISTER_DECODER(VPLAYER,           vplayer);
> > >      REGISTER_ENCDEC (WEBVTT,            webvtt);
> > >      REGISTER_ENCDEC (XSUB,              xsub);
> > > @@ -704,6 +705,7 @@ void avcodec_register_all(void)
> > >      REGISTER_PARSER(RV40,               rv40);
> > >      REGISTER_PARSER(SIPR,               sipr);
> > >      REGISTER_PARSER(TAK,                tak);
> > > +    REGISTER_PARSER(TEXTST,             textst);
> > >      REGISTER_PARSER(VC1,                vc1);
> > >      REGISTER_PARSER(VORBIS,             vorbis);
> > >      REGISTER_PARSER(VP3,                vp3);
> > > diff --git a/libavcodec/textst_parser.c
> > > b/libavcodec/textst_parser.c
> > > new file mode 100644
> > > index 0000000..5079a96
> > > --- /dev/null
> > > +++ b/libavcodec/textst_parser.c
> > > @@ -0,0 +1,49 @@
> > > +/*
> > > + * This file is part of FFmpeg.
> > > + *
> > > + * FFmpeg is free software; you can redistribute it and/or
> > > + * modify it under the terms of the GNU Lesser General Public
> > > + * License as published by the Free Software Foundation; either
> > > + * version 2.1 of the License, or (at your option) any later
> > > version.
> > > + *
> > > + * FFmpeg is distributed in the hope that it will be useful,
> > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > > GNU
> > > + * Lesser General Public License for more details.
> > > + *
> > > + * You should have received a copy of the GNU Lesser General
> > > Public
> > > + * License along with FFmpeg; if not, write to the Free Software
> > > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > > 02110-1301 USA
> > > + */
> > > +
> > > +/**
> > > + * @file
> > > + * HDMV TextST subtitle parser
> > > + */
> > > +
> > > +#include "libavutil/intreadwrite.h"
> > > +#include "parser.h"
> > > +
> > > +static int textst_parse(AVCodecParserContext *s1, AVCodecContext
> > > *avctx,
> > > +                        const uint8_t **poutbuf, int
> > > *poutbuf_size,
> > > +                        const uint8_t *buf, int buf_size)
> > > +{
> > > +    if (buf_size > 13) {
> > > +        int64_t end;
> > > +
> > > +        s1->pts = ((int64_t)(buf[3] & 1) << 32) |
> > > AV_RB32(&buf[4]);
> > > +        end = ((int64_t)(buf[8] & 1) << 32) | AV_RB32(&buf[9]);
> > > +        s1->duration = (end - s1->pts);
> > > +    }
> > > +
> > > +    /* always return the full packet. this parser isn't doing any
> > > splitting or
> > > +       combining, only packet analysis */
> > > +    *poutbuf      = buf;
> > > +    *poutbuf_size = buf_size;
> > > +    return buf_size;
> > > +}
> > > +
> > > +AVCodecParser ff_textst_parser = {
> > > +    .codec_ids      = { AV_CODEC_ID_HDMV_TEXT_SUBTITLE },
> > > +    .parser_parse   = textst_parse,
> > > +};  
> > 
> > Why does it need to be in a parser, instead of the demuxer? It seems
> > like this codec exists only in .ts anyway, and the way PTS/duration
> > is
> > extracted seems very closely tied to the format.  
> 
> I think a parser could be good idea, it could be useful when remuxing ?
> 
> In the original BluRay mpeg-ts files this codec uses PES private stream
> 2, so there are no timestamps in packets coming from mpeg-ts demuxer.
> 
> Parser could also store the first segment (style segment) in extradata.
> I think matroska stores style segment in extradata (not 100% sure, I
> don't have such files at hand). This kind of makes sense, it allows
> starting playback from middle of the mkv file.
> 
> Maybe parser should not overwrite the timestamps if those are already
> present ? I don't know if timestamps in subtitle segments are modified
> when muxing to matroska.

This codec is supported in mpeg-ts only. There are no hints that it's
supported in Matroska. If it were, the parser would break _very_ badly
on it when demuxing from mkv too.
Paul B Mahol Feb. 1, 2017, 9:23 a.m. UTC | #6
On 2/1/17, wm4 <nfxjfg@googlemail.com> wrote:
> On Wed, 01 Feb 2017 11:05:48 +0200
> Petri Hintukainen <phintuka@gmail.com> wrote:
>
>> ti, 2017-01-31 kello 16:42 +0100, wm4 kirjoitti:
>> > On Tue, 31 Jan 2017 15:22:17 +0100
>> > Paul B Mahol <onemda@gmail.com> wrote:
>> >
>> > > Signed-off-by: Paul B Mahol <onemda@gmail.com>
>> > > ---
>> > >  libavcodec/Makefile        |   2 +
>> > >  libavcodec/allcodecs.c     |   2 +
>> > >  libavcodec/textst_parser.c |  49 ++++++++++++++++++++
>> > >  libavcodec/textstdec.c     | 108
>> > > +++++++++++++++++++++++++++++++++++++++++++++
>> > >  libavformat/utils.c        |   1 +
>> > >  5 files changed, 162 insertions(+)
>> > >  create mode 100644 libavcodec/textst_parser.c
>> > >  create mode 100644 libavcodec/textstdec.c
>> > >
>> > > diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>> > > index 43a6add..edadb0f 100644
>> > > --- a/libavcodec/Makefile
>> > > +++ b/libavcodec/Makefile
>> > > @@ -539,6 +539,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER)            +=
>> > > svq1enc.o svq1.o  h263data.o  \
>> > >  OBJS-$(CONFIG_SVQ3_DECODER)            += svq3.o svq13.o
>> > > mpegutils.o h264data.o
>> > >  OBJS-$(CONFIG_TEXT_DECODER)            += textdec.o ass.o
>> > >  OBJS-$(CONFIG_TEXT_ENCODER)            += srtenc.o ass_split.o
>> > > +OBJS-$(CONFIG_TEXTST_DECODER)          += textstdec.o ass.o
>> > >  OBJS-$(CONFIG_TAK_DECODER)             += takdec.o tak.o takdsp.o
>> > >  OBJS-$(CONFIG_TARGA_DECODER)           += targa.o
>> > >  OBJS-$(CONFIG_TARGA_ENCODER)           += targaenc.o rle.o
>> > > @@ -945,6 +946,7 @@ OBJS-$(CONFIG_RV30_PARSER)             +=
>> > > rv34_parser.o
>> > >  OBJS-$(CONFIG_RV40_PARSER)             += rv34_parser.o
>> > >  OBJS-$(CONFIG_SIPR_PARSER)             += sipr_parser.o
>> > >  OBJS-$(CONFIG_TAK_PARSER)              += tak_parser.o tak.o
>> > > +OBJS-$(CONFIG_TEXTST_PARSER)           += textst_parser.o
>> > >  OBJS-$(CONFIG_VC1_PARSER)              += vc1_parser.o vc1.o
>> > > vc1data.o  \
>> > >                                            simple_idct.o wmv2data.o
>> > >  OBJS-$(CONFIG_VP3_PARSER)              += vp3_parser.o
>> > > diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
>> > > index f92b2b7..9a90533 100644
>> > > --- a/libavcodec/allcodecs.c
>> > > +++ b/libavcodec/allcodecs.c
>> > > @@ -581,6 +581,7 @@ void avcodec_register_all(void)
>> > >      REGISTER_DECODER(SUBVIEWER,         subviewer);
>> > >      REGISTER_DECODER(SUBVIEWER1,        subviewer1);
>> > >      REGISTER_ENCDEC (TEXT,              text);
>> > > +    REGISTER_DECODER(TEXTST,            textst);
>> > >      REGISTER_DECODER(VPLAYER,           vplayer);
>> > >      REGISTER_ENCDEC (WEBVTT,            webvtt);
>> > >      REGISTER_ENCDEC (XSUB,              xsub);
>> > > @@ -704,6 +705,7 @@ void avcodec_register_all(void)
>> > >      REGISTER_PARSER(RV40,               rv40);
>> > >      REGISTER_PARSER(SIPR,               sipr);
>> > >      REGISTER_PARSER(TAK,                tak);
>> > > +    REGISTER_PARSER(TEXTST,             textst);
>> > >      REGISTER_PARSER(VC1,                vc1);
>> > >      REGISTER_PARSER(VORBIS,             vorbis);
>> > >      REGISTER_PARSER(VP3,                vp3);
>> > > diff --git a/libavcodec/textst_parser.c
>> > > b/libavcodec/textst_parser.c
>> > > new file mode 100644
>> > > index 0000000..5079a96
>> > > --- /dev/null
>> > > +++ b/libavcodec/textst_parser.c
>> > > @@ -0,0 +1,49 @@
>> > > +/*
>> > > + * This file is part of FFmpeg.
>> > > + *
>> > > + * FFmpeg is free software; you can redistribute it and/or
>> > > + * modify it under the terms of the GNU Lesser General Public
>> > > + * License as published by the Free Software Foundation; either
>> > > + * version 2.1 of the License, or (at your option) any later
>> > > version.
>> > > + *
>> > > + * FFmpeg is distributed in the hope that it will be useful,
>> > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> > > GNU
>> > > + * Lesser General Public License for more details.
>> > > + *
>> > > + * You should have received a copy of the GNU Lesser General
>> > > Public
>> > > + * License along with FFmpeg; if not, write to the Free Software
>> > > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
>> > > 02110-1301 USA
>> > > + */
>> > > +
>> > > +/**
>> > > + * @file
>> > > + * HDMV TextST subtitle parser
>> > > + */
>> > > +
>> > > +#include "libavutil/intreadwrite.h"
>> > > +#include "parser.h"
>> > > +
>> > > +static int textst_parse(AVCodecParserContext *s1, AVCodecContext
>> > > *avctx,
>> > > +                        const uint8_t **poutbuf, int
>> > > *poutbuf_size,
>> > > +                        const uint8_t *buf, int buf_size)
>> > > +{
>> > > +    if (buf_size > 13) {
>> > > +        int64_t end;
>> > > +
>> > > +        s1->pts = ((int64_t)(buf[3] & 1) << 32) |
>> > > AV_RB32(&buf[4]);
>> > > +        end = ((int64_t)(buf[8] & 1) << 32) | AV_RB32(&buf[9]);
>> > > +        s1->duration = (end - s1->pts);
>> > > +    }
>> > > +
>> > > +    /* always return the full packet. this parser isn't doing any
>> > > splitting or
>> > > +       combining, only packet analysis */
>> > > +    *poutbuf      = buf;
>> > > +    *poutbuf_size = buf_size;
>> > > +    return buf_size;
>> > > +}
>> > > +
>> > > +AVCodecParser ff_textst_parser = {
>> > > +    .codec_ids      = { AV_CODEC_ID_HDMV_TEXT_SUBTITLE },
>> > > +    .parser_parse   = textst_parse,
>> > > +};
>> >
>> > Why does it need to be in a parser, instead of the demuxer? It seems
>> > like this codec exists only in .ts anyway, and the way PTS/duration
>> > is
>> > extracted seems very closely tied to the format.
>>
>> I think a parser could be good idea, it could be useful when remuxing ?
>>
>> In the original BluRay mpeg-ts files this codec uses PES private stream
>> 2, so there are no timestamps in packets coming from mpeg-ts demuxer.
>>
>> Parser could also store the first segment (style segment) in extradata.
>> I think matroska stores style segment in extradata (not 100% sure, I
>> don't have such files at hand). This kind of makes sense, it allows
>> starting playback from middle of the mkv file.
>>
>> Maybe parser should not overwrite the timestamps if those are already
>> present ? I don't know if timestamps in subtitle segments are modified
>> when muxing to matroska.
>
> This codec is supported in mpeg-ts only. There are no hints that it's
> supported in Matroska. If it were, the parser would break _very_ badly
> on it when demuxing from mkv too.

It _IS_ supported in matroska as S_HDMV/TEXTST
wm4 Feb. 1, 2017, 9:35 a.m. UTC | #7
On Wed, 1 Feb 2017 10:23:02 +0100
Paul B Mahol <onemda@gmail.com> wrote:

> On 2/1/17, wm4 <nfxjfg@googlemail.com> wrote:
> > On Wed, 01 Feb 2017 11:05:48 +0200
> > Petri Hintukainen <phintuka@gmail.com> wrote:
> >  
> >> ti, 2017-01-31 kello 16:42 +0100, wm4 kirjoitti:  
> >> > On Tue, 31 Jan 2017 15:22:17 +0100
> >> > Paul B Mahol <onemda@gmail.com> wrote:
> >> >  
> >> > > Signed-off-by: Paul B Mahol <onemda@gmail.com>
> >> > > ---
> >> > >  libavcodec/Makefile        |   2 +
> >> > >  libavcodec/allcodecs.c     |   2 +
> >> > >  libavcodec/textst_parser.c |  49 ++++++++++++++++++++
> >> > >  libavcodec/textstdec.c     | 108
> >> > > +++++++++++++++++++++++++++++++++++++++++++++
> >> > >  libavformat/utils.c        |   1 +
> >> > >  5 files changed, 162 insertions(+)
> >> > >  create mode 100644 libavcodec/textst_parser.c
> >> > >  create mode 100644 libavcodec/textstdec.c
> >> > >
> >> > > diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> >> > > index 43a6add..edadb0f 100644
> >> > > --- a/libavcodec/Makefile
> >> > > +++ b/libavcodec/Makefile
> >> > > @@ -539,6 +539,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER)            +=
> >> > > svq1enc.o svq1.o  h263data.o  \
> >> > >  OBJS-$(CONFIG_SVQ3_DECODER)            += svq3.o svq13.o
> >> > > mpegutils.o h264data.o
> >> > >  OBJS-$(CONFIG_TEXT_DECODER)            += textdec.o ass.o
> >> > >  OBJS-$(CONFIG_TEXT_ENCODER)            += srtenc.o ass_split.o
> >> > > +OBJS-$(CONFIG_TEXTST_DECODER)          += textstdec.o ass.o
> >> > >  OBJS-$(CONFIG_TAK_DECODER)             += takdec.o tak.o takdsp.o
> >> > >  OBJS-$(CONFIG_TARGA_DECODER)           += targa.o
> >> > >  OBJS-$(CONFIG_TARGA_ENCODER)           += targaenc.o rle.o
> >> > > @@ -945,6 +946,7 @@ OBJS-$(CONFIG_RV30_PARSER)             +=
> >> > > rv34_parser.o
> >> > >  OBJS-$(CONFIG_RV40_PARSER)             += rv34_parser.o
> >> > >  OBJS-$(CONFIG_SIPR_PARSER)             += sipr_parser.o
> >> > >  OBJS-$(CONFIG_TAK_PARSER)              += tak_parser.o tak.o
> >> > > +OBJS-$(CONFIG_TEXTST_PARSER)           += textst_parser.o
> >> > >  OBJS-$(CONFIG_VC1_PARSER)              += vc1_parser.o vc1.o
> >> > > vc1data.o  \
> >> > >                                            simple_idct.o wmv2data.o
> >> > >  OBJS-$(CONFIG_VP3_PARSER)              += vp3_parser.o
> >> > > diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> >> > > index f92b2b7..9a90533 100644
> >> > > --- a/libavcodec/allcodecs.c
> >> > > +++ b/libavcodec/allcodecs.c
> >> > > @@ -581,6 +581,7 @@ void avcodec_register_all(void)
> >> > >      REGISTER_DECODER(SUBVIEWER,         subviewer);
> >> > >      REGISTER_DECODER(SUBVIEWER1,        subviewer1);
> >> > >      REGISTER_ENCDEC (TEXT,              text);
> >> > > +    REGISTER_DECODER(TEXTST,            textst);
> >> > >      REGISTER_DECODER(VPLAYER,           vplayer);
> >> > >      REGISTER_ENCDEC (WEBVTT,            webvtt);
> >> > >      REGISTER_ENCDEC (XSUB,              xsub);
> >> > > @@ -704,6 +705,7 @@ void avcodec_register_all(void)
> >> > >      REGISTER_PARSER(RV40,               rv40);
> >> > >      REGISTER_PARSER(SIPR,               sipr);
> >> > >      REGISTER_PARSER(TAK,                tak);
> >> > > +    REGISTER_PARSER(TEXTST,             textst);
> >> > >      REGISTER_PARSER(VC1,                vc1);
> >> > >      REGISTER_PARSER(VORBIS,             vorbis);
> >> > >      REGISTER_PARSER(VP3,                vp3);
> >> > > diff --git a/libavcodec/textst_parser.c
> >> > > b/libavcodec/textst_parser.c
> >> > > new file mode 100644
> >> > > index 0000000..5079a96
> >> > > --- /dev/null
> >> > > +++ b/libavcodec/textst_parser.c
> >> > > @@ -0,0 +1,49 @@
> >> > > +/*
> >> > > + * This file is part of FFmpeg.
> >> > > + *
> >> > > + * FFmpeg is free software; you can redistribute it and/or
> >> > > + * modify it under the terms of the GNU Lesser General Public
> >> > > + * License as published by the Free Software Foundation; either
> >> > > + * version 2.1 of the License, or (at your option) any later
> >> > > version.
> >> > > + *
> >> > > + * FFmpeg is distributed in the hope that it will be useful,
> >> > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> >> > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> >> > > GNU
> >> > > + * Lesser General Public License for more details.
> >> > > + *
> >> > > + * You should have received a copy of the GNU Lesser General
> >> > > Public
> >> > > + * License along with FFmpeg; if not, write to the Free Software
> >> > > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> >> > > 02110-1301 USA
> >> > > + */
> >> > > +
> >> > > +/**
> >> > > + * @file
> >> > > + * HDMV TextST subtitle parser
> >> > > + */
> >> > > +
> >> > > +#include "libavutil/intreadwrite.h"
> >> > > +#include "parser.h"
> >> > > +
> >> > > +static int textst_parse(AVCodecParserContext *s1, AVCodecContext
> >> > > *avctx,
> >> > > +                        const uint8_t **poutbuf, int
> >> > > *poutbuf_size,
> >> > > +                        const uint8_t *buf, int buf_size)
> >> > > +{
> >> > > +    if (buf_size > 13) {
> >> > > +        int64_t end;
> >> > > +
> >> > > +        s1->pts = ((int64_t)(buf[3] & 1) << 32) |
> >> > > AV_RB32(&buf[4]);
> >> > > +        end = ((int64_t)(buf[8] & 1) << 32) | AV_RB32(&buf[9]);
> >> > > +        s1->duration = (end - s1->pts);
> >> > > +    }
> >> > > +
> >> > > +    /* always return the full packet. this parser isn't doing any
> >> > > splitting or
> >> > > +       combining, only packet analysis */
> >> > > +    *poutbuf      = buf;
> >> > > +    *poutbuf_size = buf_size;
> >> > > +    return buf_size;
> >> > > +}
> >> > > +
> >> > > +AVCodecParser ff_textst_parser = {
> >> > > +    .codec_ids      = { AV_CODEC_ID_HDMV_TEXT_SUBTITLE },
> >> > > +    .parser_parse   = textst_parse,
> >> > > +};  
> >> >
> >> > Why does it need to be in a parser, instead of the demuxer? It seems
> >> > like this codec exists only in .ts anyway, and the way PTS/duration
> >> > is
> >> > extracted seems very closely tied to the format.  
> >>
> >> I think a parser could be good idea, it could be useful when remuxing ?
> >>
> >> In the original BluRay mpeg-ts files this codec uses PES private stream
> >> 2, so there are no timestamps in packets coming from mpeg-ts demuxer.
> >>
> >> Parser could also store the first segment (style segment) in extradata.
> >> I think matroska stores style segment in extradata (not 100% sure, I
> >> don't have such files at hand). This kind of makes sense, it allows
> >> starting playback from middle of the mkv file.
> >>
> >> Maybe parser should not overwrite the timestamps if those are already
> >> present ? I don't know if timestamps in subtitle segments are modified
> >> when muxing to matroska.  
> >
> > This codec is supported in mpeg-ts only. There are no hints that it's
> > supported in Matroska. If it were, the parser would break _very_ badly
> > on it when demuxing from mkv too.  
> 
> It _IS_ supported in matroska as S_HDMV/TEXTST

In that case it's probably a good idea to make the ffmpeg-internal
packet format the same as Matroska's, as they must have found a way to
handle the embedded timestamps in a good way.

But I can't imagine it'd work if your proposed parser is ran on those
Matroska packets.
diff mbox

Patch

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 43a6add..edadb0f 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -539,6 +539,7 @@  OBJS-$(CONFIG_SVQ1_ENCODER)            += svq1enc.o svq1.o  h263data.o  \
 OBJS-$(CONFIG_SVQ3_DECODER)            += svq3.o svq13.o mpegutils.o h264data.o
 OBJS-$(CONFIG_TEXT_DECODER)            += textdec.o ass.o
 OBJS-$(CONFIG_TEXT_ENCODER)            += srtenc.o ass_split.o
+OBJS-$(CONFIG_TEXTST_DECODER)          += textstdec.o ass.o
 OBJS-$(CONFIG_TAK_DECODER)             += takdec.o tak.o takdsp.o
 OBJS-$(CONFIG_TARGA_DECODER)           += targa.o
 OBJS-$(CONFIG_TARGA_ENCODER)           += targaenc.o rle.o
@@ -945,6 +946,7 @@  OBJS-$(CONFIG_RV30_PARSER)             += rv34_parser.o
 OBJS-$(CONFIG_RV40_PARSER)             += rv34_parser.o
 OBJS-$(CONFIG_SIPR_PARSER)             += sipr_parser.o
 OBJS-$(CONFIG_TAK_PARSER)              += tak_parser.o tak.o
+OBJS-$(CONFIG_TEXTST_PARSER)           += textst_parser.o
 OBJS-$(CONFIG_VC1_PARSER)              += vc1_parser.o vc1.o vc1data.o  \
                                           simple_idct.o wmv2data.o
 OBJS-$(CONFIG_VP3_PARSER)              += vp3_parser.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index f92b2b7..9a90533 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -581,6 +581,7 @@  void avcodec_register_all(void)
     REGISTER_DECODER(SUBVIEWER,         subviewer);
     REGISTER_DECODER(SUBVIEWER1,        subviewer1);
     REGISTER_ENCDEC (TEXT,              text);
+    REGISTER_DECODER(TEXTST,            textst);
     REGISTER_DECODER(VPLAYER,           vplayer);
     REGISTER_ENCDEC (WEBVTT,            webvtt);
     REGISTER_ENCDEC (XSUB,              xsub);
@@ -704,6 +705,7 @@  void avcodec_register_all(void)
     REGISTER_PARSER(RV40,               rv40);
     REGISTER_PARSER(SIPR,               sipr);
     REGISTER_PARSER(TAK,                tak);
+    REGISTER_PARSER(TEXTST,             textst);
     REGISTER_PARSER(VC1,                vc1);
     REGISTER_PARSER(VORBIS,             vorbis);
     REGISTER_PARSER(VP3,                vp3);
diff --git a/libavcodec/textst_parser.c b/libavcodec/textst_parser.c
new file mode 100644
index 0000000..5079a96
--- /dev/null
+++ b/libavcodec/textst_parser.c
@@ -0,0 +1,49 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * HDMV TextST subtitle parser
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "parser.h"
+
+static int textst_parse(AVCodecParserContext *s1, AVCodecContext *avctx,
+                        const uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size)
+{
+    if (buf_size > 13) {
+        int64_t end;
+
+        s1->pts = ((int64_t)(buf[3] & 1) << 32) | AV_RB32(&buf[4]);
+        end = ((int64_t)(buf[8] & 1) << 32) | AV_RB32(&buf[9]);
+        s1->duration = (end - s1->pts);
+    }
+
+    /* always return the full packet. this parser isn't doing any splitting or
+       combining, only packet analysis */
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+    return buf_size;
+}
+
+AVCodecParser ff_textst_parser = {
+    .codec_ids      = { AV_CODEC_ID_HDMV_TEXT_SUBTITLE },
+    .parser_parse   = textst_parse,
+};
diff --git a/libavcodec/textstdec.c b/libavcodec/textstdec.c
new file mode 100644
index 0000000..a259d2d
--- /dev/null
+++ b/libavcodec/textstdec.c
@@ -0,0 +1,108 @@ 
+/*
+ * HDMV TextST decoder
+ * Copyright (c) 2017 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <string.h>
+
+#include "libavutil/bprint.h"
+#include "avcodec.h"
+#include "ass.h"
+#include "bytestream.h"
+
+static int textst_event_to_ass(AVBPrint *buf, const char *p, int size)
+{
+    GetByteContext gb;
+    int i, count;
+
+    bytestream2_init(&gb, p, size);
+    count = bytestream2_get_byte(&gb);
+
+    if (count > 127) {
+        int size = bytestream2_get_be16(&gb);
+        bytestream2_skip(&gb, size);
+    }
+    if (bytestream2_get_bytes_left(&gb) > 2) {
+        count = bytestream2_get_byte(&gb);
+
+        for (i = 0; i < count; i++) {
+            int dlength;
+
+            bytestream2_skip(&gb, 2);
+            dlength = bytestream2_get_be16(&gb);
+
+            while (dlength > 3) {
+                int type, length;
+                int code = bytestream2_get_byte(&gb);
+
+                dlength--;
+                if (code != 0x1b)
+                    continue;
+
+                type   = bytestream2_get_byte(&gb);
+                length = bytestream2_get_byte(&gb);
+                dlength -= (2 + length);
+
+                switch (type) {
+                case 1:
+                    av_bprint_append_data(buf, gb.buffer, FFMIN(length, bytestream2_get_bytes_left(&gb)));
+                    bytestream2_skip(&gb, length);
+                    break;
+                case 0x0a:
+                    av_bprintf(buf, "\n");
+                default:
+                    bytestream2_skip(&gb, length);
+                    break;
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int textst_decode_frame(AVCodecContext *avctx,
+                               void *data, int *got_sub_ptr, AVPacket *avpkt)
+{
+    FFASSDecoderContext *s = avctx->priv_data;
+    const char *ptr = avpkt->data;
+    AVSubtitle *sub = data;
+    AVBPrint buf;
+    int ret = 0;
+
+    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+    if (ptr && avpkt->size > 13 && !textst_event_to_ass(&buf, ptr + 13, avpkt->size - 13))
+        ret = ff_ass_add_rect(sub, buf.str, s->readorder++, 0, NULL, NULL);
+    av_bprint_finalize(&buf, NULL);
+    if (ret < 0)
+        return ret;
+    *got_sub_ptr = sub->num_rects > 0;
+    return avpkt->size;
+}
+
+AVCodec ff_textst_decoder = {
+    .name           = "textst",
+    .long_name      = NULL_IF_CONFIG_SMALL("HDMV TextST subtitle"),
+    .type           = AVMEDIA_TYPE_SUBTITLE,
+    .id             = AV_CODEC_ID_HDMV_TEXT_SUBTITLE,
+    .decode         = textst_decode_frame,
+    .init           = ff_ass_subtitle_header_default,
+    .flush          = ff_ass_decoder_flush,
+    .priv_data_size = sizeof(FFASSDecoderContext),
+};
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 0711310..74f808f 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -1445,6 +1445,7 @@  static int parse_packet(AVFormatContext *s, AVPacket *pkt, int stream_index)
 
         out_pkt.stream_index = st->index;
         out_pkt.pts          = st->parser->pts;
+        out_pkt.duration     = st->parser->duration;
         out_pkt.dts          = st->parser->dts;
         out_pkt.pos          = st->parser->pos;