[FFmpeg-devel] avcodec: add HDMV Text Subtitle decoder

Message ID	20170201111251.8962-1-onemda@gmail.com
State	New
Headers	show Delivered-To: ffmpegpatchwork@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; From: Paul B Mahol <onemda@gmail.com> To: ffmpeg-devel@ffmpeg.org Date: Wed, 1 Feb 2017 12:12:51 +0100 Message-Id: <20170201111251.8962-1-onemda@gmail.com> Subject: [FFmpeg-devel] [PATCH] avcodec: add HDMV Text Subtitle decoder Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

Message ID

20170201111251.8962-1-onemda@gmail.com

State

New

Headers

Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org
	designates 79.124.17.100 as permitted sender)
	client-ip=79.124.17.100; 
From: Paul B Mahol <onemda@gmail.com>
To: ffmpeg-devel@ffmpeg.org
Date: Wed,  1 Feb 2017 12:12:51 +0100
Message-Id: <20170201111251.8962-1-onemda@gmail.com>
Subject: [FFmpeg-devel] [PATCH] avcodec: add HDMV Text Subtitle decoder
Precedence: list
Reply-To: FFmpeg development discussions and patches
	<ffmpeg-devel@ffmpeg.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

Commit Message

Paul B Mahol Feb. 1, 2017, 11:12 a.m. UTC

Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libavcodec/Makefile    |   1 +
 libavcodec/allcodecs.c |   1 +
 libavcodec/textstdec.c | 409 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 411 insertions(+)
 create mode 100644 libavcodec/textstdec.c

Comments

wm4 Feb. 1, 2017, 12:10 p.m. UTC | #1

On Wed,  1 Feb 2017 12:12:51 +0100
Paul B Mahol <onemda@gmail.com> wrote:

> Signed-off-by: Paul B Mahol <onemda@gmail.com>
> ---
>  libavcodec/Makefile    |   1 +
>  libavcodec/allcodecs.c |   1 +
>  libavcodec/textstdec.c | 409 +++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 411 insertions(+)
>  create mode 100644 libavcodec/textstdec.c
> 
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 43a6add..c042984 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -539,6 +539,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER)            += svq1enc.o svq1.o  h263data.o  \
>  OBJS-$(CONFIG_SVQ3_DECODER)            += svq3.o svq13.o mpegutils.o h264data.o
>  OBJS-$(CONFIG_TEXT_DECODER)            += textdec.o ass.o
>  OBJS-$(CONFIG_TEXT_ENCODER)            += srtenc.o ass_split.o
> +OBJS-$(CONFIG_TEXTST_DECODER)          += textstdec.o ass.o
>  OBJS-$(CONFIG_TAK_DECODER)             += takdec.o tak.o takdsp.o
>  OBJS-$(CONFIG_TARGA_DECODER)           += targa.o
>  OBJS-$(CONFIG_TARGA_ENCODER)           += targaenc.o rle.o
> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> index f92b2b7..30a6309 100644
> --- a/libavcodec/allcodecs.c
> +++ b/libavcodec/allcodecs.c
> @@ -581,6 +581,7 @@ void avcodec_register_all(void)
>      REGISTER_DECODER(SUBVIEWER,         subviewer);
>      REGISTER_DECODER(SUBVIEWER1,        subviewer1);
>      REGISTER_ENCDEC (TEXT,              text);
> +    REGISTER_DECODER(TEXTST,            textst);
>      REGISTER_DECODER(VPLAYER,           vplayer);
>      REGISTER_ENCDEC (WEBVTT,            webvtt);
>      REGISTER_ENCDEC (XSUB,              xsub);
> diff --git a/libavcodec/textstdec.c b/libavcodec/textstdec.c
> new file mode 100644
> index 0000000..5110cb8
> --- /dev/null
> +++ b/libavcodec/textstdec.c
> @@ -0,0 +1,409 @@
> +/*
> + * HDMV TextST decoder
> + *
> + * Copyright (c) 2014 Petri Hintukainen <phintuka@users.sourceforge.net>
> + * Copyright (c) 2017 Paul B Mahol
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include <string.h>
> +
> +#include "libavutil/bprint.h"
> +#include "libavutil/colorspace.h"
> +
> +#include "avcodec.h"
> +#include "ass.h"
> +#include "bytestream.h"
> +#include "mathops.h"
> +
> +/* TODO: actually make use of various styles */
> +
> +enum {
> +    DIALOG_STYLE_SEGMENT        = 0x81,
> +    DIALOG_PRESENTATION_SEGMENT = 0x82,
> +};
> +
> +enum {
> +    BD_TEXTST_DATA_ESCAPE      = 0x1b,
> +    BD_TEXTST_DATA_STRING      = 1,
> +    BD_TEXTST_DATA_FONT_ID     = 2,
> +    BD_TEXTST_DATA_FONT_STYLE  = 3,
> +    BD_TEXTST_DATA_FONT_SIZE   = 4,
> +    BD_TEXTST_DATA_FONT_COLOR  = 5,
> +    BD_TEXTST_DATA_NEWLINE     = 0x0a,
> +    BD_TEXTST_DATA_RESET_STYLE = 0x0b,
> +};
> +
> +typedef struct TextSTRect {
> +    uint16_t xpos;
> +    uint16_t ypos;
> +    uint16_t width;
> +    uint16_t height;
> +} TextSTRect;
> +
> +typedef struct TextSTRegionInfo {
> +    TextSTRect region;
> +    uint8_t    background_color; /* palette entry id ref */
> +} TextSTRegionInfo;
> +
> +typedef struct TextSTFontStyle {
> +    uint8_t bold;
> +    uint8_t italic;
> +    uint8_t outline_border;
> +} TextSTFontStyle;
> +
> +typedef struct TextSTRegionStyle {
> +    uint8_t          region_style_id;
> +    TextSTRegionInfo region_info;
> +    TextSTRect       text_box;          /* relative to region */
> +    uint8_t          text_flow;
> +    uint8_t          text_halign;
> +    uint8_t          text_valign;
> +    uint8_t          line_space;
> +    uint8_t          font_id_ref;
> +    TextSTFontStyle  font_style;
> +    uint8_t          font_size;
> +    uint8_t          font_color;        /* palette entry id ref */
> +    uint8_t          outline_color;     /* palette entry id ref */
> +    uint8_t          outline_thickness;
> +} TextSTRegionStyle;
> +
> +typedef struct TextSTUserStyle {
> +    uint8_t user_style_id;
> +    int16_t region_hpos_delta;
> +    int16_t region_vpos_delta;
> +    int16_t text_box_hpos_delta;
> +    int16_t text_box_vpos_delta;
> +    int16_t text_box_width_delta;
> +    int16_t text_box_height_delta;
> +    int8_t  font_size_delta;
> +    int8_t  line_space_delta;
> +} TextSTUserStyle;
> +
> +
> +typedef struct TextSTContext {
> +    FFASSDecoderContext ass;
> +
> +    uint32_t palette[256];
> +    int region_style_count;
> +    int user_style_count;
> +    TextSTRegionStyle *region_styles;
> +    TextSTUserStyle   *user_styles;
> +} TextSTContext;
> +
> +static void decode_region_data(AVCodecContext *avctx, GetByteContext *gb, AVBPrint *sub)
> +{
> +    while (bytestream2_get_bytes_left(gb) > 2) {
> +        unsigned int code, type, length;
> +
> +        /* parse header */
> +        code = bytestream2_get_byte(gb);
> +        if (code != BD_TEXTST_DATA_ESCAPE) {
> +            continue;
> +        }
> +        type   = bytestream2_get_byte(gb);
> +        length = bytestream2_get_byte(gb);
> +
> +        /* parse content */
> +        if (length > bytestream2_get_bytes_left(gb)) {
> +            av_log(avctx, AV_LOG_WARNING, "decode_dialog_region(): unexpected end of data\n");
> +            return;
> +        }
> +
> +        switch (type) {
> +        case BD_TEXTST_DATA_STRING:
> +            av_bprint_append_data(sub, gb->buffer, length);
> +            break;
> +        case BD_TEXTST_DATA_NEWLINE:
> +            av_bprint_append_data(sub, "\\N", 2);
> +            break;
> +        default:
> +            break;
> +        }
> +
> +        bytestream2_skip(gb, length);
> +    }
> +}
> +
> +static int decode_region(AVCodecContext *avctx, GetByteContext *gb, AVBPrint *sub,
> +                         int *forced_on_flag, int *region_style_id_ref)
> +{
> +    GetByteContext gb_region;
> +    int flags, data_length;
> +
> +    flags = bytestream2_get_byte(gb);
> +    /*continous_present_flag = !!(flags & 0x80);*/
> +    *forced_on_flag      = !!(flags & 0x40);
> +    *region_style_id_ref = bytestream2_get_byte(gb);
> +    data_length          = bytestream2_get_be16(gb);
> +
> +    if (data_length > bytestream2_get_bytes_left(gb)) {
> +        av_log(avctx, AV_LOG_WARNING, "decode_dialog_region(): unexpected end of data\n");
> +        return -1;
> +    }
> +
> +    bytestream2_init(&gb_region, gb->buffer, data_length);
> +    decode_region_data(avctx, &gb_region, sub);
> +    bytestream2_skip(gb, data_length);
> +    av_bprintf(sub, "\r\n");
> +
> +    return 1;
> +}
> +
> +static int64_t decode_pts(GetByteContext *gb)
> +{
> +    return (((uint64_t)bytestream2_get_byte(gb) & 1) << 32) | bytestream2_get_be32(gb);
> +}
> +
> +static int decode_palette(AVCodecContext *avctx, GetByteContext *gb)
> +{
> +    TextSTContext *s = avctx->priv_data;
> +    unsigned length;
> +
> +    length = bytestream2_get_be16(gb);
> +
> +    if (length > bytestream2_get_bytes_left(gb)) {
> +        av_log(avctx, AV_LOG_WARNING, "decode_palette(): unexpected end of data\n");
> +        return -1;
> +    }
> +
> +    while (length > 4) {
> +        const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
> +        int index = bytestream2_get_byte(gb);
> +        int y = bytestream2_get_byte(gb);
> +        int cb = bytestream2_get_byte(gb);
> +        int cr = bytestream2_get_byte(gb);
> +        int t = bytestream2_get_byte(gb);
> +        int r_add, g_add, b_add;
> +        int r, g, b;
> +
> +        YUV_TO_RGB1_CCIR(cb, cr);
> +        YUV_TO_RGB2_CCIR(r, g, b, y);
> +
> +        s->palette[index] = ((0xFF - t) << 24) | (r << 16) | (g << 8) | b;
> +        length -= 5;
> +    }
> +
> +    return 1;
> +}
> +
> +static void decode_rect(GetByteContext *gb, TextSTRect *rect)
> +{
> +    rect->xpos   = bytestream2_get_be16(gb);
> +    rect->ypos   = bytestream2_get_be16(gb);
> +    rect->width  = bytestream2_get_be16(gb);
> +    rect->height = bytestream2_get_be16(gb);
> +}
> +
> +static void decode_region_info(AVCodecContext *avctx, GetByteContext *gb, TextSTRegionInfo *region_info)
> +{
> +    decode_rect(gb, &region_info->region);
> +    region_info->background_color = bytestream2_get_byte(gb);
> +    bytestream2_skip(gb, 1);
> +}
> +
> +static void decode_font_style(GetByteContext *gb, TextSTFontStyle *font_style)
> +{
> +    int flag = bytestream2_get_byte(gb);
> +
> +    font_style->bold           = !!(flag & 1);
> +    font_style->italic         = !!(flag & 2);
> +    font_style->outline_border = !!(flag & 4);
> +}
> +
> +static void decode_region_style(AVCodecContext *avctx, GetByteContext *gb, TextSTRegionStyle *style)
> +{
> +    style->region_style_id = bytestream2_get_byte(gb);
> +    decode_region_info(avctx, gb, &style->region_info);
> +    decode_rect(gb, &style->text_box);
> +    style->text_flow   = bytestream2_get_byte(gb);
> +    style->text_halign = bytestream2_get_byte(gb);
> +    style->text_valign = bytestream2_get_byte(gb);
> +    style->line_space  = bytestream2_get_byte(gb);
> +    style->font_id_ref = bytestream2_get_byte(gb);
> +    decode_font_style(gb, &style->font_style);
> +    style->font_size   = bytestream2_get_byte(gb);
> +    style->font_color  = bytestream2_get_byte(gb);
> +    style->outline_color = bytestream2_get_byte(gb);
> +    style->outline_thickness = bytestream2_get_byte(gb);
> +}
> +
> +static void decode_user_style(AVCodecContext *avctx, GetByteContext *gb, TextSTUserStyle *style)
> +{
> +    style->user_style_id         = bytestream2_get_byte(gb);
> +    style->region_hpos_delta     = bytestream2_get_be16(gb);
> +    style->region_vpos_delta     = bytestream2_get_be16(gb);
> +    style->text_box_hpos_delta   = bytestream2_get_be16(gb);
> +    style->text_box_vpos_delta   = bytestream2_get_be16(gb);
> +    style->text_box_width_delta  = bytestream2_get_be16(gb);
> +    style->text_box_height_delta = bytestream2_get_be16(gb);
> +    style->font_size_delta       = bytestream2_get_byte(gb);
> +    style->line_space_delta      = bytestream2_get_byte(gb);
> +}
> +
> +static void decode_style_segment(AVCodecContext *avctx, GetByteContext *gb, AVSubtitle *sub)
> +{
> +    TextSTContext *s = avctx->priv_data;
> +    int i;
> +
> +    bytestream2_skip(gb, 2);
> +    s->region_style_count = bytestream2_get_byte(gb);
> +    s->user_style_count   = bytestream2_get_byte(gb);
> +
> +    if (s->region_style_count) {
> +        av_freep(&s->region_styles);
> +        s->region_styles = av_calloc(s->region_style_count, sizeof(TextSTRegionStyle));
> +        if (!s->region_styles) {
> +            s->region_style_count = 0;
> +            return;
> +        }
> +
> +        for (i = 0; i < s->region_style_count; i++) {
> +            decode_region_style(avctx, gb, &s->region_styles[i]);
> +        }
> +    }
> +
> +    if (s->user_style_count) {
> +        av_freep(&s->user_styles);
> +        s->user_styles = av_calloc(s->user_style_count, sizeof(TextSTUserStyle));
> +        if (!s->user_styles) {
> +            s->user_style_count = 0;
> +            return;
> +        }
> +
> +        for (i = 0; i < s->user_style_count; i++) {
> +            decode_user_style(avctx, gb, &s->user_styles[i]);
> +        }
> +    }
> +
> +    decode_palette(avctx, gb);
> +}
> +
> +static void decode_presentation_segment(AVCodecContext *avctx, GetByteContext *gb, AVSubtitle *sub)
> +{
> +    TextSTContext *s = avctx->priv_data;
> +    unsigned ii, palette_update_flag, region_count;
> +    int64_t start_pts, end_pts;
> +
> +    start_pts = decode_pts(gb);
> +    end_pts   = decode_pts(gb);
> +
> +    sub->pts = start_pts * 100 / 9;

I don't think this will work. You should use packet timestamps, not get
them from the packet contents. Otherwise we'd be back to the situation
with other text subtitles, which used to embed timestamps in the packet
data.

> +    sub->start_display_time = 0;
> +    sub->end_display_time = (end_pts - start_pts) / 100;
> +
> +    palette_update_flag = bytestream2_get_byte(gb) >> 7;
> +    if (palette_update_flag) {
> +        if (decode_palette(avctx, gb) < 0) {
> +            return;
> +        }
> +    }
> +
> +    region_count = bytestream2_get_byte(gb);
> +    if (region_count > 2) {
> +        av_log(avctx, AV_LOG_WARNING, "too many regions (%d)\n", region_count);
> +        return;
> +    }
> +
> +    for (ii = 0; ii < region_count; ii++) {
> +        AVBPrint buffer;
> +        char *dec_sub;
> +        int forced_on_flag, region_style_id_ref;
> +
> +        av_bprint_init(&buffer, 1024, 1024);
> +        if (decode_region(avctx, gb, &buffer, &forced_on_flag, &region_style_id_ref) < 0) {
> +            av_bprint_finalize(&buffer, NULL);
> +            return;
> +        }
> +        av_bprint_finalize(&buffer, &dec_sub);
> +
> +        ff_ass_add_rect(sub, dec_sub, s->ass.readorder++, 0, NULL, NULL);
> +        av_free(dec_sub);
> +
> +        if (forced_on_flag && sub->num_rects > 0) {
> +            sub->rects[sub->num_rects - 1]->flags |= AV_SUBTITLE_FLAG_FORCED;
> +        }
> +    }
> +
> +    if (bytestream2_get_bytes_left(gb)) {
> +        av_log(avctx, AV_LOG_WARNING, "unknown data after dialog segment (%d bytes)\n", bytestream2_get_bytes_left(gb));
> +    }
> +}
> +
> +static int textst_decode_frame(AVCodecContext *avctx,
> +                               void *data, int *got_sub_ptr, AVPacket *avpkt)
> +{
> +    AVSubtitle *sub = data;
> +    int segment_type, segment_size;
> +    GetByteContext gb;
> +
> +    if (avpkt->size < 3) {
> +        return avpkt->size;
> +    }
> +
> +    bytestream2_init(&gb, avpkt->data, avpkt->size);
> +
> +    segment_type = bytestream2_get_byte(&gb);
> +    segment_size = bytestream2_get_be16(&gb);
> +
> +    if (avpkt->size < segment_size + 3) {
> +        av_log(avctx, AV_LOG_WARNING, "segment 0x%02x size mismatch: segment %d bytes, packet %d bytes\n",
> +               segment_type, segment_size, avpkt->size);
> +        return avpkt->size;
> +    }
> +
> +    switch (segment_type) {
> +    case DIALOG_STYLE_SEGMENT:
> +        decode_style_segment(avctx, &gb, sub);
> +        break;
> +    case DIALOG_PRESENTATION_SEGMENT:
> +        decode_presentation_segment(avctx, &gb, sub);
> +        break;
> +    default:
> +        av_log(avctx, AV_LOG_WARNING, "unknown segment type 0x%02x\n", segment_type);
> +        break;
> +    }
> +
> +    *got_sub_ptr = sub->num_rects > 0;
> +
> +    return avpkt->size;
> +}
> +
> +static av_cold int textst_init(AVCodecContext *avctx)
> +{
> +    TextSTContext *s = avctx->priv_data;
> +    int ret, i;
> +
> +    for (i = 0; i < 256; i++)
> +        s->palette[i] = 0xFFFFFFFF;
> +
> +    ret = ff_ass_subtitle_header_default(avctx);
> +
> +    return ret;
> +}
> +
> +AVCodec ff_textst_decoder = {
> +    .name           = "textst",
> +    .long_name      = NULL_IF_CONFIG_SMALL("HDMV TextST subtitle"),
> +    .type           = AVMEDIA_TYPE_SUBTITLE,
> +    .id             = AV_CODEC_ID_HDMV_TEXT_SUBTITLE,
> +    .decode         = textst_decode_frame,
> +    .init           = textst_init,
> +    .flush          = ff_ass_decoder_flush,
> +    .priv_data_size = sizeof(TextSTContext),
> +};

Paul B Mahol Feb. 1, 2017, 12:20 p.m. UTC | #2

On 2/1/17, wm4 <nfxjfg@googlemail.com> wrote:
> On Wed,  1 Feb 2017 12:12:51 +0100
> Paul B Mahol <onemda@gmail.com> wrote:
>
>> Signed-off-by: Paul B Mahol <onemda@gmail.com>
>> ---
>>  libavcodec/Makefile    |   1 +
>>  libavcodec/allcodecs.c |   1 +
>>  libavcodec/textstdec.c | 409
>> +++++++++++++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 411 insertions(+)
>>  create mode 100644 libavcodec/textstdec.c
>>

[...]

>> +
>> +static void decode_presentation_segment(AVCodecContext *avctx,
>> GetByteContext *gb, AVSubtitle *sub)
>> +{
>> +    TextSTContext *s = avctx->priv_data;
>> +    unsigned ii, palette_update_flag, region_count;
>> +    int64_t start_pts, end_pts;
>> +
>> +    start_pts = decode_pts(gb);
>> +    end_pts   = decode_pts(gb);
>> +
>> +    sub->pts = start_pts * 100 / 9;
>
> I don't think this will work. You should use packet timestamps, not get
> them from the packet contents. Otherwise we'd be back to the situation
> with other text subtitles, which used to embed timestamps in the packet
> data.
>

pts are stored in packet, they are not available from packet timestamp.

And this succesfully converts TextST to ass format.

Hendrik Leppkes Feb. 1, 2017, 12:25 p.m. UTC | #3

On Wed, Feb 1, 2017 at 1:20 PM, Paul B Mahol <onemda@gmail.com> wrote:
> On 2/1/17, wm4 <nfxjfg@googlemail.com> wrote:
>> On Wed,  1 Feb 2017 12:12:51 +0100
>> Paul B Mahol <onemda@gmail.com> wrote:
>>
>>> Signed-off-by: Paul B Mahol <onemda@gmail.com>
>>> ---
>>>  libavcodec/Makefile    |   1 +
>>>  libavcodec/allcodecs.c |   1 +
>>>  libavcodec/textstdec.c | 409
>>> +++++++++++++++++++++++++++++++++++++++++++++++++
>>>  3 files changed, 411 insertions(+)
>>>  create mode 100644 libavcodec/textstdec.c
>>>
>
> [...]
>
>>> +
>>> +static void decode_presentation_segment(AVCodecContext *avctx,
>>> GetByteContext *gb, AVSubtitle *sub)
>>> +{
>>> +    TextSTContext *s = avctx->priv_data;
>>> +    unsigned ii, palette_update_flag, region_count;
>>> +    int64_t start_pts, end_pts;
>>> +
>>> +    start_pts = decode_pts(gb);
>>> +    end_pts   = decode_pts(gb);
>>> +
>>> +    sub->pts = start_pts * 100 / 9;
>>
>> I don't think this will work. You should use packet timestamps, not get
>> them from the packet contents. Otherwise we'd be back to the situation
>> with other text subtitles, which used to embed timestamps in the packet
>> data.
>>
>
> pts are stored in packet, they are not available from packet timestamp.
>
> And this succesfully converts TextST to ass format.

In-band timestamps are fragile and don't work half the time. Many
use-cases rely on reading and potentially modifying packet timestamps,
and that just doesn't work if they then get overriden by the decoder -
ie. all sorts of code would need special cases to handle this kind of
stream.
Better to have something on our end that handles them and rips out the
in-band timestamps and puts them into the AVPacket straight out of the
demuxer.

- Hendrik

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 43a6add..c042984 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -539,6 +539,7 @@  OBJS-$(CONFIG_SVQ1_ENCODER)            += svq1enc.o svq1.o  h263data.o  \
 OBJS-$(CONFIG_SVQ3_DECODER)            += svq3.o svq13.o mpegutils.o h264data.o
 OBJS-$(CONFIG_TEXT_DECODER)            += textdec.o ass.o
 OBJS-$(CONFIG_TEXT_ENCODER)            += srtenc.o ass_split.o
+OBJS-$(CONFIG_TEXTST_DECODER)          += textstdec.o ass.o
 OBJS-$(CONFIG_TAK_DECODER)             += takdec.o tak.o takdsp.o
 OBJS-$(CONFIG_TARGA_DECODER)           += targa.o
 OBJS-$(CONFIG_TARGA_ENCODER)           += targaenc.o rle.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index f92b2b7..30a6309 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -581,6 +581,7 @@  void avcodec_register_all(void)
     REGISTER_DECODER(SUBVIEWER,         subviewer);
     REGISTER_DECODER(SUBVIEWER1,        subviewer1);
     REGISTER_ENCDEC (TEXT,              text);
+    REGISTER_DECODER(TEXTST,            textst);
     REGISTER_DECODER(VPLAYER,           vplayer);
     REGISTER_ENCDEC (WEBVTT,            webvtt);
     REGISTER_ENCDEC (XSUB,              xsub);
diff --git a/libavcodec/textstdec.c b/libavcodec/textstdec.c
new file mode 100644
index 0000000..5110cb8
--- /dev/null
+++ b/libavcodec/textstdec.c
@@ -0,0 +1,409 @@ 
+/*
+ * HDMV TextST decoder
+ *
+ * Copyright (c) 2014 Petri Hintukainen <phintuka@users.sourceforge.net>
+ * Copyright (c) 2017 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <string.h>
+
+#include "libavutil/bprint.h"
+#include "libavutil/colorspace.h"
+
+#include "avcodec.h"
+#include "ass.h"
+#include "bytestream.h"
+#include "mathops.h"
+
+/* TODO: actually make use of various styles */
+
+enum {
+    DIALOG_STYLE_SEGMENT        = 0x81,
+    DIALOG_PRESENTATION_SEGMENT = 0x82,
+};
+
+enum {
+    BD_TEXTST_DATA_ESCAPE      = 0x1b,
+    BD_TEXTST_DATA_STRING      = 1,
+    BD_TEXTST_DATA_FONT_ID     = 2,
+    BD_TEXTST_DATA_FONT_STYLE  = 3,
+    BD_TEXTST_DATA_FONT_SIZE   = 4,
+    BD_TEXTST_DATA_FONT_COLOR  = 5,
+    BD_TEXTST_DATA_NEWLINE     = 0x0a,
+    BD_TEXTST_DATA_RESET_STYLE = 0x0b,
+};
+
+typedef struct TextSTRect {
+    uint16_t xpos;
+    uint16_t ypos;
+    uint16_t width;
+    uint16_t height;
+} TextSTRect;
+
+typedef struct TextSTRegionInfo {
+    TextSTRect region;
+    uint8_t    background_color; /* palette entry id ref */
+} TextSTRegionInfo;
+
+typedef struct TextSTFontStyle {
+    uint8_t bold;
+    uint8_t italic;
+    uint8_t outline_border;
+} TextSTFontStyle;
+
+typedef struct TextSTRegionStyle {
+    uint8_t          region_style_id;
+    TextSTRegionInfo region_info;
+    TextSTRect       text_box;          /* relative to region */
+    uint8_t          text_flow;
+    uint8_t          text_halign;
+    uint8_t          text_valign;
+    uint8_t          line_space;
+    uint8_t          font_id_ref;
+    TextSTFontStyle  font_style;
+    uint8_t          font_size;
+    uint8_t          font_color;        /* palette entry id ref */
+    uint8_t          outline_color;     /* palette entry id ref */
+    uint8_t          outline_thickness;
+} TextSTRegionStyle;
+
+typedef struct TextSTUserStyle {
+    uint8_t user_style_id;
+    int16_t region_hpos_delta;
+    int16_t region_vpos_delta;
+    int16_t text_box_hpos_delta;
+    int16_t text_box_vpos_delta;
+    int16_t text_box_width_delta;
+    int16_t text_box_height_delta;
+    int8_t  font_size_delta;
+    int8_t  line_space_delta;
+} TextSTUserStyle;
+
+
+typedef struct TextSTContext {
+    FFASSDecoderContext ass;
+
+    uint32_t palette[256];
+    int region_style_count;
+    int user_style_count;
+    TextSTRegionStyle *region_styles;
+    TextSTUserStyle   *user_styles;
+} TextSTContext;
+
+static void decode_region_data(AVCodecContext *avctx, GetByteContext *gb, AVBPrint *sub)
+{
+    while (bytestream2_get_bytes_left(gb) > 2) {
+        unsigned int code, type, length;
+
+        /* parse header */
+        code = bytestream2_get_byte(gb);
+        if (code != BD_TEXTST_DATA_ESCAPE) {
+            continue;
+        }
+        type   = bytestream2_get_byte(gb);
+        length = bytestream2_get_byte(gb);
+
+        /* parse content */
+        if (length > bytestream2_get_bytes_left(gb)) {
+            av_log(avctx, AV_LOG_WARNING, "decode_dialog_region(): unexpected end of data\n");
+            return;
+        }
+
+        switch (type) {
+        case BD_TEXTST_DATA_STRING:
+            av_bprint_append_data(sub, gb->buffer, length);
+            break;
+        case BD_TEXTST_DATA_NEWLINE:
+            av_bprint_append_data(sub, "\\N", 2);
+            break;
+        default:
+            break;
+        }
+
+        bytestream2_skip(gb, length);
+    }
+}
+
+static int decode_region(AVCodecContext *avctx, GetByteContext *gb, AVBPrint *sub,
+                         int *forced_on_flag, int *region_style_id_ref)
+{
+    GetByteContext gb_region;
+    int flags, data_length;
+
+    flags = bytestream2_get_byte(gb);
+    /*continous_present_flag = !!(flags & 0x80);*/
+    *forced_on_flag      = !!(flags & 0x40);
+    *region_style_id_ref = bytestream2_get_byte(gb);
+    data_length          = bytestream2_get_be16(gb);
+
+    if (data_length > bytestream2_get_bytes_left(gb)) {
+        av_log(avctx, AV_LOG_WARNING, "decode_dialog_region(): unexpected end of data\n");
+        return -1;
+    }
+
+    bytestream2_init(&gb_region, gb->buffer, data_length);
+    decode_region_data(avctx, &gb_region, sub);
+    bytestream2_skip(gb, data_length);
+    av_bprintf(sub, "\r\n");
+
+    return 1;
+}
+
+static int64_t decode_pts(GetByteContext *gb)
+{
+    return (((uint64_t)bytestream2_get_byte(gb) & 1) << 32) | bytestream2_get_be32(gb);
+}
+
+static int decode_palette(AVCodecContext *avctx, GetByteContext *gb)
+{
+    TextSTContext *s = avctx->priv_data;
+    unsigned length;
+
+    length = bytestream2_get_be16(gb);
+
+    if (length > bytestream2_get_bytes_left(gb)) {
+        av_log(avctx, AV_LOG_WARNING, "decode_palette(): unexpected end of data\n");
+        return -1;
+    }
+
+    while (length > 4) {
+        const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
+        int index = bytestream2_get_byte(gb);
+        int y = bytestream2_get_byte(gb);
+        int cb = bytestream2_get_byte(gb);
+        int cr = bytestream2_get_byte(gb);
+        int t = bytestream2_get_byte(gb);
+        int r_add, g_add, b_add;
+        int r, g, b;
+
+        YUV_TO_RGB1_CCIR(cb, cr);
+        YUV_TO_RGB2_CCIR(r, g, b, y);
+
+        s->palette[index] = ((0xFF - t) << 24) | (r << 16) | (g << 8) | b;
+        length -= 5;
+    }
+
+    return 1;
+}
+
+static void decode_rect(GetByteContext *gb, TextSTRect *rect)
+{
+    rect->xpos   = bytestream2_get_be16(gb);
+    rect->ypos   = bytestream2_get_be16(gb);
+    rect->width  = bytestream2_get_be16(gb);
+    rect->height = bytestream2_get_be16(gb);
+}
+
+static void decode_region_info(AVCodecContext *avctx, GetByteContext *gb, TextSTRegionInfo *region_info)
+{
+    decode_rect(gb, &region_info->region);
+    region_info->background_color = bytestream2_get_byte(gb);
+    bytestream2_skip(gb, 1);
+}
+
+static void decode_font_style(GetByteContext *gb, TextSTFontStyle *font_style)
+{
+    int flag = bytestream2_get_byte(gb);
+
+    font_style->bold           = !!(flag & 1);
+    font_style->italic         = !!(flag & 2);
+    font_style->outline_border = !!(flag & 4);
+}
+
+static void decode_region_style(AVCodecContext *avctx, GetByteContext *gb, TextSTRegionStyle *style)
+{
+    style->region_style_id = bytestream2_get_byte(gb);
+    decode_region_info(avctx, gb, &style->region_info);
+    decode_rect(gb, &style->text_box);
+    style->text_flow   = bytestream2_get_byte(gb);
+    style->text_halign = bytestream2_get_byte(gb);
+    style->text_valign = bytestream2_get_byte(gb);
+    style->line_space  = bytestream2_get_byte(gb);
+    style->font_id_ref = bytestream2_get_byte(gb);
+    decode_font_style(gb, &style->font_style);
+    style->font_size   = bytestream2_get_byte(gb);
+    style->font_color  = bytestream2_get_byte(gb);
+    style->outline_color = bytestream2_get_byte(gb);
+    style->outline_thickness = bytestream2_get_byte(gb);
+}
+
+static void decode_user_style(AVCodecContext *avctx, GetByteContext *gb, TextSTUserStyle *style)
+{
+    style->user_style_id         = bytestream2_get_byte(gb);
+    style->region_hpos_delta     = bytestream2_get_be16(gb);
+    style->region_vpos_delta     = bytestream2_get_be16(gb);
+    style->text_box_hpos_delta   = bytestream2_get_be16(gb);
+    style->text_box_vpos_delta   = bytestream2_get_be16(gb);
+    style->text_box_width_delta  = bytestream2_get_be16(gb);
+    style->text_box_height_delta = bytestream2_get_be16(gb);
+    style->font_size_delta       = bytestream2_get_byte(gb);
+    style->line_space_delta      = bytestream2_get_byte(gb);
+}
+
+static void decode_style_segment(AVCodecContext *avctx, GetByteContext *gb, AVSubtitle *sub)
+{
+    TextSTContext *s = avctx->priv_data;
+    int i;
+
+    bytestream2_skip(gb, 2);
+    s->region_style_count = bytestream2_get_byte(gb);
+    s->user_style_count   = bytestream2_get_byte(gb);
+
+    if (s->region_style_count) {
+        av_freep(&s->region_styles);
+        s->region_styles = av_calloc(s->region_style_count, sizeof(TextSTRegionStyle));
+        if (!s->region_styles) {
+            s->region_style_count = 0;
+            return;
+        }
+
+        for (i = 0; i < s->region_style_count; i++) {
+            decode_region_style(avctx, gb, &s->region_styles[i]);
+        }
+    }
+
+    if (s->user_style_count) {
+        av_freep(&s->user_styles);
+        s->user_styles = av_calloc(s->user_style_count, sizeof(TextSTUserStyle));
+        if (!s->user_styles) {
+            s->user_style_count = 0;
+            return;
+        }
+
+        for (i = 0; i < s->user_style_count; i++) {
+            decode_user_style(avctx, gb, &s->user_styles[i]);
+        }
+    }
+
+    decode_palette(avctx, gb);
+}
+
+static void decode_presentation_segment(AVCodecContext *avctx, GetByteContext *gb, AVSubtitle *sub)
+{
+    TextSTContext *s = avctx->priv_data;
+    unsigned ii, palette_update_flag, region_count;
+    int64_t start_pts, end_pts;
+
+    start_pts = decode_pts(gb);
+    end_pts   = decode_pts(gb);
+
+    sub->pts = start_pts * 100 / 9;
+    sub->start_display_time = 0;
+    sub->end_display_time = (end_pts - start_pts) / 100;
+
+    palette_update_flag = bytestream2_get_byte(gb) >> 7;
+    if (palette_update_flag) {
+        if (decode_palette(avctx, gb) < 0) {
+            return;
+        }
+    }
+
+    region_count = bytestream2_get_byte(gb);
+    if (region_count > 2) {
+        av_log(avctx, AV_LOG_WARNING, "too many regions (%d)\n", region_count);
+        return;
+    }
+
+    for (ii = 0; ii < region_count; ii++) {
+        AVBPrint buffer;
+        char *dec_sub;
+        int forced_on_flag, region_style_id_ref;
+
+        av_bprint_init(&buffer, 1024, 1024);
+        if (decode_region(avctx, gb, &buffer, &forced_on_flag, &region_style_id_ref) < 0) {
+            av_bprint_finalize(&buffer, NULL);
+            return;
+        }
+        av_bprint_finalize(&buffer, &dec_sub);
+
+        ff_ass_add_rect(sub, dec_sub, s->ass.readorder++, 0, NULL, NULL);
+        av_free(dec_sub);
+
+        if (forced_on_flag && sub->num_rects > 0) {
+            sub->rects[sub->num_rects - 1]->flags |= AV_SUBTITLE_FLAG_FORCED;
+        }
+    }
+
+    if (bytestream2_get_bytes_left(gb)) {
+        av_log(avctx, AV_LOG_WARNING, "unknown data after dialog segment (%d bytes)\n", bytestream2_get_bytes_left(gb));
+    }
+}
+
+static int textst_decode_frame(AVCodecContext *avctx,
+                               void *data, int *got_sub_ptr, AVPacket *avpkt)
+{
+    AVSubtitle *sub = data;
+    int segment_type, segment_size;
+    GetByteContext gb;
+
+    if (avpkt->size < 3) {
+        return avpkt->size;
+    }
+
+    bytestream2_init(&gb, avpkt->data, avpkt->size);
+
+    segment_type = bytestream2_get_byte(&gb);
+    segment_size = bytestream2_get_be16(&gb);
+
+    if (avpkt->size < segment_size + 3) {
+        av_log(avctx, AV_LOG_WARNING, "segment 0x%02x size mismatch: segment %d bytes, packet %d bytes\n",
+               segment_type, segment_size, avpkt->size);
+        return avpkt->size;
+    }
+
+    switch (segment_type) {
+    case DIALOG_STYLE_SEGMENT:
+        decode_style_segment(avctx, &gb, sub);
+        break;
+    case DIALOG_PRESENTATION_SEGMENT:
+        decode_presentation_segment(avctx, &gb, sub);
+        break;
+    default:
+        av_log(avctx, AV_LOG_WARNING, "unknown segment type 0x%02x\n", segment_type);
+        break;
+    }
+
+    *got_sub_ptr = sub->num_rects > 0;
+
+    return avpkt->size;
+}
+
+static av_cold int textst_init(AVCodecContext *avctx)
+{
+    TextSTContext *s = avctx->priv_data;
+    int ret, i;
+
+    for (i = 0; i < 256; i++)
+        s->palette[i] = 0xFFFFFFFF;
+
+    ret = ff_ass_subtitle_header_default(avctx);
+
+    return ret;
+}
+
+AVCodec ff_textst_decoder = {
+    .name           = "textst",
+    .long_name      = NULL_IF_CONFIG_SMALL("HDMV TextST subtitle"),
+    .type           = AVMEDIA_TYPE_SUBTITLE,
+    .id             = AV_CODEC_ID_HDMV_TEXT_SUBTITLE,
+    .decode         = textst_decode_frame,
+    .init           = textst_init,
+    .flush          = ff_ass_decoder_flush,
+    .priv_data_size = sizeof(TextSTContext),
+};

[FFmpeg-devel] avcodec: add HDMV Text Subtitle decoder

Commit Message

Comments

Patch