diff mbox series

[FFmpeg-devel,2/2] avcodec/ttml: Add decoder

Message ID 20230317015527.425-2-steve.rock.pet@gmail.com
State New
Headers show
Series [FFmpeg-devel,1/2] avformat/ttml: Add demuxer | expand

Checks

Context Check Description
yinshiyou/make_fate_loongarch64 success Make fate finished
yinshiyou/make_loongarch64 warning New warnings during build

Commit Message

Aidan March 17, 2023, 1:55 a.m. UTC
From: Aidan <steve.rock.pet@gmail.com>

Requested: #4859

Signed-off-by: Aidan Vaughan <steve.rock.pet@gmail.com>
---
I've had some challenges implementing some sort of TTML decoder/demux support.
With TTML being an XML format, the easist way to ensure a valid XML file was to use libxml2. 
Using libxml2, it requires dumping xml to be read for the decoder. (Which I hate because it must be parsed back in the decoder!!!!)

Plus, TTML has a vast number of ways defining styles.
For example, I had to use AV_PKT_DATA_WEBVTT_SETTINGS because styles can be defined in the <p> tag.
Also styles can be defined in the body as defaults.

I used a workaround by providing the default body styles in each packets's AV_PKT_DATA_WEBVTT_SETTINGS.
While writing this, I just realized I could provide body styles in the extradata.
I could send another patch to switch to that instead. I'd would like my code to be reviewed first.

This definitely isn't perfect & isn't complete support with all the features.
There are some valid ttml files that won't work. For example, ttml files that contain metrics for timestamps.
You can see https://trac.ffmpeg.org/ticket/4859 for some quirky ttml files.

Here is the TTML file I was using for testing: https://gist.github.com/TheDaChicken/30cb6d4c3f603725c5bd8f034ea75650

Copying from ttml demuxer -> ttml encoder isn't perfect either.
It is possible to add to the encoder to preserve more things. I didn't want to mess with any code I didn't write unless it's required.

Any review of my code is appreciated (my code ain't perfect) and especially getting more support in ffmpeg for TTML.
It would be nice to have support for ttml decoding. (As someone who has had some fair share of ttml files)

 configure              |   2 +
 libavcodec/Makefile    |   1 +
 libavcodec/allcodecs.c |   1 +
 libavcodec/ttmldec.c   | 711 +++++++++++++++++++++++++++++++++++++++++
 libavcodec/version.h   |   2 +-
 5 files changed, 716 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/ttmldec.c

Comments

Aidan March 26, 2023, 5:53 p.m. UTC | #1
Should I make my own parser for the XML so it reduces having to dump in
avformat portion?
Also should I submit another patch to fix the fate test failure on the
avformat patch due to missing configure change for the libxml2 library?
it's only because in the second patch not the first.

On Thu, Mar 16, 2023, 6:56 PM TheDaChicken <steve.rock.pet@gmail.com> wrote:

> From: Aidan <steve.rock.pet@gmail.com>
>
> Requested: #4859
>
> Signed-off-by: Aidan Vaughan <steve.rock.pet@gmail.com>
> ---
> I've had some challenges implementing some sort of TTML decoder/demux
> support.
> With TTML being an XML format, the easist way to ensure a valid XML file
> was to use libxml2.
> Using libxml2, it requires dumping xml to be read for the decoder. (Which
> I hate because it must be parsed back in the decoder!!!!)
>
> Plus, TTML has a vast number of ways defining styles.
> For example, I had to use AV_PKT_DATA_WEBVTT_SETTINGS because styles can
> be defined in the <p> tag.
> Also styles can be defined in the body as defaults.
>
> I used a workaround by providing the default body styles in each packets's
> AV_PKT_DATA_WEBVTT_SETTINGS.
> While writing this, I just realized I could provide body styles in the
> extradata.
> I could send another patch to switch to that instead. I'd would like my
> code to be reviewed first.
>
> This definitely isn't perfect & isn't complete support with all the
> features.
> There are some valid ttml files that won't work. For example, ttml files
> that contain metrics for timestamps.
> You can see https://trac.ffmpeg.org/ticket/4859 for some quirky ttml
> files.
>
> Here is the TTML file I was using for testing:
> https://gist.github.com/TheDaChicken/30cb6d4c3f603725c5bd8f034ea75650
>
> Copying from ttml demuxer -> ttml encoder isn't perfect either.
> It is possible to add to the encoder to preserve more things. I didn't
> want to mess with any code I didn't write unless it's required.
>
> Any review of my code is appreciated (my code ain't perfect) and
> especially getting more support in ffmpeg for TTML.
> It would be nice to have support for ttml decoding. (As someone who has
> had some fair share of ttml files)
>
>  configure              |   2 +
>  libavcodec/Makefile    |   1 +
>  libavcodec/allcodecs.c |   1 +
>  libavcodec/ttmldec.c   | 711 +++++++++++++++++++++++++++++++++++++++++
>  libavcodec/version.h   |   2 +-
>  5 files changed, 716 insertions(+), 1 deletion(-)
>  create mode 100644 libavcodec/ttmldec.c
>
> diff --git a/configure b/configure
> index 03d3c429a5..fbe11f3663 100755
> --- a/configure
> +++ b/configure
> @@ -3311,6 +3311,7 @@ pcm_alaw_at_decoder_deps="audiotoolbox"
>  pcm_mulaw_at_decoder_deps="audiotoolbox"
>  qdmc_at_decoder_deps="audiotoolbox"
>  qdm2_at_decoder_deps="audiotoolbox"
> +ttml_decoder_deps="libxml2"
>  aac_at_encoder_deps="audiotoolbox"
>  aac_at_encoder_select="audio_frame_queue"
>  alac_at_encoder_deps="audiotoolbox"
> @@ -3479,6 +3480,7 @@ rtp_mpegts_muxer_select="mpegts_muxer rtp_muxer"
>  rtpdec_select="asf_demuxer mov_demuxer mpegts_demuxer rm_demuxer
> rtp_protocol srtp"
>  rtsp_demuxer_select="http_protocol rtpdec"
>  rtsp_muxer_select="rtp_muxer http_protocol rtp_protocol rtpenc_chain"
> +ttml_demuxer_deps="libxml2"
>  sap_demuxer_select="sdp_demuxer"
>  sap_muxer_select="rtp_muxer rtp_protocol rtpenc_chain"
>  sdp_demuxer_select="rtpdec"
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index abae4909d2..96fc259c93 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -719,6 +719,7 @@ OBJS-$(CONFIG_TSCC_DECODER)            += tscc.o
> msrledec.o
>  OBJS-$(CONFIG_TSCC2_DECODER)           += tscc2.o
>  OBJS-$(CONFIG_TTA_DECODER)             += tta.o ttadata.o ttadsp.o
>  OBJS-$(CONFIG_TTA_ENCODER)             += ttaenc.o ttaencdsp.o ttadata.o
> +OBJS-$(CONFIG_TTML_DECODER)            += ttmldec.o
>  OBJS-$(CONFIG_TTML_ENCODER)            += ttmlenc.o ass_split.o
>  OBJS-$(CONFIG_TWINVQ_DECODER)          += twinvqdec.o twinvq.o
>  OBJS-$(CONFIG_TXD_DECODER)             += txd.o
> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> index 385ee34803..43ac735dc6 100644
> --- a/libavcodec/allcodecs.c
> +++ b/libavcodec/allcodecs.c
> @@ -730,6 +730,7 @@ extern const FFCodec ff_subviewer_decoder;
>  extern const FFCodec ff_subviewer1_decoder;
>  extern const FFCodec ff_text_encoder;
>  extern const FFCodec ff_text_decoder;
> +extern const FFCodec ff_ttml_decoder;
>  extern const FFCodec ff_ttml_encoder;
>  extern const FFCodec ff_vplayer_decoder;
>  extern const FFCodec ff_webvtt_encoder;
> diff --git a/libavcodec/ttmldec.c b/libavcodec/ttmldec.c
> new file mode 100644
> index 0000000000..7bdcdc1bca
> --- /dev/null
> +++ b/libavcodec/ttmldec.c
> @@ -0,0 +1,711 @@
> +/*
> + * TTML subtitle decoder
> + * Copyright (c) 2023 Aidan Vaughan (TheDaChicken)
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +#include <libxml/parser.h>
> +#include "avcodec.h"
> +#include "ass.h"
> +#include "ttmlenc.h"
> +#include "codec_internal.h"
> +#include "version.h"
> +#include "libavutil/bprint.h"
> +#include "libavutil/parseutils.h"
> +
> +/**
> + * @file
> + * TTML subtitle decoder
> + * @see https://www.w3.org/TR/ttml1/
> + * @see https://www.w3.org/TR/ttml2/
> + * @see https://www.w3.org/TR/ttml-imsc/rec
> + */
> +
> +static const char* TTML_HEADER =
> +        "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
> +        "<tt %s>%s</tt>";
> +
> +typedef struct {
> +    const char* name;
> +    const char* fontfamily;
> +    uint8_t fontsize;
> +    uint8_t bold;
> +    uint8_t italic;
> +    int color;
> +    int backgroundColor;
> +    int alignment;
> +} TTMLStyle;
> +
> +typedef struct {
> +    const char* style;
> +    const char* name;
> +} TTMLRegion;
> +
> +typedef struct {
> +    xmlDocPtr doc;
> +    xmlNodePtr node;
> +    int readorder;
> +    TTMLStyle* styles;
> +    TTMLRegion* regions;
> +    uint32_t style_count;
> +    uint32_t region_count;
> +} TTMLContext;
> +
> +const static TTMLStyle DEFAULT_STYLE = {
> +    "Default",
> +    ASS_DEFAULT_FONT, ASS_DEFAULT_FONT_SIZE,
> +    ASS_DEFAULT_BOLD, ASS_DEFAULT_ITALIC,
> +    ASS_DEFAULT_COLOR,
> +    ASS_DEFAULT_BACK_COLOR, ASS_DEFAULT_ALIGNMENT
> +};
> +
> +static void init_style(TTMLStyle* style) {
> +    style->name = av_strdup("Default");
> +    style->fontfamily = av_strdup(ASS_DEFAULT_FONT);
> +    style->fontsize = ASS_DEFAULT_FONT_SIZE;
> +    style->bold = ASS_DEFAULT_BOLD;
> +    style->italic = ASS_DEFAULT_ITALIC;
> +    style->color = ASS_DEFAULT_COLOR;
> +    style->backgroundColor = ASS_DEFAULT_BACK_COLOR;
> +    style->alignment = ASS_DEFAULT_ALIGNMENT;
> +}
> +
> +/*
> https://www.w3.org/TR/2018/REC-ttml2-20181108/#style-attribute-fontSize */
> +static int parse_fontsize(const TTMLStyle root, const char *key) {
> +    int px;
> +    if(sscanf(key, "%upx", &px) == 1) return px;
> +    return 0;
> +}
> +
> +/*
> https://www.w3.org/TR/2018/REC-ttml2-20181108/#style-attribute-textAlign
> */
> +static int parse_text_align(const char *text_align) {
> +    if (!strncmp(text_align, "left", 4))
> +    {
> +        return 1;
> +    }
> +    else if (!strncmp(text_align, "center", 6))
> +    {
> +        return 2;
> +    }
> +    else if (!strncmp(text_align, "right", 5))
> +    {
> +        return 3;
> +    }
> +    return 0;
> +}
> +
> +static xmlNodePtr find_child_node_by_name(xmlNodePtr rootnode, const char
> *nodename)
> +{
> +    xmlNodePtr node = rootnode;
> +    if (!node) {
> +        return NULL;
> +    }
> +
> +    node = xmlFirstElementChild(node);
> +    while (node) {
> +        if (!av_strcasecmp(node->name, nodename)) {
> +            return node;
> +        }
> +        node = xmlNextElementSibling(node);
> +    }
> +    return NULL;
> +}
> +
> +static TTMLStyle* get_style(AVCodecContext *avctx, const char*
> style_name) {
> +    TTMLContext *ttml = avctx->priv_data;
> +    TTMLStyle* style;
> +    if(!style_name)
> +        return NULL;
> +
> +    for(int i = 0; i < ttml->style_count; i++)
> +    {
> +        style = &ttml->styles[i];
> +        if(style->name && !strncmp(style->name, style_name,
> strlen(style_name)))
> +        {
> +            return style;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +static TTMLRegion* get_region(AVCodecContext *avctx, const char*
> region_name) {
> +    TTMLContext *ttml = avctx->priv_data;
> +    TTMLRegion* region;
> +    if(!region_name)
> +        return NULL;
> +
> +    for(int i = 0; i < ttml->region_count; i++)
> +    {
> +        region = &ttml->regions[i];
> +        if(region->name && !strncmp(region->name, region_name,
> strlen(region_name)))
> +        {
> +            return region;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +static int create_ass_header(AVCodecContext *avctx) {
> +    TTMLContext *ttml = avctx->priv_data;
> +    TTMLStyle* style;
> +    AVBPrint buf;
> +
> +    av_bprint_init(&buf, 0, INT_MAX);
> +    av_bprintf(&buf,
> +               "[Script Info]\r\n"
> +               "; Script generated by FFmpeg/Lavc%s\r\n"
> +               "ScriptType: v4.00+\r\n"
> +               "PlayResX: %d\r\n"
> +               "PlayResY: %d\r\n"
> +               "ScaledBorderAndShadow: yes\r\n"
> +               "YCbCr Matrix: None\r\n"
> +               "\r\n"
> +               "[V4+ Styles]\r\n"
> +
> +               /* ASS (v4+) header */
> +               "Format: Name, "
> +               "Fontname, Fontsize, "
> +               "PrimaryColour, SecondaryColour, OutlineColour,
> BackColour, "
> +               "Bold, Italic, Underline, StrikeOut, "
> +               "ScaleX, ScaleY, "
> +               "Spacing, Angle, "
> +               "BorderStyle, Outline, Shadow, "
> +               "Alignment, MarginL, MarginR, MarginV, "
> +               "Encoding\r\n",
> +               !(avctx->flags & AV_CODEC_FLAG_BITEXACT) ?
> AV_STRINGIFY(LIBAVCODEC_VERSION) : "",
> +               ASS_DEFAULT_PLAYRESX, ASS_DEFAULT_PLAYRESY);
> +
> +    for(int i = 0; i < ttml->style_count; i++)
> +    {
> +        style = &ttml->styles[i];
> +        av_bprintf(&buf,"Style: "
> +                        "%s,"                  /* Name */
> +                        "%s,%d,"               /* Font{name,size} */
> +                        "&H%x,&H%x,&H%x,&H%x," /*
> {Primary,Secondary,Outline,Back}Colour */
> +                        "%d,%d,%d,0,"          /* Bold, Italic,
> Underline, StrikeOut */
> +                        "100,100,"             /* Scale{X,Y} */
> +                        "0,0,"                 /* Spacing, Angle */
> +                        "%d,1,0,"              /* BorderStyle, Outline,
> Shadow */
> +                        "%d,10,10,10,"         /* Alignment, Margin[LRV]
> */
> +                        "1\r\n"                /* Encoding */,
> +                   style->name, style->fontfamily, style->fontsize,
> +                   style->color, style->color,
> +                   style->backgroundColor, style->backgroundColor,
> +                   style->bold, style->italic, 0,
> +                   ASS_DEFAULT_BORDERSTYLE, style->alignment);
> +    }
> +
> +    av_bprintf(&buf,
> +               "\r\n"
> +               "[Events]\r\n"
> +               "Format: Layer, Start, End, Style, Name, MarginL, MarginR,
> MarginV, Effect, Text\r\n");
> +
> +    if (!av_bprint_is_complete(&buf))
> +        return AVERROR(ENOMEM);
> +
> +    if (!(avctx->subtitle_header =
> +                  av_mallocz(buf.size)))
> +        return AVERROR(ENOMEM);
> +
> +    avctx->subtitle_header_size = buf.len;
> +    memcpy(avctx->subtitle_header, buf.str, buf.size);
> +
> +    av_bprint_finalize(&buf, NULL);
> +    return 0;
> +}
> +
> +/* Example: <region style="default" xml:id="speaker"></region> */
> +static int parse_region(AVCodecContext *avctx, TTMLRegion* region,
> xmlNodePtr node) {
> +    xmlAttrPtr attr = NULL;
> +    xmlChar* val = NULL;
> +
> +    attr = node->properties;
> +    while (attr) {
> +        val = xmlGetProp(node, attr->name);
> +
> +        if(!strncmp(attr->name, "id", 2))
> +        {
> +            region->name = av_strdup(val);
> +        }
> +        else if (!strncmp(attr->name, "style", 5))
> +        {
> +            region->style = av_strdup(val);
> +        }
> +
> +        attr = attr->next;
> +        xmlFree(val);
> +    }
> +    return 0;
> +}
> +
> +/* Example: tts:fontSize="24px" tts:fontFamily="Arial"
> tts:fontWeight="normal" tts:fontStyle="normal"
> + * tts:textDecoration="none" tts:color="white"
> tts:backgroundColor="black" tts:textAlign="center"
> + * tts:fontStyle="italic" */
> +static int parse_styles(AVCodecContext *avctx, xmlNodePtr node, const
> TTMLStyle root_style, TTMLStyle* ttml_style) {
> +    xmlAttrPtr attr = NULL;
> +    xmlChar* val = NULL;
> +
> +
> +    attr = node->properties;
> +    while (attr) {
> +        val = xmlGetProp(node, attr->name);
> +
> +        /* P tag could contain labels for subtitles.
> +         * It's not a STYLE name */
> +        if(!strncmp(attr->name, "id", 2) && !strncmp(node->name, "style",
> 5))
> +        {
> +            ttml_style->name = av_strdup(val);
> +        }
> +        else if (!strncmp(attr->name, "fontStyle", 9))
> +        {
> +            ttml_style->italic = (!strncmp(val, "italic", 6) ? 1 : 0);
> +        }
> +        else if (!strncmp(attr->name, "fontWeight", 10))
> +        {
> +            ttml_style->bold = (!strncmp(val, "bold", 4) ? 1 : 0);
> +        }
> +        else if (!strncmp(attr->name, "fontSize", 8))
> +        {
> +            ttml_style->fontsize = parse_fontsize(root_style, val);
> +        }
> +        else if (!strncmp(attr->name, "fontFamily", 8))
> +        {
> +            ttml_style->fontfamily = av_strdup(val);
> +        }
> +        else if(!strncmp(attr->name, "color", 5))
> +        {
> +            uint8_t rgba[4];
> +            av_parse_color(rgba, val, strlen(val), avctx);
> +            ttml_style->color = rgba[0] | rgba[1] << 8 | rgba[2] << 16;
> +        }
> +        else if(!strncmp(attr->name, "backgroundColor", 5))
> +        {
> +            uint8_t rgba[4];
> +            av_parse_color(rgba, val, strlen(val), avctx);
> +            ttml_style->backgroundColor = rgba[0] | rgba[1] << 8 |
> rgba[2] << 16;
> +        }
> +        else if(!strncmp(attr->name, "textAlign", 5))
> +        {
> +            ttml_style->alignment = parse_text_align(val);
> +        }
> +        attr = attr->next;
> +        xmlFree(val);
> +    }
> +    return 0;
> +}
> +
> +static int parse_header(AVCodecContext *avctx,
> +                        const char* attr_data, const char* ttml_data) {
> +    TTMLContext *ttml = avctx->priv_data;
> +    TTMLRegion* region = NULL;
> +    TTMLStyle* style = NULL;
> +    xmlNodePtr head_node;
> +    xmlNodePtr child_node;
> +    AVBPrint buf;
> +
> +    av_bprint_init(&buf, 0, INT_MAX);
> +    av_bprintf(&buf, TTML_HEADER,
> +               attr_data, ttml_data);
> +
> +    ttml->doc = xmlReadMemory(buf.str, buf.len, NULL, NULL, 0);
> +    ttml->node = xmlDocGetRootElement(ttml->doc);
> +    if(!ttml->node)
> +        return AVERROR_INVALIDDATA;
> +
> +    head_node = find_child_node_by_name(ttml->node, "head");
> +    if(!head_node)
> +        return AVERROR_INVALIDDATA;
> +
> +    child_node = xmlFirstElementChild(head_node);
> +    while(child_node)
> +    {
> +        if(!strncmp(child_node->name, "styling", 7))
> +        {
> +            xmlNodePtr style_node;
> +            uint32_t style_count = xmlChildElementCount(child_node);
> +
> +            /* allocate styles array */
> +            ttml->styles = av_realloc_array(ttml->styles, style_count,
> sizeof(*ttml->styles));
> +            if(!ttml->styles)
> +                return AVERROR(ENOMEM);
> +            ttml->style_count = 0;
> +
> +            for (style_node = xmlFirstElementChild(child_node);
> style_node != NULL;
> +                 style_node = xmlNextElementSibling(style_node))
> +            {
> +                if(strncmp(style_node->name, "style", 5) != 0)
> +                    continue;
> +                style = &ttml->styles[ttml->style_count];
> +                init_style(style);
> +                parse_styles(avctx, style_node, DEFAULT_STYLE, style);
> +                ttml->style_count++;
> +            }
> +        } else if(!strncmp(child_node->name, "layout", 6))
> +        {
> +            xmlNodePtr region_node;
> +            uint32_t region_count = xmlChildElementCount(child_node);
> +
> +            /* allocate regions array */
> +            ttml->regions = av_realloc_array(ttml->regions, region_count,
> sizeof(*ttml->regions));
> +            if(!ttml->regions)
> +                return AVERROR(ENOMEM);
> +            ttml->region_count = 0;
> +
> +            for (region_node = xmlFirstElementChild(child_node);
> region_node != NULL;
> +                 region_node = xmlNextElementSibling(region_node))
> +            {
> +                if(strncmp(region_node->name, "region", 6) != 0)
> +                    continue;
> +                region = &ttml->regions[ttml->region_count];
> +                parse_region(avctx, region, region_node);
> +                ttml->region_count++;
> +            }
> +        }
> +
> +        child_node = xmlNextElementSibling(child_node);
> +    }
> +
> +    av_bprint_finalize(&buf, NULL);
> +    return 0;
> +}
> +
> +static int parse_header_from_extradata(AVCodecContext *avctx) {
> +    const size_t base_extradata_size = TTMLENC_EXTRADATA_SIGNATURE_SIZE +
> 1 +
> +                                       AV_INPUT_BUFFER_PADDING_SIZE;
> +    if(avctx->extradata_size > base_extradata_size)
> +    {
> +        char *attr_data =
> +                (char *)avctx->extradata +
> TTMLENC_EXTRADATA_SIGNATURE_SIZE;
> +        size_t attr_size = av_strnlen(
> +                attr_data, avctx->extradata_size -
> TTMLENC_EXTRADATA_SIGNATURE_SIZE);
> +        char *ttml_data = attr_data+attr_size+1;
> +        return parse_header(avctx, attr_data, ttml_data);
> +    }
> +    return AVERROR_INVALIDDATA;
> +}
> +
> +static TTMLStyle* get_p_node_style(AVCodecContext *avctx, xmlNodePtr
> p_node)
> +{
> +    TTMLRegion* region = NULL;
> +    TTMLStyle* style = NULL;
> +    xmlChar* region_name;
> +    xmlChar* style_name;
> +
> +    /* Sometimes region is defined with the style in the region */
> +    region_name = xmlGetProp(p_node, "region");
> +    if(region_name)
> +    {
> +        region = get_region(avctx, region_name);
> +    }
> +    style_name = xmlGetProp(p_node, "style");
> +    if(style_name)
> +    {
> +        style = get_style(avctx, style_name);
> +    }
> +    else if(region)
> +    {
> +        style = get_style(avctx, region->style);
> +    }
> +
> +    xmlFree(region_name);
> +    xmlFree(style_name);
> +    return style;
> +}
> +
> +static int apply_ass_styles(AVBPrint* buf, const TTMLStyle new_style,
> const TTMLStyle root) {
> +    if(new_style.italic ^ root.italic)
> +    {
> +        av_bprintf(buf, "{\\i%d}", new_style.italic);
> +    }
> +    if(new_style.bold ^ root.bold)
> +    {
> +        av_bprintf(buf, "{\\b%d}", new_style.bold);
> +    }
> +    if(new_style.color ^ root.color)
> +    {
> +        av_bprintf(buf, "{\\c&H%"PRIX32"&}", new_style.color & 0xffffff);
> +    }
> +    if(new_style.alignment != 0 && new_style.alignment ^ root.alignment)
> +    {
> +        av_bprintf(buf, "{\\an%d}", new_style.alignment);
> +    }
> +    return 0;
> +}
> +
> +/* Example: <p style="test">Regular <span
> tts:fontStyle="italic">Italics</span></p> */
> +static int parse_p_xml_to_ass(AVCodecContext *avctx, AVBPrint* buf,
> +                              xmlNodePtr paragraph_node, const TTMLStyle
> root_style)
> +{
> +    TTMLStyle style_props = root_style;
> +    xmlNodePtr child;
> +    xmlChar* dupstr = NULL;
> +    char* val = NULL;
> +
> +    /* parse extra styles that are in the node attrs */
> +    parse_styles(avctx, paragraph_node, root_style, &style_props);
> +    /* write extra styles that aren't a part of the root style */
> +    apply_ass_styles(buf, style_props, root_style);
> +
> +    for(child = paragraph_node->children; child != NULL; child =
> child->next) {
> +        if(child->type == XML_TEXT_NODE) {
> +            dupstr = xmlNodeGetContent(child);
> +            val = dupstr;
> +
> +            while (*val)
> +            {
> +                /* spaces as formatting can get into the way as the xml
> parser keeps that */
> +                if (av_isspace(*val))
> +                    val++;
> +                else
> +                    break;
> +            }
> +            av_bprintf(buf, "%s", val);
> +            xmlFree(dupstr);
> +        }
> +        if(child->type != XML_ELEMENT_NODE)
> +            continue;
> +        if (!strncmp(child->name, "metadata", 8)) /* <metadata ccrow="13"
> cccol="1"/> */
> +        {
> +            /* This isn't in the OFFICIAL ttml docs. However, this tag is
> in some ttml files on certain services */
> +            /* This is defined like CEA-608 cols and rows */
> +            int x, y;
> +            int col, row;
> +            xmlChar* ccrow = xmlGetProp(child, "ccrow");
> +            xmlChar* cccol = xmlGetProp(child, "cccol");
> +
> +            col = strtol(ccrow, (void*)&cccol, 10);
> +            row = strtol(ccrow, (void*)&ccrow, 10);
> +
> +            x = ASS_DEFAULT_PLAYRESX * (0.1 + 0.0250 * col);
> +            y = ASS_DEFAULT_PLAYRESY * (0.1 + 0.0533 * row);
> +            av_bprintf(buf, "{\\pos(%d,%d)}", x, y);
> +        }
> +        else if (!strncmp(child->name, "br", 2))
> +        {
> +            av_bprintf(buf, "\\N");
> +        }
> +        else if (!strncmp(child->name, "span", 4)) /* Example: <span
> style="default" tts:fontStyle="italic"> */
> +        {
> +            int rested = 0;
> +            /* style may get switched (most of the time yes) */
> +            const TTMLStyle* new_style = get_p_node_style(avctx, child);
> +            if(!new_style)
> +                new_style = &root_style;
> +            else if(!av_stristr(new_style->name, root_style.name)) {
> +                av_bprintf(buf, "{\\r%s}", new_style->name);
> +                rested = 1;
> +            }
> +            parse_p_xml_to_ass(avctx, buf, child, *new_style);
> +            if(rested)
> +            {
> +                av_bprintf(buf, "{\\r%s}", root_style.name);
> +            }
> +        }
> +    }
> +
> +    /* undo the special node styles */
> +    apply_ass_styles(buf, root_style, style_props);
> +    return 0;
> +}
> +
> +
> +static av_cold int ttml_init(AVCodecContext *avctx)
> +{
> +    TTMLContext *ttml = avctx->priv_data;
> +    int ret = 0;
> +
> +    LIBXML_TEST_VERSION
> +
> +    if(avctx->extradata_size >= 0)
> +    {
> +        ret = parse_header_from_extradata(avctx);
> +        if(ret < 0)
> +            return ret;
> +    }
> +    else
> +    {
> +        ret = parse_header(avctx, TTML_DEFAULT_NAMESPACING, "");
> +        if(ret < 0)
> +            return ret;
> +    }
> +    if(!ttml->style_count)
> +    {
> +        /* set default stuff */
> +        ttml->styles = av_malloc_array(1, sizeof(*ttml->styles));
> +        if (!ttml->styles)
> +            return AVERROR(ENOMEM);
> +        init_style(&ttml->styles[0]);
> +        ttml->style_count = 1;
> +    }
> +    return create_ass_header(avctx);
> +}
> +
> +/**
> + * Recreate p tag for parsing.
> + * Not the best way (of course) but it ensures all data is proper using
> libxml2
> + */
> +static int recreate_p_tag(const AVPacket *avpkt, AVBPrint* buf) {
> +    size_t settings_size;
> +    uint8_t* settings;
> +
> +    /* recreate p tag with our data for parsing */
> +    av_bprintf(buf, "<p ");
> +
> +    settings = av_packet_get_side_data(avpkt, AV_PKT_DATA_WEBVTT_SETTINGS,
> +                                       &settings_size);
> +    if (settings_size > INT_MAX)
> +        return AVERROR(EINVAL);
> +
> +    if (settings && settings_size > 0)
> +        av_bprintf(buf, "%s", settings);
> +
> +    av_bprintf(buf, ">%s</p>", avpkt->data);
> +
> +    if (!av_bprint_is_complete(buf))
> +        return AVERROR(ENOMEM);
> +
> +    return 0;
> +}
> +
> +static int ttml_decode_frame(AVCodecContext *avctx, AVSubtitle *sub,
> +                             int *got_sub_ptr, const AVPacket *avpkt)
> +{
> +    TTMLContext *ttml = avctx->priv_data;
> +    int ret = 0;
> +    TTMLStyle* style;
> +    AVBPrint buf;
> +    xmlChar* default_str;
> +    xmlNodePtr p_node;
> +
> +    av_bprint_init(&buf, 0, INT_MAX);
> +
> +    ret = recreate_p_tag(avpkt, &buf);
> +    if(ret < 0)
> +        goto cleanup;
> +
> +    ret = xmlParseInNodeContext(ttml->node,
> +                                buf.str,
> +                                buf.size, 0, &p_node);
> +    if(ret != XML_ERR_OK) {
> +        ret = AVERROR_INVALIDDATA;
> +        goto cleanup;
> +    }
> +
> +    av_bprint_clear(&buf);
> +
> +    /* a way to get the default body attrs */
> +    default_str = xmlGetProp(p_node, "default");
> +    style = get_p_node_style(avctx, p_node);
> +    if(!style && default_str) /* if there are defaults and can't find
> style then check defaults */
> +    {
> +        xmlNodePtr default_node;
> +        /* recreate p tag with our data for parsing */
> +        av_bprintf(&buf, "<p %s/>", default_str);
> +        ret = xmlParseInNodeContext(ttml->node,
> +                                    buf.str,
> +                                    buf.size, 0, &default_node);
> +        if(ret != XML_ERR_OK) {
> +            ret = AVERROR_INVALIDDATA;
> +            goto cleanup;
> +        }
> +        style = get_p_node_style(avctx, default_node);
> +        av_bprint_clear(&buf);
> +    }
> +    if(!style)
> +    {
> +        xmlNodePtr children = p_node->children;
> +        /* style may be in the first span with no text in front */
> +        while(children)
> +        {
> +            if (children->type == XML_ELEMENT_NODE) {
> +                if (strncmp(children->name, "span", 4) != 0)
> +                    break;
> +                style = get_p_node_style(avctx, children);
> +                break;
> +            } else if (children->type == XML_TEXT_NODE)
> +            {
> +                /* new line cant be counted as text which would need
> styling */
> +                xmlChar* dupstr = xmlNodeGetContent(children);
> +                char* val = dupstr;
> +                while(*val) {
> +                    if (!av_isspace(*val))
> +                        break;
> +                    val++;
> +                }
> +                xmlFree(dupstr);
> +            }
> +
> +            children = children->next;
> +        }
> +        if(!style) {
> +            /* this is a type of subtitle which requires some sort of
> style */
> +            av_log(avctx, AV_LOG_ERROR, "Unable to find style
> data['%s']\n", avpkt->data);
> +            ret = AVERROR_INVALIDDATA;
> +            goto cleanup;
> +        }
> +    }
> +
> +    parse_p_xml_to_ass(avctx, &buf, p_node, *style);
> +    ret = ff_ass_add_rect(sub, buf.str, ttml->readorder++, 0,
> +                          style->name, NULL);
> +    if (ret < 0)
> +        goto cleanup;
> +
> +    ret = avpkt->size;
> +    *got_sub_ptr = sub->num_rects > 0;
> +
> +    cleanup:
> +    av_bprint_finalize(&buf, NULL);
> +    xmlFree(p_node);
> +    return ret;
> +}
> +
> +static av_cold int ttml_close(AVCodecContext *avctx)
> +{
> +    TTMLContext *ttml = avctx->priv_data;
> +    TTMLStyle* style;
> +    TTMLRegion* region;
> +    for(int i = 0; i < ttml->style_count; i++)
> +    {
> +        style = &ttml->styles[i];
> +        av_freep(&style->name);
> +        av_freep(&style->fontfamily);
> +    }
> +    av_freep(&ttml->styles);
> +    for(int i = 0; i < ttml->region_count; i++)
> +    {
> +        region = &ttml->regions[i];
> +        av_freep(&region->style);
> +        av_freep(&region->name);
> +    }
> +    av_freep(&ttml->regions);
> +    xmlFreeDoc(ttml->doc);
> +    return 0;
> +}
> +
> +const FFCodec ff_ttml_decoder = {
> +        .p.name         = "ttml",
> +        CODEC_LONG_NAME("TTML subtitle"),
> +        .priv_data_size = sizeof(TTMLContext),
> +        .p.type         = AVMEDIA_TYPE_SUBTITLE,
> +        .p.id           = AV_CODEC_ID_TTML,
> +        FF_CODEC_DECODE_SUB_CB(ttml_decode_frame),
> +        .init           = ttml_init,
> +        .flush          = ff_ass_decoder_flush,
> +        .close          = ttml_close,
> +};
> diff --git a/libavcodec/version.h b/libavcodec/version.h
> index 39dbec0208..ecb096f38b 100644
> --- a/libavcodec/version.h
> +++ b/libavcodec/version.h
> @@ -29,7 +29,7 @@
>
>  #include "version_major.h"
>
> -#define LIBAVCODEC_VERSION_MINOR   6
> +#define LIBAVCODEC_VERSION_MINOR   7
>  #define LIBAVCODEC_VERSION_MICRO 101
>
>  #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
> --
> 2.37.1.windows.1
>
>
diff mbox series

Patch

diff --git a/configure b/configure
index 03d3c429a5..fbe11f3663 100755
--- a/configure
+++ b/configure
@@ -3311,6 +3311,7 @@  pcm_alaw_at_decoder_deps="audiotoolbox"
 pcm_mulaw_at_decoder_deps="audiotoolbox"
 qdmc_at_decoder_deps="audiotoolbox"
 qdm2_at_decoder_deps="audiotoolbox"
+ttml_decoder_deps="libxml2"
 aac_at_encoder_deps="audiotoolbox"
 aac_at_encoder_select="audio_frame_queue"
 alac_at_encoder_deps="audiotoolbox"
@@ -3479,6 +3480,7 @@  rtp_mpegts_muxer_select="mpegts_muxer rtp_muxer"
 rtpdec_select="asf_demuxer mov_demuxer mpegts_demuxer rm_demuxer rtp_protocol srtp"
 rtsp_demuxer_select="http_protocol rtpdec"
 rtsp_muxer_select="rtp_muxer http_protocol rtp_protocol rtpenc_chain"
+ttml_demuxer_deps="libxml2"
 sap_demuxer_select="sdp_demuxer"
 sap_muxer_select="rtp_muxer rtp_protocol rtpenc_chain"
 sdp_demuxer_select="rtpdec"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index abae4909d2..96fc259c93 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -719,6 +719,7 @@  OBJS-$(CONFIG_TSCC_DECODER)            += tscc.o msrledec.o
 OBJS-$(CONFIG_TSCC2_DECODER)           += tscc2.o
 OBJS-$(CONFIG_TTA_DECODER)             += tta.o ttadata.o ttadsp.o
 OBJS-$(CONFIG_TTA_ENCODER)             += ttaenc.o ttaencdsp.o ttadata.o
+OBJS-$(CONFIG_TTML_DECODER)            += ttmldec.o
 OBJS-$(CONFIG_TTML_ENCODER)            += ttmlenc.o ass_split.o
 OBJS-$(CONFIG_TWINVQ_DECODER)          += twinvqdec.o twinvq.o
 OBJS-$(CONFIG_TXD_DECODER)             += txd.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 385ee34803..43ac735dc6 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -730,6 +730,7 @@  extern const FFCodec ff_subviewer_decoder;
 extern const FFCodec ff_subviewer1_decoder;
 extern const FFCodec ff_text_encoder;
 extern const FFCodec ff_text_decoder;
+extern const FFCodec ff_ttml_decoder;
 extern const FFCodec ff_ttml_encoder;
 extern const FFCodec ff_vplayer_decoder;
 extern const FFCodec ff_webvtt_encoder;
diff --git a/libavcodec/ttmldec.c b/libavcodec/ttmldec.c
new file mode 100644
index 0000000000..7bdcdc1bca
--- /dev/null
+++ b/libavcodec/ttmldec.c
@@ -0,0 +1,711 @@ 
+/*
+ * TTML subtitle decoder
+ * Copyright (c) 2023 Aidan Vaughan (TheDaChicken)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <libxml/parser.h>
+#include "avcodec.h"
+#include "ass.h"
+#include "ttmlenc.h"
+#include "codec_internal.h"
+#include "version.h"
+#include "libavutil/bprint.h"
+#include "libavutil/parseutils.h"
+
+/**
+ * @file
+ * TTML subtitle decoder
+ * @see https://www.w3.org/TR/ttml1/
+ * @see https://www.w3.org/TR/ttml2/
+ * @see https://www.w3.org/TR/ttml-imsc/rec
+ */
+
+static const char* TTML_HEADER =
+        "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+        "<tt %s>%s</tt>";
+
+typedef struct {
+    const char* name;
+    const char* fontfamily;
+    uint8_t fontsize;
+    uint8_t bold;
+    uint8_t italic;
+    int color;
+    int backgroundColor;
+    int alignment;
+} TTMLStyle;
+
+typedef struct {
+    const char* style;
+    const char* name;
+} TTMLRegion;
+
+typedef struct {
+    xmlDocPtr doc;
+    xmlNodePtr node;
+    int readorder;
+    TTMLStyle* styles;
+    TTMLRegion* regions;
+    uint32_t style_count;
+    uint32_t region_count;
+} TTMLContext;
+
+const static TTMLStyle DEFAULT_STYLE = {
+    "Default",
+    ASS_DEFAULT_FONT, ASS_DEFAULT_FONT_SIZE,
+    ASS_DEFAULT_BOLD, ASS_DEFAULT_ITALIC,
+    ASS_DEFAULT_COLOR,
+    ASS_DEFAULT_BACK_COLOR, ASS_DEFAULT_ALIGNMENT
+};
+
+static void init_style(TTMLStyle* style) {
+    style->name = av_strdup("Default");
+    style->fontfamily = av_strdup(ASS_DEFAULT_FONT);
+    style->fontsize = ASS_DEFAULT_FONT_SIZE;
+    style->bold = ASS_DEFAULT_BOLD;
+    style->italic = ASS_DEFAULT_ITALIC;
+    style->color = ASS_DEFAULT_COLOR;
+    style->backgroundColor = ASS_DEFAULT_BACK_COLOR;
+    style->alignment = ASS_DEFAULT_ALIGNMENT;
+}
+
+/* https://www.w3.org/TR/2018/REC-ttml2-20181108/#style-attribute-fontSize */
+static int parse_fontsize(const TTMLStyle root, const char *key) {
+    int px;
+    if(sscanf(key, "%upx", &px) == 1) return px;
+    return 0;
+}
+
+/* https://www.w3.org/TR/2018/REC-ttml2-20181108/#style-attribute-textAlign */
+static int parse_text_align(const char *text_align) {
+    if (!strncmp(text_align, "left", 4))
+    {
+        return 1;
+    }
+    else if (!strncmp(text_align, "center", 6))
+    {
+        return 2;
+    }
+    else if (!strncmp(text_align, "right", 5))
+    {
+        return 3;
+    }
+    return 0;
+}
+
+static xmlNodePtr find_child_node_by_name(xmlNodePtr rootnode, const char *nodename)
+{
+    xmlNodePtr node = rootnode;
+    if (!node) {
+        return NULL;
+    }
+
+    node = xmlFirstElementChild(node);
+    while (node) {
+        if (!av_strcasecmp(node->name, nodename)) {
+            return node;
+        }
+        node = xmlNextElementSibling(node);
+    }
+    return NULL;
+}
+
+static TTMLStyle* get_style(AVCodecContext *avctx, const char* style_name) {
+    TTMLContext *ttml = avctx->priv_data;
+    TTMLStyle* style;
+    if(!style_name)
+        return NULL;
+
+    for(int i = 0; i < ttml->style_count; i++)
+    {
+        style = &ttml->styles[i];
+        if(style->name && !strncmp(style->name, style_name, strlen(style_name)))
+        {
+            return style;
+        }
+    }
+    return NULL;
+}
+
+static TTMLRegion* get_region(AVCodecContext *avctx, const char* region_name) {
+    TTMLContext *ttml = avctx->priv_data;
+    TTMLRegion* region;
+    if(!region_name)
+        return NULL;
+
+    for(int i = 0; i < ttml->region_count; i++)
+    {
+        region = &ttml->regions[i];
+        if(region->name && !strncmp(region->name, region_name, strlen(region_name)))
+        {
+            return region;
+        }
+    }
+    return NULL;
+}
+
+static int create_ass_header(AVCodecContext *avctx) {
+    TTMLContext *ttml = avctx->priv_data;
+    TTMLStyle* style;
+    AVBPrint buf;
+
+    av_bprint_init(&buf, 0, INT_MAX);
+    av_bprintf(&buf,
+               "[Script Info]\r\n"
+               "; Script generated by FFmpeg/Lavc%s\r\n"
+               "ScriptType: v4.00+\r\n"
+               "PlayResX: %d\r\n"
+               "PlayResY: %d\r\n"
+               "ScaledBorderAndShadow: yes\r\n"
+               "YCbCr Matrix: None\r\n"
+               "\r\n"
+               "[V4+ Styles]\r\n"
+
+               /* ASS (v4+) header */
+               "Format: Name, "
+               "Fontname, Fontsize, "
+               "PrimaryColour, SecondaryColour, OutlineColour, BackColour, "
+               "Bold, Italic, Underline, StrikeOut, "
+               "ScaleX, ScaleY, "
+               "Spacing, Angle, "
+               "BorderStyle, Outline, Shadow, "
+               "Alignment, MarginL, MarginR, MarginV, "
+               "Encoding\r\n",
+               !(avctx->flags & AV_CODEC_FLAG_BITEXACT) ? AV_STRINGIFY(LIBAVCODEC_VERSION) : "",
+               ASS_DEFAULT_PLAYRESX, ASS_DEFAULT_PLAYRESY);
+
+    for(int i = 0; i < ttml->style_count; i++)
+    {
+        style = &ttml->styles[i];
+        av_bprintf(&buf,"Style: "
+                        "%s,"                  /* Name */
+                        "%s,%d,"               /* Font{name,size} */
+                        "&H%x,&H%x,&H%x,&H%x," /* {Primary,Secondary,Outline,Back}Colour */
+                        "%d,%d,%d,0,"          /* Bold, Italic, Underline, StrikeOut */
+                        "100,100,"             /* Scale{X,Y} */
+                        "0,0,"                 /* Spacing, Angle */
+                        "%d,1,0,"              /* BorderStyle, Outline, Shadow */
+                        "%d,10,10,10,"         /* Alignment, Margin[LRV] */
+                        "1\r\n"                /* Encoding */,
+                   style->name, style->fontfamily, style->fontsize,
+                   style->color, style->color,
+                   style->backgroundColor, style->backgroundColor,
+                   style->bold, style->italic, 0,
+                   ASS_DEFAULT_BORDERSTYLE, style->alignment);
+    }
+
+    av_bprintf(&buf,
+               "\r\n"
+               "[Events]\r\n"
+               "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\r\n");
+
+    if (!av_bprint_is_complete(&buf))
+        return AVERROR(ENOMEM);
+
+    if (!(avctx->subtitle_header =
+                  av_mallocz(buf.size)))
+        return AVERROR(ENOMEM);
+
+    avctx->subtitle_header_size = buf.len;
+    memcpy(avctx->subtitle_header, buf.str, buf.size);
+
+    av_bprint_finalize(&buf, NULL);
+    return 0;
+}
+
+/* Example: <region style="default" xml:id="speaker"></region> */
+static int parse_region(AVCodecContext *avctx, TTMLRegion* region, xmlNodePtr node) {
+    xmlAttrPtr attr = NULL;
+    xmlChar* val = NULL;
+
+    attr = node->properties;
+    while (attr) {
+        val = xmlGetProp(node, attr->name);
+
+        if(!strncmp(attr->name, "id", 2))
+        {
+            region->name = av_strdup(val);
+        }
+        else if (!strncmp(attr->name, "style", 5))
+        {
+            region->style = av_strdup(val);
+        }
+
+        attr = attr->next;
+        xmlFree(val);
+    }
+    return 0;
+}
+
+/* Example: tts:fontSize="24px" tts:fontFamily="Arial" tts:fontWeight="normal" tts:fontStyle="normal"
+ * tts:textDecoration="none" tts:color="white" tts:backgroundColor="black" tts:textAlign="center"
+ * tts:fontStyle="italic" */
+static int parse_styles(AVCodecContext *avctx, xmlNodePtr node, const TTMLStyle root_style, TTMLStyle* ttml_style) {
+    xmlAttrPtr attr = NULL;
+    xmlChar* val = NULL;
+
+
+    attr = node->properties;
+    while (attr) {
+        val = xmlGetProp(node, attr->name);
+
+        /* P tag could contain labels for subtitles.
+         * It's not a STYLE name */
+        if(!strncmp(attr->name, "id", 2) && !strncmp(node->name, "style", 5))
+        {
+            ttml_style->name = av_strdup(val);
+        }
+        else if (!strncmp(attr->name, "fontStyle", 9))
+        {
+            ttml_style->italic = (!strncmp(val, "italic", 6) ? 1 : 0);
+        }
+        else if (!strncmp(attr->name, "fontWeight", 10))
+        {
+            ttml_style->bold = (!strncmp(val, "bold", 4) ? 1 : 0);
+        }
+        else if (!strncmp(attr->name, "fontSize", 8))
+        {
+            ttml_style->fontsize = parse_fontsize(root_style, val);
+        }
+        else if (!strncmp(attr->name, "fontFamily", 8))
+        {
+            ttml_style->fontfamily = av_strdup(val);
+        }
+        else if(!strncmp(attr->name, "color", 5))
+        {
+            uint8_t rgba[4];
+            av_parse_color(rgba, val, strlen(val), avctx);
+            ttml_style->color = rgba[0] | rgba[1] << 8 | rgba[2] << 16;
+        }
+        else if(!strncmp(attr->name, "backgroundColor", 5))
+        {
+            uint8_t rgba[4];
+            av_parse_color(rgba, val, strlen(val), avctx);
+            ttml_style->backgroundColor = rgba[0] | rgba[1] << 8 | rgba[2] << 16;
+        }
+        else if(!strncmp(attr->name, "textAlign", 5))
+        {
+            ttml_style->alignment = parse_text_align(val);
+        }
+        attr = attr->next;
+        xmlFree(val);
+    }
+    return 0;
+}
+
+static int parse_header(AVCodecContext *avctx,
+                        const char* attr_data, const char* ttml_data) {
+    TTMLContext *ttml = avctx->priv_data;
+    TTMLRegion* region = NULL;
+    TTMLStyle* style = NULL;
+    xmlNodePtr head_node;
+    xmlNodePtr child_node;
+    AVBPrint buf;
+
+    av_bprint_init(&buf, 0, INT_MAX);
+    av_bprintf(&buf, TTML_HEADER,
+               attr_data, ttml_data);
+
+    ttml->doc = xmlReadMemory(buf.str, buf.len, NULL, NULL, 0);
+    ttml->node = xmlDocGetRootElement(ttml->doc);
+    if(!ttml->node)
+        return AVERROR_INVALIDDATA;
+
+    head_node = find_child_node_by_name(ttml->node, "head");
+    if(!head_node)
+        return AVERROR_INVALIDDATA;
+
+    child_node = xmlFirstElementChild(head_node);
+    while(child_node)
+    {
+        if(!strncmp(child_node->name, "styling", 7))
+        {
+            xmlNodePtr style_node;
+            uint32_t style_count = xmlChildElementCount(child_node);
+
+            /* allocate styles array */
+            ttml->styles = av_realloc_array(ttml->styles, style_count, sizeof(*ttml->styles));
+            if(!ttml->styles)
+                return AVERROR(ENOMEM);
+            ttml->style_count = 0;
+
+            for (style_node = xmlFirstElementChild(child_node); style_node != NULL;
+                 style_node = xmlNextElementSibling(style_node))
+            {
+                if(strncmp(style_node->name, "style", 5) != 0)
+                    continue;
+                style = &ttml->styles[ttml->style_count];
+                init_style(style);
+                parse_styles(avctx, style_node, DEFAULT_STYLE, style);
+                ttml->style_count++;
+            }
+        } else if(!strncmp(child_node->name, "layout", 6))
+        {
+            xmlNodePtr region_node;
+            uint32_t region_count = xmlChildElementCount(child_node);
+
+            /* allocate regions array */
+            ttml->regions = av_realloc_array(ttml->regions, region_count, sizeof(*ttml->regions));
+            if(!ttml->regions)
+                return AVERROR(ENOMEM);
+            ttml->region_count = 0;
+
+            for (region_node = xmlFirstElementChild(child_node); region_node != NULL;
+                 region_node = xmlNextElementSibling(region_node))
+            {
+                if(strncmp(region_node->name, "region", 6) != 0)
+                    continue;
+                region = &ttml->regions[ttml->region_count];
+                parse_region(avctx, region, region_node);
+                ttml->region_count++;
+            }
+        }
+
+        child_node = xmlNextElementSibling(child_node);
+    }
+
+    av_bprint_finalize(&buf, NULL);
+    return 0;
+}
+
+static int parse_header_from_extradata(AVCodecContext *avctx) {
+    const size_t base_extradata_size = TTMLENC_EXTRADATA_SIGNATURE_SIZE + 1 +
+                                       AV_INPUT_BUFFER_PADDING_SIZE;
+    if(avctx->extradata_size > base_extradata_size)
+    {
+        char *attr_data =
+                (char *)avctx->extradata + TTMLENC_EXTRADATA_SIGNATURE_SIZE;
+        size_t attr_size = av_strnlen(
+                attr_data, avctx->extradata_size - TTMLENC_EXTRADATA_SIGNATURE_SIZE);
+        char *ttml_data = attr_data+attr_size+1;
+        return parse_header(avctx, attr_data, ttml_data);
+    }
+    return AVERROR_INVALIDDATA;
+}
+
+static TTMLStyle* get_p_node_style(AVCodecContext *avctx, xmlNodePtr p_node)
+{
+    TTMLRegion* region = NULL;
+    TTMLStyle* style = NULL;
+    xmlChar* region_name;
+    xmlChar* style_name;
+
+    /* Sometimes region is defined with the style in the region */
+    region_name = xmlGetProp(p_node, "region");
+    if(region_name)
+    {
+        region = get_region(avctx, region_name);
+    }
+    style_name = xmlGetProp(p_node, "style");
+    if(style_name)
+    {
+        style = get_style(avctx, style_name);
+    }
+    else if(region)
+    {
+        style = get_style(avctx, region->style);
+    }
+
+    xmlFree(region_name);
+    xmlFree(style_name);
+    return style;
+}
+
+static int apply_ass_styles(AVBPrint* buf, const TTMLStyle new_style, const TTMLStyle root) {
+    if(new_style.italic ^ root.italic)
+    {
+        av_bprintf(buf, "{\\i%d}", new_style.italic);
+    }
+    if(new_style.bold ^ root.bold)
+    {
+        av_bprintf(buf, "{\\b%d}", new_style.bold);
+    }
+    if(new_style.color ^ root.color)
+    {
+        av_bprintf(buf, "{\\c&H%"PRIX32"&}", new_style.color & 0xffffff);
+    }
+    if(new_style.alignment != 0 && new_style.alignment ^ root.alignment)
+    {
+        av_bprintf(buf, "{\\an%d}", new_style.alignment);
+    }
+    return 0;
+}
+
+/* Example: <p style="test">Regular <span tts:fontStyle="italic">Italics</span></p> */
+static int parse_p_xml_to_ass(AVCodecContext *avctx, AVBPrint* buf,
+                              xmlNodePtr paragraph_node, const TTMLStyle root_style)
+{
+    TTMLStyle style_props = root_style;
+    xmlNodePtr child;
+    xmlChar* dupstr = NULL;
+    char* val = NULL;
+
+    /* parse extra styles that are in the node attrs */
+    parse_styles(avctx, paragraph_node, root_style, &style_props);
+    /* write extra styles that aren't a part of the root style */
+    apply_ass_styles(buf, style_props, root_style);
+
+    for(child = paragraph_node->children; child != NULL; child = child->next) {
+        if(child->type == XML_TEXT_NODE) {
+            dupstr = xmlNodeGetContent(child);
+            val = dupstr;
+
+            while (*val)
+            {
+                /* spaces as formatting can get into the way as the xml parser keeps that */
+                if (av_isspace(*val))
+                    val++;
+                else
+                    break;
+            }
+            av_bprintf(buf, "%s", val);
+            xmlFree(dupstr);
+        }
+        if(child->type != XML_ELEMENT_NODE)
+            continue;
+        if (!strncmp(child->name, "metadata", 8)) /* <metadata ccrow="13" cccol="1"/> */
+        {
+            /* This isn't in the OFFICIAL ttml docs. However, this tag is in some ttml files on certain services */
+            /* This is defined like CEA-608 cols and rows */
+            int x, y;
+            int col, row;
+            xmlChar* ccrow = xmlGetProp(child, "ccrow");
+            xmlChar* cccol = xmlGetProp(child, "cccol");
+
+            col = strtol(ccrow, (void*)&cccol, 10);
+            row = strtol(ccrow, (void*)&ccrow, 10);
+
+            x = ASS_DEFAULT_PLAYRESX * (0.1 + 0.0250 * col);
+            y = ASS_DEFAULT_PLAYRESY * (0.1 + 0.0533 * row);
+            av_bprintf(buf, "{\\pos(%d,%d)}", x, y);
+        }
+        else if (!strncmp(child->name, "br", 2))
+        {
+            av_bprintf(buf, "\\N");
+        }
+        else if (!strncmp(child->name, "span", 4)) /* Example: <span style="default" tts:fontStyle="italic"> */
+        {
+            int rested = 0;
+            /* style may get switched (most of the time yes) */
+            const TTMLStyle* new_style = get_p_node_style(avctx, child);
+            if(!new_style)
+                new_style = &root_style;
+            else if(!av_stristr(new_style->name, root_style.name)) {
+                av_bprintf(buf, "{\\r%s}", new_style->name);
+                rested = 1;
+            }
+            parse_p_xml_to_ass(avctx, buf, child, *new_style);
+            if(rested)
+            {
+                av_bprintf(buf, "{\\r%s}", root_style.name);
+            }
+        }
+    }
+
+    /* undo the special node styles */
+    apply_ass_styles(buf, root_style, style_props);
+    return 0;
+}
+
+
+static av_cold int ttml_init(AVCodecContext *avctx)
+{
+    TTMLContext *ttml = avctx->priv_data;
+    int ret = 0;
+
+    LIBXML_TEST_VERSION
+
+    if(avctx->extradata_size >= 0)
+    {
+        ret = parse_header_from_extradata(avctx);
+        if(ret < 0)
+            return ret;
+    }
+    else
+    {
+        ret = parse_header(avctx, TTML_DEFAULT_NAMESPACING, "");
+        if(ret < 0)
+            return ret;
+    }
+    if(!ttml->style_count)
+    {
+        /* set default stuff */
+        ttml->styles = av_malloc_array(1, sizeof(*ttml->styles));
+        if (!ttml->styles)
+            return AVERROR(ENOMEM);
+        init_style(&ttml->styles[0]);
+        ttml->style_count = 1;
+    }
+    return create_ass_header(avctx);
+}
+
+/**
+ * Recreate p tag for parsing.
+ * Not the best way (of course) but it ensures all data is proper using libxml2
+ */
+static int recreate_p_tag(const AVPacket *avpkt, AVBPrint* buf) {
+    size_t settings_size;
+    uint8_t* settings;
+
+    /* recreate p tag with our data for parsing */
+    av_bprintf(buf, "<p ");
+
+    settings = av_packet_get_side_data(avpkt, AV_PKT_DATA_WEBVTT_SETTINGS,
+                                       &settings_size);
+    if (settings_size > INT_MAX)
+        return AVERROR(EINVAL);
+
+    if (settings && settings_size > 0)
+        av_bprintf(buf, "%s", settings);
+
+    av_bprintf(buf, ">%s</p>", avpkt->data);
+
+    if (!av_bprint_is_complete(buf))
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static int ttml_decode_frame(AVCodecContext *avctx, AVSubtitle *sub,
+                             int *got_sub_ptr, const AVPacket *avpkt)
+{
+    TTMLContext *ttml = avctx->priv_data;
+    int ret = 0;
+    TTMLStyle* style;
+    AVBPrint buf;
+    xmlChar* default_str;
+    xmlNodePtr p_node;
+
+    av_bprint_init(&buf, 0, INT_MAX);
+
+    ret = recreate_p_tag(avpkt, &buf);
+    if(ret < 0)
+        goto cleanup;
+
+    ret = xmlParseInNodeContext(ttml->node,
+                                buf.str,
+                                buf.size, 0, &p_node);
+    if(ret != XML_ERR_OK) {
+        ret = AVERROR_INVALIDDATA;
+        goto cleanup;
+    }
+
+    av_bprint_clear(&buf);
+
+    /* a way to get the default body attrs */
+    default_str = xmlGetProp(p_node, "default");
+    style = get_p_node_style(avctx, p_node);
+    if(!style && default_str) /* if there are defaults and can't find style then check defaults */
+    {
+        xmlNodePtr default_node;
+        /* recreate p tag with our data for parsing */
+        av_bprintf(&buf, "<p %s/>", default_str);
+        ret = xmlParseInNodeContext(ttml->node,
+                                    buf.str,
+                                    buf.size, 0, &default_node);
+        if(ret != XML_ERR_OK) {
+            ret = AVERROR_INVALIDDATA;
+            goto cleanup;
+        }
+        style = get_p_node_style(avctx, default_node);
+        av_bprint_clear(&buf);
+    }
+    if(!style)
+    {
+        xmlNodePtr children = p_node->children;
+        /* style may be in the first span with no text in front */
+        while(children)
+        {
+            if (children->type == XML_ELEMENT_NODE) {
+                if (strncmp(children->name, "span", 4) != 0)
+                    break;
+                style = get_p_node_style(avctx, children);
+                break;
+            } else if (children->type == XML_TEXT_NODE)
+            {
+                /* new line cant be counted as text which would need styling */
+                xmlChar* dupstr = xmlNodeGetContent(children);
+                char* val = dupstr;
+                while(*val) {
+                    if (!av_isspace(*val))
+                        break;
+                    val++;
+                }
+                xmlFree(dupstr);
+            }
+
+            children = children->next;
+        }
+        if(!style) {
+            /* this is a type of subtitle which requires some sort of style */
+            av_log(avctx, AV_LOG_ERROR, "Unable to find style data['%s']\n", avpkt->data);
+            ret = AVERROR_INVALIDDATA;
+            goto cleanup;
+        }
+    }
+
+    parse_p_xml_to_ass(avctx, &buf, p_node, *style);
+    ret = ff_ass_add_rect(sub, buf.str, ttml->readorder++, 0,
+                          style->name, NULL);
+    if (ret < 0)
+        goto cleanup;
+
+    ret = avpkt->size;
+    *got_sub_ptr = sub->num_rects > 0;
+
+    cleanup:
+    av_bprint_finalize(&buf, NULL);
+    xmlFree(p_node);
+    return ret;
+}
+
+static av_cold int ttml_close(AVCodecContext *avctx)
+{
+    TTMLContext *ttml = avctx->priv_data;
+    TTMLStyle* style;
+    TTMLRegion* region;
+    for(int i = 0; i < ttml->style_count; i++)
+    {
+        style = &ttml->styles[i];
+        av_freep(&style->name);
+        av_freep(&style->fontfamily);
+    }
+    av_freep(&ttml->styles);
+    for(int i = 0; i < ttml->region_count; i++)
+    {
+        region = &ttml->regions[i];
+        av_freep(&region->style);
+        av_freep(&region->name);
+    }
+    av_freep(&ttml->regions);
+    xmlFreeDoc(ttml->doc);
+    return 0;
+}
+
+const FFCodec ff_ttml_decoder = {
+        .p.name         = "ttml",
+        CODEC_LONG_NAME("TTML subtitle"),
+        .priv_data_size = sizeof(TTMLContext),
+        .p.type         = AVMEDIA_TYPE_SUBTITLE,
+        .p.id           = AV_CODEC_ID_TTML,
+        FF_CODEC_DECODE_SUB_CB(ttml_decode_frame),
+        .init           = ttml_init,
+        .flush          = ff_ass_decoder_flush,
+        .close          = ttml_close,
+};
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 39dbec0208..ecb096f38b 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -29,7 +29,7 @@ 
 
 #include "version_major.h"
 
-#define LIBAVCODEC_VERSION_MINOR   6
+#define LIBAVCODEC_VERSION_MINOR   7
 #define LIBAVCODEC_VERSION_MICRO 101
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \