diff mbox

[FFmpeg-devel,1/1] libavcodec/movtextdec.c: fixing decoding for UTF-8 (ticket 6021)

Message ID VI1PR01MB1327D150E6B25FE0A1B32A82C09F0@VI1PR01MB1327.eurprd01.prod.exchangelabs.com
State Superseded
Headers show

Commit Message

Erik Bråthen Solem Dec. 17, 2016, 3:39 a.m. UTC
Character offsets were interpreted as byte offsets, resulting in misplaced
styling tags where multibyte characters were involved. The entire subtitle
stream would even be rendered invalid if such a misplaced tag happened to
split a multibyte character. This patch fixes this for UTF-8; UTF-16 was and
still is broken. These are the only supported encodings according to the spec.

---
 libavcodec/movtextdec.c | 95 +++++++++++++++++++++++++++----------------------
 1 file changed, 53 insertions(+), 42 deletions(-)

Comments

Michael Niedermayer Dec. 17, 2016, 12:17 p.m. UTC | #1
On Sat, Dec 17, 2016 at 03:39:02AM +0000, Erik Bråthen Solem wrote:
[...]
> @@ -491,7 +501,8 @@ static int mov_text_decode_frame(AVCodecContext *avctx,
>  
>              for (size_t i = 0; i < box_count; i++) {
>                  if (tsmb_type == box_types[i].type) {
> -                    if (m->tracksize + m->size_var + box_types[i].base_size > avpkt->size)
> +                    if (m->tracksize + m->size_var +

> +                        ox_types[i].base_size > avpkt->size)

do you mean box_types, theres no ox_types identifer (at least not in
git master)

[...]
Erik Bråthen Solem Dec. 18, 2016, 8:44 p.m. UTC | #2
Yes, it was supposed to be box_types, not ox_types. I must have removed the b by mistake after I tested the code. Should I resubmit the patch?
Michael Niedermayer Dec. 19, 2016, 12:57 a.m. UTC | #3
On Sun, Dec 18, 2016 at 08:44:29PM +0000, Erik Bråthen Solem wrote:
> Yes, it was supposed to be box_types, not ox_types. I must have removed the b by mistake after I tested the code. Should I resubmit the patch?

the change is trivial but the code that was tested should be submted
exactly, so please test and submit a corrected patch

thx

[...]
Erik Bråthen Solem Dec. 19, 2016, 3:20 a.m. UTC | #4
Done. It was assigned its own patch number (1860), so I am changing the state
of this one to "Superseded".
diff mbox

Patch

diff --git a/libavcodec/movtextdec.c b/libavcodec/movtextdec.c
index 7b5b161..6e1ff73 100644
--- a/libavcodec/movtextdec.c
+++ b/libavcodec/movtextdec.c
@@ -328,6 +328,7 @@  static int text_to_ass(AVBPrint *buf, const char *text, const char *text_end,
     int i = 0;
     int j = 0;
     int text_pos = 0;
+    int text_pos_chars = 0;
 
     if (text < text_end && m->box_flags & TWRP_BOX) {
         if (m->w.wrap_flag == 1) {
@@ -338,50 +339,59 @@  static int text_to_ass(AVBPrint *buf, const char *text, const char *text_end,
     }
 
     while (text < text_end) {
-        if (m->box_flags & STYL_BOX) {
-            for (i = 0; i < m->style_entries; i++) {
-                if (m->s[i]->style_flag && text_pos == m->s[i]->style_end) {
-                    av_bprintf(buf, "{\\r}");
+        if ((*text & 0xC0) != 0x80) { // Boxes never split multibyte characters
+            if (m->box_flags & STYL_BOX) {
+                for (i = 0; i < m->style_entries; i++) {
+                    if (m->s[i]->style_flag &&
+                        text_pos_chars == m->s[i]->style_end)
+                    {
+                        av_bprintf(buf, "{\\r}");
+                    }
                 }
-            }
-            for (i = 0; i < m->style_entries; i++) {
-                if (m->s[i]->style_flag && text_pos == m->s[i]->style_start) {
-                    if (m->s[i]->style_flag & STYLE_FLAG_BOLD)
-                        av_bprintf(buf, "{\\b1}");
-                    if (m->s[i]->style_flag & STYLE_FLAG_ITALIC)
-                        av_bprintf(buf, "{\\i1}");
-                    if (m->s[i]->style_flag & STYLE_FLAG_UNDERLINE)
-                        av_bprintf(buf, "{\\u1}");
-                    av_bprintf(buf, "{\\fs%d}", m->s[i]->fontsize);
-                    for (j = 0; j < m->ftab_entries; j++) {
-                        if (m->s[i]->style_fontID == m->ftab[j]->fontID)
-                            av_bprintf(buf, "{\\fn%s}", m->ftab[j]->font);
+                for (i = 0; i < m->style_entries; i++) {
+                    if (m->s[i]->style_flag
+                        && text_pos_chars == m->s[i]->style_start)
+                    {
+                        if (m->s[i]->style_flag & STYLE_FLAG_BOLD)
+                            av_bprintf(buf, "{\\b1}");
+                        if (m->s[i]->style_flag & STYLE_FLAG_ITALIC)
+                            av_bprintf(buf, "{\\i1}");
+                        if (m->s[i]->style_flag & STYLE_FLAG_UNDERLINE)
+                            av_bprintf(buf, "{\\u1}");
+                        /* (No need to print font style if equal to default?) */
+                        av_bprintf(buf, "{\\fs%d}", m->s[i]->fontsize);
+                        for (j = 0; j < m->ftab_entries; j++) {
+                            if (m->s[i]->style_fontID == m->ftab[j]->fontID)
+                                av_bprintf(buf, "{\\fn%s}", m->ftab[j]->font);
+                        }
                     }
                 }
             }
-        }
-        if (m->box_flags & HLIT_BOX) {
-            if (text_pos == m->h.hlit_start) {
-                /* If hclr box is present, set the secondary color to the color
-                 * specified. Otherwise, set primary color to white and secondary
-                 * color to black. These colors will come from TextSampleModifier
-                 * boxes in future and inverse video technique for highlight will
-                 * be implemented.
-                 */
-                if (m->box_flags & HCLR_BOX) {
-                    av_bprintf(buf, "{\\2c&H%02x%02x%02x&}", m->c.hlit_color[2],
-                                m->c.hlit_color[1], m->c.hlit_color[0]);
-                } else {
-                    av_bprintf(buf, "{\\1c&H000000&}{\\2c&HFFFFFF&}");
+            if (m->box_flags & HLIT_BOX) {
+                if (text_pos_chars == m->h.hlit_start) {
+                    /* If hclr box is present, set the secondary color to the
+                     * color specified. Otherwise, set primary color to white
+                     * and secondary color to black. These colors will come from
+                     * TextSampleModifier boxes in future and inverse video
+                     * technique for highlight will be implemented.
+                     */
+                    if (m->box_flags & HCLR_BOX) {
+                        av_bprintf(buf, "{\\2c&H%02x%02x%02x&}",
+                                    m->c.hlit_color[2], m->c.hlit_color[1],
+                                    m->c.hlit_color[0]);
+                    } else {
+                        av_bprintf(buf, "{\\1c&H000000&}{\\2c&HFFFFFF&}");
+                    }
                 }
-            }
-            if (text_pos == m->h.hlit_end) {
-                if (m->box_flags & HCLR_BOX) {
-                    av_bprintf(buf, "{\\2c&H000000&}");
-                } else {
-                    av_bprintf(buf, "{\\1c&HFFFFFF&}{\\2c&H000000&}");
+                if (text_pos_chars == m->h.hlit_end) {
+                    if (m->box_flags & HCLR_BOX) {
+                        av_bprintf(buf, "{\\2c&H000000&}");
+                    } else {
+                        av_bprintf(buf, "{\\1c&HFFFFFF&}{\\2c&H000000&}");
+                    }
                 }
             }
+        text_pos_chars++;
         }
 
         switch (*text) {
@@ -412,10 +422,10 @@  static int mov_text_init(AVCodecContext *avctx) {
     MovTextContext *m = avctx->priv_data;
     ret = mov_text_tx3g(avctx, m);
     if (ret == 0) {
-        return ff_ass_subtitle_header(avctx, m->d.font, m->d.fontsize, m->d.color,
-                                m->d.back_color, m->d.bold, m->d.italic,
-                                m->d.underline, ASS_DEFAULT_BORDERSTYLE,
-                                m->d.alignment);
+        return ff_ass_subtitle_header(avctx, m->d.font, m->d.fontsize,
+                                m->d.color, m->d.back_color, m->d.bold,
+                                m->d.italic, m->d.underline,
+                                ASS_DEFAULT_BORDERSTYLE, m->d.alignment);
     } else
         return ff_ass_subtitle_header_default(avctx);
 }
@@ -491,7 +501,8 @@  static int mov_text_decode_frame(AVCodecContext *avctx,
 
             for (size_t i = 0; i < box_count; i++) {
                 if (tsmb_type == box_types[i].type) {
-                    if (m->tracksize + m->size_var + box_types[i].base_size > avpkt->size)
+                    if (m->tracksize + m->size_var +
+                        ox_types[i].base_size > avpkt->size)
                         break;
                     ret_tsmb = box_types[i].decode(tsmb, m, avpkt);
                     if (ret_tsmb == -1)