[FFmpeg-devel] movtextenc: fix handling of utf-8 subtitles

Submitted by Philip Langdale on March 28, 2018, 3:16 p.m.

Details

Message ID 20180328151612.9781-1-philipl@overt.org
State New
Headers show

Commit Message

Philip Langdale March 28, 2018, 3:16 p.m.
See the earlier fix for movtextdec for details. The equivalent bug is
present on the encoder side as well.

We need to track the text length in 'characters' (which seems to really
mean codepoints) to ensure that styles are applied across the correct
ranges.

Signed-off-by: Philip Langdale <philipl@overt.org>
---
 libavcodec/movtextenc.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

Patch hide | download patch | download mbox

diff --git a/libavcodec/movtextenc.c b/libavcodec/movtextenc.c
index d795e317c3..e1d2ae446c 100644
--- a/libavcodec/movtextenc.c
+++ b/libavcodec/movtextenc.c
@@ -304,11 +304,33 @@  static void mov_text_color_cb(void *priv, unsigned int color, unsigned int color
      */
 }
 
+static uint16_t utf8_strlen(const char *text, int len)
+{
+    uint16_t i = 0, ret = 0;
+    while (i < len) {
+        char c = text[i];
+        if ((c & 0x80) == 0)
+            i += 1;
+        else if ((c & 0xE0) == 0xC0)
+            i += 2;
+        else if ((c & 0xF0) == 0xE0)
+            i += 3;
+        else if ((c & 0xF8) == 0xF0)
+            i += 4;
+        else
+            return 0;
+        ret++;
+    }
+    return ret;
+}
+
 static void mov_text_text_cb(void *priv, const char *text, int len)
 {
+    uint16_t utf8_len = utf8_strlen(text, len);
     MovTextContext *s = priv;
     av_bprint_append_data(&s->buffer, text, len);
-    s->text_pos += len;
+    // If it's not utf-8, just use the byte length
+    s->text_pos += utf8_len ? utf8_len : len;
 }
 
 static void mov_text_new_line_cb(void *priv, int forced)