[FFmpeg-devel] lavc/movtextenc: fix incorrect offset calculation for UTF-8 characters

Submitted by Erik BrĂ¥then Solem on March 8, 2017, 1:36 a.m.

Details

Message ID VI1P194MB0255EF9C94AA3A8590BA5465C02E0@VI1P194MB0255.EURP194.PROD.OUTLOOK.COM
State New
Headers show

Commit Message

Erik BrĂ¥then Solem March 8, 2017, 1:36 a.m.
The 3GPP Timed Text (TTXT / tx3g / mov_text) specification counts multibyte UTF-8 characters as one single character, ffmpeg currently counts bytes. This produces files where style boxes have incorrect offsets. This patch introduces:
1. a separate variable that keeps track of the byte count
2. a for loop that excludes continuation bytes from the character counting

Fixes trac #6021 (encoding part).

---
 libavcodec/movtextenc.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

Patch hide | download patch | download mbox

diff --git a/libavcodec/movtextenc.c b/libavcodec/movtextenc.c
index 20e01e2..8d09ff4 100644
--- a/libavcodec/movtextenc.c
+++ b/libavcodec/movtextenc.c
@@ -70,6 +70,7 @@  typedef struct {
     uint8_t style_fontsize;
     uint32_t style_color;
     uint16_t text_pos;
+    uint16_t byte_size;
 } MovTextContext;
 
 typedef struct {
@@ -302,7 +303,10 @@  static void mov_text_text_cb(void *priv, const char *text, int len)
 {
     MovTextContext *s = priv;
     av_bprint_append_data(&s->buffer, text, len);
-    s->text_pos += len;
+    for (int i = 0; i < len; i++)
+        if ((text[i] & 0xC0) != 0x80)
+            s->text_pos++; /* increase character count */
+    s->byte_size += len; /* increase byte count */
 }
 
 static void mov_text_new_line_cb(void *priv, int forced)
@@ -310,6 +314,7 @@  static void mov_text_new_line_cb(void *priv, int forced)
     MovTextContext *s = priv;
     av_bprint_append_data(&s->buffer, "\n", 1);
     s->text_pos += 1;
+    s->byte_size += 1;
 }
 
 static const ASSCodesCallbacks mov_text_callbacks = {
@@ -328,6 +333,7 @@  static int mov_text_encode_frame(AVCodecContext *avctx, unsigned char *buf,
     size_t j;
 
     s->text_pos = 0;
+    s->byte_size = 0;
     s->count = 0;
     s->box_flags = 0;
     s->style_entries = 0;
@@ -362,7 +368,7 @@  static int mov_text_encode_frame(AVCodecContext *avctx, unsigned char *buf,
         }
     }
 
-    AV_WB16(buf, s->text_pos);
+    AV_WB16(buf, s->byte_size);
     buf += 2;
 
     if (!av_bprint_is_complete(&s->buffer)) {