diff mbox series

[FFmpeg-devel,3/4] lavf/matroskadec: support standard (non-WebM) WebVTT formatting

Message ID 20200528051318.34538-3-rcombs@rcombs.me
State Superseded
Headers show
Series [FFmpeg-devel,1/4] lavf: matroska subtitle muxer
Related show

Checks

Context Check Description
andriy/default pending
andriy/make success Make finished
andriy/make_fate success Make fate finished

Commit Message

rcombs May 28, 2020, 5:13 a.m. UTC
Fixes ticket #5641
---
 libavformat/matroska.c    |  11 ++--
 libavformat/matroskadec.c | 111 ++++++++++++++++++++++++--------------
 2 files changed, 77 insertions(+), 45 deletions(-)
diff mbox series

Patch

diff --git a/libavformat/matroska.c b/libavformat/matroska.c
index 7c56aba403..962fa496f4 100644
--- a/libavformat/matroska.c
+++ b/libavformat/matroska.c
@@ -60,16 +60,12 @@  const CodecTags ff_mkv_codec_tags[]={
     {"A_VORBIS"         , AV_CODEC_ID_VORBIS},
     {"A_WAVPACK4"       , AV_CODEC_ID_WAVPACK},
 
-    {"D_WEBVTT/SUBTITLES"   , AV_CODEC_ID_WEBVTT},
-    {"D_WEBVTT/CAPTIONS"    , AV_CODEC_ID_WEBVTT},
-    {"D_WEBVTT/DESCRIPTIONS", AV_CODEC_ID_WEBVTT},
-    {"D_WEBVTT/METADATA"    , AV_CODEC_ID_WEBVTT},
-
     {"S_TEXT/UTF8"      , AV_CODEC_ID_SUBRIP},
     {"S_TEXT/UTF8"      , AV_CODEC_ID_TEXT},
     {"S_TEXT/ASCII"     , AV_CODEC_ID_TEXT},
     {"S_TEXT/ASS"       , AV_CODEC_ID_ASS},
     {"S_TEXT/SSA"       , AV_CODEC_ID_ASS},
+    {"S_TEXT/WEBVTT"    , AV_CODEC_ID_WEBVTT},
     {"S_ASS"            , AV_CODEC_ID_ASS},
     {"S_SSA"            , AV_CODEC_ID_ASS},
     {"S_VOBSUB"         , AV_CODEC_ID_DVD_SUBTITLE},
@@ -77,6 +73,11 @@  const CodecTags ff_mkv_codec_tags[]={
     {"S_HDMV/PGS"       , AV_CODEC_ID_HDMV_PGS_SUBTITLE},
     {"S_HDMV/TEXTST"    , AV_CODEC_ID_HDMV_TEXT_SUBTITLE},
 
+    {"D_WEBVTT/SUBTITLES"   , AV_CODEC_ID_WEBVTT},
+    {"D_WEBVTT/CAPTIONS"    , AV_CODEC_ID_WEBVTT},
+    {"D_WEBVTT/DESCRIPTIONS", AV_CODEC_ID_WEBVTT},
+    {"D_WEBVTT/METADATA"    , AV_CODEC_ID_WEBVTT},
+
     {"V_AV1"            , AV_CODEC_ID_AV1},
     {"V_DIRAC"          , AV_CODEC_ID_DIRAC},
     {"V_FFV1"           , AV_CODEC_ID_FFV1},
diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index bb3a126c29..1170f57be3 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -3306,62 +3306,81 @@  static int matroska_parse_webvtt(MatroskaDemuxContext *matroska,
                                  uint8_t *data, int data_len,
                                  uint64_t timecode,
                                  uint64_t duration,
-                                 int64_t pos)
+                                 int64_t pos,
+                                 uint8_t *additional, uint64_t additional_id, int additional_size)
 {
     AVPacket pktl, *pkt = &pktl;
-    uint8_t *id, *settings, *text, *buf;
-    int id_len, settings_len, text_len;
+    uint8_t *id, *settings, *comment, *text, *buf;
+    int id_len = 0, settings_len = 0, comment_len = 0, text_len;
     uint8_t *p, *q;
     int err;
+    int webm_style = !strncmp(track->codec_id, "D_WEBVTT/", 9);
 
     if (data_len <= 0)
         return AVERROR_INVALIDDATA;
 
-    p = data;
-    q = data + data_len;
-
-    id = p;
-    id_len = -1;
-    while (p < q) {
-        if (*p == '\r' || *p == '\n') {
-            id_len = p - id;
-            if (*p == '\r')
-                p++;
-            break;
+    p = webm_style ? data : additional;
+    q = webm_style ? (data + data_len) : (additional + additional_size);
+
+    if (p) {
+        id = p;
+        id_len = -1;
+        while (p < q) {
+            if (*p == '\r' || *p == '\n') {
+                id_len = p - id;
+                if (*p == '\r')
+                    p++;
+                break;
+            }
+            p++;
         }
+
+        if (p >= q || *p != '\n')
+            return AVERROR_INVALIDDATA;
         p++;
-    }
 
-    if (p >= q || *p != '\n')
-        return AVERROR_INVALIDDATA;
-    p++;
-
-    settings = p;
-    settings_len = -1;
-    while (p < q) {
-        if (*p == '\r' || *p == '\n') {
-            settings_len = p - settings;
-            if (*p == '\r')
-                p++;
-            break;
+        settings = p;
+        settings_len = -1;
+        while (p < q) {
+            if (*p == '\r' || *p == '\n') {
+                settings_len = p - settings;
+                if (*p == '\r')
+                    p++;
+                break;
+            }
+            p++;
         }
+
+        if (p >= q || *p != '\n')
+            return AVERROR_INVALIDDATA;
         p++;
+
+        if (!webm_style && p < q) {
+            if (q[-1] != '\r' && q[-1] != '\n')
+                return AVERROR_INVALIDDATA;
+
+            comment = p;
+            comment_len = q - p;
+        }
     }
 
-    if (p >= q || *p != '\n')
-        return AVERROR_INVALIDDATA;
-    p++;
-
-    text = p;
-    text_len = q - p;
-    while (text_len > 0) {
-        const int len = text_len - 1;
-        const uint8_t c = p[len];
-        if (c != '\r' && c != '\n')
-            break;
-        text_len = len;
+    if (webm_style) {
+        text = p;
+        text_len = q - p;
+
+        while (text_len > 0) {
+            const int len = text_len - 1;
+            const uint8_t c = p[len];
+            if (c != '\r' && c != '\n')
+                break;
+            text_len = len;
+        }
+    } else {
+        text = data;
+        text_len = data_len;
     }
 
+
     if (text_len <= 0)
         return AVERROR_INVALIDDATA;
 
@@ -3394,6 +3413,17 @@  static int matroska_parse_webvtt(MatroskaDemuxContext *matroska,
         memcpy(buf, settings, settings_len);
     }
 
+    if (comment_len > 0) {
+        buf = av_packet_new_side_data(pkt,
+                                      AV_PKT_DATA_WEBVTT_COMMENT,
+                                      comment_len);
+        if (!buf) {
+            av_packet_unref(pkt);
+            return AVERROR(ENOMEM);
+        }
+        memcpy(buf, comment, comment_len);
+    }
+
     // Do we need this for subtitles?
     // pkt->flags = AV_PKT_FLAG_KEY;
 
@@ -3656,7 +3686,8 @@  static int matroska_parse_block(MatroskaDemuxContext *matroska, AVBufferRef *buf
             res = matroska_parse_webvtt(matroska, track, st,
                                         out_data, out_size,
                                         timecode, lace_duration,
-                                        pos);
+                                        pos,
+                                        additional, additional_id, additional_size);
             if (!buf)
                 av_free(out_data);
             if (res)