diff mbox series

[FFmpeg-devel] Removes linebreaks forbidden by the WEBVTT spec on encode

Message ID jeLdODgRx0zy8qJY5tnuY19FkjTsKre3LaL5FVEb4thYCjHxKScSXGNb07mc_913nSRxc4npY6YtqYTPVRdGWDqiBh9wp-ufD6jSIGSPMaQ=@all8bits.com
State New
Headers show
Series [FFmpeg-devel] Removes linebreaks forbidden by the WEBVTT spec on encode
Related show

Checks

Context Check Description
andriy/ffmpeg-patchwork pending
andriy/ffmpeg-patchwork fail Failed to apply patch

Commit Message

bloomtom Jan. 2, 2020, 6:14 a.m. UTC
libavformat/webvttenc.c: The WEBVTT spec only allows one sequential
linebreak (\r, \n) character within packet data. Two or more linebreaks
in a row signifies the end of a data packet. Previous behavior allows data
to be orphaned outside packets parsed by the spec in the best case, but
some parsers simply refuse to process such vtt files. This patch shims
packet data writing, skipping linebreak characters at the start and end of
packet data, and replacing any number of sequential linebreaks between
valid characters with a single linefeed.
tests/ref/fate/sub-webvttenc: Modified to expect the new behavior in webvttenc.

Signed-off-by: Tom Bloom <bloomtom@all8bits.com>
---
 libavformat/webvttenc.c      | 43 ++++++++++++++++++++++++++++++++++--
 tests/ref/fate/sub-webvttenc |  4 ----
 2 files changed, 41 insertions(+), 6 deletions(-)

 A (Normal  spaces followed by a letter)
 A (No hard spaces followed by a letter)
@@ -147,13 +145,11 @@ A (No hard spaces followed by a letter)
 Show this: \TEST and this: \-)
 

 00:58.501 --> 01:00.500
-
 A letter followed by 05 hard spaces: A\h\h\h\h\h
 A letter followed by normal  spaces: A
 A letter followed by no hard spaces: A
 05 hard  spaces between letters: A\h\h\h\h\hA
 5 normal spaces between letters: A     A
-
 ^--Forced line break
 

 01:00.501 --> 01:02.500
diff mbox series

Patch

diff --git a/libavformat/webvttenc.c b/libavformat/webvttenc.c
index 61b7f54622..8da2818aec 100644
--- a/libavformat/webvttenc.c
+++ b/libavformat/webvttenc.c
@@ -1,5 +1,6 @@ 
 /*
  * Copyright (c) 2013 Matthew Heaney
+ * Copyright (c) 2020 Thomas Bloom
  *
  * This file is part of FFmpeg.
  *
@@ -62,6 +63,42 @@  static int webvtt_write_header(AVFormatContext *ctx)
     return 0;
 }
 

+static int is_linebreak(char c)
+{
+    return c == '\n' || c == '\r';
+}
+
+static int webvtt_write_data(AVIOContext *pb, uint8_t *pkt, int pkt_len)
+{
+    int start = 0;
+    int written = 0;
+
+    // Fast forward to first non-linebreak.
+    while(start < pkt_len - 1 && is_linebreak(pkt[start])) {
+        start++;
+    }
+
+    for (int i = start; i < pkt_len; i++) {
+        while(is_linebreak(pkt[i])) {
+            if (i == pkt_len - 1) {
+                // Hit end with no stop in linebreaks.
+                return written;
+            }
+            else if (!is_linebreak(pkt[i+1])) {
+                // write a single linefeed to cover all skipped.
+                avio_printf(pb, "\n");
+                written++;
+            }
+            i++;
+        }
+
+        avio_write(pb, &pkt[i], 1);
+        written++;
+    }
+
+    return written;
+}
+
 static int webvtt_write_packet(AVFormatContext *ctx, AVPacket *pkt)
 {
     AVIOContext  *pb = ctx->pb;
@@ -88,8 +125,10 @@  static int webvtt_write_packet(AVFormatContext *ctx, AVPacket *pkt)
 

     avio_printf(pb, "\n");
 

-    avio_write(pb, pkt->data, pkt->size);
-    avio_printf(pb, "\n");
+    if (webvtt_write_data(pb, pkt->data, pkt->size) > 0) {
+        // Data not empty. Write a linefeed to divide packets in output.
+        avio_printf(pb, "\n");
+    }
 

     return 0;
 }
diff --git a/tests/ref/fate/sub-webvttenc b/tests/ref/fate/sub-webvttenc
index 45ae0b6131..012f10a8ba 100644
--- a/tests/ref/fate/sub-webvttenc
+++ b/tests/ref/fate/sub-webvttenc
@@ -128,14 +128,12 @@  also hide these tags:
 but show this: {normal text}
 

 00:54.501 --> 01:00.500
-
 \ N is a forced line break
 \ h is a hard space
 Normal spaces at the start and at the end of the line are trimmed while hard spaces are not trimmed.
 The\hline\hwill\hnever\hbreak\hautomatically\hright\hbefore\hor\hafter\ha\hhard\hspace.\h:-D
 

 00:54.501 --> 00:56.500
-
 \h\h\h\h\hA (05 hard spaces followed by a letter)