diff mbox series

[FFmpeg-devel,1/2] lavf/subtitles: Add ff_text_peek_r16(), only accept \r, \n, \r\n and \r\r\n line endings

Message ID b976905efa0ce3cd2837839dc116c9a8a82787ee.camel@haerdin.se
State New
Headers show
Series [FFmpeg-devel,1/2] lavf/subtitles: Add ff_text_peek_r16(), only accept \r, \n, \r\n and \r\r\n line endings | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Tomas Härdin March 30, 2024, 12:08 a.m. UTC
Here's an alternative first patch that rolls patch 1+3 into one. I'd
like some feedback on this before I continue hacking on patch 2. While
I don't like that we accept any old broken srt file, especially without
knowing what software made it, I'm not completely opposed to
compromising in this specific case. But I'd rather we didn't, and stuck
to \r, \n and \r\n. What I really don't want is runs of \r being eaten
without being "terminated" by a \n, because this messes up Mac support.

/Tomas
diff mbox series

Patch

From 2ec68c51e4599b8493a2e103793f571451d872d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Thu, 28 Mar 2024 20:30:37 +0100
Subject: [PATCH 1/2] lavf/subtitles: Add ff_text_peek_r16(), only accept \r,
 \n, \r\n and \r\r\n line endings

---
 libavformat/subtitles.c | 53 +++++++++++++++++++++++++++++++++++++----
 libavformat/subtitles.h |  5 ++++
 2 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/libavformat/subtitles.c b/libavformat/subtitles.c
index 3413763c7b..01187df6ab 100644
--- a/libavformat/subtitles.c
+++ b/libavformat/subtitles.c
@@ -22,6 +22,7 @@ 
 #include "subtitles.h"
 #include "avio_internal.h"
 #include "libavutil/avstring.h"
+#include "libavutil/intreadwrite.h"
 
 void ff_text_init_avio(void *s, FFTextReader *r, AVIOContext *pb)
 {
@@ -106,6 +107,42 @@  int ff_text_peek_r8(FFTextReader *r)
     return c;
 }
 
+int ff_text_peek_r16(FFTextReader *r)
+{
+    int c1, c2;
+    if (r->buf_pos < r->buf_len - 1)
+        return AV_RB16(&r->buf[r->buf_pos]);
+
+    // missing one or two bytes
+    c1 = ff_text_r8(r);
+    if (avio_feof(r->pb))
+        return 0;
+
+    if (r->buf_pos == r->buf_len - 1) {
+        // missing one byte
+        r->buf[0] = r->buf[r->buf_pos];
+        r->buf[1] = c1;
+        r->buf_pos = 0;
+        r->buf_len = 2;
+        return AV_RB16(r->buf);
+    }
+
+    // missing two bytes
+    c2 = ff_text_r8(r);
+    if (avio_feof(r->pb)) {
+        r->buf[0] = c1;
+        r->buf_pos = 0;
+        r->buf_len = 1;
+        return 0;
+    }
+
+    r->buf[0] = c1;
+    r->buf[1] = c2;
+    r->buf_pos = 0;
+    r->buf_len = 2;
+    return AV_RB16(r->buf);
+}
+
 AVPacket *ff_subtitles_queue_insert(FFDemuxSubtitlesQueue *q,
                                     const uint8_t *event, size_t len, int merge)
 {
@@ -446,11 +483,12 @@  int ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf)
 ptrdiff_t ff_subtitles_read_line(FFTextReader *tr, char *buf, size_t size)
 {
     size_t cur = 0;
+    unsigned char c;
     if (!size)
         return 0;
     buf[0] = '\0';
     while (cur + 1 < size) {
-        unsigned char c = ff_text_r8(tr);
+        c = ff_text_r8(tr);
         if (!c)
             return ff_text_eof(tr) ? cur : AVERROR_INVALIDDATA;
         if (c == '\r' || c == '\n')
@@ -458,9 +496,14 @@  ptrdiff_t ff_subtitles_read_line(FFTextReader *tr, char *buf, size_t size)
         buf[cur++] = c;
         buf[cur] = '\0';
     }
-    while (ff_text_peek_r8(tr) == '\r')
-        ff_text_r8(tr);
-    if (ff_text_peek_r8(tr) == '\n')
-        ff_text_r8(tr);
+    if (c == '\r') {
+        if (ff_text_peek_r8(tr) == '\n')
+            ff_text_r8(tr);
+        else if (ff_text_peek_r16(tr) == AV_RB16("\r\n")) {
+            // ticket5032-rrn.srt has \r\r\n
+            ff_text_r8(tr);
+            ff_text_r8(tr);
+        }
+    }
     return cur;
 }
diff --git a/libavformat/subtitles.h b/libavformat/subtitles.h
index 88665663c5..2a92044976 100644
--- a/libavformat/subtitles.h
+++ b/libavformat/subtitles.h
@@ -94,6 +94,11 @@  int ff_text_eof(FFTextReader *r);
  */
 int ff_text_peek_r8(FFTextReader *r);
 
+/**
+ * Like ff_text_peek_r8(), but peek two bytes and return them as a big-endian number.
+ */
+int ff_text_peek_r16(FFTextReader *r);
+
 /**
  * Read the given number of bytes (in UTF-8). On error or EOF, \0 bytes are
  * written.
-- 
2.39.2