diff mbox series

[FFmpeg-devel] avformat/webvttdec, enc: correctly process files containing STYLE, REGION blocks

Message ID CA+_k4RKhhH-T08pzeXbZh+tq8Ya7eaKbEFogtSRmYGcFa0UG6g@mail.gmail.com
State Superseded
Headers show
Series [FFmpeg-devel] avformat/webvttdec, enc: correctly process files containing STYLE, REGION blocks | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate fail Make fate failed
andriy/PPC64_make warning Make failed

Commit Message

Dave Evans Oct. 12, 2020, 9:07 p.m. UTC
This patch fixes the total failure to parse cues when style and region
definition blocks are contained in the input file, and ensures those blocks
are written to the output when copying.

The test attached needs to be added to samples at the path shown in the
patch in order to validate that the original issue is fixed.

First patch so please go easy :-)

Cheers,
Dave
Subject: [PATCH] avformat/webvttdec,enc: correctly decode files containing
 STYLE, REGION blocks

Add the ability to extract cues from files containing one or more WebVTT
region definition block or WebVTT style block. Previously the decoder would
misinterpret these and fail to parse any cues at all. Also add the ability to
write these, or alternative header information, back out in the encoder using
the codec extradata.
---
 libavformat/webvttdec.c                   | 16 ++++++++++++++--
 libavformat/webvttenc.c                   | 12 ++++++++++++
 tests/fate/subtitles.mak                  |  3 +++
 tests/ref/fate/sub-webvtt-styleandregions | 19 +++++++++++++++++++
 4 files changed, 48 insertions(+), 2 deletions(-)
 create mode 100644 tests/ref/fate/sub-webvtt-styleandregions

Comments

Jan Ekström Oct. 12, 2020, 9:51 p.m. UTC | #1
On Tue, Oct 13, 2020 at 12:07 AM Dave Evans <dave.evans@m2amedia.tv> wrote:
>
> This patch fixes the total failure to parse cues when style and region
> definition blocks are contained in the input file, and ensures those blocks
> are written to the output when copying.
>

Thank you for taking time to add a FATE test, but unfortunately I am
not sure if this test tests the functionality you have added.

You have added what appears to be parsing of webvtt styles into
extradata in the demuxer, and then writing it out in the muxer. And
the test just outputs this webvtt into ASS, which doesn't keep any of
this since the decoder doesn't translate the newly inserted things
into ASS styles :)

So, I think a more fitting test would be to add this to the webvtt
tests with -c copy and pushing it out as webvtt, I would think?

Jan
Dave Evans Oct. 13, 2020, 8:13 a.m. UTC | #2
On Mon, Oct 12, 2020 at 11:20 PM Jan Ekström <jeebjp@gmail.com> wrote:
>
> On Tue, Oct 13, 2020 at 12:07 AM Dave Evans <dave.evans@m2amedia.tv> wrote:
> >
> > This patch fixes the total failure to parse cues when style and region
> > definition blocks are contained in the input file, and ensures those blocks
> > are written to the output when copying.
> >
>
> Thank you for taking time to add a FATE test, but unfortunately I am
> not sure if this test tests the functionality you have added.
>
> You have added what appears to be parsing of webvtt styles into
> extradata in the demuxer, and then writing it out in the muxer. And
> the test just outputs this webvtt into ASS, which doesn't keep any of
> this since the decoder doesn't translate the newly inserted things
> into ASS styles :)
>
> So, I think a more fitting test would be to add this to the webvtt
> tests with -c copy and pushing it out as webvtt, I would think?
>
> Jan

Hi Jan,

Thanks for taking the time to review.

In fact, this was the test I intended to add. The test case was
developed before the fix and serves to show the original issue we saw
(total failure to extract any cues under the circumstances of the
test) is fixed and that there should be no regression with that
content, and I believe it serves those purposes. Apologies for not
making this clearer in the original submission.

That said, I agree that it may also be worth adding test coverage for
the encoder change and I will take the time to add that as well.

Cheers,
Dave
diff mbox series

Patch

diff --git a/libavformat/webvttdec.c b/libavformat/webvttdec.c
index 8d2fdfed37..c6cc367383 100644
--- a/libavformat/webvttdec.c
+++ b/libavformat/webvttdec.c
@@ -60,7 +60,7 @@  static int64_t read_ts(const char *s)
 static int webvtt_read_header(AVFormatContext *s)
 {
     WebVTTContext *webvtt = s->priv_data;
-    AVBPrint cue;
+    AVBPrint cue, header;
     int res = 0;
     AVStream *st = avformat_new_stream(s, NULL);
 
@@ -72,6 +72,7 @@  static int webvtt_read_header(AVFormatContext *s)
     st->disposition |= webvtt->kind;
 
     av_bprint_init(&cue,    0, AV_BPRINT_SIZE_UNLIMITED);
+    av_bprint_init(&header, 0, AV_BPRINT_SIZE_UNLIMITED);
 
     for (;;) {
         int i;
@@ -89,12 +90,18 @@  static int webvtt_read_header(AVFormatContext *s)
         p = identifier = cue.str;
         pos = avio_tell(s->pb);
 
-        /* ignore header chunk */
+        /* ignore the magic word and any comments */
         if (!strncmp(p, "\xEF\xBB\xBFWEBVTT", 9) ||
             !strncmp(p, "WEBVTT", 6) ||
             !strncmp(p, "NOTE", 4))
             continue;
 
+        /* store the style and region blocks from the header */
+        if (!strncmp(p, "STYLE", 5) || !strncmp(p, "REGION", 6)) {
+            av_bprintf(&header, "%s%s", header.len ? "\n\n" : "", p);
+            continue;
+        }
+
         /* optional cue identifier (can be a number like in SRT or some kind of
          * chaptering id) */
         for (i = 0; p[i] && p[i] != '\n' && p[i] != '\r'; i++) {
@@ -161,12 +168,17 @@  static int webvtt_read_header(AVFormatContext *s)
         SET_SIDE_DATA(settings,   AV_PKT_DATA_WEBVTT_SETTINGS);
     }
 
+    res = ff_bprint_to_codecpar_extradata(st->codecpar, &header);
+    if (res < 0)
+        goto end;
+
     ff_subtitles_queue_finalize(s, &webvtt->q);
 
 end:
     if (res < 0)
         ff_subtitles_queue_clean(&webvtt->q);
     av_bprint_finalize(&cue,    NULL);
+    av_bprint_finalize(&header, NULL);
     return res;
 }
 
diff --git a/libavformat/webvttenc.c b/libavformat/webvttenc.c
index cbd989dcb6..fcbd3ee10a 100644
--- a/libavformat/webvttenc.c
+++ b/libavformat/webvttenc.c
@@ -58,6 +58,18 @@  static int webvtt_write_header(AVFormatContext *ctx)
 
     avio_printf(pb, "WEBVTT\n");
 
+    if (par->extradata_size > 0) {
+        size_t header_size = par->extradata_size;
+
+        if (par->extradata[0] != '\n')
+            avio_printf(pb, "\n");
+
+        avio_write(pb, par->extradata, header_size);
+
+        if (par->extradata[header_size - 1] != '\n')
+            avio_printf(pb, "\n");
+    }
+
     return 0;
 }
 
diff --git a/tests/fate/subtitles.mak b/tests/fate/subtitles.mak
index 6323d0f93d..41b496e46f 100644
--- a/tests/fate/subtitles.mak
+++ b/tests/fate/subtitles.mak
@@ -91,6 +91,9 @@  fate-sub-webvtt: CMD = fmtstdout ass -i $(TARGET_SAMPLES)/sub/WebVTT_capability_
 FATE_SUBTITLES_ASS-$(call DEMDEC, WEBVTT, WEBVTT) += fate-sub-webvtt2
 fate-sub-webvtt2: CMD = fmtstdout ass -i $(TARGET_SAMPLES)/sub/WebVTT_extended_tester.vtt
 
+FATE_SUBTITLES_ASS-$(call DEMDEC, WEBVTT, WEBVTT) += fate-sub-webvtt-styleandregions
+fate-sub-webvtt-styleandregions: CMD = fmtstdout ass -i $(TARGET_SAMPLES)/sub/webvtt_style_and_regions.vtt
+
 FATE_SUBTITLES-$(call ALLYES, SRT_DEMUXER SUBRIP_DECODER WEBVTT_ENCODER WEBVTT_MUXER) += fate-sub-webvttenc
 fate-sub-webvttenc: CMD = fmtstdout webvtt -i $(TARGET_SAMPLES)/sub/SubRip_capability_tester.srt
 
diff --git a/tests/ref/fate/sub-webvtt-styleandregions b/tests/ref/fate/sub-webvtt-styleandregions
new file mode 100644
index 0000000000..ca3516fe02
--- /dev/null
+++ b/tests/ref/fate/sub-webvtt-styleandregions
@@ -0,0 +1,19 @@ 
+[Script Info]
+; Script generated by FFmpeg/Lavc
+ScriptType: v4.00+
+PlayResX: 384
+PlayResY: 288
+ScaledBorderAndShadow: yes
+
+[V4+ Styles]
+Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
+Style: Default,Arial,16,&Hffffff,&Hffffff,&H0,&H0,0,0,0,0,100,100,0,0,1,1,0,2,10,10,10,0
+
+[Events]
+Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
+Dialogue: 0,0:00:10.00,0:00:25.00,Default,,0,0,0,,Can I tell you a joke, Dad?
+Dialogue: 0,0:00:12.50,0:00:27.50,Default,,0,0,0,,Sure, I could do with a laugh.
+Dialogue: 0,0:00:15.00,0:00:30.00,Default,,0,0,0,,Where do sheep go to get their hair cut?
+Dialogue: 0,0:00:17.50,0:00:32.50,Default,,0,0,0,,I don't know, son. Where do sheep go to get their hair cut?
+Dialogue: 0,0:00:20.00,0:00:35.00,Default,,0,0,0,,To the baa-baa shop!
+Dialogue: 0,0:00:22.50,0:00:37.50,Default,,0,0,0,,{\i1}[facepalms]{\i0}