diff mbox

[FFmpeg-devel] lavc/ass_split: improve handling of complex ASS features

Message ID 20160910084531.11103-1-rodger.combs@gmail.com
State Changes Requested
Headers show

Commit Message

Rodger Combs Sept. 10, 2016, 8:45 a.m. UTC
Specifically:
- Skip writing drawings as text
- Convert \h to a Unicode non-breaking space
- Ignore comments and unknown tags

The test references are update to reflect these changes.
---
 libavcodec/ass_split.c       | 29 +++++++++++++++++------------
 tests/ref/fate/sub-textenc   | 12 ++++++------
 tests/ref/fate/sub-webvttenc | 12 ++++++------
 3 files changed, 29 insertions(+), 24 deletions(-)

Comments

Clément Bœsch Sept. 17, 2016, 12:07 p.m. UTC | #1
On Sat, Sep 10, 2016 at 03:45:31AM -0500, Rodger Combs wrote:
> Specifically:
> - Skip writing drawings as text
> - Convert \h to a Unicode non-breaking space
> - Ignore comments and unknown tags
> 
> The test references are update to reflect these changes.
> ---
>  libavcodec/ass_split.c       | 29 +++++++++++++++++------------
>  tests/ref/fate/sub-textenc   | 12 ++++++------
>  tests/ref/fate/sub-webvttenc | 12 ++++++------
>  3 files changed, 29 insertions(+), 24 deletions(-)
> 
> diff --git a/libavcodec/ass_split.c b/libavcodec/ass_split.c
> index beaba7e..b25a0f3 100644
> --- a/libavcodec/ass_split.c
> +++ b/libavcodec/ass_split.c
> @@ -477,30 +477,37 @@ int ff_ass_split_override_codes(const ASSCodesCallbacks *callbacks, void *priv,
>      const char *text = NULL;
>      char new_line[2];
>      int text_len = 0;
> +    int drawing = 0;
>  
>      while (buf && *buf) {
>          if (text && callbacks->text &&
> -            (sscanf(buf, "\\%1[nN]", new_line) == 1 ||
> -             !strncmp(buf, "{\\", 2))) {
> -            callbacks->text(priv, text, text_len);
> +            (sscanf(buf, "\\%1[nNh]", new_line) == 1 ||
> +             *buf == '{')) {
> +            if (!drawing)
> +                callbacks->text(priv, text, text_len);
>              text = NULL;
>          }
> -        if (sscanf(buf, "\\%1[nN]", new_line) == 1) {
> +        if (buf[0] == '\\' && buf[1] == 'h') {
> +            callbacks->text(priv, "\u00A0", 2);
> +            buf += 2;
> +        } else if (sscanf(buf, "\\%1[nN]", new_line) == 1) {
>              if (callbacks->new_line)
>                  callbacks->new_line(priv, new_line[0] == 'N');
>              buf += 2;
> -        } else if (!strncmp(buf, "{\\", 2)) {
> -            buf++;
> +        } else if (*buf == '{' && strchr(buf, '}')) {
> +            buf += strcspn(buf, "\\}"); // skip comments
>              while (*buf == '\\') {
>                  char style[2], c[2], sep[2], c_num[2] = "0", tmp[128] = {0};
>                  unsigned int color = 0xFFFFFFFF;
> -                int len, size = -1, an = -1, alpha = -1;
> +                int len = 2, size = -1, an = -1, alpha = -1;
>                  int x1, y1, x2, y2, t1 = -1, t2 = -1;
>                  if (sscanf(buf, "\\%1[bisu]%1[01\\}]%n", style, c, &len) > 1) {
>                      int close = c[0] == '0' ? 1 : c[0] == '1' ? 0 : -1;
>                      len += close != -1;
>                      if (callbacks->style)
>                          callbacks->style(priv, style[0], close);
> +                } else if (sscanf(buf, "\\p%u%1[\\}]%n", &size, sep, &len) > 1) {
> +                    drawing = (size > 0);
>                  } else if (sscanf(buf, "\\c%1[\\}]%n", sep, &len) > 0 ||
>                             sscanf(buf, "\\c&H%X&%1[\\}]%n", &color, sep, &len) > 1 ||
>                             sscanf(buf, "\\%1[1234]c%1[\\}]%n", c_num, sep, &len) > 1 ||
> @@ -543,13 +550,11 @@ int ff_ass_split_override_codes(const ASSCodesCallbacks *callbacks, void *priv,
>                  } else if (sscanf(buf, "\\org(%d,%d)%1[\\}]%n", &x1, &y1, sep, &len) > 2) {
>                      if (callbacks->origin)
>                          callbacks->origin(priv, x1, y1);
> -                } else {
> -                    len = strcspn(buf+1, "\\}") + 2;  /* skip unknown code */
>                  }
>                  buf += len - 1;
> +                buf += strcspn(buf, "\\}"); // skip comments
>              }
> -            if (*buf++ != '}')
> -                return AVERROR_INVALIDDATA;

> +            buf++; // skip }

is it really always certain to have a '}'?

>          } else {
>              if (!text) {
>                  text = buf;
> @@ -559,7 +564,7 @@ int ff_ass_split_override_codes(const ASSCodesCallbacks *callbacks, void *priv,
>              buf++;
>          }
>      }
> -    if (text && callbacks->text)
> +    if (text && callbacks->text && !drawing)
>          callbacks->text(priv, text, text_len);
>      if (callbacks->end)
>          callbacks->end(priv);
> diff --git a/tests/ref/fate/sub-textenc b/tests/ref/fate/sub-textenc
> index f7d82ce..2d7236e 100644
> --- a/tests/ref/fate/sub-textenc
> +++ b/tests/ref/fate/sub-textenc
> @@ -152,7 +152,7 @@ text 2
>  00:00:52,501 --> 00:00:54,500
>  Hide these tags:
>  also hide these tags:

> -but show this: {normal text}
> +but show this: 

this sounds wrong

[...]
diff mbox

Patch

diff --git a/libavcodec/ass_split.c b/libavcodec/ass_split.c
index beaba7e..b25a0f3 100644
--- a/libavcodec/ass_split.c
+++ b/libavcodec/ass_split.c
@@ -477,30 +477,37 @@  int ff_ass_split_override_codes(const ASSCodesCallbacks *callbacks, void *priv,
     const char *text = NULL;
     char new_line[2];
     int text_len = 0;
+    int drawing = 0;
 
     while (buf && *buf) {
         if (text && callbacks->text &&
-            (sscanf(buf, "\\%1[nN]", new_line) == 1 ||
-             !strncmp(buf, "{\\", 2))) {
-            callbacks->text(priv, text, text_len);
+            (sscanf(buf, "\\%1[nNh]", new_line) == 1 ||
+             *buf == '{')) {
+            if (!drawing)
+                callbacks->text(priv, text, text_len);
             text = NULL;
         }
-        if (sscanf(buf, "\\%1[nN]", new_line) == 1) {
+        if (buf[0] == '\\' && buf[1] == 'h') {
+            callbacks->text(priv, "\u00A0", 2);
+            buf += 2;
+        } else if (sscanf(buf, "\\%1[nN]", new_line) == 1) {
             if (callbacks->new_line)
                 callbacks->new_line(priv, new_line[0] == 'N');
             buf += 2;
-        } else if (!strncmp(buf, "{\\", 2)) {
-            buf++;
+        } else if (*buf == '{' && strchr(buf, '}')) {
+            buf += strcspn(buf, "\\}"); // skip comments
             while (*buf == '\\') {
                 char style[2], c[2], sep[2], c_num[2] = "0", tmp[128] = {0};
                 unsigned int color = 0xFFFFFFFF;
-                int len, size = -1, an = -1, alpha = -1;
+                int len = 2, size = -1, an = -1, alpha = -1;
                 int x1, y1, x2, y2, t1 = -1, t2 = -1;
                 if (sscanf(buf, "\\%1[bisu]%1[01\\}]%n", style, c, &len) > 1) {
                     int close = c[0] == '0' ? 1 : c[0] == '1' ? 0 : -1;
                     len += close != -1;
                     if (callbacks->style)
                         callbacks->style(priv, style[0], close);
+                } else if (sscanf(buf, "\\p%u%1[\\}]%n", &size, sep, &len) > 1) {
+                    drawing = (size > 0);
                 } else if (sscanf(buf, "\\c%1[\\}]%n", sep, &len) > 0 ||
                            sscanf(buf, "\\c&H%X&%1[\\}]%n", &color, sep, &len) > 1 ||
                            sscanf(buf, "\\%1[1234]c%1[\\}]%n", c_num, sep, &len) > 1 ||
@@ -543,13 +550,11 @@  int ff_ass_split_override_codes(const ASSCodesCallbacks *callbacks, void *priv,
                 } else if (sscanf(buf, "\\org(%d,%d)%1[\\}]%n", &x1, &y1, sep, &len) > 2) {
                     if (callbacks->origin)
                         callbacks->origin(priv, x1, y1);
-                } else {
-                    len = strcspn(buf+1, "\\}") + 2;  /* skip unknown code */
                 }
                 buf += len - 1;
+                buf += strcspn(buf, "\\}"); // skip comments
             }
-            if (*buf++ != '}')
-                return AVERROR_INVALIDDATA;
+            buf++; // skip }
         } else {
             if (!text) {
                 text = buf;
@@ -559,7 +564,7 @@  int ff_ass_split_override_codes(const ASSCodesCallbacks *callbacks, void *priv,
             buf++;
         }
     }
-    if (text && callbacks->text)
+    if (text && callbacks->text && !drawing)
         callbacks->text(priv, text, text_len);
     if (callbacks->end)
         callbacks->end(priv);
diff --git a/tests/ref/fate/sub-textenc b/tests/ref/fate/sub-textenc
index f7d82ce..2d7236e 100644
--- a/tests/ref/fate/sub-textenc
+++ b/tests/ref/fate/sub-textenc
@@ -152,7 +152,7 @@  text 2
 00:00:52,501 --> 00:00:54,500
 Hide these tags:
 also hide these tags:
-but show this: {normal text}
+but show this: 
 
 30
 00:00:54,501 --> 00:01:00,500
@@ -160,18 +160,18 @@  but show this: {normal text}
 \ N is a forced line break
 \ h is a hard space
 Normal spaces at the start and at the end of the line are trimmed while hard spaces are not trimmed.
-The\hline\hwill\hnever\hbreak\hautomatically\hright\hbefore\hor\hafter\ha\hhard\hspace.\h:-D
+The line will never break automatically right before or after a hard space. :-D
 
 31
 00:00:54,501 --> 00:00:56,500
 
-\h\h\h\h\hA (05 hard spaces followed by a letter)
+     A (05 hard spaces followed by a letter)
 A (Normal  spaces followed by a letter)
 A (No hard spaces followed by a letter)
 
 32
 00:00:56,501 --> 00:00:58,500
-\h\h\h\h\hA (05 hard spaces followed by a letter)
+     A (05 hard spaces followed by a letter)
 A (Normal  spaces followed by a letter)
 A (No hard spaces followed by a letter)
 Show this: \TEST and this: \-)
@@ -179,10 +179,10 @@  Show this: \TEST and this: \-)
 33
 00:00:58,501 --> 00:01:00,500
 
-A letter followed by 05 hard spaces: A\h\h\h\h\h
+A letter followed by 05 hard spaces: A     
 A letter followed by normal  spaces: A
 A letter followed by no hard spaces: A
-05 hard  spaces between letters: A\h\h\h\h\hA
+05 hard  spaces between letters: A     A
 5 normal spaces between letters: A     A
 
 ^--Forced line break
diff --git a/tests/ref/fate/sub-webvttenc b/tests/ref/fate/sub-webvttenc
index 08903e3..5f4d2b6 100644
--- a/tests/ref/fate/sub-webvttenc
+++ b/tests/ref/fate/sub-webvttenc
@@ -125,33 +125,33 @@  text 2
 00:52.501 --> 00:54.500
 Hide these tags:
 also hide these tags:
-but show this: {normal text}
+but show this: 
 
 00:54.501 --> 01:00.500
 
 \ N is a forced line break
 \ h is a hard space
 Normal spaces at the start and at the end of the line are trimmed while hard spaces are not trimmed.
-The\hline\hwill\hnever\hbreak\hautomatically\hright\hbefore\hor\hafter\ha\hhard\hspace.\h:-D
+The line will never break automatically right before or after a hard space. :-D
 
 00:54.501 --> 00:56.500
 
-\h\h\h\h\hA (05 hard spaces followed by a letter)
+     A (05 hard spaces followed by a letter)
 A (Normal  spaces followed by a letter)
 A (No hard spaces followed by a letter)
 
 00:56.501 --> 00:58.500
-\h\h\h\h\hA (05 hard spaces followed by a letter)
+     A (05 hard spaces followed by a letter)
 A (Normal  spaces followed by a letter)
 A (No hard spaces followed by a letter)
 Show this: \TEST and this: \-)
 
 00:58.501 --> 01:00.500
 
-A letter followed by 05 hard spaces: A\h\h\h\h\h
+A letter followed by 05 hard spaces: A     
 A letter followed by normal  spaces: A
 A letter followed by no hard spaces: A
-05 hard  spaces between letters: A\h\h\h\h\hA
+05 hard  spaces between letters: A     A
 5 normal spaces between letters: A     A
 
 ^--Forced line break