diff mbox series

[FFmpeg-devel,1/3] avutil/{avstring, bprint}: add XML escaping from ffprobe to avutil

Message ID 20201204144643.73279-2-jeebjp@gmail.com
State Superseded
Headers show
Series Initial implementation of TTML encoding/muxing
Related show

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished

Commit Message

Jan Ekström Dec. 4, 2020, 2:46 p.m. UTC
From: Stefano Sabatini <stefasab@gmail.com>

---
 libavutil/avstring.h |  1 +
 libavutil/bprint.c   | 14 ++++++++++++++
 tools/ffescape.c     |  1 +
 3 files changed, 16 insertions(+)

Comments

Nicolas George Dec. 7, 2020, 1:15 p.m. UTC | #1
Jan Ekström (12020-12-04):
> From: Stefano Sabatini <stefasab@gmail.com>
> 
> ---
>  libavutil/avstring.h |  1 +
>  libavutil/bprint.c   | 14 ++++++++++++++
>  tools/ffescape.c     |  1 +
>  3 files changed, 16 insertions(+)

Good idea.

But I do not like that we are escaping characters that do not need to
(except &gt; for symmetry). I feel that there needs to be three mode:

AV_ESCAPE_MODE_XML → for normal text
AV_ESCAPE_MODE_XML_QUOT → for attributes surrounded by ""
AV_ESCAPE_MODE_XML_APOS → for attributes surrounded by ''

> 
> diff --git a/libavutil/avstring.h b/libavutil/avstring.h
> index ee225585b3..79bb920a70 100644
> --- a/libavutil/avstring.h
> +++ b/libavutil/avstring.h
> @@ -324,6 +324,7 @@ enum AVEscapeMode {
>      AV_ESCAPE_MODE_AUTO,      ///< Use auto-selected escaping mode.
>      AV_ESCAPE_MODE_BACKSLASH, ///< Use backslash escaping.
>      AV_ESCAPE_MODE_QUOTE,     ///< Use single-quote escaping.
> +    AV_ESCAPE_MODE_XML,       ///< Use XML non-markup character data escaping.
>  };
>  
>  /**
> diff --git a/libavutil/bprint.c b/libavutil/bprint.c
> index 2f059c5ba6..d825b61b14 100644
> --- a/libavutil/bprint.c
> +++ b/libavutil/bprint.c
> @@ -283,6 +283,20 @@ void av_bprint_escape(AVBPrint *dstbuf, const char *src, const char *special_cha
>          av_bprint_chars(dstbuf, '\'', 1);
>          break;
>  
> +    case AV_ESCAPE_MODE_XML:
> +        /* escape XML non-markup character data as per 2.4 */
> +        for (; *src; src++) {
> +            switch (*src) {
> +            case '&' : av_bprintf(dstbuf, "%s", "&amp;");  break;
> +            case '<' : av_bprintf(dstbuf, "%s", "&lt;");   break;
> +            case '>' : av_bprintf(dstbuf, "%s", "&gt;");   break;
> +            case '"' : av_bprintf(dstbuf, "%s", "&quot;"); break;
> +            case '\'': av_bprintf(dstbuf, "%s", "&apos;"); break;
> +            default: av_bprint_chars(dstbuf, *src, 1);
> +            }
> +        }
> +        break;
> +
>      /* case AV_ESCAPE_MODE_BACKSLASH or unknown mode */
>      default:
>          /* \-escape characters */
> diff --git a/tools/ffescape.c b/tools/ffescape.c
> index 0530d28c6d..8537235d5e 100644
> --- a/tools/ffescape.c
> +++ b/tools/ffescape.c
> @@ -104,6 +104,7 @@ int main(int argc, char **argv)
>              if      (!strcmp(optarg, "auto"))      escape_mode = AV_ESCAPE_MODE_AUTO;
>              else if (!strcmp(optarg, "backslash")) escape_mode = AV_ESCAPE_MODE_BACKSLASH;
>              else if (!strcmp(optarg, "quote"))     escape_mode = AV_ESCAPE_MODE_QUOTE;
> +            else if (!strcmp(optarg, "xml"))       escape_mode = AV_ESCAPE_MODE_XML;
>              else {
>                  av_log(NULL, AV_LOG_ERROR,
>                         "Invalid value '%s' for option -m, "

Regards,
diff mbox series

Patch

diff --git a/libavutil/avstring.h b/libavutil/avstring.h
index ee225585b3..79bb920a70 100644
--- a/libavutil/avstring.h
+++ b/libavutil/avstring.h
@@ -324,6 +324,7 @@  enum AVEscapeMode {
     AV_ESCAPE_MODE_AUTO,      ///< Use auto-selected escaping mode.
     AV_ESCAPE_MODE_BACKSLASH, ///< Use backslash escaping.
     AV_ESCAPE_MODE_QUOTE,     ///< Use single-quote escaping.
+    AV_ESCAPE_MODE_XML,       ///< Use XML non-markup character data escaping.
 };
 
 /**
diff --git a/libavutil/bprint.c b/libavutil/bprint.c
index 2f059c5ba6..d825b61b14 100644
--- a/libavutil/bprint.c
+++ b/libavutil/bprint.c
@@ -283,6 +283,20 @@  void av_bprint_escape(AVBPrint *dstbuf, const char *src, const char *special_cha
         av_bprint_chars(dstbuf, '\'', 1);
         break;
 
+    case AV_ESCAPE_MODE_XML:
+        /* escape XML non-markup character data as per 2.4 */
+        for (; *src; src++) {
+            switch (*src) {
+            case '&' : av_bprintf(dstbuf, "%s", "&amp;");  break;
+            case '<' : av_bprintf(dstbuf, "%s", "&lt;");   break;
+            case '>' : av_bprintf(dstbuf, "%s", "&gt;");   break;
+            case '"' : av_bprintf(dstbuf, "%s", "&quot;"); break;
+            case '\'': av_bprintf(dstbuf, "%s", "&apos;"); break;
+            default: av_bprint_chars(dstbuf, *src, 1);
+            }
+        }
+        break;
+
     /* case AV_ESCAPE_MODE_BACKSLASH or unknown mode */
     default:
         /* \-escape characters */
diff --git a/tools/ffescape.c b/tools/ffescape.c
index 0530d28c6d..8537235d5e 100644
--- a/tools/ffescape.c
+++ b/tools/ffescape.c
@@ -104,6 +104,7 @@  int main(int argc, char **argv)
             if      (!strcmp(optarg, "auto"))      escape_mode = AV_ESCAPE_MODE_AUTO;
             else if (!strcmp(optarg, "backslash")) escape_mode = AV_ESCAPE_MODE_BACKSLASH;
             else if (!strcmp(optarg, "quote"))     escape_mode = AV_ESCAPE_MODE_QUOTE;
+            else if (!strcmp(optarg, "xml"))       escape_mode = AV_ESCAPE_MODE_XML;
             else {
                 av_log(NULL, AV_LOG_ERROR,
                        "Invalid value '%s' for option -m, "