@@ -309,6 +309,7 @@ enum AVEscapeMode {
AV_ESCAPE_MODE_AUTO, ///< Use auto-selected escaping mode.
AV_ESCAPE_MODE_BACKSLASH, ///< Use backslash escaping.
AV_ESCAPE_MODE_QUOTE, ///< Use single-quote escaping.
+ AV_ESCAPE_MODE_XML, ///< Use XML ampersand-escaping; requires UTF-8 input.
};
/**
@@ -329,6 +330,33 @@ enum AVEscapeMode {
#define AV_ESCAPE_FLAG_STRICT (1 << 1)
/**
+ * In addition to the provided list, escape all characters outside the range of
+ * U+0020 to U+007E.
+ * This only applies to XML-escaping.
+ */
+#define AV_ESCAPE_FLAG_NON_ASCII (1 << 2)
+
+/**
+ * In addition to the provided list, escape single or double quotes.
+ * This only applies to XML-escaping.
+ */
+#define AV_ESCAPE_FLAG_ESCAPE_SINGLE_QUOTE (1 << 3)
+#define AV_ESCAPE_FLAG_ESCAPE_DOUBLE_QUOTE (1 << 4)
+
+/**
+ * Replace invalid UTF-8 characters with a U+FFFD REPLACEMENT CHARACTER, escaped
+ * if AV_ESCAPE_FLAG_NON_ASCII is set.
+ * This only applies to XML-escaping.
+ */
+#define AV_ESCAPE_FLAG_REPLACE_INVALID_SEQUENCES (1 << 5)
+
+/**
+ * Replace invalid UTF-8 characters with a '?', overriding the previous flag.
+ * This only applies to XML-escaping.
+ */
+#define AV_ESCAPE_FLAG_REPLACE_INVALID_ASCII (1 << 6)
+
+/**
* Escape string in src, and put the escaped string in an allocated
* string in *dst, which must be freed with av_free().
*
@@ -271,6 +271,49 @@ void av_bprint_escape(AVBPrint *dstbuf, const char *src, const char *special_cha
mode = AV_ESCAPE_MODE_BACKSLASH; /* TODO: implement a heuristic */
switch (mode) {
+ case AV_ESCAPE_MODE_XML:
+ /* &;-escape characters */
+ while (*src) {
+ uint8_t tmp;
+ uint32_t cp;
+ const char *src1 = src;
+ GET_UTF8(cp, (uint8_t)*src++, goto err;);
+
+ if ((cp < 0xFF &&
+ ((special_chars && strchr(special_chars, cp)) ||
+ (flags & AV_ESCAPE_FLAG_WHITESPACE) && strchr(WHITESPACES, cp))) ||
+ (!(flags & AV_ESCAPE_FLAG_STRICT) &&
+ (cp == '&' || cp == '<' || cp == '>')) ||
+ ((flags & AV_ESCAPE_FLAG_ESCAPE_SINGLE_QUOTE) && cp == '\'') ||
+ ((flags & AV_ESCAPE_FLAG_ESCAPE_DOUBLE_QUOTE) && cp == '"') ||
+ ((flags & AV_ESCAPE_FLAG_NON_ASCII) && (cp < 0x20 || cp > 0x7e))) {
+ switch (cp) {
+ case '&' : av_bprintf(dstbuf, "&"); break;
+ case '<' : av_bprintf(dstbuf, "<"); break;
+ case '>' : av_bprintf(dstbuf, ">"); break;
+ case '"' : av_bprintf(dstbuf, """); break;
+ case '\'': av_bprintf(dstbuf, "'"); break;
+ default: av_bprintf(dstbuf, "&#x%"PRIx32";", cp); break;
+ }
+ } else {
+ PUT_UTF8(cp, tmp, av_bprint_chars(dstbuf, tmp, 1);)
+ }
+ continue;
+ err:
+ if (flags & AV_ESCAPE_FLAG_REPLACE_INVALID_ASCII) {
+ av_bprint_chars(dstbuf, '?', 1);
+ } else if (flags & AV_ESCAPE_FLAG_REPLACE_INVALID_SEQUENCES) {
+ if (flags & AV_ESCAPE_FLAG_NON_ASCII)
+ av_bprintf(dstbuf, "\xEF\xBF\xBD");
+ else
+ av_bprintf(dstbuf, "�");
+ } else {
+ while (src1 < src)
+ av_bprint_chars(dstbuf, *src1++, 1);
+ }
+ }
+ break;
+
case AV_ESCAPE_MODE_QUOTE:
/* enclose the string between '' */
av_bprint_chars(dstbuf, '\'', 1);