[FFmpeg-devel] avformat: add option to parse/store ID3 PRIV tags in metadata.

Submitted by rshaffer@tunein.com on Jan. 23, 2018, 5:39 p.m.

Details

Message ID 20180123173953.63821-1-rshaffer@tunein.com
State New
Headers show

Commit Message

rshaffer@tunein.com Jan. 23, 2018, 5:39 p.m.
From: Richard Shaffer <rshaffer@tunein.com>

Enables getting access to ID3 PRIV tags from the command-line or metadata API
when demuxing. The PRIV owner is stored as the metadata key prepended with
"id3v2_priv.", and the data is stored as the metadata value. As PRIV tags may
contain arbitrary data, non-printable characters, including NULL bytes, are
escaped as \xXX.

Similarly, any metadata tags that begin with "id3v2_priv." are inserted as ID3
PRIV tags into the output (assuming the format supports ID3). \xXX sequences in
the value are un-escaped to their byte value.
---
Whitespace changes re Moritz' comments on code format.

 libavformat/id3v2.c    | 48 ++++++++++++++++++++++++++++++++++++++++
 libavformat/id3v2.h    | 15 +++++++++++++
 libavformat/id3v2enc.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++
 libavformat/utils.c    |  2 ++
 4 files changed, 125 insertions(+)

Comments

wm4 Jan. 24, 2018, 3:20 a.m.
On Tue, 23 Jan 2018 09:39:53 -0800
rshaffer@tunein.com wrote:

> From: Richard Shaffer <rshaffer@tunein.com>
> 
> Enables getting access to ID3 PRIV tags from the command-line or metadata API
> when demuxing. The PRIV owner is stored as the metadata key prepended with
> "id3v2_priv.", and the data is stored as the metadata value. As PRIV tags may
> contain arbitrary data, non-printable characters, including NULL bytes, are
> escaped as \xXX.
> 
> Similarly, any metadata tags that begin with "id3v2_priv." are inserted as ID3
> PRIV tags into the output (assuming the format supports ID3). \xXX sequences in
> the value are un-escaped to their byte value.
> ---
> Whitespace changes re Moritz' comments on code format.
> 
>  libavformat/id3v2.c    | 48 ++++++++++++++++++++++++++++++++++++++++
>  libavformat/id3v2.h    | 15 +++++++++++++
>  libavformat/id3v2enc.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  libavformat/utils.c    |  2 ++
>  4 files changed, 125 insertions(+)
> 
> diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
> index 6c216ba7a2..b80178d67a 100644
> --- a/libavformat/id3v2.c
> +++ b/libavformat/id3v2.c
> @@ -33,6 +33,7 @@
>  #endif
>  
>  #include "libavutil/avstring.h"
> +#include "libavutil/bprint.h"
>  #include "libavutil/dict.h"
>  #include "libavutil/intreadwrite.h"
>  #include "avio_internal.h"
> @@ -1224,3 +1225,50 @@ end:
>      av_freep(&chapters);
>      return ret;
>  }
> +
> +int ff_id3v2_parse_priv_dict(AVDictionary **metadata, ID3v2ExtraMeta **extra_meta)
> +{
> +    ID3v2ExtraMeta *cur;
> +    int dict_flags = AV_DICT_DONT_OVERWRITE | AV_DICT_DONT_STRDUP_KEY | AV_DICT_DONT_STRDUP_VAL;
> +
> +    for (cur = *extra_meta; cur; cur = cur->next) {
> +        if (!strcmp(cur->tag, "PRIV")) {
> +            ID3v2ExtraMetaPRIV *priv = cur->data;
> +            AVBPrint bprint;
> +            char *escaped, *key;
> +            int i, ret;
> +
> +            if ((key = av_asprintf(ID3v2_PRIV_METADATA_PREFIX "%s", priv->owner)) == NULL) {
> +                return AVERROR(ENOMEM);
> +            }
> +
> +            av_bprint_init(&bprint, priv->datasize + 1, AV_BPRINT_SIZE_UNLIMITED);
> +
> +            for (i = 0; i < priv->datasize; i++) {
> +                if (priv->data[i] < 32 || priv->data[i] > 126 || priv->data[i] == '\\') {
> +                    av_bprintf(&bprint, "\\x%02x", priv->data[i]);
> +                } else {
> +                    av_bprint_chars(&bprint, priv->data[i], 1);
> +                }
> +            }
> +
> +            if ((ret = av_bprint_finalize(&bprint, &escaped)) < 0) {
> +                av_free(key);
> +                return ret;
> +            }
> +
> +            if ((ret = av_dict_set(metadata, key, escaped, dict_flags)) < 0) {
> +                av_free(key);
> +                av_free(escaped);
> +                return ret;
> +            }
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +int ff_id3v2_parse_priv(AVFormatContext *s, ID3v2ExtraMeta **extra_meta)
> +{
> +    return ff_id3v2_parse_priv_dict(&s->metadata, extra_meta);
> +}
> diff --git a/libavformat/id3v2.h b/libavformat/id3v2.h
> index 5e64ead096..9de0bee374 100644
> --- a/libavformat/id3v2.h
> +++ b/libavformat/id3v2.h
> @@ -39,6 +39,8 @@
>  #define ID3v2_FLAG_ENCRYPTION  0x0004
>  #define ID3v2_FLAG_COMPRESSION 0x0008
>  
> +#define ID3v2_PRIV_METADATA_PREFIX "id3v2_priv."
> +
>  enum ID3v2Encoding {
>      ID3v2_ENCODING_ISO8859  = 0,
>      ID3v2_ENCODING_UTF16BOM = 1,
> @@ -167,6 +169,19 @@ int ff_id3v2_parse_apic(AVFormatContext *s, ID3v2ExtraMeta **extra_meta);
>   */
>  int ff_id3v2_parse_chapters(AVFormatContext *s, ID3v2ExtraMeta **extra_meta);
>  
> +/**
> + * Parse PRIV tags into a dictionary. The PRIV owner is the metadata key. The
> + * PRIV data is the value, with non-printable characters escaped.
> + */
> +int ff_id3v2_parse_priv_dict(AVDictionary **d, ID3v2ExtraMeta **extra_meta);
> +
> +/**
> + * Add metadata for all PRIV tags in the ID3v2 header. The PRIV owner is the
> + * metadata key. The PRIV data is the value, with non-printable characters
> + * escaped.
> + */
> +int ff_id3v2_parse_priv(AVFormatContext *s, ID3v2ExtraMeta **extra_meta);
> +
>  extern const AVMetadataConv ff_id3v2_34_metadata_conv[];
>  extern const AVMetadataConv ff_id3v2_4_metadata_conv[];
>  
> diff --git a/libavformat/id3v2enc.c b/libavformat/id3v2enc.c
> index 14de76ac06..ffe358f019 100644
> --- a/libavformat/id3v2enc.c
> +++ b/libavformat/id3v2enc.c
> @@ -96,6 +96,59 @@ static int id3v2_put_ttag(ID3v2EncContext *id3, AVIOContext *avioc, const char *
>      return len + ID3v2_HEADER_SIZE;
>  }
>  
> +/**
> + * Write a priv frame with owner and data. 'key' is the owner prepended with
> + * ID3v2_PRIV_METADATA_PREFIX. 'data' is provided as a string. Any \xXX
> + * (where 'X' is a valid hex digit) will be unescaped to the byte value.
> + */
> +static int id3v2_put_priv(ID3v2EncContext *id3, AVIOContext *avioc, const char *key, const char *data)
> +{
> +    int len;
> +    uint8_t *pb;
> +    AVIOContext *dyn_buf;
> +
> +    if (!av_strstart(key, ID3v2_PRIV_METADATA_PREFIX, &key)) {
> +        return 0;
> +    }
> +
> +    if (avio_open_dyn_buf(&dyn_buf) < 0)
> +        return AVERROR(ENOMEM);
> +
> +    // owner + null byte.
> +    avio_write(dyn_buf, key, strlen(key) + 1);
> +
> +    while (*data) {
> +        if (av_strstart(data, "\\x", &data)) {
> +            if (data[0] && data[1] && av_isxdigit(data[0]) && av_isxdigit(data[1])) {
> +                char digits[] = {data[0], data[1], 0};
> +                avio_w8(dyn_buf, strtol(digits, NULL, 16));
> +                data += 2;
> +            } else {
> +                ffio_free_dyn_buf(&dyn_buf);
> +                av_log(avioc, AV_LOG_ERROR, "Invalid escape '\\x%.2s' in metadata tag '"
> +                       ID3v2_PRIV_METADATA_PREFIX "%s'.\n", data, key);
> +                return AVERROR(EINVAL);
> +            }
> +        } else {
> +            avio_write(dyn_buf, data++, 1);
> +        }
> +    }
> +
> +    len = avio_close_dyn_buf(dyn_buf, &pb);
> +
> +    avio_wb32(avioc, MKBETAG('P', 'R', 'I', 'V'));
> +    if (id3->version == 3)
> +        avio_wb32(avioc, len);
> +    else
> +        id3v2_put_size(avioc, len);
> +    avio_wb16(avioc, 0);
> +    avio_write(avioc, pb, len);
> +
> +    av_free(pb);
> +
> +    return len + ID3v2_HEADER_SIZE;
> +}
> +
>  static int id3v2_check_write_tag(ID3v2EncContext *id3, AVIOContext *pb, AVDictionaryEntry *t,
>                                   const char table[][4], enum ID3v2Encoding enc)
>  {
> @@ -186,6 +239,13 @@ static int write_metadata(AVIOContext *pb, AVDictionary **metadata,
>              continue;
>          }
>  
> +        if ((ret = id3v2_put_priv(id3, pb, t->key, t->value)) > 0) {
> +            id3->len += ret;
> +            continue;
> +        } else if (ret < 0) {
> +            return ret;
> +        }
> +
>          /* unknown tag, write as TXXX frame */
>          if ((ret = id3v2_put_ttag(id3, pb, t->key, t->value, MKBETAG('T', 'X', 'X', 'X'), enc)) < 0)
>              return ret;
> diff --git a/libavformat/utils.c b/libavformat/utils.c
> index 3d733417e1..c15b8cc818 100644
> --- a/libavformat/utils.c
> +++ b/libavformat/utils.c
> @@ -637,6 +637,8 @@ int avformat_open_input(AVFormatContext **ps, const char *filename,
>                  goto fail;
>              if ((ret = ff_id3v2_parse_chapters(s, &id3v2_extra_meta)) < 0)
>                  goto fail;
> +            if ((ret = ff_id3v2_parse_priv(s, &id3v2_extra_meta)) < 0)
> +                goto fail;
>          } else
>              av_log(s, AV_LOG_DEBUG, "demuxer does not support additional id3 data, skipping\n");
>      }

Pushed, with a libavformat microbump added.

Patch hide | download patch | download mbox

diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
index 6c216ba7a2..b80178d67a 100644
--- a/libavformat/id3v2.c
+++ b/libavformat/id3v2.c
@@ -33,6 +33,7 @@ 
 #endif
 
 #include "libavutil/avstring.h"
+#include "libavutil/bprint.h"
 #include "libavutil/dict.h"
 #include "libavutil/intreadwrite.h"
 #include "avio_internal.h"
@@ -1224,3 +1225,50 @@  end:
     av_freep(&chapters);
     return ret;
 }
+
+int ff_id3v2_parse_priv_dict(AVDictionary **metadata, ID3v2ExtraMeta **extra_meta)
+{
+    ID3v2ExtraMeta *cur;
+    int dict_flags = AV_DICT_DONT_OVERWRITE | AV_DICT_DONT_STRDUP_KEY | AV_DICT_DONT_STRDUP_VAL;
+
+    for (cur = *extra_meta; cur; cur = cur->next) {
+        if (!strcmp(cur->tag, "PRIV")) {
+            ID3v2ExtraMetaPRIV *priv = cur->data;
+            AVBPrint bprint;
+            char *escaped, *key;
+            int i, ret;
+
+            if ((key = av_asprintf(ID3v2_PRIV_METADATA_PREFIX "%s", priv->owner)) == NULL) {
+                return AVERROR(ENOMEM);
+            }
+
+            av_bprint_init(&bprint, priv->datasize + 1, AV_BPRINT_SIZE_UNLIMITED);
+
+            for (i = 0; i < priv->datasize; i++) {
+                if (priv->data[i] < 32 || priv->data[i] > 126 || priv->data[i] == '\\') {
+                    av_bprintf(&bprint, "\\x%02x", priv->data[i]);
+                } else {
+                    av_bprint_chars(&bprint, priv->data[i], 1);
+                }
+            }
+
+            if ((ret = av_bprint_finalize(&bprint, &escaped)) < 0) {
+                av_free(key);
+                return ret;
+            }
+
+            if ((ret = av_dict_set(metadata, key, escaped, dict_flags)) < 0) {
+                av_free(key);
+                av_free(escaped);
+                return ret;
+            }
+        }
+    }
+
+    return 0;
+}
+
+int ff_id3v2_parse_priv(AVFormatContext *s, ID3v2ExtraMeta **extra_meta)
+{
+    return ff_id3v2_parse_priv_dict(&s->metadata, extra_meta);
+}
diff --git a/libavformat/id3v2.h b/libavformat/id3v2.h
index 5e64ead096..9de0bee374 100644
--- a/libavformat/id3v2.h
+++ b/libavformat/id3v2.h
@@ -39,6 +39,8 @@ 
 #define ID3v2_FLAG_ENCRYPTION  0x0004
 #define ID3v2_FLAG_COMPRESSION 0x0008
 
+#define ID3v2_PRIV_METADATA_PREFIX "id3v2_priv."
+
 enum ID3v2Encoding {
     ID3v2_ENCODING_ISO8859  = 0,
     ID3v2_ENCODING_UTF16BOM = 1,
@@ -167,6 +169,19 @@  int ff_id3v2_parse_apic(AVFormatContext *s, ID3v2ExtraMeta **extra_meta);
  */
 int ff_id3v2_parse_chapters(AVFormatContext *s, ID3v2ExtraMeta **extra_meta);
 
+/**
+ * Parse PRIV tags into a dictionary. The PRIV owner is the metadata key. The
+ * PRIV data is the value, with non-printable characters escaped.
+ */
+int ff_id3v2_parse_priv_dict(AVDictionary **d, ID3v2ExtraMeta **extra_meta);
+
+/**
+ * Add metadata for all PRIV tags in the ID3v2 header. The PRIV owner is the
+ * metadata key. The PRIV data is the value, with non-printable characters
+ * escaped.
+ */
+int ff_id3v2_parse_priv(AVFormatContext *s, ID3v2ExtraMeta **extra_meta);
+
 extern const AVMetadataConv ff_id3v2_34_metadata_conv[];
 extern const AVMetadataConv ff_id3v2_4_metadata_conv[];
 
diff --git a/libavformat/id3v2enc.c b/libavformat/id3v2enc.c
index 14de76ac06..ffe358f019 100644
--- a/libavformat/id3v2enc.c
+++ b/libavformat/id3v2enc.c
@@ -96,6 +96,59 @@  static int id3v2_put_ttag(ID3v2EncContext *id3, AVIOContext *avioc, const char *
     return len + ID3v2_HEADER_SIZE;
 }
 
+/**
+ * Write a priv frame with owner and data. 'key' is the owner prepended with
+ * ID3v2_PRIV_METADATA_PREFIX. 'data' is provided as a string. Any \xXX
+ * (where 'X' is a valid hex digit) will be unescaped to the byte value.
+ */
+static int id3v2_put_priv(ID3v2EncContext *id3, AVIOContext *avioc, const char *key, const char *data)
+{
+    int len;
+    uint8_t *pb;
+    AVIOContext *dyn_buf;
+
+    if (!av_strstart(key, ID3v2_PRIV_METADATA_PREFIX, &key)) {
+        return 0;
+    }
+
+    if (avio_open_dyn_buf(&dyn_buf) < 0)
+        return AVERROR(ENOMEM);
+
+    // owner + null byte.
+    avio_write(dyn_buf, key, strlen(key) + 1);
+
+    while (*data) {
+        if (av_strstart(data, "\\x", &data)) {
+            if (data[0] && data[1] && av_isxdigit(data[0]) && av_isxdigit(data[1])) {
+                char digits[] = {data[0], data[1], 0};
+                avio_w8(dyn_buf, strtol(digits, NULL, 16));
+                data += 2;
+            } else {
+                ffio_free_dyn_buf(&dyn_buf);
+                av_log(avioc, AV_LOG_ERROR, "Invalid escape '\\x%.2s' in metadata tag '"
+                       ID3v2_PRIV_METADATA_PREFIX "%s'.\n", data, key);
+                return AVERROR(EINVAL);
+            }
+        } else {
+            avio_write(dyn_buf, data++, 1);
+        }
+    }
+
+    len = avio_close_dyn_buf(dyn_buf, &pb);
+
+    avio_wb32(avioc, MKBETAG('P', 'R', 'I', 'V'));
+    if (id3->version == 3)
+        avio_wb32(avioc, len);
+    else
+        id3v2_put_size(avioc, len);
+    avio_wb16(avioc, 0);
+    avio_write(avioc, pb, len);
+
+    av_free(pb);
+
+    return len + ID3v2_HEADER_SIZE;
+}
+
 static int id3v2_check_write_tag(ID3v2EncContext *id3, AVIOContext *pb, AVDictionaryEntry *t,
                                  const char table[][4], enum ID3v2Encoding enc)
 {
@@ -186,6 +239,13 @@  static int write_metadata(AVIOContext *pb, AVDictionary **metadata,
             continue;
         }
 
+        if ((ret = id3v2_put_priv(id3, pb, t->key, t->value)) > 0) {
+            id3->len += ret;
+            continue;
+        } else if (ret < 0) {
+            return ret;
+        }
+
         /* unknown tag, write as TXXX frame */
         if ((ret = id3v2_put_ttag(id3, pb, t->key, t->value, MKBETAG('T', 'X', 'X', 'X'), enc)) < 0)
             return ret;
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 3d733417e1..c15b8cc818 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -637,6 +637,8 @@  int avformat_open_input(AVFormatContext **ps, const char *filename,
                 goto fail;
             if ((ret = ff_id3v2_parse_chapters(s, &id3v2_extra_meta)) < 0)
                 goto fail;
+            if ((ret = ff_id3v2_parse_priv(s, &id3v2_extra_meta)) < 0)
+                goto fail;
         } else
             av_log(s, AV_LOG_DEBUG, "demuxer does not support additional id3 data, skipping\n");
     }