Message ID | 20180118003439.99504-1-rshaffer@tunein.com |
---|---|
State | Superseded |
Headers | show |
I wanted to push this thread to the top of the list one more time. wm4 was kind enough to make some comments on the original patch, which I think I've addressed. It would be good to know where it stands. If it might be useful to other people, it would be nice to have it accepted in ffmpeg, or to have some additional comments if more changes are needed. On the other hand, I don't want to be a squeaky wheel. If there is no response or interest, I won't have any hard feelings. Thanks again, -Richard On Wed, Jan 17, 2018 at 4:34 PM, <rshaffer@tunein.com> wrote: > From: Richard Shaffer <rshaffer@tunein.com> > > Enables getting access to ID3 PRIV tags from the command-line or metadata > API > when demuxing. The PRIV owner is stored as the metadata key prepended with > "id3v2_priv.", and the data is stored as the metadata value. As PRIV tags > may > contain arbitrary data, non-printable characters, including NULL bytes, are > escaped as \xXX. > > Similarly, any metadata tags that begin with "id3v2_priv." are inserted as > ID3 > PRIV tags into the output (assuming the format supports ID3). \xXX > sequences in > the value are un-escaped to their byte value. > --- > > Sorry. One more update. I realized there was a possibility of reading past > the > end of input in id3v2_put_priv, so I added a fix. > > -Richard > > libavformat/id3v2.c | 48 +++++++++++++++++++++++++++++++++++++ > libavformat/id3v2.h | 15 ++++++++++++ > libavformat/id3v2enc.c | 64 ++++++++++++++++++++++++++++++ > ++++++++++++++++++++ > libavformat/utils.c | 2 ++ > 4 files changed, 129 insertions(+) > > diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c > index 6c216ba7a2..e46174d7c7 100644 > --- a/libavformat/id3v2.c > +++ b/libavformat/id3v2.c > @@ -33,6 +33,7 @@ > #endif > > #include "libavutil/avstring.h" > +#include "libavutil/bprint.h" > #include "libavutil/dict.h" > #include "libavutil/intreadwrite.h" > #include "avio_internal.h" > @@ -1224,3 +1225,50 @@ end: > av_freep(&chapters); > return ret; > } > + > +int ff_id3v2_parse_priv_dict(AVDictionary **metadata, ID3v2ExtraMeta > **extra_meta) > +{ > + ID3v2ExtraMeta *cur; > + int dict_flags = AV_DICT_DONT_OVERWRITE | AV_DICT_DONT_STRDUP_KEY | > AV_DICT_DONT_STRDUP_VAL; > + > + for (cur = *extra_meta; cur; cur = cur->next) { > + if (!strcmp(cur->tag, "PRIV")) { > + ID3v2ExtraMetaPRIV *priv = cur->data; > + AVBPrint bprint; > + char * escaped, * key; > + int i, ret; > + > + if ((key = av_asprintf(ID3v2_PRIV_METADATA_PREFIX "%s", > priv->owner)) == NULL) { > + return AVERROR(ENOMEM); > + } > + > + av_bprint_init(&bprint, priv->datasize + 1, > AV_BPRINT_SIZE_UNLIMITED); > + > + for (i = 0; i < priv->datasize; i++) { > + if (priv->data[i] < 32 || priv->data[i] > 126 || > priv->data[i] == '\\') { > + av_bprintf(&bprint, "\\x%02x", priv->data[i]); > + } else { > + av_bprint_chars(&bprint, priv->data[i], 1); > + } > + } > + > + if ((ret = av_bprint_finalize(&bprint, &escaped)) < 0) { > + av_free(key); > + return ret; > + } > + > + if ((ret = av_dict_set(metadata, key, escaped, dict_flags)) < > 0) { > + av_free(key); > + av_free(escaped); > + return ret; > + } > + } > + } > + > + return 0; > +} > + > +int ff_id3v2_parse_priv(AVFormatContext *s, ID3v2ExtraMeta **extra_meta) > +{ > + return ff_id3v2_parse_priv_dict(&s->metadata, extra_meta); > +} > \ No newline at end of file > diff --git a/libavformat/id3v2.h b/libavformat/id3v2.h > index 5e64ead096..9de0bee374 100644 > --- a/libavformat/id3v2.h > +++ b/libavformat/id3v2.h > @@ -39,6 +39,8 @@ > #define ID3v2_FLAG_ENCRYPTION 0x0004 > #define ID3v2_FLAG_COMPRESSION 0x0008 > > +#define ID3v2_PRIV_METADATA_PREFIX "id3v2_priv." > + > enum ID3v2Encoding { > ID3v2_ENCODING_ISO8859 = 0, > ID3v2_ENCODING_UTF16BOM = 1, > @@ -167,6 +169,19 @@ int ff_id3v2_parse_apic(AVFormatContext *s, > ID3v2ExtraMeta **extra_meta); > */ > int ff_id3v2_parse_chapters(AVFormatContext *s, ID3v2ExtraMeta > **extra_meta); > > +/** > + * Parse PRIV tags into a dictionary. The PRIV owner is the metadata key. > The > + * PRIV data is the value, with non-printable characters escaped. > + */ > +int ff_id3v2_parse_priv_dict(AVDictionary **d, ID3v2ExtraMeta > **extra_meta); > + > +/** > + * Add metadata for all PRIV tags in the ID3v2 header. The PRIV owner is > the > + * metadata key. The PRIV data is the value, with non-printable characters > + * escaped. > + */ > +int ff_id3v2_parse_priv(AVFormatContext *s, ID3v2ExtraMeta **extra_meta); > + > extern const AVMetadataConv ff_id3v2_34_metadata_conv[]; > extern const AVMetadataConv ff_id3v2_4_metadata_conv[]; > > diff --git a/libavformat/id3v2enc.c b/libavformat/id3v2enc.c > index 14de76ac06..d5e358c8b6 100644 > --- a/libavformat/id3v2enc.c > +++ b/libavformat/id3v2enc.c > @@ -96,6 +96,63 @@ static int id3v2_put_ttag(ID3v2EncContext *id3, > AVIOContext *avioc, const char * > return len + ID3v2_HEADER_SIZE; > } > > +/** > + * Write a priv frame with owner and data. 'key' is the owner prepended > with > + * ID3v2_PRIV_METADATA_PREFIX. 'data' is provided as a string. Any \xXX > + * (where 'X' is a valid hex digit) will be unescaped to the byte value. > + */ > +static int id3v2_put_priv(ID3v2EncContext *id3, AVIOContext *avioc, const > char *key, const char *data) > +{ > + int len; > + uint8_t *pb; > + AVIOContext *dyn_buf; > + > + if (!av_strstart(key, ID3v2_PRIV_METADATA_PREFIX, &key)) { > + return 0; > + } > + > + if (avio_open_dyn_buf(&dyn_buf) < 0) > + return AVERROR(ENOMEM); > + > + // owner + null byte. > + avio_write(dyn_buf, key, strlen(key) + 1); > + > + while (*data) { > + if (av_strstart(data, "\\x", &data)) { > + if (data[0] && data[1] && av_isxdigit(data[0]) && > av_isxdigit(data[1])) { > + char digits[] = {data[0], data[1], 0}; > + avio_w8(dyn_buf, strtol(digits, NULL, 16)); > + } else { > + // '\x' wasn't followed by two hex digits. Just write out > + // whatever the 2-4 characters were. > + avio_write(dyn_buf, "\\x", 2); > + if (data[0]) { > + avio_w8(dyn_buf, data[0]); > + if (data[1]) > + avio_w8(dyn_buf, data[1]); > + } > + } > + data += 2; > + } else { > + avio_write(dyn_buf, data++, 1); > + } > + } > + > + len = avio_close_dyn_buf(dyn_buf, &pb); > + > + avio_wb32(avioc, MKBETAG('P', 'R', 'I', 'V')); > + if (id3->version == 3) > + avio_wb32(avioc, len); > + else > + id3v2_put_size(avioc, len); > + avio_wb16(avioc, 0); > + avio_write(avioc, pb, len); > + > + av_free(pb); > + > + return len + ID3v2_HEADER_SIZE; > +} > + > static int id3v2_check_write_tag(ID3v2EncContext *id3, AVIOContext *pb, > AVDictionaryEntry *t, > const char table[][4], enum > ID3v2Encoding enc) > { > @@ -186,6 +243,13 @@ static int write_metadata(AVIOContext *pb, > AVDictionary **metadata, > continue; > } > > + if ((ret = id3v2_put_priv(id3, pb, t->key, t->value)) > 0) { > + id3->len += ret; > + continue; > + } else if (ret < 0) { > + return ret; > + } > + > /* unknown tag, write as TXXX frame */ > if ((ret = id3v2_put_ttag(id3, pb, t->key, t->value, MKBETAG('T', > 'X', 'X', 'X'), enc)) < 0) > return ret; > diff --git a/libavformat/utils.c b/libavformat/utils.c > index 3d733417e1..c15b8cc818 100644 > --- a/libavformat/utils.c > +++ b/libavformat/utils.c > @@ -637,6 +637,8 @@ int avformat_open_input(AVFormatContext **ps, const > char *filename, > goto fail; > if ((ret = ff_id3v2_parse_chapters(s, &id3v2_extra_meta)) < 0) > goto fail; > + if ((ret = ff_id3v2_parse_priv(s, &id3v2_extra_meta)) < 0) > + goto fail; > } else > av_log(s, AV_LOG_DEBUG, "demuxer does not support additional > id3 data, skipping\n"); > } > -- > 2.14.3 (Apple Git-98) > >
On Wed, 17 Jan 2018 16:34:39 -0800 rshaffer@tunein.com wrote: > From: Richard Shaffer <rshaffer@tunein.com> > > Enables getting access to ID3 PRIV tags from the command-line or metadata API > when demuxing. The PRIV owner is stored as the metadata key prepended with > "id3v2_priv.", and the data is stored as the metadata value. As PRIV tags may > contain arbitrary data, non-printable characters, including NULL bytes, are > escaped as \xXX. > > Similarly, any metadata tags that begin with "id3v2_priv." are inserted as ID3 > PRIV tags into the output (assuming the format supports ID3). \xXX sequences in > the value are un-escaped to their byte value. > --- > > Sorry. One more update. I realized there was a possibility of reading past the > end of input in id3v2_put_priv, so I added a fix. > > -Richard > > libavformat/id3v2.c | 48 +++++++++++++++++++++++++++++++++++++ > libavformat/id3v2.h | 15 ++++++++++++ > libavformat/id3v2enc.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++ > libavformat/utils.c | 2 ++ > 4 files changed, 129 insertions(+) > > diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c > index 6c216ba7a2..e46174d7c7 100644 > --- a/libavformat/id3v2.c > +++ b/libavformat/id3v2.c > @@ -33,6 +33,7 @@ > #endif > > #include "libavutil/avstring.h" > +#include "libavutil/bprint.h" > #include "libavutil/dict.h" > #include "libavutil/intreadwrite.h" > #include "avio_internal.h" > @@ -1224,3 +1225,50 @@ end: > av_freep(&chapters); > return ret; > } > + > +int ff_id3v2_parse_priv_dict(AVDictionary **metadata, ID3v2ExtraMeta **extra_meta) > +{ > + ID3v2ExtraMeta *cur; > + int dict_flags = AV_DICT_DONT_OVERWRITE | AV_DICT_DONT_STRDUP_KEY | AV_DICT_DONT_STRDUP_VAL; > + > + for (cur = *extra_meta; cur; cur = cur->next) { > + if (!strcmp(cur->tag, "PRIV")) { > + ID3v2ExtraMetaPRIV *priv = cur->data; > + AVBPrint bprint; > + char * escaped, * key; > + int i, ret; > + > + if ((key = av_asprintf(ID3v2_PRIV_METADATA_PREFIX "%s", priv->owner)) == NULL) { > + return AVERROR(ENOMEM); > + } > + > + av_bprint_init(&bprint, priv->datasize + 1, AV_BPRINT_SIZE_UNLIMITED); > + > + for (i = 0; i < priv->datasize; i++) { > + if (priv->data[i] < 32 || priv->data[i] > 126 || priv->data[i] == '\\') { > + av_bprintf(&bprint, "\\x%02x", priv->data[i]); > + } else { > + av_bprint_chars(&bprint, priv->data[i], 1); > + } > + } > + > + if ((ret = av_bprint_finalize(&bprint, &escaped)) < 0) { > + av_free(key); > + return ret; > + } > + > + if ((ret = av_dict_set(metadata, key, escaped, dict_flags)) < 0) { > + av_free(key); > + av_free(escaped); > + return ret; > + } > + } > + } > + > + return 0; > +} > + > +int ff_id3v2_parse_priv(AVFormatContext *s, ID3v2ExtraMeta **extra_meta) > +{ > + return ff_id3v2_parse_priv_dict(&s->metadata, extra_meta); > +} > \ No newline at end of file > diff --git a/libavformat/id3v2.h b/libavformat/id3v2.h > index 5e64ead096..9de0bee374 100644 > --- a/libavformat/id3v2.h > +++ b/libavformat/id3v2.h > @@ -39,6 +39,8 @@ > #define ID3v2_FLAG_ENCRYPTION 0x0004 > #define ID3v2_FLAG_COMPRESSION 0x0008 > > +#define ID3v2_PRIV_METADATA_PREFIX "id3v2_priv." > + > enum ID3v2Encoding { > ID3v2_ENCODING_ISO8859 = 0, > ID3v2_ENCODING_UTF16BOM = 1, > @@ -167,6 +169,19 @@ int ff_id3v2_parse_apic(AVFormatContext *s, ID3v2ExtraMeta **extra_meta); > */ > int ff_id3v2_parse_chapters(AVFormatContext *s, ID3v2ExtraMeta **extra_meta); > > +/** > + * Parse PRIV tags into a dictionary. The PRIV owner is the metadata key. The > + * PRIV data is the value, with non-printable characters escaped. > + */ > +int ff_id3v2_parse_priv_dict(AVDictionary **d, ID3v2ExtraMeta **extra_meta); > + > +/** > + * Add metadata for all PRIV tags in the ID3v2 header. The PRIV owner is the > + * metadata key. The PRIV data is the value, with non-printable characters > + * escaped. > + */ > +int ff_id3v2_parse_priv(AVFormatContext *s, ID3v2ExtraMeta **extra_meta); > + > extern const AVMetadataConv ff_id3v2_34_metadata_conv[]; > extern const AVMetadataConv ff_id3v2_4_metadata_conv[]; > > diff --git a/libavformat/id3v2enc.c b/libavformat/id3v2enc.c > index 14de76ac06..d5e358c8b6 100644 > --- a/libavformat/id3v2enc.c > +++ b/libavformat/id3v2enc.c > @@ -96,6 +96,63 @@ static int id3v2_put_ttag(ID3v2EncContext *id3, AVIOContext *avioc, const char * > return len + ID3v2_HEADER_SIZE; > } > > +/** > + * Write a priv frame with owner and data. 'key' is the owner prepended with > + * ID3v2_PRIV_METADATA_PREFIX. 'data' is provided as a string. Any \xXX > + * (where 'X' is a valid hex digit) will be unescaped to the byte value. > + */ > +static int id3v2_put_priv(ID3v2EncContext *id3, AVIOContext *avioc, const char *key, const char *data) > +{ > + int len; > + uint8_t *pb; > + AVIOContext *dyn_buf; > + > + if (!av_strstart(key, ID3v2_PRIV_METADATA_PREFIX, &key)) { > + return 0; > + } > + > + if (avio_open_dyn_buf(&dyn_buf) < 0) > + return AVERROR(ENOMEM); > + > + // owner + null byte. > + avio_write(dyn_buf, key, strlen(key) + 1); > + > + while (*data) { > + if (av_strstart(data, "\\x", &data)) { > + if (data[0] && data[1] && av_isxdigit(data[0]) && av_isxdigit(data[1])) { > + char digits[] = {data[0], data[1], 0}; > + avio_w8(dyn_buf, strtol(digits, NULL, 16)); > + } else { > + // '\x' wasn't followed by two hex digits. Just write out > + // whatever the 2-4 characters were. > + avio_write(dyn_buf, "\\x", 2); > + if (data[0]) { > + avio_w8(dyn_buf, data[0]); > + if (data[1]) > + avio_w8(dyn_buf, data[1]); > + } Wouldn't this be slightly nicer if you only wrote the "\\x" here, and moved the "data += 2;" to the case when the digits were valid? Though personally I'd probably prefer if it just errored out on invalid input, but dunno. > + } > + data += 2; > + } else { > + avio_write(dyn_buf, data++, 1); > + } > + } > + > + len = avio_close_dyn_buf(dyn_buf, &pb); > + > + avio_wb32(avioc, MKBETAG('P', 'R', 'I', 'V')); > + if (id3->version == 3) > + avio_wb32(avioc, len); > + else > + id3v2_put_size(avioc, len); > + avio_wb16(avioc, 0); > + avio_write(avioc, pb, len); > + > + av_free(pb); > + > + return len + ID3v2_HEADER_SIZE; > +} > + > static int id3v2_check_write_tag(ID3v2EncContext *id3, AVIOContext *pb, AVDictionaryEntry *t, > const char table[][4], enum ID3v2Encoding enc) > { > @@ -186,6 +243,13 @@ static int write_metadata(AVIOContext *pb, AVDictionary **metadata, > continue; > } > > + if ((ret = id3v2_put_priv(id3, pb, t->key, t->value)) > 0) { > + id3->len += ret; > + continue; > + } else if (ret < 0) { > + return ret; > + } > + > /* unknown tag, write as TXXX frame */ > if ((ret = id3v2_put_ttag(id3, pb, t->key, t->value, MKBETAG('T', 'X', 'X', 'X'), enc)) < 0) > return ret; > diff --git a/libavformat/utils.c b/libavformat/utils.c > index 3d733417e1..c15b8cc818 100644 > --- a/libavformat/utils.c > +++ b/libavformat/utils.c > @@ -637,6 +637,8 @@ int avformat_open_input(AVFormatContext **ps, const char *filename, > goto fail; > if ((ret = ff_id3v2_parse_chapters(s, &id3v2_extra_meta)) < 0) > goto fail; > + if ((ret = ff_id3v2_parse_priv(s, &id3v2_extra_meta)) < 0) > + goto fail; > } else > av_log(s, AV_LOG_DEBUG, "demuxer does not support additional id3 data, skipping\n"); > } I still don't like using hex escaping, but given how there's no better mechanism for metadata that can contain arbitrary bytes, it's probably fine. Send a reminder in a day, and I'll push the patch if there are no new comments. Bonus points if you send another patch adding a FATE test for this later.
On Mon, Jan 22, 2018 at 3:18 PM, wm4 <nfxjfg@googlemail.com> wrote: > On Wed, 17 Jan 2018 16:34:39 -0800 > rshaffer@tunein.com wrote: > > > From: Richard Shaffer <rshaffer@tunein.com> > > > > Enables getting access to ID3 PRIV tags from the command-line or > metadata API > > when demuxing. The PRIV owner is stored as the metadata key prepended > with > > "id3v2_priv.", and the data is stored as the metadata value. As PRIV > tags may > > contain arbitrary data, non-printable characters, including NULL bytes, > are > > escaped as \xXX. > > > > Similarly, any metadata tags that begin with "id3v2_priv." are inserted > as ID3 > > PRIV tags into the output (assuming the format supports ID3). \xXX > sequences in > > the value are un-escaped to their byte value. > > --- > > > > Sorry. One more update. I realized there was a possibility of reading > past the > > end of input in id3v2_put_priv, so I added a fix. > > > > -Richard > > > > libavformat/id3v2.c | 48 +++++++++++++++++++++++++++++++++++++ > > libavformat/id3v2.h | 15 ++++++++++++ > > libavformat/id3v2enc.c | 64 ++++++++++++++++++++++++++++++ > ++++++++++++++++++++ > > libavformat/utils.c | 2 ++ > > 4 files changed, 129 insertions(+) > > > > diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c > > index 6c216ba7a2..e46174d7c7 100644 > > --- a/libavformat/id3v2.c > > +++ b/libavformat/id3v2.c > > @@ -33,6 +33,7 @@ > > #endif > > > > #include "libavutil/avstring.h" > > +#include "libavutil/bprint.h" > > #include "libavutil/dict.h" > > #include "libavutil/intreadwrite.h" > > #include "avio_internal.h" > > @@ -1224,3 +1225,50 @@ end: > > av_freep(&chapters); > > return ret; > > } > > + > > +int ff_id3v2_parse_priv_dict(AVDictionary **metadata, ID3v2ExtraMeta > **extra_meta) > > +{ > > + ID3v2ExtraMeta *cur; > > + int dict_flags = AV_DICT_DONT_OVERWRITE | AV_DICT_DONT_STRDUP_KEY | > AV_DICT_DONT_STRDUP_VAL; > > + > > + for (cur = *extra_meta; cur; cur = cur->next) { > > + if (!strcmp(cur->tag, "PRIV")) { > > + ID3v2ExtraMetaPRIV *priv = cur->data; > > + AVBPrint bprint; > > + char * escaped, * key; > > + int i, ret; > > + > > + if ((key = av_asprintf(ID3v2_PRIV_METADATA_PREFIX "%s", > priv->owner)) == NULL) { > > + return AVERROR(ENOMEM); > > + } > > + > > + av_bprint_init(&bprint, priv->datasize + 1, > AV_BPRINT_SIZE_UNLIMITED); > > + > > + for (i = 0; i < priv->datasize; i++) { > > + if (priv->data[i] < 32 || priv->data[i] > 126 || > priv->data[i] == '\\') { > > + av_bprintf(&bprint, "\\x%02x", priv->data[i]); > > + } else { > > + av_bprint_chars(&bprint, priv->data[i], 1); > > + } > > + } > > + > > + if ((ret = av_bprint_finalize(&bprint, &escaped)) < 0) { > > + av_free(key); > > + return ret; > > + } > > + > > + if ((ret = av_dict_set(metadata, key, escaped, dict_flags)) > < 0) { > > + av_free(key); > > + av_free(escaped); > > + return ret; > > + } > > + } > > + } > > + > > + return 0; > > +} > > + > > +int ff_id3v2_parse_priv(AVFormatContext *s, ID3v2ExtraMeta > **extra_meta) > > +{ > > + return ff_id3v2_parse_priv_dict(&s->metadata, extra_meta); > > +} > > \ No newline at end of file > > diff --git a/libavformat/id3v2.h b/libavformat/id3v2.h > > index 5e64ead096..9de0bee374 100644 > > --- a/libavformat/id3v2.h > > +++ b/libavformat/id3v2.h > > @@ -39,6 +39,8 @@ > > #define ID3v2_FLAG_ENCRYPTION 0x0004 > > #define ID3v2_FLAG_COMPRESSION 0x0008 > > > > +#define ID3v2_PRIV_METADATA_PREFIX "id3v2_priv." > > + > > enum ID3v2Encoding { > > ID3v2_ENCODING_ISO8859 = 0, > > ID3v2_ENCODING_UTF16BOM = 1, > > @@ -167,6 +169,19 @@ int ff_id3v2_parse_apic(AVFormatContext *s, > ID3v2ExtraMeta **extra_meta); > > */ > > int ff_id3v2_parse_chapters(AVFormatContext *s, ID3v2ExtraMeta > **extra_meta); > > > > +/** > > + * Parse PRIV tags into a dictionary. The PRIV owner is the metadata > key. The > > + * PRIV data is the value, with non-printable characters escaped. > > + */ > > +int ff_id3v2_parse_priv_dict(AVDictionary **d, ID3v2ExtraMeta > **extra_meta); > > + > > +/** > > + * Add metadata for all PRIV tags in the ID3v2 header. The PRIV owner > is the > > + * metadata key. The PRIV data is the value, with non-printable > characters > > + * escaped. > > + */ > > +int ff_id3v2_parse_priv(AVFormatContext *s, ID3v2ExtraMeta > **extra_meta); > > + > > extern const AVMetadataConv ff_id3v2_34_metadata_conv[]; > > extern const AVMetadataConv ff_id3v2_4_metadata_conv[]; > > > > diff --git a/libavformat/id3v2enc.c b/libavformat/id3v2enc.c > > index 14de76ac06..d5e358c8b6 100644 > > --- a/libavformat/id3v2enc.c > > +++ b/libavformat/id3v2enc.c > > @@ -96,6 +96,63 @@ static int id3v2_put_ttag(ID3v2EncContext *id3, > AVIOContext *avioc, const char * > > return len + ID3v2_HEADER_SIZE; > > } > > > > +/** > > + * Write a priv frame with owner and data. 'key' is the owner prepended > with > > + * ID3v2_PRIV_METADATA_PREFIX. 'data' is provided as a string. Any \xXX > > + * (where 'X' is a valid hex digit) will be unescaped to the byte value. > > + */ > > +static int id3v2_put_priv(ID3v2EncContext *id3, AVIOContext *avioc, > const char *key, const char *data) > > +{ > > + int len; > > + uint8_t *pb; > > + AVIOContext *dyn_buf; > > + > > + if (!av_strstart(key, ID3v2_PRIV_METADATA_PREFIX, &key)) { > > + return 0; > > + } > > + > > + if (avio_open_dyn_buf(&dyn_buf) < 0) > > + return AVERROR(ENOMEM); > > + > > + // owner + null byte. > > + avio_write(dyn_buf, key, strlen(key) + 1); > > + > > + while (*data) { > > + if (av_strstart(data, "\\x", &data)) { > > + if (data[0] && data[1] && av_isxdigit(data[0]) && > av_isxdigit(data[1])) { > > + char digits[] = {data[0], data[1], 0}; > > + avio_w8(dyn_buf, strtol(digits, NULL, 16)); > > + } else { > > + // '\x' wasn't followed by two hex digits. Just write > out > > + // whatever the 2-4 characters were. > > + avio_write(dyn_buf, "\\x", 2); > > + if (data[0]) { > > + avio_w8(dyn_buf, data[0]); > > + if (data[1]) > > + avio_w8(dyn_buf, data[1]); > > + } > > Wouldn't this be slightly nicer if you only wrote the "\\x" here, and > moved the "data += 2;" to the case when the digits were valid? > > Though personally I'd probably prefer if it just errored out on invalid > input, but dunno. > Not a bad idea to just error out, I guess. Probably better than silently writing data that could be a bug. Would this be better? if (av_strstart(data, "\\x", &data)) { if (data[0] && data[1] && av_isxdigit(data[0]) && av_isxdigit(data[1])) { char digits[] = {data[0], data[1], 0}; avio_w8(dyn_buf, strtol(digits, NULL, 16)); data += 2; } else { ffio_free_dyn_buf(&dyn_buf); av_log(avioc, AV_LOG_ERROR, "Invalid escape \\x%.2s in metadata tag '%s'.\n", data, key); return AVERROR(EINVAL);; } } else { avio_write(dyn_buf, data++, 1); } > > > + } > > + data += 2; > > + } else { > > + avio_write(dyn_buf, data++, 1); > > + } > > + } > > + > > + len = avio_close_dyn_buf(dyn_buf, &pb); > > + > > + avio_wb32(avioc, MKBETAG('P', 'R', 'I', 'V')); > > + if (id3->version == 3) > > + avio_wb32(avioc, len); > > + else > > + id3v2_put_size(avioc, len); > > + avio_wb16(avioc, 0); > > + avio_write(avioc, pb, len); > > + > > + av_free(pb); > > + > > + return len + ID3v2_HEADER_SIZE; > > +} > > + > > static int id3v2_check_write_tag(ID3v2EncContext *id3, AVIOContext > *pb, AVDictionaryEntry *t, > > const char table[][4], enum > ID3v2Encoding enc) > > { > > @@ -186,6 +243,13 @@ static int write_metadata(AVIOContext *pb, > AVDictionary **metadata, > > continue; > > } > > > > + if ((ret = id3v2_put_priv(id3, pb, t->key, t->value)) > 0) { > > + id3->len += ret; > > + continue; > > + } else if (ret < 0) { > > + return ret; > > + } > > + > > /* unknown tag, write as TXXX frame */ > > if ((ret = id3v2_put_ttag(id3, pb, t->key, t->value, > MKBETAG('T', 'X', 'X', 'X'), enc)) < 0) > > return ret; > > diff --git a/libavformat/utils.c b/libavformat/utils.c > > index 3d733417e1..c15b8cc818 100644 > > --- a/libavformat/utils.c > > +++ b/libavformat/utils.c > > @@ -637,6 +637,8 @@ int avformat_open_input(AVFormatContext **ps, const > char *filename, > > goto fail; > > if ((ret = ff_id3v2_parse_chapters(s, &id3v2_extra_meta)) < > 0) > > goto fail; > > + if ((ret = ff_id3v2_parse_priv(s, &id3v2_extra_meta)) < 0) > > + goto fail; > > } else > > av_log(s, AV_LOG_DEBUG, "demuxer does not support > additional id3 data, skipping\n"); > > } > > I still don't like using hex escaping, but given how there's no better > mechanism for metadata that can contain arbitrary bytes, it's probably > fine. > I feel the same way, but I don't see a better way without breaking the API or inventing a new one. > > Send a reminder in a day, and I'll push the patch if there are no new > comments. > > Bonus points if you send another patch adding a FATE test for this > later. > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel >
On Mon, 22 Jan 2018 15:48:07 -0800 Richard Shaffer <rshaffer@tunein.com> wrote: > On Mon, Jan 22, 2018 at 3:18 PM, wm4 <nfxjfg@googlemail.com> wrote: > > > On Wed, 17 Jan 2018 16:34:39 -0800 > > rshaffer@tunein.com wrote: > > > > > From: Richard Shaffer <rshaffer@tunein.com> > > > > > > Enables getting access to ID3 PRIV tags from the command-line or > > metadata API > > > when demuxing. The PRIV owner is stored as the metadata key prepended > > with > > > "id3v2_priv.", and the data is stored as the metadata value. As PRIV > > tags may > > > contain arbitrary data, non-printable characters, including NULL bytes, > > are > > > escaped as \xXX. > > > > > > Similarly, any metadata tags that begin with "id3v2_priv." are inserted > > as ID3 > > > PRIV tags into the output (assuming the format supports ID3). \xXX > > sequences in > > > the value are un-escaped to their byte value. > > > --- > > > > > > Sorry. One more update. I realized there was a possibility of reading > > past the > > > end of input in id3v2_put_priv, so I added a fix. > > > > > > -Richard > > > > > > libavformat/id3v2.c | 48 +++++++++++++++++++++++++++++++++++++ > > > libavformat/id3v2.h | 15 ++++++++++++ > > > libavformat/id3v2enc.c | 64 ++++++++++++++++++++++++++++++ > > ++++++++++++++++++++ > > > libavformat/utils.c | 2 ++ > > > 4 files changed, 129 insertions(+) > > > > > > diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c > > > index 6c216ba7a2..e46174d7c7 100644 > > > --- a/libavformat/id3v2.c > > > +++ b/libavformat/id3v2.c > > > @@ -33,6 +33,7 @@ > > > #endif > > > > > > #include "libavutil/avstring.h" > > > +#include "libavutil/bprint.h" > > > #include "libavutil/dict.h" > > > #include "libavutil/intreadwrite.h" > > > #include "avio_internal.h" > > > @@ -1224,3 +1225,50 @@ end: > > > av_freep(&chapters); > > > return ret; > > > } > > > + > > > +int ff_id3v2_parse_priv_dict(AVDictionary **metadata, ID3v2ExtraMeta > > **extra_meta) > > > +{ > > > + ID3v2ExtraMeta *cur; > > > + int dict_flags = AV_DICT_DONT_OVERWRITE | AV_DICT_DONT_STRDUP_KEY | > > AV_DICT_DONT_STRDUP_VAL; > > > + > > > + for (cur = *extra_meta; cur; cur = cur->next) { > > > + if (!strcmp(cur->tag, "PRIV")) { > > > + ID3v2ExtraMetaPRIV *priv = cur->data; > > > + AVBPrint bprint; > > > + char * escaped, * key; > > > + int i, ret; > > > + > > > + if ((key = av_asprintf(ID3v2_PRIV_METADATA_PREFIX "%s", > > priv->owner)) == NULL) { > > > + return AVERROR(ENOMEM); > > > + } > > > + > > > + av_bprint_init(&bprint, priv->datasize + 1, > > AV_BPRINT_SIZE_UNLIMITED); > > > + > > > + for (i = 0; i < priv->datasize; i++) { > > > + if (priv->data[i] < 32 || priv->data[i] > 126 || > > priv->data[i] == '\\') { > > > + av_bprintf(&bprint, "\\x%02x", priv->data[i]); > > > + } else { > > > + av_bprint_chars(&bprint, priv->data[i], 1); > > > + } > > > + } > > > + > > > + if ((ret = av_bprint_finalize(&bprint, &escaped)) < 0) { > > > + av_free(key); > > > + return ret; > > > + } > > > + > > > + if ((ret = av_dict_set(metadata, key, escaped, dict_flags)) > > < 0) { > > > + av_free(key); > > > + av_free(escaped); > > > + return ret; > > > + } > > > + } > > > + } > > > + > > > + return 0; > > > +} > > > + > > > +int ff_id3v2_parse_priv(AVFormatContext *s, ID3v2ExtraMeta > > **extra_meta) > > > +{ > > > + return ff_id3v2_parse_priv_dict(&s->metadata, extra_meta); > > > +} > > > \ No newline at end of file > > > diff --git a/libavformat/id3v2.h b/libavformat/id3v2.h > > > index 5e64ead096..9de0bee374 100644 > > > --- a/libavformat/id3v2.h > > > +++ b/libavformat/id3v2.h > > > @@ -39,6 +39,8 @@ > > > #define ID3v2_FLAG_ENCRYPTION 0x0004 > > > #define ID3v2_FLAG_COMPRESSION 0x0008 > > > > > > +#define ID3v2_PRIV_METADATA_PREFIX "id3v2_priv." > > > + > > > enum ID3v2Encoding { > > > ID3v2_ENCODING_ISO8859 = 0, > > > ID3v2_ENCODING_UTF16BOM = 1, > > > @@ -167,6 +169,19 @@ int ff_id3v2_parse_apic(AVFormatContext *s, > > ID3v2ExtraMeta **extra_meta); > > > */ > > > int ff_id3v2_parse_chapters(AVFormatContext *s, ID3v2ExtraMeta > > **extra_meta); > > > > > > +/** > > > + * Parse PRIV tags into a dictionary. The PRIV owner is the metadata > > key. The > > > + * PRIV data is the value, with non-printable characters escaped. > > > + */ > > > +int ff_id3v2_parse_priv_dict(AVDictionary **d, ID3v2ExtraMeta > > **extra_meta); > > > + > > > +/** > > > + * Add metadata for all PRIV tags in the ID3v2 header. The PRIV owner > > is the > > > + * metadata key. The PRIV data is the value, with non-printable > > characters > > > + * escaped. > > > + */ > > > +int ff_id3v2_parse_priv(AVFormatContext *s, ID3v2ExtraMeta > > **extra_meta); > > > + > > > extern const AVMetadataConv ff_id3v2_34_metadata_conv[]; > > > extern const AVMetadataConv ff_id3v2_4_metadata_conv[]; > > > > > > diff --git a/libavformat/id3v2enc.c b/libavformat/id3v2enc.c > > > index 14de76ac06..d5e358c8b6 100644 > > > --- a/libavformat/id3v2enc.c > > > +++ b/libavformat/id3v2enc.c > > > @@ -96,6 +96,63 @@ static int id3v2_put_ttag(ID3v2EncContext *id3, > > AVIOContext *avioc, const char * > > > return len + ID3v2_HEADER_SIZE; > > > } > > > > > > +/** > > > + * Write a priv frame with owner and data. 'key' is the owner prepended > > with > > > + * ID3v2_PRIV_METADATA_PREFIX. 'data' is provided as a string. Any \xXX > > > + * (where 'X' is a valid hex digit) will be unescaped to the byte value. > > > + */ > > > +static int id3v2_put_priv(ID3v2EncContext *id3, AVIOContext *avioc, > > const char *key, const char *data) > > > +{ > > > + int len; > > > + uint8_t *pb; > > > + AVIOContext *dyn_buf; > > > + > > > + if (!av_strstart(key, ID3v2_PRIV_METADATA_PREFIX, &key)) { > > > + return 0; > > > + } > > > + > > > + if (avio_open_dyn_buf(&dyn_buf) < 0) > > > + return AVERROR(ENOMEM); > > > + > > > + // owner + null byte. > > > + avio_write(dyn_buf, key, strlen(key) + 1); > > > + > > > + while (*data) { > > > + if (av_strstart(data, "\\x", &data)) { > > > + if (data[0] && data[1] && av_isxdigit(data[0]) && > > av_isxdigit(data[1])) { > > > + char digits[] = {data[0], data[1], 0}; > > > + avio_w8(dyn_buf, strtol(digits, NULL, 16)); > > > + } else { > > > + // '\x' wasn't followed by two hex digits. Just write > > out > > > + // whatever the 2-4 characters were. > > > + avio_write(dyn_buf, "\\x", 2); > > > + if (data[0]) { > > > + avio_w8(dyn_buf, data[0]); > > > + if (data[1]) > > > + avio_w8(dyn_buf, data[1]); > > > + } > > > > Wouldn't this be slightly nicer if you only wrote the "\\x" here, and > > moved the "data += 2;" to the case when the digits were valid? > > > > Though personally I'd probably prefer if it just errored out on invalid > > input, but dunno. > > > > Not a bad idea to just error out, I guess. Probably better than silently > writing data that could be a bug. Would this be better? > > if (av_strstart(data, "\\x", &data)) { > if (data[0] && data[1] && av_isxdigit(data[0]) && > av_isxdigit(data[1])) { > char digits[] = {data[0], data[1], 0}; > avio_w8(dyn_buf, strtol(digits, NULL, 16)); > data += 2; > } else { > ffio_free_dyn_buf(&dyn_buf); > av_log(avioc, AV_LOG_ERROR, "Invalid escape \\x%.2s in > metadata tag '%s'.\n", data, key); > return AVERROR(EINVAL);; > } > } else { > avio_write(dyn_buf, data++, 1); > } Yeah, I'd feel slightly better about this.
On Wed, Jan 17, 2018 at 16:34:39 -0800, rshaffer@tunein.com wrote: If you have to post one more update anyway, some formal comments from me: > + char * escaped, * key; "*" attaches to the right-hand term. > +} > \ No newline at end of file Please do add this newline. :-) Moritz.
diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c index 6c216ba7a2..e46174d7c7 100644 --- a/libavformat/id3v2.c +++ b/libavformat/id3v2.c @@ -33,6 +33,7 @@ #endif #include "libavutil/avstring.h" +#include "libavutil/bprint.h" #include "libavutil/dict.h" #include "libavutil/intreadwrite.h" #include "avio_internal.h" @@ -1224,3 +1225,50 @@ end: av_freep(&chapters); return ret; } + +int ff_id3v2_parse_priv_dict(AVDictionary **metadata, ID3v2ExtraMeta **extra_meta) +{ + ID3v2ExtraMeta *cur; + int dict_flags = AV_DICT_DONT_OVERWRITE | AV_DICT_DONT_STRDUP_KEY | AV_DICT_DONT_STRDUP_VAL; + + for (cur = *extra_meta; cur; cur = cur->next) { + if (!strcmp(cur->tag, "PRIV")) { + ID3v2ExtraMetaPRIV *priv = cur->data; + AVBPrint bprint; + char * escaped, * key; + int i, ret; + + if ((key = av_asprintf(ID3v2_PRIV_METADATA_PREFIX "%s", priv->owner)) == NULL) { + return AVERROR(ENOMEM); + } + + av_bprint_init(&bprint, priv->datasize + 1, AV_BPRINT_SIZE_UNLIMITED); + + for (i = 0; i < priv->datasize; i++) { + if (priv->data[i] < 32 || priv->data[i] > 126 || priv->data[i] == '\\') { + av_bprintf(&bprint, "\\x%02x", priv->data[i]); + } else { + av_bprint_chars(&bprint, priv->data[i], 1); + } + } + + if ((ret = av_bprint_finalize(&bprint, &escaped)) < 0) { + av_free(key); + return ret; + } + + if ((ret = av_dict_set(metadata, key, escaped, dict_flags)) < 0) { + av_free(key); + av_free(escaped); + return ret; + } + } + } + + return 0; +} + +int ff_id3v2_parse_priv(AVFormatContext *s, ID3v2ExtraMeta **extra_meta) +{ + return ff_id3v2_parse_priv_dict(&s->metadata, extra_meta); +} \ No newline at end of file diff --git a/libavformat/id3v2.h b/libavformat/id3v2.h index 5e64ead096..9de0bee374 100644 --- a/libavformat/id3v2.h +++ b/libavformat/id3v2.h @@ -39,6 +39,8 @@ #define ID3v2_FLAG_ENCRYPTION 0x0004 #define ID3v2_FLAG_COMPRESSION 0x0008 +#define ID3v2_PRIV_METADATA_PREFIX "id3v2_priv." + enum ID3v2Encoding { ID3v2_ENCODING_ISO8859 = 0, ID3v2_ENCODING_UTF16BOM = 1, @@ -167,6 +169,19 @@ int ff_id3v2_parse_apic(AVFormatContext *s, ID3v2ExtraMeta **extra_meta); */ int ff_id3v2_parse_chapters(AVFormatContext *s, ID3v2ExtraMeta **extra_meta); +/** + * Parse PRIV tags into a dictionary. The PRIV owner is the metadata key. The + * PRIV data is the value, with non-printable characters escaped. + */ +int ff_id3v2_parse_priv_dict(AVDictionary **d, ID3v2ExtraMeta **extra_meta); + +/** + * Add metadata for all PRIV tags in the ID3v2 header. The PRIV owner is the + * metadata key. The PRIV data is the value, with non-printable characters + * escaped. + */ +int ff_id3v2_parse_priv(AVFormatContext *s, ID3v2ExtraMeta **extra_meta); + extern const AVMetadataConv ff_id3v2_34_metadata_conv[]; extern const AVMetadataConv ff_id3v2_4_metadata_conv[]; diff --git a/libavformat/id3v2enc.c b/libavformat/id3v2enc.c index 14de76ac06..d5e358c8b6 100644 --- a/libavformat/id3v2enc.c +++ b/libavformat/id3v2enc.c @@ -96,6 +96,63 @@ static int id3v2_put_ttag(ID3v2EncContext *id3, AVIOContext *avioc, const char * return len + ID3v2_HEADER_SIZE; } +/** + * Write a priv frame with owner and data. 'key' is the owner prepended with + * ID3v2_PRIV_METADATA_PREFIX. 'data' is provided as a string. Any \xXX + * (where 'X' is a valid hex digit) will be unescaped to the byte value. + */ +static int id3v2_put_priv(ID3v2EncContext *id3, AVIOContext *avioc, const char *key, const char *data) +{ + int len; + uint8_t *pb; + AVIOContext *dyn_buf; + + if (!av_strstart(key, ID3v2_PRIV_METADATA_PREFIX, &key)) { + return 0; + } + + if (avio_open_dyn_buf(&dyn_buf) < 0) + return AVERROR(ENOMEM); + + // owner + null byte. + avio_write(dyn_buf, key, strlen(key) + 1); + + while (*data) { + if (av_strstart(data, "\\x", &data)) { + if (data[0] && data[1] && av_isxdigit(data[0]) && av_isxdigit(data[1])) { + char digits[] = {data[0], data[1], 0}; + avio_w8(dyn_buf, strtol(digits, NULL, 16)); + } else { + // '\x' wasn't followed by two hex digits. Just write out + // whatever the 2-4 characters were. + avio_write(dyn_buf, "\\x", 2); + if (data[0]) { + avio_w8(dyn_buf, data[0]); + if (data[1]) + avio_w8(dyn_buf, data[1]); + } + } + data += 2; + } else { + avio_write(dyn_buf, data++, 1); + } + } + + len = avio_close_dyn_buf(dyn_buf, &pb); + + avio_wb32(avioc, MKBETAG('P', 'R', 'I', 'V')); + if (id3->version == 3) + avio_wb32(avioc, len); + else + id3v2_put_size(avioc, len); + avio_wb16(avioc, 0); + avio_write(avioc, pb, len); + + av_free(pb); + + return len + ID3v2_HEADER_SIZE; +} + static int id3v2_check_write_tag(ID3v2EncContext *id3, AVIOContext *pb, AVDictionaryEntry *t, const char table[][4], enum ID3v2Encoding enc) { @@ -186,6 +243,13 @@ static int write_metadata(AVIOContext *pb, AVDictionary **metadata, continue; } + if ((ret = id3v2_put_priv(id3, pb, t->key, t->value)) > 0) { + id3->len += ret; + continue; + } else if (ret < 0) { + return ret; + } + /* unknown tag, write as TXXX frame */ if ((ret = id3v2_put_ttag(id3, pb, t->key, t->value, MKBETAG('T', 'X', 'X', 'X'), enc)) < 0) return ret; diff --git a/libavformat/utils.c b/libavformat/utils.c index 3d733417e1..c15b8cc818 100644 --- a/libavformat/utils.c +++ b/libavformat/utils.c @@ -637,6 +637,8 @@ int avformat_open_input(AVFormatContext **ps, const char *filename, goto fail; if ((ret = ff_id3v2_parse_chapters(s, &id3v2_extra_meta)) < 0) goto fail; + if ((ret = ff_id3v2_parse_priv(s, &id3v2_extra_meta)) < 0) + goto fail; } else av_log(s, AV_LOG_DEBUG, "demuxer does not support additional id3 data, skipping\n"); }
From: Richard Shaffer <rshaffer@tunein.com> Enables getting access to ID3 PRIV tags from the command-line or metadata API when demuxing. The PRIV owner is stored as the metadata key prepended with "id3v2_priv.", and the data is stored as the metadata value. As PRIV tags may contain arbitrary data, non-printable characters, including NULL bytes, are escaped as \xXX. Similarly, any metadata tags that begin with "id3v2_priv." are inserted as ID3 PRIV tags into the output (assuming the format supports ID3). \xXX sequences in the value are un-escaped to their byte value. --- Sorry. One more update. I realized there was a possibility of reading past the end of input in id3v2_put_priv, so I added a fix. -Richard libavformat/id3v2.c | 48 +++++++++++++++++++++++++++++++++++++ libavformat/id3v2.h | 15 ++++++++++++ libavformat/id3v2enc.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++ libavformat/utils.c | 2 ++ 4 files changed, 129 insertions(+)