diff mbox series

[FFmpeg-devel,2/2] lavf/id3v2dec: support multiple values and TIPL frames

Message ID 20220620013711.91482-2-rcombs@rcombs.me
State New
Headers show
Series [FFmpeg-devel,1/2] lavf/metadata: support duplicate keys in ff_metadata_conv | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 fail Make fate failed
andriy/make_armv7_RPi4 success Make finished
andriy/make_fate_armv7_RPi4 fail Make fate failed

Commit Message

rcombs June 20, 2022, 1:37 a.m. UTC
Fixes https://trac.ffmpeg.org/ticket/6949

Ordinary text frames in ID3v2 are allowed to have multiple
(null-separated) values. This technically isn't allowed in TXXX,
but it's used in practice by Picard, and supporting it is harmless.

TIPL/IPL (Involved People List) and TMCL (Musician Credits List) work
similarly to TXXX, but alternate key-value-key-value.
---
 libavformat/id3v2.c | 49 ++++++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 21 deletions(-)

Comments

Andreas Rheinhardt June 20, 2022, 10:15 p.m. UTC | #1
rcombs:
> Fixes https://trac.ffmpeg.org/ticket/6949
> 
> Ordinary text frames in ID3v2 are allowed to have multiple
> (null-separated) values. This technically isn't allowed in TXXX,
> but it's used in practice by Picard, and supporting it is harmless.
> 
> TIPL/IPL (Involved People List) and TMCL (Musician Credits List) work
> similarly to TXXX, but alternate key-value-key-value.
> ---
>  libavformat/id3v2.c | 49 ++++++++++++++++++++++++++-------------------
>  1 file changed, 28 insertions(+), 21 deletions(-)
> 
> diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
> index 191a305ffb..667105e9f9 100644
> --- a/libavformat/id3v2.c
> +++ b/libavformat/id3v2.c
> @@ -321,8 +321,12 @@ static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen,
>                        AVDictionary **metadata, const char *key)
>  {
>      uint8_t *dst;
> -    int encoding, dict_flags = AV_DICT_DONT_OVERWRITE | AV_DICT_DONT_STRDUP_VAL;
> +    uint8_t *dst_key = NULL;
> +    int encoding, dict_flags = AV_DICT_MULTIKEY | AV_DICT_DONT_STRDUP_VAL;
>      unsigned genre;
> +    int count = 0;
> +    int is_tipl = !(strcmp(key, "TIPL") && strcmp(key, "TMCL") &&
> +                    strcmp(key, "IPL"));
>  
>      if (taglen < 1)
>          return;
> @@ -330,30 +334,33 @@ static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen,
>      encoding = avio_r8(pb);
>      taglen--; /* account for encoding type byte */
>  
> -    if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
> -        av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key);
> -        return;
> -    }
> -
> -    if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))                         &&
> -        (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1) &&
> -        genre <= ID3v1_GENRE_MAX) {
> -        av_freep(&dst);
> -        dst = av_strdup(ff_id3v1_genre_str[genre]);
> -    } else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) {
> -        /* dst now contains the key, need to get value */
> -        key = dst;
> +    while (taglen > 1) {
>          if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
>              av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key);
> -            av_freep(&key);
>              return;
>          }
> -        dict_flags |= AV_DICT_DONT_STRDUP_KEY;
> -    } else if (!*dst)
> -        av_freep(&dst);
>  
> -    if (dst)
> -        av_dict_set(metadata, key, dst, dict_flags);
> +        count++;
> +
> +        if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))                         &&
> +            (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1) &&
> +            genre <= ID3v1_GENRE_MAX) {
> +            av_freep(&dst);
> +            dst = av_strdup(ff_id3v1_genre_str[genre]);
> +        } else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX")) ||
> +                   (is_tipl && (count & 1))) {
> +            /* dst now contains the key, need to get value */
> +            av_free(dst_key);
> +            key = dst_key = dst;
> +            continue;
> +        } else if (!*dst)
> +            av_freep(&dst);
> +
> +        if (dst)
> +            av_dict_set(metadata, key, dst, dict_flags);
> +    }
> +
> +    av_free(dst_key);
>  }
>  
>  static void read_uslt(AVFormatContext *s, AVIOContext *pb, int taglen,
> @@ -1039,7 +1046,7 @@ static void id3v2_parse(AVIOContext *pb, AVDictionary **metadata,
>                      pbx = &pb_local.pub; // read from sync buffer
>                  }
>  #endif
> -            if (tag[0] == 'T')
> +            if (tag[0] == 'T' || !strcmp(tag, "IPL"))
>                  /* parse text tag */
>                  read_ttag(s, pbx, tlen, metadata, tag);
>              else if (!memcmp(tag, "USLT", 4))

From avformat.h:

"Keys are unique; there can never be 2 tags with the same key. This is
also meant semantically, i.e., a demuxer should not knowingly produce
several keys that are literally different but semantically identical.
E.g., key=Author5, key=Author6. In this example, all authors must be
placed in the same tag."

If this requirement did not exist, I would have fixed #6949 and #9641
long ago.

- Andreas
rcombs June 20, 2022, 10:32 p.m. UTC | #2
> On Jun 20, 2022, at 17:15, Andreas Rheinhardt <andreas.rheinhardt@outlook.com> wrote:
> 
> rcombs:
>> Fixes https://trac.ffmpeg.org/ticket/6949
>> 
>> Ordinary text frames in ID3v2 are allowed to have multiple
>> (null-separated) values. This technically isn't allowed in TXXX,
>> but it's used in practice by Picard, and supporting it is harmless.
>> 
>> TIPL/IPL (Involved People List) and TMCL (Musician Credits List) work
>> similarly to TXXX, but alternate key-value-key-value.
>> ---
>> libavformat/id3v2.c | 49 ++++++++++++++++++++++++++-------------------
>> 1 file changed, 28 insertions(+), 21 deletions(-)
>> 
>> diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
>> index 191a305ffb..667105e9f9 100644
>> --- a/libavformat/id3v2.c
>> +++ b/libavformat/id3v2.c
>> @@ -321,8 +321,12 @@ static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen,
>> AVDictionary **metadata, const char *key)
>> {
>> uint8_t *dst;
>> - int encoding, dict_flags = AV_DICT_DONT_OVERWRITE | AV_DICT_DONT_STRDUP_VAL;
>> + uint8_t *dst_key = NULL;
>> + int encoding, dict_flags = AV_DICT_MULTIKEY | AV_DICT_DONT_STRDUP_VAL;
>> unsigned genre;
>> + int count = 0;
>> + int is_tipl = !(strcmp(key, "TIPL") && strcmp(key, "TMCL") &&
>> + strcmp(key, "IPL"));
>> 
>> if (taglen < 1)
>> return;
>> @@ -330,30 +334,33 @@ static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen,
>> encoding = avio_r8(pb);
>> taglen--; /* account for encoding type byte */
>> 
>> - if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
>> - av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key);
>> - return;
>> - }
>> -
>> - if (!(strcmp(key, "TCON") && strcmp(key, "TCO")) &&
>> - (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1) &&
>> - genre <= ID3v1_GENRE_MAX) {
>> - av_freep(&dst);
>> - dst = av_strdup(ff_id3v1_genre_str[genre]);
>> - } else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) {
>> - /* dst now contains the key, need to get value */
>> - key = dst;
>> + while (taglen > 1) {
>> if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
>> av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key);
>> - av_freep(&key);
>> return;
>> }
>> - dict_flags |= AV_DICT_DONT_STRDUP_KEY;
>> - } else if (!*dst)
>> - av_freep(&dst);
>> 
>> - if (dst)
>> - av_dict_set(metadata, key, dst, dict_flags);
>> + count++;
>> +
>> + if (!(strcmp(key, "TCON") && strcmp(key, "TCO")) &&
>> + (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1) &&
>> + genre <= ID3v1_GENRE_MAX) {
>> + av_freep(&dst);
>> + dst = av_strdup(ff_id3v1_genre_str[genre]);
>> + } else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX")) ||
>> + (is_tipl && (count & 1))) {
>> + /* dst now contains the key, need to get value */
>> + av_free(dst_key);
>> + key = dst_key = dst;
>> + continue;
>> + } else if (!*dst)
>> + av_freep(&dst);
>> +
>> + if (dst)
>> + av_dict_set(metadata, key, dst, dict_flags);
>> + }
>> +
>> + av_free(dst_key);
>> }
>> 
>> static void read_uslt(AVFormatContext *s, AVIOContext *pb, int taglen,
>> @@ -1039,7 +1046,7 @@ static void id3v2_parse(AVIOContext *pb, AVDictionary **metadata,
>> pbx = &pb_local.pub; // read from sync buffer
>> }
>> #endif
>> - if (tag[0] == 'T')
>> + if (tag[0] == 'T' || !strcmp(tag, "IPL"))
>> /* parse text tag */
>> read_ttag(s, pbx, tlen, metadata, tag);
>> else if (!memcmp(tag, "USLT", 4))
> 
> From avformat.h:
> 
> "Keys are unique; there can never be 2 tags with the same key. This is
> also meant semantically, i.e., a demuxer should not knowingly produce
> several keys that are literally different but semantically identical.
> E.g., key=Author5, key=Author6. In this example, all authors must be
> placed in the same tag."
> 
> If this requirement did not exist, I would have fixed #6949 and #9641
> long ago.
> 
> - Andreas

Well, we have 3 options, then:
- Export these kinds of duplicate tags concatenated in a single dict entry (split by slashes or something?)
- Add an AVOption in lavf/options_table.h to opt into multi-key support
- Remove the requirement altogether in a major API version bump

I don't see any particular technical reason for this requirement (there are a couple implementation details that make it awkward but they're fixable in code); it seems like it's largely just there because historically there was no support for colliding keys.

I'm not especially fond of this requirement, since it requires ad-hoc in-band signaling when multiple tags exist, it doesn't round-trip cleanly, and whatever separator we use for concatenation could collide with actual tag contents.

Meanwhile, as to actual technical issues: I've been experimenting with this in FATE and run into some interesting snags; the most significant is handling cases of redundant metadata (i.e. id3v2 block and container both have a title tag). Currently, this just means that whichever one gets parsed second loses. In FATE, the two values are always identical, so I'm currently addressing this by just adding an AV_DICT_ flag to skip insertion if an identical tag (both key and value) already exists.


> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org <mailto:ffmpeg-devel@ffmpeg.org>
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org <mailto:ffmpeg-devel-request@ffmpeg.org> with subject "unsubscribe".
Soft Works June 20, 2022, 10:49 p.m. UTC | #3
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of
> Ridley Combs
> Sent: Tuesday, June 21, 2022 12:33 AM
> To: ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
> Subject: Re: [FFmpeg-devel] [PATCH 2/2] lavf/id3v2dec: support
> multiple values and TIPL frames
> 
> 
> > On Jun 20, 2022, at 17:15, Andreas Rheinhardt
> <andreas.rheinhardt@outlook.com> wrote:
> >
> > rcombs:
> >> Fixes https://trac.ffmpeg.org/ticket/6949
> >>
> >> Ordinary text frames in ID3v2 are allowed to have multiple
> >> (null-separated) values. This technically isn't allowed in TXXX,
> >> but it's used in practice by Picard, and supporting it is
> harmless.
> >>
> >> TIPL/IPL (Involved People List) and TMCL (Musician Credits List)
> work
> >> similarly to TXXX, but alternate key-value-key-value.
> >> ---
> >> libavformat/id3v2.c | 49 ++++++++++++++++++++++++++---------------
> ----
> >> 1 file changed, 28 insertions(+), 21 deletions(-)
> >>
> >> diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
> >> index 191a305ffb..667105e9f9 100644
> >> --- a/libavformat/id3v2.c
> >> +++ b/libavformat/id3v2.c
> >> @@ -321,8 +321,12 @@ static void read_ttag(AVFormatContext *s,
> AVIOContext *pb, int taglen,
> >> AVDictionary **metadata, const char *key)
> >> {
> >> uint8_t *dst;
> >> - int encoding, dict_flags = AV_DICT_DONT_OVERWRITE |
> AV_DICT_DONT_STRDUP_VAL;
> >> + uint8_t *dst_key = NULL;
> >> + int encoding, dict_flags = AV_DICT_MULTIKEY |
> AV_DICT_DONT_STRDUP_VAL;
> >> unsigned genre;
> >> + int count = 0;
> >> + int is_tipl = !(strcmp(key, "TIPL") && strcmp(key, "TMCL") &&
> >> + strcmp(key, "IPL"));
> >>
> >> if (taglen < 1)
> >> return;
> >> @@ -330,30 +334,33 @@ static void read_ttag(AVFormatContext *s,
> AVIOContext *pb, int taglen,
> >> encoding = avio_r8(pb);
> >> taglen--; /* account for encoding type byte */
> >>
> >> - if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
> >> - av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n",
> key);
> >> - return;
> >> - }
> >> -
> >> - if (!(strcmp(key, "TCON") && strcmp(key, "TCO")) &&
> >> - (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre)
> == 1) &&
> >> - genre <= ID3v1_GENRE_MAX) {
> >> - av_freep(&dst);
> >> - dst = av_strdup(ff_id3v1_genre_str[genre]);
> >> - } else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) {
> >> - /* dst now contains the key, need to get value */
> >> - key = dst;
> >> + while (taglen > 1) {
> >> if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
> >> av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key);
> >> - av_freep(&key);
> >> return;
> >> }
> >> - dict_flags |= AV_DICT_DONT_STRDUP_KEY;
> >> - } else if (!*dst)
> >> - av_freep(&dst);
> >>
> >> - if (dst)
> >> - av_dict_set(metadata, key, dst, dict_flags);
> >> + count++;
> >> +
> >> + if (!(strcmp(key, "TCON") && strcmp(key, "TCO")) &&
> >> + (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre)
> == 1) &&
> >> + genre <= ID3v1_GENRE_MAX) {
> >> + av_freep(&dst);
> >> + dst = av_strdup(ff_id3v1_genre_str[genre]);
> >> + } else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX")) ||
> >> + (is_tipl && (count & 1))) {
> >> + /* dst now contains the key, need to get value */
> >> + av_free(dst_key);
> >> + key = dst_key = dst;
> >> + continue;
> >> + } else if (!*dst)
> >> + av_freep(&dst);
> >> +
> >> + if (dst)
> >> + av_dict_set(metadata, key, dst, dict_flags);
> >> + }
> >> +
> >> + av_free(dst_key);
> >> }
> >>
> >> static void read_uslt(AVFormatContext *s, AVIOContext *pb, int
> taglen,
> >> @@ -1039,7 +1046,7 @@ static void id3v2_parse(AVIOContext *pb,
> AVDictionary **metadata,
> >> pbx = &pb_local.pub; // read from sync buffer
> >> }
> >> #endif
> >> - if (tag[0] == 'T')
> >> + if (tag[0] == 'T' || !strcmp(tag, "IPL"))
> >> /* parse text tag */
> >> read_ttag(s, pbx, tlen, metadata, tag);
> >> else if (!memcmp(tag, "USLT", 4))
> >
> > From avformat.h:
> >
> > "Keys are unique; there can never be 2 tags with the same key. This
> is
> > also meant semantically, i.e., a demuxer should not knowingly
> produce
> > several keys that are literally different but semantically
> identical.
> > E.g., key=Author5, key=Author6. In this example, all authors must
> be
> > placed in the same tag."
> >
> > If this requirement did not exist, I would have fixed #6949 and
> #9641
> > long ago.
> >
> > - Andreas
> 
> Well, we have 3 options, then:
> - Export these kinds of duplicate tags concatenated in a single dict
> entry (split by slashes or something?)
> - Add an AVOption in lavf/options_table.h to opt into multi-key
> support
> - Remove the requirement altogether in a major API version bump

That's the way I would prefer.

Doing nothing for formal reasons is not a useful way. Most
softwares can read multivalue ID3v2 tags, ffmpeg/probe is more a 
kind of exception.

I think I should also mention that half part of this patch has
been submitted before

https://patchwork.ffmpeg.org/project/ffmpeg/patch/1118083116.39618.1617751350909@office.mailbox.org/

Best regards,
softworkz
Michael Niedermayer June 21, 2022, 12:39 a.m. UTC | #4
On Sun, Jun 19, 2022 at 08:37:11PM -0500, rcombs wrote:
> Fixes https://trac.ffmpeg.org/ticket/6949
> 
> Ordinary text frames in ID3v2 are allowed to have multiple
> (null-separated) values. This technically isn't allowed in TXXX,
> but it's used in practice by Picard, and supporting it is harmless.
> 
> TIPL/IPL (Involved People List) and TMCL (Musician Credits List) work
> similarly to TXXX, but alternate key-value-key-value.
> ---
>  libavformat/id3v2.c | 49 ++++++++++++++++++++++++++-------------------
>  1 file changed, 28 insertions(+), 21 deletions(-)

It appears this looses a track # metadata on one file
i will send you the file privatly

[...]
diff mbox series

Patch

diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
index 191a305ffb..667105e9f9 100644
--- a/libavformat/id3v2.c
+++ b/libavformat/id3v2.c
@@ -321,8 +321,12 @@  static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen,
                       AVDictionary **metadata, const char *key)
 {
     uint8_t *dst;
-    int encoding, dict_flags = AV_DICT_DONT_OVERWRITE | AV_DICT_DONT_STRDUP_VAL;
+    uint8_t *dst_key = NULL;
+    int encoding, dict_flags = AV_DICT_MULTIKEY | AV_DICT_DONT_STRDUP_VAL;
     unsigned genre;
+    int count = 0;
+    int is_tipl = !(strcmp(key, "TIPL") && strcmp(key, "TMCL") &&
+                    strcmp(key, "IPL"));
 
     if (taglen < 1)
         return;
@@ -330,30 +334,33 @@  static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen,
     encoding = avio_r8(pb);
     taglen--; /* account for encoding type byte */
 
-    if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
-        av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key);
-        return;
-    }
-
-    if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))                         &&
-        (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1) &&
-        genre <= ID3v1_GENRE_MAX) {
-        av_freep(&dst);
-        dst = av_strdup(ff_id3v1_genre_str[genre]);
-    } else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) {
-        /* dst now contains the key, need to get value */
-        key = dst;
+    while (taglen > 1) {
         if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
             av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key);
-            av_freep(&key);
             return;
         }
-        dict_flags |= AV_DICT_DONT_STRDUP_KEY;
-    } else if (!*dst)
-        av_freep(&dst);
 
-    if (dst)
-        av_dict_set(metadata, key, dst, dict_flags);
+        count++;
+
+        if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))                         &&
+            (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1) &&
+            genre <= ID3v1_GENRE_MAX) {
+            av_freep(&dst);
+            dst = av_strdup(ff_id3v1_genre_str[genre]);
+        } else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX")) ||
+                   (is_tipl && (count & 1))) {
+            /* dst now contains the key, need to get value */
+            av_free(dst_key);
+            key = dst_key = dst;
+            continue;
+        } else if (!*dst)
+            av_freep(&dst);
+
+        if (dst)
+            av_dict_set(metadata, key, dst, dict_flags);
+    }
+
+    av_free(dst_key);
 }
 
 static void read_uslt(AVFormatContext *s, AVIOContext *pb, int taglen,
@@ -1039,7 +1046,7 @@  static void id3v2_parse(AVIOContext *pb, AVDictionary **metadata,
                     pbx = &pb_local.pub; // read from sync buffer
                 }
 #endif
-            if (tag[0] == 'T')
+            if (tag[0] == 'T' || !strcmp(tag, "IPL"))
                 /* parse text tag */
                 read_ttag(s, pbx, tlen, metadata, tag);
             else if (!memcmp(tag, "USLT", 4))