diff mbox series

[FFmpeg-devel,3/3] lavf/id3v2dec: support multiple values and TIPL frames

Message ID 20220824235200.22312-3-rcombs@rcombs.me
State New
Headers show
Series [FFmpeg-devel,1/3] lavu/dict: add AV_DICT_DEDUP | expand

Commit Message

rcombs Aug. 24, 2022, 11:52 p.m. UTC
Fixes https://trac.ffmpeg.org/ticket/6949

Ordinary text frames in ID3v2 are allowed to have multiple
(null-separated) values. This technically isn't allowed in TXXX,
but it's used in practice by Picard, and supporting it is harmless.

TIPL/IPL (Involved People List) and TMCL (Musician Credits List) work
similarly to TXXX, but alternate key-value-key-value.
---
 libavformat/id3v2.c | 49 ++++++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 21 deletions(-)

Comments

Soft Works Aug. 25, 2022, 12:16 a.m. UTC | #1
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of
> rcombs
> Sent: Thursday, August 25, 2022 1:52 AM
> To: ffmpeg-devel@ffmpeg.org
> Subject: [FFmpeg-devel] [PATCH 3/3] lavf/id3v2dec: support multiple
> values and TIPL frames
> 
> Fixes https://trac.ffmpeg.org/ticket/6949
> 
> Ordinary text frames in ID3v2 are allowed to have multiple
> (null-separated) values. This technically isn't allowed in TXXX,
> but it's used in practice by Picard, and supporting it is harmless.
> 
> TIPL/IPL (Involved People List) and TMCL (Musician Credits List) work
> similarly to TXXX, but alternate key-value-key-value.
> ---
>  libavformat/id3v2.c | 49 ++++++++++++++++++++++++++-----------------
> --
>  1 file changed, 28 insertions(+), 21 deletions(-)
> 
> diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
> index 191a305ffb..335a1436b2 100644
> --- a/libavformat/id3v2.c
> +++ b/libavformat/id3v2.c
> @@ -321,8 +321,12 @@ static void read_ttag(AVFormatContext *s,
> AVIOContext *pb, int taglen,
>                        AVDictionary **metadata, const char *key)
>  {
>      uint8_t *dst;
> -    int encoding, dict_flags = AV_DICT_DONT_OVERWRITE |
> AV_DICT_DONT_STRDUP_VAL;
> +    uint8_t *dst_key = NULL;
> +    int encoding, dict_flags = AV_DICT_MULTIKEY |
> AV_DICT_DONT_STRDUP_VAL | AV_DICT_DEDUP;
>      unsigned genre;
> +    int count = 0;
> +    int is_tipl = !(strcmp(key, "TIPL") && strcmp(key, "TMCL") &&
> +                    strcmp(key, "IPL"));
> 
>      if (taglen < 1)
>          return;
> @@ -330,30 +334,33 @@ static void read_ttag(AVFormatContext *s,
> AVIOContext *pb, int taglen,
>      encoding = avio_r8(pb);
>      taglen--; /* account for encoding type byte */
> 
> -    if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
> -        av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n",
> key);
> -        return;
> -    }
> -
> -    if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))
> &&
> -        (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d",
> &genre) == 1) &&
> -        genre <= ID3v1_GENRE_MAX) {
> -        av_freep(&dst);
> -        dst = av_strdup(ff_id3v1_genre_str[genre]);
> -    } else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) {
> -        /* dst now contains the key, need to get value */
> -        key = dst;
> +    while (taglen > 1) {

int n = 0;

>          if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
>              av_log(s, AV_LOG_ERROR, "Error reading frame %s,
> skipped\n", key);
> -            av_freep(&key);
>              return;
>          }
> -        dict_flags |= AV_DICT_DONT_STRDUP_KEY;
> -    } else if (!*dst)
> -        av_freep(&dst);
> 
> -    if (dst)
> -        av_dict_set(metadata, key, dst, dict_flags);
> +        count++;
> +
> +        if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))
> &&
> +            (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d",
> &genre) == 1) &&

(sscanf(dst, "(%d)", &genre) == 1 || (sscanf(dst, "%d%n", &genre, &n) == 1 && n == strlen(dst))) &&

avoids parsing genre strings starting with numbers (like '2step')
as genre id.


Thanks for resubmitting,
softworkz
rcombs Aug. 25, 2022, 12:23 a.m. UTC | #2
> On Aug 24, 2022, at 19:16, Soft Works <softworkz@hotmail.com> wrote:
> 
> 
> 
>> -----Original Message-----
>> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org <mailto:ffmpeg-devel-bounces@ffmpeg.org>> On Behalf Of
>> rcombs
>> Sent: Thursday, August 25, 2022 1:52 AM
>> To: ffmpeg-devel@ffmpeg.org <mailto:ffmpeg-devel@ffmpeg.org>
>> Subject: [FFmpeg-devel] [PATCH 3/3] lavf/id3v2dec: support multiple
>> values and TIPL frames
>> 
>> Fixes https://trac.ffmpeg.org/ticket/6949
>> 
>> Ordinary text frames in ID3v2 are allowed to have multiple
>> (null-separated) values. This technically isn't allowed in TXXX,
>> but it's used in practice by Picard, and supporting it is harmless.
>> 
>> TIPL/IPL (Involved People List) and TMCL (Musician Credits List) work
>> similarly to TXXX, but alternate key-value-key-value.
>> ---
>> libavformat/id3v2.c | 49 ++++++++++++++++++++++++++-----------------
>> --
>> 1 file changed, 28 insertions(+), 21 deletions(-)
>> 
>> diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
>> index 191a305ffb..335a1436b2 100644
>> --- a/libavformat/id3v2.c
>> +++ b/libavformat/id3v2.c
>> @@ -321,8 +321,12 @@ static void read_ttag(AVFormatContext *s,
>> AVIOContext *pb, int taglen,
>>                       AVDictionary **metadata, const char *key)
>> {
>>     uint8_t *dst;
>> -    int encoding, dict_flags = AV_DICT_DONT_OVERWRITE |
>> AV_DICT_DONT_STRDUP_VAL;
>> +    uint8_t *dst_key = NULL;
>> +    int encoding, dict_flags = AV_DICT_MULTIKEY |
>> AV_DICT_DONT_STRDUP_VAL | AV_DICT_DEDUP;
>>     unsigned genre;
>> +    int count = 0;
>> +    int is_tipl = !(strcmp(key, "TIPL") && strcmp(key, "TMCL") &&
>> +                    strcmp(key, "IPL"));
>> 
>>     if (taglen < 1)
>>         return;
>> @@ -330,30 +334,33 @@ static void read_ttag(AVFormatContext *s,
>> AVIOContext *pb, int taglen,
>>     encoding = avio_r8(pb);
>>     taglen--; /* account for encoding type byte */
>> 
>> -    if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
>> -        av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n",
>> key);
>> -        return;
>> -    }
>> -
>> -    if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))
>> &&
>> -        (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d",
>> &genre) == 1) &&
>> -        genre <= ID3v1_GENRE_MAX) {
>> -        av_freep(&dst);
>> -        dst = av_strdup(ff_id3v1_genre_str[genre]);
>> -    } else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) {
>> -        /* dst now contains the key, need to get value */
>> -        key = dst;
>> +    while (taglen > 1) {
> 
> int n = 0;
> 
>>         if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
>>             av_log(s, AV_LOG_ERROR, "Error reading frame %s,
>> skipped\n", key);
>> -            av_freep(&key);
>>             return;
>>         }
>> -        dict_flags |= AV_DICT_DONT_STRDUP_KEY;
>> -    } else if (!*dst)
>> -        av_freep(&dst);
>> 
>> -    if (dst)
>> -        av_dict_set(metadata, key, dst, dict_flags);
>> +        count++;
>> +
>> +        if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))
>> &&
>> +            (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d",
>> &genre) == 1) &&
> 
> (sscanf(dst, "(%d)", &genre) == 1 || (sscanf(dst, "%d%n", &genre, &n) == 1 && n == strlen(dst))) &&
> 
> avoids parsing genre strings starting with numbers (like '2step')
> as genre id.

Sounds reasonable, but this isn't new code (it's just reindented); please send this as its own patch.

> 
> 
> Thanks for resubmitting,
> softworkz
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org <mailto:ffmpeg-devel@ffmpeg.org>
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org <mailto:ffmpeg-devel-request@ffmpeg.org> with subject "unsubscribe".
diff mbox series

Patch

diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
index 191a305ffb..335a1436b2 100644
--- a/libavformat/id3v2.c
+++ b/libavformat/id3v2.c
@@ -321,8 +321,12 @@  static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen,
                       AVDictionary **metadata, const char *key)
 {
     uint8_t *dst;
-    int encoding, dict_flags = AV_DICT_DONT_OVERWRITE | AV_DICT_DONT_STRDUP_VAL;
+    uint8_t *dst_key = NULL;
+    int encoding, dict_flags = AV_DICT_MULTIKEY | AV_DICT_DONT_STRDUP_VAL | AV_DICT_DEDUP;
     unsigned genre;
+    int count = 0;
+    int is_tipl = !(strcmp(key, "TIPL") && strcmp(key, "TMCL") &&
+                    strcmp(key, "IPL"));
 
     if (taglen < 1)
         return;
@@ -330,30 +334,33 @@  static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen,
     encoding = avio_r8(pb);
     taglen--; /* account for encoding type byte */
 
-    if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
-        av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key);
-        return;
-    }
-
-    if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))                         &&
-        (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1) &&
-        genre <= ID3v1_GENRE_MAX) {
-        av_freep(&dst);
-        dst = av_strdup(ff_id3v1_genre_str[genre]);
-    } else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) {
-        /* dst now contains the key, need to get value */
-        key = dst;
+    while (taglen > 1) {
         if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
             av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key);
-            av_freep(&key);
             return;
         }
-        dict_flags |= AV_DICT_DONT_STRDUP_KEY;
-    } else if (!*dst)
-        av_freep(&dst);
 
-    if (dst)
-        av_dict_set(metadata, key, dst, dict_flags);
+        count++;
+
+        if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))                         &&
+            (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1) &&
+            genre <= ID3v1_GENRE_MAX) {
+            av_freep(&dst);
+            dst = av_strdup(ff_id3v1_genre_str[genre]);
+        } else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX")) ||
+                   (is_tipl && (count & 1))) {
+            /* dst now contains the key, need to get value */
+            av_free(dst_key);
+            key = dst_key = dst;
+            continue;
+        } else if (!*dst)
+            av_freep(&dst);
+
+        if (dst)
+            av_dict_set(metadata, key, dst, dict_flags);
+    }
+
+    av_free(dst_key);
 }
 
 static void read_uslt(AVFormatContext *s, AVIOContext *pb, int taglen,
@@ -1039,7 +1046,7 @@  static void id3v2_parse(AVIOContext *pb, AVDictionary **metadata,
                     pbx = &pb_local.pub; // read from sync buffer
                 }
 #endif
-            if (tag[0] == 'T')
+            if (tag[0] == 'T' || !strcmp(tag, "IPL"))
                 /* parse text tag */
                 read_ttag(s, pbx, tlen, metadata, tag);
             else if (!memcmp(tag, "USLT", 4))