diff mbox

[FFmpeg-devel] id3v2: fix unsynchronization

Message ID 20180130124325.1310-1-nfxjfg@googlemail.com
State Accepted
Commit 48bc9fffd11c05e75b9125d8cd90e9263108bd83
Headers show

Commit Message

wm4 Jan. 30, 2018, 12:43 p.m. UTC
The ID3v2 "unsynchronization scheme" requires replacing any 0xFF 0x00
sequences with 0xFF. This has to be done on every byte of the source
data, while the current code skipped a byte after a replacement. This
meant 0xFF 0x00 0xFF 00 was translated to 0xFF 0xFF 0x00 instead of 0xFF
0xFF. It feels a bit messy to do this correctly with the avio use. But
fortunately, this translation can be done in-place, so we can just do it
in memory.

Inspired by what taglib does.
---
Sample (which had corrupted cover art, displays fine with the fix):
https://0x0.st/sbQ9.bin (unfortunately a bit too large for FATE)
---
 libavformat/id3v2.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

Comments

wm4 Jan. 30, 2018, 12:47 p.m. UTC | #1
On Tue, 30 Jan 2018 13:43:25 +0100
wm4 <nfxjfg@googlemail.com> wrote:

> The ID3v2 "unsynchronization scheme" requires replacing any 0xFF 0x00
> sequences with 0xFF. This has to be done on every byte of the source
> data, while the current code skipped a byte after a replacement. This
> meant 0xFF 0x00 0xFF 00 was translated to 0xFF 0xFF 0x00 instead of 0xFF
> 0xFF. It feels a bit messy to do this correctly with the avio use. But
> fortunately, this translation can be done in-place, so we can just do it
> in memory.
> 
> Inspired by what taglib does.
> ---
> Sample (which had corrupted cover art, displays fine with the fix):
> https://0x0.st/sbQ9.bin (unfortunately a bit too large for FATE)
> ---
>  libavformat/id3v2.c | 26 ++++++++++++++------------
>  1 file changed, 14 insertions(+), 12 deletions(-)
> 
> diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
> index b80178d67a..f7de26a1d8 100644
> --- a/libavformat/id3v2.c
> +++ b/libavformat/id3v2.c
> @@ -976,19 +976,21 @@ static void id3v2_parse(AVIOContext *pb, AVDictionary **metadata,
>                  }
>              }
>              if (unsync || tunsync) {
> -                int64_t end = avio_tell(pb) + tlen;
> -                uint8_t *b;
> -
> -                b = buffer;
> -                while (avio_tell(pb) < end && b - buffer < tlen && !pb->eof_reached) {
> -                    *b++ = avio_r8(pb);
> -                    if (*(b - 1) == 0xff && avio_tell(pb) < end - 1 &&
> -                        b - buffer < tlen &&
> -                        !pb->eof_reached ) {
> -                        uint8_t val = avio_r8(pb);
> -                        *b++ = val ? val : avio_r8(pb);
> -                    }
> +                uint8_t *b = buffer;
> +                uint8_t *t = buffer;
> +                uint8_t *end = t + tlen;
> +
> +                if (avio_read(pb, buffer, tlen) != tlen) {
> +                    av_log(s, AV_LOG_ERROR, "Failed to read tag data\n");
> +                    goto seek;
>                  }
> +
> +                while (t != end) {
> +                    *b++ = *t++;
> +                    if (t != end && t[-1] == 0xff && !t[0])
> +                        t++;
> +                }
> +
>                  ffio_init_context(&pb_local, buffer, b - buffer, 0, NULL, NULL, NULL,
>                                    NULL);
>                  tlen = b - buffer;

Also I just saw that 9ae80e6a9cefcab61e867256ba19ef78a4bfe0cb seems to
claim that the current code is correct (and my commit essentially
reverts it). Taglib decodes the tag correctly, though.
rshaffer@tunein.com Jan. 30, 2018, 10:16 p.m. UTC | #2
LGTM, for whatever my opinion is worth.

Also easier to follow than the old code.

-Richard

On Tue, Jan 30, 2018 at 4:47 AM, wm4 <nfxjfg@googlemail.com> wrote:
> On Tue, 30 Jan 2018 13:43:25 +0100
> wm4 <nfxjfg@googlemail.com> wrote:
>
>> The ID3v2 "unsynchronization scheme" requires replacing any 0xFF 0x00
>> sequences with 0xFF. This has to be done on every byte of the source
>> data, while the current code skipped a byte after a replacement. This
>> meant 0xFF 0x00 0xFF 00 was translated to 0xFF 0xFF 0x00 instead of 0xFF
>> 0xFF. It feels a bit messy to do this correctly with the avio use. But
>> fortunately, this translation can be done in-place, so we can just do it
>> in memory.
>>
>> Inspired by what taglib does.
>> ---
>> Sample (which had corrupted cover art, displays fine with the fix):
>> https://0x0.st/sbQ9.bin (unfortunately a bit too large for FATE)
>> ---
>>  libavformat/id3v2.c | 26 ++++++++++++++------------
>>  1 file changed, 14 insertions(+), 12 deletions(-)
>>
>> diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
>> index b80178d67a..f7de26a1d8 100644
>> --- a/libavformat/id3v2.c
>> +++ b/libavformat/id3v2.c
>> @@ -976,19 +976,21 @@ static void id3v2_parse(AVIOContext *pb, AVDictionary **metadata,
>>                  }
>>              }
>>              if (unsync || tunsync) {
>> -                int64_t end = avio_tell(pb) + tlen;
>> -                uint8_t *b;
>> -
>> -                b = buffer;
>> -                while (avio_tell(pb) < end && b - buffer < tlen && !pb->eof_reached) {
>> -                    *b++ = avio_r8(pb);
>> -                    if (*(b - 1) == 0xff && avio_tell(pb) < end - 1 &&
>> -                        b - buffer < tlen &&
>> -                        !pb->eof_reached ) {
>> -                        uint8_t val = avio_r8(pb);
>> -                        *b++ = val ? val : avio_r8(pb);
>> -                    }
>> +                uint8_t *b = buffer;
>> +                uint8_t *t = buffer;
>> +                uint8_t *end = t + tlen;
>> +
>> +                if (avio_read(pb, buffer, tlen) != tlen) {
>> +                    av_log(s, AV_LOG_ERROR, "Failed to read tag data\n");
>> +                    goto seek;
>>                  }
>> +
>> +                while (t != end) {
>> +                    *b++ = *t++;
>> +                    if (t != end && t[-1] == 0xff && !t[0])
>> +                        t++;
>> +                }
>> +
>>                  ffio_init_context(&pb_local, buffer, b - buffer, 0, NULL, NULL, NULL,
>>                                    NULL);
>>                  tlen = b - buffer;
>
> Also I just saw that 9ae80e6a9cefcab61e867256ba19ef78a4bfe0cb seems to
> claim that the current code is correct (and my commit essentially
> reverts it). Taglib decodes the tag correctly, though.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Michael Niedermayer Jan. 31, 2018, 10:34 a.m. UTC | #3
On Tue, Jan 30, 2018 at 01:43:25PM +0100, wm4 wrote:
> The ID3v2 "unsynchronization scheme" requires replacing any 0xFF 0x00
> sequences with 0xFF. This has to be done on every byte of the source
> data, while the current code skipped a byte after a replacement. This
> meant 0xFF 0x00 0xFF 00 was translated to 0xFF 0xFF 0x00 instead of 0xFF
> 0xFF. It feels a bit messy to do this correctly with the avio use. But
> fortunately, this translation can be done in-place, so we can just do it
> in memory.
> 
> Inspired by what taglib does.
> ---
> Sample (which had corrupted cover art, displays fine with the fix):
> https://0x0.st/sbQ9.bin (unfortunately a bit too large for FATE)
> ---
>  libavformat/id3v2.c | 26 ++++++++++++++------------
>  1 file changed, 14 insertions(+), 12 deletions(-)

if this works better than before then its a good idea.
Also your code looks better and more efficient than what was there
before

[...]
wm4 Jan. 31, 2018, 11:04 a.m. UTC | #4
On Wed, 31 Jan 2018 11:34:27 +0100
Michael Niedermayer <michael@niedermayer.cc> wrote:

> On Tue, Jan 30, 2018 at 01:43:25PM +0100, wm4 wrote:
> > The ID3v2 "unsynchronization scheme" requires replacing any 0xFF 0x00
> > sequences with 0xFF. This has to be done on every byte of the source
> > data, while the current code skipped a byte after a replacement. This
> > meant 0xFF 0x00 0xFF 00 was translated to 0xFF 0xFF 0x00 instead of 0xFF
> > 0xFF. It feels a bit messy to do this correctly with the avio use. But
> > fortunately, this translation can be done in-place, so we can just do it
> > in memory.
> > 
> > Inspired by what taglib does.
> > ---
> > Sample (which had corrupted cover art, displays fine with the fix):
> > https://0x0.st/sbQ9.bin (unfortunately a bit too large for FATE)
> > ---
> >  libavformat/id3v2.c | 26 ++++++++++++++------------
> >  1 file changed, 14 insertions(+), 12 deletions(-)  
> 
> if this works better than before then its a good idea.
> Also your code looks better and more efficient than what was there
> before

It definitely makes the sample at hand work, however the sample Libav
attempted to fix seems to be lost.
wm4 Feb. 4, 2018, 2:24 p.m. UTC | #5
On Tue, 30 Jan 2018 13:43:25 +0100
wm4 <nfxjfg@googlemail.com> wrote:

> The ID3v2 "unsynchronization scheme" requires replacing any 0xFF 0x00
> sequences with 0xFF. This has to be done on every byte of the source
> data, while the current code skipped a byte after a replacement. This
> meant 0xFF 0x00 0xFF 00 was translated to 0xFF 0xFF 0x00 instead of 0xFF
> 0xFF. It feels a bit messy to do this correctly with the avio use. But
> fortunately, this translation can be done in-place, so we can just do it
> in memory.
> 
> Inspired by what taglib does.
> ---
> Sample (which had corrupted cover art, displays fine with the fix):
> https://0x0.st/sbQ9.bin (unfortunately a bit too large for FATE)
> ---
>  libavformat/id3v2.c | 26 ++++++++++++++------------
>  1 file changed, 14 insertions(+), 12 deletions(-)
> 
> diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
> index b80178d67a..f7de26a1d8 100644
> --- a/libavformat/id3v2.c
> +++ b/libavformat/id3v2.c
> @@ -976,19 +976,21 @@ static void id3v2_parse(AVIOContext *pb, AVDictionary **metadata,
>                  }
>              }
>              if (unsync || tunsync) {
> -                int64_t end = avio_tell(pb) + tlen;
> -                uint8_t *b;
> -
> -                b = buffer;
> -                while (avio_tell(pb) < end && b - buffer < tlen && !pb->eof_reached) {
> -                    *b++ = avio_r8(pb);
> -                    if (*(b - 1) == 0xff && avio_tell(pb) < end - 1 &&
> -                        b - buffer < tlen &&
> -                        !pb->eof_reached ) {
> -                        uint8_t val = avio_r8(pb);
> -                        *b++ = val ? val : avio_r8(pb);
> -                    }
> +                uint8_t *b = buffer;
> +                uint8_t *t = buffer;
> +                uint8_t *end = t + tlen;
> +
> +                if (avio_read(pb, buffer, tlen) != tlen) {
> +                    av_log(s, AV_LOG_ERROR, "Failed to read tag data\n");
> +                    goto seek;
>                  }
> +
> +                while (t != end) {
> +                    *b++ = *t++;
> +                    if (t != end && t[-1] == 0xff && !t[0])
> +                        t++;
> +                }
> +
>                  ffio_init_context(&pb_local, buffer, b - buffer, 0, NULL, NULL, NULL,
>                                    NULL);
>                  tlen = b - buffer;

Pushed, with a note about the old Libav commit added.
diff mbox

Patch

diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
index b80178d67a..f7de26a1d8 100644
--- a/libavformat/id3v2.c
+++ b/libavformat/id3v2.c
@@ -976,19 +976,21 @@  static void id3v2_parse(AVIOContext *pb, AVDictionary **metadata,
                 }
             }
             if (unsync || tunsync) {
-                int64_t end = avio_tell(pb) + tlen;
-                uint8_t *b;
-
-                b = buffer;
-                while (avio_tell(pb) < end && b - buffer < tlen && !pb->eof_reached) {
-                    *b++ = avio_r8(pb);
-                    if (*(b - 1) == 0xff && avio_tell(pb) < end - 1 &&
-                        b - buffer < tlen &&
-                        !pb->eof_reached ) {
-                        uint8_t val = avio_r8(pb);
-                        *b++ = val ? val : avio_r8(pb);
-                    }
+                uint8_t *b = buffer;
+                uint8_t *t = buffer;
+                uint8_t *end = t + tlen;
+
+                if (avio_read(pb, buffer, tlen) != tlen) {
+                    av_log(s, AV_LOG_ERROR, "Failed to read tag data\n");
+                    goto seek;
                 }
+
+                while (t != end) {
+                    *b++ = *t++;
+                    if (t != end && t[-1] == 0xff && !t[0])
+                        t++;
+                }
+
                 ffio_init_context(&pb_local, buffer, b - buffer, 0, NULL, NULL, NULL,
                                   NULL);
                 tlen = b - buffer;