diff mbox

[FFmpeg-devel] avformat: parse iTunes gapless information

Message ID 20170127004712.17308-2-kode54@gmail.com
State New
Headers show

Commit Message

Christopher Snowhill Jan. 27, 2017, 12:47 a.m. UTC
Signed-off-by: Christopher Snowhill <kode54@gmail.com>
---
 libavformat/mp3dec.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 76 insertions(+), 1 deletion(-)

Comments

wm4 Jan. 27, 2017, 12:10 p.m. UTC | #1
On Thu, 26 Jan 2017 16:47:12 -0800
Christopher Snowhill <kode54@gmail.com> wrote:

> Signed-off-by: Christopher Snowhill <kode54@gmail.com>
> ---
>  libavformat/mp3dec.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 76 insertions(+), 1 deletion(-)
> 
> diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c
> index 099ca57..b895e37 100644
> --- a/libavformat/mp3dec.c
> +++ b/libavformat/mp3dec.c
> @@ -295,6 +295,66 @@ static void mp3_parse_vbri_tag(AVFormatContext *s, AVStream *st, int64_t base)
>      }
>  }
>  
> +static void mp3_parse_itunes_tag(AVFormatContext *s, AVStream *st, MPADecodeHeader *c, int64_t base, int vbrtag_size, unsigned int *size, uint64_t *duration)
> +{
> +    uint32_t v;
> +    AVDictionaryEntry *de;
> +    MP3DecContext *mp3 = s->priv_data;
> +    size_t length;
> +    uint32_t zero, start_pad, end_pad;
> +    uint64_t last_eight_frames_offset;
> +    int64_t temp_duration, file_size;
> +    int i;
> +
> +    if (!s->metadata || !(de = av_dict_get(s->metadata, "iTunSMPB", NULL, 0)))
> +        return;
> +
> +    length = strlen(de->value);
> +
> +    /* Minimum length is one digit per field plus the whitespace, maximum length should depend on field type
> +     * There are four fields we need in the first six, the rest are currently zero padding */
> +    if (length < (12 + 11) || length > (10 * 8 + 2 * 16 + 11))
> +        return;
> +
> +    file_size = avio_size(s->pb);
> +    if (file_size < 0)
> +        file_size = 0;
> +
> +    if (sscanf(de->value, "%"PRIx32" %"PRIx32" %"PRIx32" %"PRIx64" %"PRIx32" %"PRIx64, &zero, &start_pad, &end_pad, &temp_duration, &zero, &last_eight_frames_offset) < 6 ||
> +        temp_duration < 0 ||
> +        start_pad > (576 * 2 * 32) ||
> +        end_pad > (576 * 2 * 64) ||
> +        (file_size && (last_eight_frames_offset >= (file_size - base - vbrtag_size)))) {
> +        *duration = 0;
> +        return;
> +    }
> +
> +    *duration = temp_duration;
> +
> +    mp3->start_pad = start_pad;
> +    mp3->end_pad = end_pad;
> +    if (end_pad >= 528 + 1)
> +        mp3->end_pad = end_pad - (528 + 1);
> +    st->start_skip_samples = mp3->start_pad + 528 + 1;
> +    av_log(s, AV_LOG_DEBUG, "pad %d %d\n", mp3->start_pad, mp3->end_pad);
> +    if (!s->pb->seekable)
> +        return;
> +
> +    *size = (unsigned int) last_eight_frames_offset;
> +    if (avio_seek(s->pb, base + vbrtag_size + last_eight_frames_offset, SEEK_SET) < 0)
> +        return;
> +
> +    for (i = 0; i < 8; i++) {
> +        v = avio_rb32(s->pb);
> +        if (ff_mpa_check_header(v) < 0)
> +            return;
> +        if (avpriv_mpegaudio_decode_header(c, v) != 0)
> +            break;
> +        *size += c->frame_size;
> +        avio_skip(s->pb, c->frame_size - 4);
> +    }
> +}
> +
>  /**
>   * Try to find Xing/Info/VBRI tags and compute duration from info therein
>   */
> @@ -303,8 +363,10 @@ static int mp3_parse_vbr_tags(AVFormatContext *s, AVStream *st, int64_t base)
>      uint32_t v, spf;
>      MPADecodeHeader c;
>      int vbrtag_size = 0;
> +    unsigned int size = 0;
>      MP3DecContext *mp3 = s->priv_data;
>      int ret;
> +    uint64_t duration = 0;
>  
>      ffio_init_checksum(s->pb, ff_crcA001_update, 0);
>  
> @@ -327,16 +389,29 @@ static int mp3_parse_vbr_tags(AVFormatContext *s, AVStream *st, int64_t base)
>      mp3_parse_vbri_tag(s, st, base);
>  
>      if (!mp3->frames && !mp3->header_filesize)
> +        vbrtag_size = 0;
> +
> +    mp3_parse_itunes_tag(s, st, &c, base, vbrtag_size, &size, &duration);
> +
> +    if (!mp3->frames && !size && !duration)
>          return -1;
>  
>      /* Skip the vbr tag frame */
>      avio_seek(s->pb, base + vbrtag_size, SEEK_SET);
>  
> -    if (mp3->frames)
> +    if (duration)
> +        st->duration = av_rescale_q(duration, (AVRational){1, c.sample_rate}, st->time_base);
> +    else if (mp3->frames)
>          st->duration = av_rescale_q(mp3->frames, (AVRational){spf, c.sample_rate},
>                                      st->time_base);
>      if (mp3->header_filesize && mp3->frames && !mp3->is_cbr)
>          st->codecpar->bit_rate = av_rescale(mp3->header_filesize, 8 * c.sample_rate, mp3->frames * (int64_t)spf);
> +    if (size) {
> +        if (duration)
> +            st->codecpar->bit_rate = av_rescale(size, 8 * c.sample_rate, duration);
> +        else if (mp3->frames)
> +            st->codecpar->bit_rate = av_rescale(size, 8 * c.sample_rate, mp3->frames * (int64_t)spf);
> +    }
>  
>      return 0;
>  }

If we add this, can we add a fate sample as well? The test should be
similar to the other gapless tests.
Christopher Snowhill Jan. 27, 2017, 9:20 p.m. UTC | #2
It needs this patch first, but I have some example MP3 files that
decode gaplessly with these patches applied. Encoded using the latest
iTunes as of this writing (12.5.5.5) on macOS Sierra. Attaching the
MP3 files, as well as source FLAC files, after this patch goes
through.
Christopher Snowhill Jan. 27, 2017, 9:26 p.m. UTC | #3
Combined with the ID3v2.2 COM frame fix I just submitted, these MP3 files will decode to a continuous signal.
On 1/27/2017 4:10:54 AM, wm4 <nfxjfg@googlemail.com> wrote:
On Thu, 26 Jan 2017 16:47:12 -0800
Christopher Snowhill wrote:

> Signed-off-by: Christopher Snowhill
> ---
> libavformat/mp3dec.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 76 insertions(+), 1 deletion(-)
>
> diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c
> index 099ca57..b895e37 100644
> --- a/libavformat/mp3dec.c
> +++ b/libavformat/mp3dec.c
> @@ -295,6 +295,66 @@ static void mp3_parse_vbri_tag(AVFormatContext *s, AVStream *st, int64_t base)
> }
> }
>
> +static void mp3_parse_itunes_tag(AVFormatContext *s, AVStream *st, MPADecodeHeader *c, int64_t base, int vbrtag_size, unsigned int *size, uint64_t *duration)
> +{
> + uint32_t v;
> + AVDictionaryEntry *de;
> + MP3DecContext *mp3 = s->priv_data;
> + size_t length;
> + uint32_t zero, start_pad, end_pad;
> + uint64_t last_eight_frames_offset;
> + int64_t temp_duration, file_size;
> + int i;
> +
> + if (!s->metadata || !(de = av_dict_get(s->metadata, "iTunSMPB", NULL, 0)))
> + return;
> +
> + length = strlen(de->value);
> +
> + /* Minimum length is one digit per field plus the whitespace, maximum length should depend on field type
> + * There are four fields we need in the first six, the rest are currently zero padding */
> + if (length (10 * 8 + 2 * 16 + 11))
> + return;
> +
> + file_size = avio_size(s->pb);
> + if (file_size
> + file_size = 0;
> +
> + if (sscanf(de->value, "%"PRIx32" %"PRIx32" %"PRIx32" %"PRIx64" %"PRIx32" %"PRIx64, &zero, &start_pad, &end_pad, &temp_duration, &zero, &last_eight_frames_offset)
> + temp_duration
> + start_pad > (576 * 2 * 32) ||
> + end_pad > (576 * 2 * 64) ||
> + (file_size && (last_eight_frames_offset >= (file_size - base - vbrtag_size)))) {
> + *duration = 0;
> + return;
> + }
> +
> + *duration = temp_duration;
> +
> + mp3->start_pad = start_pad;
> + mp3->end_pad = end_pad;
> + if (end_pad >= 528 + 1)
> + mp3->end_pad = end_pad - (528 + 1);
> + st->start_skip_samples = mp3->start_pad + 528 + 1;
> + av_log(s, AV_LOG_DEBUG, "pad %d %d\n", mp3->start_pad, mp3->end_pad);
> + if (!s->pb->seekable)
> + return;
> +
> + *size = (unsigned int) last_eight_frames_offset;
> + if (avio_seek(s->pb, base + vbrtag_size + last_eight_frames_offset, SEEK_SET)
> + return;
> +
> + for (i = 0; i
> + v = avio_rb32(s->pb);
> + if (ff_mpa_check_header(v)
> + return;
> + if (avpriv_mpegaudio_decode_header(c, v) != 0)
> + break;
> + *size += c->frame_size;
> + avio_skip(s->pb, c->frame_size - 4);
> + }
> +}
> +
> /**
> * Try to find Xing/Info/VBRI tags and compute duration from info therein
> */
> @@ -303,8 +363,10 @@ static int mp3_parse_vbr_tags(AVFormatContext *s, AVStream *st, int64_t base)
> uint32_t v, spf;
> MPADecodeHeader c;
> int vbrtag_size = 0;
> + unsigned int size = 0;
> MP3DecContext *mp3 = s->priv_data;
> int ret;
> + uint64_t duration = 0;
>
> ffio_init_checksum(s->pb, ff_crcA001_update, 0);
>
> @@ -327,16 +389,29 @@ static int mp3_parse_vbr_tags(AVFormatContext *s, AVStream *st, int64_t base)
> mp3_parse_vbri_tag(s, st, base);
>
> if (!mp3->frames && !mp3->header_filesize)
> + vbrtag_size = 0;
> +
> + mp3_parse_itunes_tag(s, st, &c, base, vbrtag_size, &size, &duration);
> +
> + if (!mp3->frames && !size && !duration)
> return -1;
>
> /* Skip the vbr tag frame */
> avio_seek(s->pb, base + vbrtag_size, SEEK_SET);
>
> - if (mp3->frames)
> + if (duration)
> + st->duration = av_rescale_q(duration, (AVRational){1, c.sample_rate}, st->time_base);
> + else if (mp3->frames)
> st->duration = av_rescale_q(mp3->frames, (AVRational){spf, c.sample_rate},
> st->time_base);
> if (mp3->header_filesize && mp3->frames && !mp3->is_cbr)
> st->codecpar->bit_rate = av_rescale(mp3->header_filesize, 8 * c.sample_rate, mp3->frames * (int64_t)spf);
> + if (size) {
> + if (duration)
> + st->codecpar->bit_rate = av_rescale(size, 8 * c.sample_rate, duration);
> + else if (mp3->frames)
> + st->codecpar->bit_rate = av_rescale(size, 8 * c.sample_rate, mp3->frames * (int64_t)spf);
> + }
>
> return 0;
> }

If we add this, can we add a fate sample as well? The test should be
similar to the other gapless tests.
Christopher Snowhill Jan. 29, 2017, 12:09 a.m. UTC | #4
Disregard those two, they were mistaken transcodes. These two are supposed to be gapless, however. Same signal, just not transcoded and downsampled first.
On 1/27/2017 1:26:24 PM, Christopher Snowhill <kode54@gmail.com> wrote:
Combined with the ID3v2.2 COM frame fix I just submitted, these MP3 files will decode to a continuous signal.
On 1/27/2017 4:10:54 AM, wm4 <nfxjfg@googlemail.com> wrote:
On Thu, 26 Jan 2017 16:47:12 -0800
Christopher Snowhill wrote:

> Signed-off-by: Christopher Snowhill
> ---
> libavformat/mp3dec.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 76 insertions(+), 1 deletion(-)
>
> diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c
> index 099ca57..b895e37 100644
> --- a/libavformat/mp3dec.c
> +++ b/libavformat/mp3dec.c
> @@ -295,6 +295,66 @@ static void mp3_parse_vbri_tag(AVFormatContext *s, AVStream *st, int64_t base)
> }
> }
>
> +static void mp3_parse_itunes_tag(AVFormatContext *s, AVStream *st, MPADecodeHeader *c, int64_t base, int vbrtag_size, unsigned int *size, uint64_t *duration)
> +{
> + uint32_t v;
> + AVDictionaryEntry *de;
> + MP3DecContext *mp3 = s->priv_data;
> + size_t length;
> + uint32_t zero, start_pad, end_pad;
> + uint64_t last_eight_frames_offset;
> + int64_t temp_duration, file_size;
> + int i;
> +
> + if (!s->metadata || !(de = av_dict_get(s->metadata, "iTunSMPB", NULL, 0)))
> + return;
> +
> + length = strlen(de->value);
> +
> + /* Minimum length is one digit per field plus the whitespace, maximum length should depend on field type
> + * There are four fields we need in the first six, the rest are currently zero padding */
> + if (length (10 * 8 + 2 * 16 + 11))
> + return;
> +
> + file_size = avio_size(s->pb);
> + if (file_size
> + file_size = 0;
> +
> + if (sscanf(de->value, "%"PRIx32" %"PRIx32" %"PRIx32" %"PRIx64" %"PRIx32" %"PRIx64, &zero, &start_pad, &end_pad, &temp_duration, &zero, &last_eight_frames_offset)
> + temp_duration
> + start_pad > (576 * 2 * 32) ||
> + end_pad > (576 * 2 * 64) ||
> + (file_size && (last_eight_frames_offset >= (file_size - base - vbrtag_size)))) {
> + *duration = 0;
> + return;
> + }
> +
> + *duration = temp_duration;
> +
> + mp3->start_pad = start_pad;
> + mp3->end_pad = end_pad;
> + if (end_pad >= 528 + 1)
> + mp3->end_pad = end_pad - (528 + 1);
> + st->start_skip_samples = mp3->start_pad + 528 + 1;
> + av_log(s, AV_LOG_DEBUG, "pad %d %d\n", mp3->start_pad, mp3->end_pad);
> + if (!s->pb->seekable)
> + return;
> +
> + *size = (unsigned int) last_eight_frames_offset;
> + if (avio_seek(s->pb, base + vbrtag_size + last_eight_frames_offset, SEEK_SET)
> + return;
> +
> + for (i = 0; i
> + v = avio_rb32(s->pb);
> + if (ff_mpa_check_header(v)
> + return;
> + if (avpriv_mpegaudio_decode_header(c, v) != 0)
> + break;
> + *size += c->frame_size;
> + avio_skip(s->pb, c->frame_size - 4);
> + }
> +}
> +
> /**
> * Try to find Xing/Info/VBRI tags and compute duration from info therein
> */
> @@ -303,8 +363,10 @@ static int mp3_parse_vbr_tags(AVFormatContext *s, AVStream *st, int64_t base)
> uint32_t v, spf;
> MPADecodeHeader c;
> int vbrtag_size = 0;
> + unsigned int size = 0;
> MP3DecContext *mp3 = s->priv_data;
> int ret;
> + uint64_t duration = 0;
>
> ffio_init_checksum(s->pb, ff_crcA001_update, 0);
>
> @@ -327,16 +389,29 @@ static int mp3_parse_vbr_tags(AVFormatContext *s, AVStream *st, int64_t base)
> mp3_parse_vbri_tag(s, st, base);
>
> if (!mp3->frames && !mp3->header_filesize)
> + vbrtag_size = 0;
> +
> + mp3_parse_itunes_tag(s, st, &c, base, vbrtag_size, &size, &duration);
> +
> + if (!mp3->frames && !size && !duration)
> return -1;
>
> /* Skip the vbr tag frame */
> avio_seek(s->pb, base + vbrtag_size, SEEK_SET);
>
> - if (mp3->frames)
> + if (duration)
> + st->duration = av_rescale_q(duration, (AVRational){1, c.sample_rate}, st->time_base);
> + else if (mp3->frames)
> st->duration = av_rescale_q(mp3->frames, (AVRational){spf, c.sample_rate},
> st->time_base);
> if (mp3->header_filesize && mp3->frames && !mp3->is_cbr)
> st->codecpar->bit_rate = av_rescale(mp3->header_filesize, 8 * c.sample_rate, mp3->frames * (int64_t)spf);
> + if (size) {
> + if (duration)
> + st->codecpar->bit_rate = av_rescale(size, 8 * c.sample_rate, duration);
> + else if (mp3->frames)
> + st->codecpar->bit_rate = av_rescale(size, 8 * c.sample_rate, mp3->frames * (int64_t)spf);
> + }
>
> return 0;
> }

If we add this, can we add a fate sample as well? The test should be
similar to the other gapless tests.
Michael Niedermayer Jan. 29, 2017, 1:39 a.m. UTC | #5
On Sat, Jan 28, 2017 at 04:09:20PM -0800, Christopher Snowhill wrote:
> Disregard those two, they were mistaken transcodes. These two are supposed to be gapless, however. Same signal, just not transcoded and downsampled first.

where should they be uploaded to ?
also is there a corresponding patch with fate test to test ?

thx

[...]
Christopher Snowhill Oct. 2, 2017, 3:13 a.m. UTC | #6
I've finally assembled a fate test for this patch.

The test file goes here:

<fate-suite>/gapless/gapless-itunes.mp3

I have uploaded a copy here:

https://f.losno.co/gapless-itunes.mp3
Michael Niedermayer Oct. 2, 2017, 9:11 p.m. UTC | #7
On Sun, Oct 01, 2017 at 08:13:11PM -0700, Christopher Snowhill wrote:
> 
> I've finally assembled a fate test for this patch.
> 
> The test file goes here:
> 
> <fate-suite>/gapless/gapless-itunes.mp3
> 
> I have uploaded a copy here:
> 
> https://f.losno.co/gapless-itunes.mp3

uploaded to fate samples

thx

[...]
diff mbox

Patch

diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c
index 099ca57..b895e37 100644
--- a/libavformat/mp3dec.c
+++ b/libavformat/mp3dec.c
@@ -295,6 +295,66 @@  static void mp3_parse_vbri_tag(AVFormatContext *s, AVStream *st, int64_t base)
     }
 }
 
+static void mp3_parse_itunes_tag(AVFormatContext *s, AVStream *st, MPADecodeHeader *c, int64_t base, int vbrtag_size, unsigned int *size, uint64_t *duration)
+{
+    uint32_t v;
+    AVDictionaryEntry *de;
+    MP3DecContext *mp3 = s->priv_data;
+    size_t length;
+    uint32_t zero, start_pad, end_pad;
+    uint64_t last_eight_frames_offset;
+    int64_t temp_duration, file_size;
+    int i;
+
+    if (!s->metadata || !(de = av_dict_get(s->metadata, "iTunSMPB", NULL, 0)))
+        return;
+
+    length = strlen(de->value);
+
+    /* Minimum length is one digit per field plus the whitespace, maximum length should depend on field type
+     * There are four fields we need in the first six, the rest are currently zero padding */
+    if (length < (12 + 11) || length > (10 * 8 + 2 * 16 + 11))
+        return;
+
+    file_size = avio_size(s->pb);
+    if (file_size < 0)
+        file_size = 0;
+
+    if (sscanf(de->value, "%"PRIx32" %"PRIx32" %"PRIx32" %"PRIx64" %"PRIx32" %"PRIx64, &zero, &start_pad, &end_pad, &temp_duration, &zero, &last_eight_frames_offset) < 6 ||
+        temp_duration < 0 ||
+        start_pad > (576 * 2 * 32) ||
+        end_pad > (576 * 2 * 64) ||
+        (file_size && (last_eight_frames_offset >= (file_size - base - vbrtag_size)))) {
+        *duration = 0;
+        return;
+    }
+
+    *duration = temp_duration;
+
+    mp3->start_pad = start_pad;
+    mp3->end_pad = end_pad;
+    if (end_pad >= 528 + 1)
+        mp3->end_pad = end_pad - (528 + 1);
+    st->start_skip_samples = mp3->start_pad + 528 + 1;
+    av_log(s, AV_LOG_DEBUG, "pad %d %d\n", mp3->start_pad, mp3->end_pad);
+    if (!s->pb->seekable)
+        return;
+
+    *size = (unsigned int) last_eight_frames_offset;
+    if (avio_seek(s->pb, base + vbrtag_size + last_eight_frames_offset, SEEK_SET) < 0)
+        return;
+
+    for (i = 0; i < 8; i++) {
+        v = avio_rb32(s->pb);
+        if (ff_mpa_check_header(v) < 0)
+            return;
+        if (avpriv_mpegaudio_decode_header(c, v) != 0)
+            break;
+        *size += c->frame_size;
+        avio_skip(s->pb, c->frame_size - 4);
+    }
+}
+
 /**
  * Try to find Xing/Info/VBRI tags and compute duration from info therein
  */
@@ -303,8 +363,10 @@  static int mp3_parse_vbr_tags(AVFormatContext *s, AVStream *st, int64_t base)
     uint32_t v, spf;
     MPADecodeHeader c;
     int vbrtag_size = 0;
+    unsigned int size = 0;
     MP3DecContext *mp3 = s->priv_data;
     int ret;
+    uint64_t duration = 0;
 
     ffio_init_checksum(s->pb, ff_crcA001_update, 0);
 
@@ -327,16 +389,29 @@  static int mp3_parse_vbr_tags(AVFormatContext *s, AVStream *st, int64_t base)
     mp3_parse_vbri_tag(s, st, base);
 
     if (!mp3->frames && !mp3->header_filesize)
+        vbrtag_size = 0;
+
+    mp3_parse_itunes_tag(s, st, &c, base, vbrtag_size, &size, &duration);
+
+    if (!mp3->frames && !size && !duration)
         return -1;
 
     /* Skip the vbr tag frame */
     avio_seek(s->pb, base + vbrtag_size, SEEK_SET);
 
-    if (mp3->frames)
+    if (duration)
+        st->duration = av_rescale_q(duration, (AVRational){1, c.sample_rate}, st->time_base);
+    else if (mp3->frames)
         st->duration = av_rescale_q(mp3->frames, (AVRational){spf, c.sample_rate},
                                     st->time_base);
     if (mp3->header_filesize && mp3->frames && !mp3->is_cbr)
         st->codecpar->bit_rate = av_rescale(mp3->header_filesize, 8 * c.sample_rate, mp3->frames * (int64_t)spf);
+    if (size) {
+        if (duration)
+            st->codecpar->bit_rate = av_rescale(size, 8 * c.sample_rate, duration);
+        else if (mp3->frames)
+            st->codecpar->bit_rate = av_rescale(size, 8 * c.sample_rate, mp3->frames * (int64_t)spf);
+    }
 
     return 0;
 }