diff mbox

[FFmpeg-devel] aadec: alternate mp3 seek handling

Message ID 20180731191632.2327-1-ottoka@posteo.de
State New
Headers show

Commit Message

Karsten Otto July 31, 2018, 7:16 p.m. UTC
After seeking, determine the offset of the next frame in the decrypted
buffer by scanning the first few bytes for a valid mp3 header.
This significantly improves the listening experience for audio content
with untypical encoding.
---
This is a refinement of an earlier patch iteration, according to lessons
learned and discussions on the mailing list: Scan a limited range to find
the first shifted frame only, check for a very specific bit pattern, and
add extra checks and guards for better code maintainability.

Unfortunately, this variant violates the architectural layering between
demuxer and codec. But I did some more testing with untypical encodings,
where the current estimation mechanism fails, and the resulting audio on
seek was just too horribly annoying.

I believe the better listening experience outweighs the architectural
uglyness, so this should be in the official code base. But if you think
otherwise, just let me know, and I will keep this a private patch.

 libavformat/aadec.c | 45 ++++++++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 17 deletions(-)

Comments

Karsten Otto Aug. 8, 2018, 8:41 a.m. UTC | #1
Ping. Any thoughts on this?

Cheers, Karsten

> Am 31.07.2018 um 21:16 schrieb Karsten Otto <ottoka@posteo.de>:
> 
> After seeking, determine the offset of the next frame in the decrypted
> buffer by scanning the first few bytes for a valid mp3 header.
> This significantly improves the listening experience for audio content
> with untypical encoding.
> ---
> This is a refinement of an earlier patch iteration, according to lessons
> learned and discussions on the mailing list: Scan a limited range to find
> the first shifted frame only, check for a very specific bit pattern, and
> add extra checks and guards for better code maintainability.
> 
> Unfortunately, this variant violates the architectural layering between
> demuxer and codec. But I did some more testing with untypical encodings,
> where the current estimation mechanism fails, and the resulting audio on
> seek was just too horribly annoying.
> 
> I believe the better listening experience outweighs the architectural
> uglyness, so this should be in the official code base. But if you think
> otherwise, just let me know, and I will keep this a private patch.
> 
> libavformat/aadec.c | 45 ++++++++++++++++++++++++++++-----------------
> 1 file changed, 28 insertions(+), 17 deletions(-)
> 
> diff --git a/libavformat/aadec.c b/libavformat/aadec.c
> index d83f283ffe..9b1495c218 100644
> --- a/libavformat/aadec.c
> +++ b/libavformat/aadec.c
> @@ -37,7 +37,7 @@
> #define TEA_BLOCK_SIZE 8
> #define CHAPTER_HEADER_SIZE 8
> #define TIMEPREC 1000
> -#define MP3_FRAME_SIZE 104
> +#define MP3_FRAME_SIZE 105
> 
> typedef struct AADemuxContext {
>     AVClass *class;
> @@ -51,7 +51,7 @@ typedef struct AADemuxContext {
>     int64_t current_chapter_size;
>     int64_t content_start;
>     int64_t content_end;
> -    int seek_offset;
> +    int did_seek;
> } AADemuxContext;
> 
> static int get_second_size(char *codec_name)
> @@ -179,7 +179,7 @@ static int aa_read_header(AVFormatContext *s)
>         st->codecpar->sample_rate = 22050;
>         st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
>         avpriv_set_pts_info(st, 64, 8, 32000 * TIMEPREC);
> -        // encoded audio frame is MP3_FRAME_SIZE bytes (+1 with padding, unlikely)
> +        // encoded audio frame is MP3_FRAME_SIZE bytes (-1 without padding)
>     } else if (!strcmp(codec_name, "acelp85")) {
>         st->codecpar->codec_id = AV_CODEC_ID_SIPR;
>         st->codecpar->block_align = 19;
> @@ -231,7 +231,7 @@ static int aa_read_header(AVFormatContext *s)
>     ff_update_cur_dts(s, st, 0);
>     avio_seek(pb, start, SEEK_SET);
>     c->current_chapter_size = 0;
> -    c->seek_offset = 0;
> +    c->did_seek = 0;
> 
>     return 0;
> }
> @@ -244,7 +244,7 @@ static int aa_read_packet(AVFormatContext *s, AVPacket *pkt)
>     int trailing_bytes;
>     int blocks;
>     uint8_t buf[MAX_CODEC_SECOND_SIZE * 2];
> -    int written = 0;
> +    int written = 0, offset = 0;
>     int ret;
>     AADemuxContext *c = s->priv_data;
>     uint64_t pos = avio_tell(s->pb);
> @@ -297,16 +297,33 @@ static int aa_read_packet(AVFormatContext *s, AVPacket *pkt)
>     if (c->current_chapter_size <= 0)
>         c->current_chapter_size = 0;
> 
> -    if (c->seek_offset > written)
> -        c->seek_offset = 0; // ignore wrong estimate
> +    if (c->did_seek) {
> +        c->did_seek = 0;
> +
> +        if (s->streams[0]->codecpar->codec_id == AV_CODEC_ID_MP3 && written >= MP3_FRAME_SIZE + 3) {
> +            for (offset = 0; offset < MP3_FRAME_SIZE; ++offset) {
> +                // find mp3 header: sync, v2, layer3, no crc, 32kbps, 22kHz, mono
> +                if ((buf[offset + 0]       ) == 0xff &&
> +                    (buf[offset + 1]       ) == 0xf3 &&
> +                    (buf[offset + 2] & 0xfc) == 0x40 &&
> +                    (buf[offset + 3] & 0xf0) == 0xc0)
> +                        break;
> +            }
> +            if (offset == MP3_FRAME_SIZE) offset = 0; // not found, just use as is
> +        }
> +
> +        ff_update_cur_dts(s, s->streams[0],
> +            (pos + offset - c->content_start - CHAPTER_HEADER_SIZE * (c->chapter_idx - 1))
> +            * TIMEPREC);
> +    }
> 
> -    ret = av_new_packet(pkt, written - c->seek_offset);
> +    if (offset > written) offset = 0;
> +    ret = av_new_packet(pkt, written - offset);
>     if (ret < 0)
>         return ret;
> -    memcpy(pkt->data, buf + c->seek_offset, written - c->seek_offset);
> +    memcpy(pkt->data, buf + offset, written - offset);
>     pkt->pos = pos;
> 
> -    c->seek_offset = 0;
>     return 0;
> }
> 
> @@ -349,13 +366,7 @@ static int aa_read_seek(AVFormatContext *s,
>     c->current_codec_second_size = c->codec_second_size;
>     c->current_chapter_size = chapter_size - chapter_pos;
>     c->chapter_idx = 1 + chapter_idx;
> -
> -    // for unaligned frames, estimate offset of first frame in block (assume no padding)
> -    if (s->streams[0]->codecpar->codec_id == AV_CODEC_ID_MP3) {
> -        c->seek_offset = (MP3_FRAME_SIZE - chapter_pos % MP3_FRAME_SIZE) % MP3_FRAME_SIZE;
> -    }
> -
> -    ff_update_cur_dts(s, s->streams[0], ch->start + (chapter_pos + c->seek_offset) * TIMEPREC);
> +    c->did_seek = 1;
> 
>     return 1;
> }
> -- 
> 2.14.3 (Apple Git-98)
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Karsten Otto Aug. 20, 2018, 8:29 p.m. UTC | #2
Ping. Any thoughts on this?

Cheers, Karsten

> Am 31.07.2018 um 21:16 schrieb Karsten Otto <ottoka@posteo.de>:
> 
> After seeking, determine the offset of the next frame in the decrypted
> buffer by scanning the first few bytes for a valid mp3 header.
> This significantly improves the listening experience for audio content
> with untypical encoding.
> ---
> This is a refinement of an earlier patch iteration, according to lessons
> learned and discussions on the mailing list: Scan a limited range to find
> the first shifted frame only, check for a very specific bit pattern, and
> add extra checks and guards for better code maintainability.
> 
> Unfortunately, this variant violates the architectural layering between
> demuxer and codec. But I did some more testing with untypical encodings,
> where the current estimation mechanism fails, and the resulting audio on
> seek was just too horribly annoying.
> 
> I believe the better listening experience outweighs the architectural
> uglyness, so this should be in the official code base. But if you think
> otherwise, just let me know, and I will keep this a private patch.
> 
> libavformat/aadec.c | 45 ++++++++++++++++++++++++++++-----------------
> 1 file changed, 28 insertions(+), 17 deletions(-)
> 
> diff --git a/libavformat/aadec.c b/libavformat/aadec.c
> index d83f283ffe..9b1495c218 100644
> --- a/libavformat/aadec.c
> +++ b/libavformat/aadec.c
> @@ -37,7 +37,7 @@
> #define TEA_BLOCK_SIZE 8
> #define CHAPTER_HEADER_SIZE 8
> #define TIMEPREC 1000
> -#define MP3_FRAME_SIZE 104
> +#define MP3_FRAME_SIZE 105
> 
> typedef struct AADemuxContext {
>     AVClass *class;
> @@ -51,7 +51,7 @@ typedef struct AADemuxContext {
>     int64_t current_chapter_size;
>     int64_t content_start;
>     int64_t content_end;
> -    int seek_offset;
> +    int did_seek;
> } AADemuxContext;
> 
> static int get_second_size(char *codec_name)
> @@ -179,7 +179,7 @@ static int aa_read_header(AVFormatContext *s)
>         st->codecpar->sample_rate = 22050;
>         st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
>         avpriv_set_pts_info(st, 64, 8, 32000 * TIMEPREC);
> -        // encoded audio frame is MP3_FRAME_SIZE bytes (+1 with padding, unlikely)
> +        // encoded audio frame is MP3_FRAME_SIZE bytes (-1 without padding)
>     } else if (!strcmp(codec_name, "acelp85")) {
>         st->codecpar->codec_id = AV_CODEC_ID_SIPR;
>         st->codecpar->block_align = 19;
> @@ -231,7 +231,7 @@ static int aa_read_header(AVFormatContext *s)
>     ff_update_cur_dts(s, st, 0);
>     avio_seek(pb, start, SEEK_SET);
>     c->current_chapter_size = 0;
> -    c->seek_offset = 0;
> +    c->did_seek = 0;
> 
>     return 0;
> }
> @@ -244,7 +244,7 @@ static int aa_read_packet(AVFormatContext *s, AVPacket *pkt)
>     int trailing_bytes;
>     int blocks;
>     uint8_t buf[MAX_CODEC_SECOND_SIZE * 2];
> -    int written = 0;
> +    int written = 0, offset = 0;
>     int ret;
>     AADemuxContext *c = s->priv_data;
>     uint64_t pos = avio_tell(s->pb);
> @@ -297,16 +297,33 @@ static int aa_read_packet(AVFormatContext *s, AVPacket *pkt)
>     if (c->current_chapter_size <= 0)
>         c->current_chapter_size = 0;
> 
> -    if (c->seek_offset > written)
> -        c->seek_offset = 0; // ignore wrong estimate
> +    if (c->did_seek) {
> +        c->did_seek = 0;
> +
> +        if (s->streams[0]->codecpar->codec_id == AV_CODEC_ID_MP3 && written >= MP3_FRAME_SIZE + 3) {
> +            for (offset = 0; offset < MP3_FRAME_SIZE; ++offset) {
> +                // find mp3 header: sync, v2, layer3, no crc, 32kbps, 22kHz, mono
> +                if ((buf[offset + 0]       ) == 0xff &&
> +                    (buf[offset + 1]       ) == 0xf3 &&
> +                    (buf[offset + 2] & 0xfc) == 0x40 &&
> +                    (buf[offset + 3] & 0xf0) == 0xc0)
> +                        break;
> +            }
> +            if (offset == MP3_FRAME_SIZE) offset = 0; // not found, just use as is
> +        }
> +
> +        ff_update_cur_dts(s, s->streams[0],
> +            (pos + offset - c->content_start - CHAPTER_HEADER_SIZE * (c->chapter_idx - 1))
> +            * TIMEPREC);
> +    }
> 
> -    ret = av_new_packet(pkt, written - c->seek_offset);
> +    if (offset > written) offset = 0;
> +    ret = av_new_packet(pkt, written - offset);
>     if (ret < 0)
>         return ret;
> -    memcpy(pkt->data, buf + c->seek_offset, written - c->seek_offset);
> +    memcpy(pkt->data, buf + offset, written - offset);
>     pkt->pos = pos;
> 
> -    c->seek_offset = 0;
>     return 0;
> }
> 
> @@ -349,13 +366,7 @@ static int aa_read_seek(AVFormatContext *s,
>     c->current_codec_second_size = c->codec_second_size;
>     c->current_chapter_size = chapter_size - chapter_pos;
>     c->chapter_idx = 1 + chapter_idx;
> -
> -    // for unaligned frames, estimate offset of first frame in block (assume no padding)
> -    if (s->streams[0]->codecpar->codec_id == AV_CODEC_ID_MP3) {
> -        c->seek_offset = (MP3_FRAME_SIZE - chapter_pos % MP3_FRAME_SIZE) % MP3_FRAME_SIZE;
> -    }
> -
> -    ff_update_cur_dts(s, s->streams[0], ch->start + (chapter_pos + c->seek_offset) * TIMEPREC);
> +    c->did_seek = 1;
> 
>     return 1;
> }
> -- 
> 2.14.3 (Apple Git-98)
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
diff mbox

Patch

diff --git a/libavformat/aadec.c b/libavformat/aadec.c
index d83f283ffe..9b1495c218 100644
--- a/libavformat/aadec.c
+++ b/libavformat/aadec.c
@@ -37,7 +37,7 @@ 
 #define TEA_BLOCK_SIZE 8
 #define CHAPTER_HEADER_SIZE 8
 #define TIMEPREC 1000
-#define MP3_FRAME_SIZE 104
+#define MP3_FRAME_SIZE 105
 
 typedef struct AADemuxContext {
     AVClass *class;
@@ -51,7 +51,7 @@  typedef struct AADemuxContext {
     int64_t current_chapter_size;
     int64_t content_start;
     int64_t content_end;
-    int seek_offset;
+    int did_seek;
 } AADemuxContext;
 
 static int get_second_size(char *codec_name)
@@ -179,7 +179,7 @@  static int aa_read_header(AVFormatContext *s)
         st->codecpar->sample_rate = 22050;
         st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
         avpriv_set_pts_info(st, 64, 8, 32000 * TIMEPREC);
-        // encoded audio frame is MP3_FRAME_SIZE bytes (+1 with padding, unlikely)
+        // encoded audio frame is MP3_FRAME_SIZE bytes (-1 without padding)
     } else if (!strcmp(codec_name, "acelp85")) {
         st->codecpar->codec_id = AV_CODEC_ID_SIPR;
         st->codecpar->block_align = 19;
@@ -231,7 +231,7 @@  static int aa_read_header(AVFormatContext *s)
     ff_update_cur_dts(s, st, 0);
     avio_seek(pb, start, SEEK_SET);
     c->current_chapter_size = 0;
-    c->seek_offset = 0;
+    c->did_seek = 0;
 
     return 0;
 }
@@ -244,7 +244,7 @@  static int aa_read_packet(AVFormatContext *s, AVPacket *pkt)
     int trailing_bytes;
     int blocks;
     uint8_t buf[MAX_CODEC_SECOND_SIZE * 2];
-    int written = 0;
+    int written = 0, offset = 0;
     int ret;
     AADemuxContext *c = s->priv_data;
     uint64_t pos = avio_tell(s->pb);
@@ -297,16 +297,33 @@  static int aa_read_packet(AVFormatContext *s, AVPacket *pkt)
     if (c->current_chapter_size <= 0)
         c->current_chapter_size = 0;
 
-    if (c->seek_offset > written)
-        c->seek_offset = 0; // ignore wrong estimate
+    if (c->did_seek) {
+        c->did_seek = 0;
+
+        if (s->streams[0]->codecpar->codec_id == AV_CODEC_ID_MP3 && written >= MP3_FRAME_SIZE + 3) {
+            for (offset = 0; offset < MP3_FRAME_SIZE; ++offset) {
+                // find mp3 header: sync, v2, layer3, no crc, 32kbps, 22kHz, mono
+                if ((buf[offset + 0]       ) == 0xff &&
+                    (buf[offset + 1]       ) == 0xf3 &&
+                    (buf[offset + 2] & 0xfc) == 0x40 &&
+                    (buf[offset + 3] & 0xf0) == 0xc0)
+                        break;
+            }
+            if (offset == MP3_FRAME_SIZE) offset = 0; // not found, just use as is
+        }
+
+        ff_update_cur_dts(s, s->streams[0],
+            (pos + offset - c->content_start - CHAPTER_HEADER_SIZE * (c->chapter_idx - 1))
+            * TIMEPREC);
+    }
 
-    ret = av_new_packet(pkt, written - c->seek_offset);
+    if (offset > written) offset = 0;
+    ret = av_new_packet(pkt, written - offset);
     if (ret < 0)
         return ret;
-    memcpy(pkt->data, buf + c->seek_offset, written - c->seek_offset);
+    memcpy(pkt->data, buf + offset, written - offset);
     pkt->pos = pos;
 
-    c->seek_offset = 0;
     return 0;
 }
 
@@ -349,13 +366,7 @@  static int aa_read_seek(AVFormatContext *s,
     c->current_codec_second_size = c->codec_second_size;
     c->current_chapter_size = chapter_size - chapter_pos;
     c->chapter_idx = 1 + chapter_idx;
-
-    // for unaligned frames, estimate offset of first frame in block (assume no padding)
-    if (s->streams[0]->codecpar->codec_id == AV_CODEC_ID_MP3) {
-        c->seek_offset = (MP3_FRAME_SIZE - chapter_pos % MP3_FRAME_SIZE) % MP3_FRAME_SIZE;
-    }
-
-    ff_update_cur_dts(s, s->streams[0], ch->start + (chapter_pos + c->seek_offset) * TIMEPREC);
+    c->did_seek = 1;
 
     return 1;
 }