diff mbox

[FFmpeg-devel,v6,3/3] aadec: improve seeking in mp3 content

Message ID 20180713103507.24022-1-ottoka@posteo.de
State Superseded
Headers show

Commit Message

Karsten Otto July 13, 2018, 10:35 a.m. UTC
MP3 frames may not be aligned to aa chunk boundaries. When seeking,
calculate the expected frame offset in the target chunk. Adjust the
timestamp and truncate the next packet accordingly.

This solution works for the majority of tested audio material. For
some rare encodings with mp3 padding or embedded id3 tags, it will
mispredict the correct offset, and at worst skip an extra frame.
---
This revision includes some comments documenting how/why it works.

 libavformat/aadec.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

Comments

Michael Niedermayer July 14, 2018, 12:20 a.m. UTC | #1
On Fri, Jul 13, 2018 at 12:35:07PM +0200, Karsten Otto wrote:
> MP3 frames may not be aligned to aa chunk boundaries. When seeking,
> calculate the expected frame offset in the target chunk. Adjust the
> timestamp and truncate the next packet accordingly.
> 
> This solution works for the majority of tested audio material. For
> some rare encodings with mp3 padding or embedded id3 tags, it will
> mispredict the correct offset, and at worst skip an extra frame.
> ---

> This revision includes some comments documenting how/why it works.

this is not what i meant.

The input can be arbitrary, and generally would be manually created to
exploit the code.
iam not at all against documenting what it "// normally," does
but the question is what makes a check unneeded

I was more thinking of a single comment in place of the missing check
explaining why it is _guranteed_ to be unneeded.

It could also be done in form of a documented av_assert0();

Also if you need a lot of text to explain this, theres a problem.
People who work on this in the future must understand what condition
they need to maintain to make this stay true.

thanks

[...]
Karsten Otto July 14, 2018, 8:36 a.m. UTC | #2
> Am 14.07.2018 um 02:20 schrieb Michael Niedermayer <michael@niedermayer.cc>:
> 
> Signierter PGP-Teil
> On Fri, Jul 13, 2018 at 12:35:07PM +0200, Karsten Otto wrote:
>> MP3 frames may not be aligned to aa chunk boundaries. When seeking,
>> calculate the expected frame offset in the target chunk. Adjust the
>> timestamp and truncate the next packet accordingly.
>> 
>> This solution works for the majority of tested audio material. For
>> some rare encodings with mp3 padding or embedded id3 tags, it will
>> mispredict the correct offset, and at worst skip an extra frame.
>> ---
> 
>> This revision includes some comments documenting how/why it works.
> 
> this is not what i meant.
> 
> The input can be arbitrary, and generally would be manually created to
> exploit the code.
> iam not at all against documenting what it "// normally," does
> but the question is what makes a check unneeded
> 
> I was more thinking of a single comment in place of the missing check
> explaining why it is _guranteed_ to be unneeded.
> 
> It could also be done in form of a documented av_assert0();
> 
> Also if you need a lot of text to explain this, theres a problem.
> People who work on this in the future must understand what condition
> they need to maintain to make this stay true.
> 
Right, so you were concerned about maintainability:

With a thorough understanding of the code, I know I only need to handle
the last block correctly, then I won't need an extra check at packet creation.

But if I move the check down there, packet creation is obviously safe to
anybody, even without the context. And in case someone (intentionally)
messed up the file. Or needs to refactor the code in a hurry.

I will change the patch accordingly.

Cheers, Karsten
diff mbox

Patch

diff --git a/libavformat/aadec.c b/libavformat/aadec.c
index 17ad20686b..f7e92dbb4b 100644
--- a/libavformat/aadec.c
+++ b/libavformat/aadec.c
@@ -37,6 +37,7 @@ 
 #define TEA_BLOCK_SIZE 8
 #define CHAPTER_HEADER_SIZE 8
 #define TIMEPREC 1000
+#define MP3_FRAME_SIZE 104
 
 typedef struct AADemuxContext {
     AVClass *class;
@@ -50,6 +51,7 @@  typedef struct AADemuxContext {
     int64_t current_chapter_size;
     int64_t content_start;
     int64_t content_end;
+    int seek_offset;
 } AADemuxContext;
 
 static int get_second_size(char *codec_name)
@@ -177,6 +179,7 @@  static int aa_read_header(AVFormatContext *s)
         st->codecpar->sample_rate = 22050;
         st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
         avpriv_set_pts_info(st, 64, 8, 32000 * TIMEPREC);
+        // encoded audio frame is MP3_FRAME_SIZE bytes (+1 with padding, unlikely)
     } else if (!strcmp(codec_name, "acelp85")) {
         st->codecpar->codec_id = AV_CODEC_ID_SIPR;
         st->codecpar->block_align = 19;
@@ -228,6 +231,7 @@  static int aa_read_header(AVFormatContext *s)
     ff_update_cur_dts(s, st, 0);
     avio_seek(pb, start, SEEK_SET);
     c->current_chapter_size = 0;
+    c->seek_offset = 0;
 
     return 0;
 }
@@ -266,6 +270,10 @@  static int aa_read_packet(AVFormatContext *s, AVPacket *pkt)
     // is this the last block in this chapter?
     if (c->current_chapter_size / c->current_codec_second_size == 0) {
         c->current_codec_second_size = c->current_chapter_size % c->current_codec_second_size;
+        // normally, seek_offset max MP3_FRAME_SIZE-1 <= current_codec_second_size;
+        // in short last block, estimate may be off for file with padding
+        if (c->seek_offset > c->current_codec_second_size)
+            c->seek_offset = 0;
     }
 
     // decrypt c->current_codec_second_size bytes
@@ -293,12 +301,14 @@  static int aa_read_packet(AVFormatContext *s, AVPacket *pkt)
     if (c->current_chapter_size <= 0)
         c->current_chapter_size = 0;
 
-    ret = av_new_packet(pkt, written);
+    // at this point, seek_offset <= written == current_codec_second_size
+    ret = av_new_packet(pkt, written - c->seek_offset);
     if (ret < 0)
         return ret;
-    memcpy(pkt->data, buf, written);
+    memcpy(pkt->data, buf + c->seek_offset, written - c->seek_offset);
     pkt->pos = pos;
 
+    c->seek_offset = 0;
     return 0;
 }
 
@@ -342,7 +352,12 @@  static int aa_read_seek(AVFormatContext *s,
     c->current_chapter_size = chapter_size - chapter_pos;
     c->chapter_idx = 1 + chapter_idx;
 
-    ff_update_cur_dts(s, s->streams[0], ch->start + chapter_pos * TIMEPREC);
+    // estimate extra offset of first unaligned frame in block (assume no padding)
+    if (s->streams[0]->codecpar->codec_id == AV_CODEC_ID_MP3) {
+        c->seek_offset = (MP3_FRAME_SIZE - chapter_pos % MP3_FRAME_SIZE) % MP3_FRAME_SIZE;
+    }
+
+    ff_update_cur_dts(s, s->streams[0], ch->start + (chapter_pos + c->seek_offset) * TIMEPREC);
 
     return 1;
 }