diff mbox

[FFmpeg-devel,v3,2/3] aadec: add chapters and seeking

Message ID 20180621165826.1714-3-ottoka@posteo.de
State Superseded
Headers show

Commit Message

Karsten Otto June 21, 2018, 4:58 p.m. UTC
read_packet reads content in chunks. Thus seek must be clamped to valid
chunk positions in the file, which in turn are relative to chapter start
positions.

So in read_header, scan for chapter headers once by skipping through the
content. Set stream time_base to bitrate in bytes/s, for easy timestamp to
position conversion.

Then in read_seek, find the chapter containing the seek position, calculate
the nearest chunk position, and reinit the read_seek state accordingly.
---
 libavformat/aadec.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 83 insertions(+), 4 deletions(-)

Comments

Michael Niedermayer July 2, 2018, 8:22 a.m. UTC | #1
On Thu, Jun 21, 2018 at 06:58:25PM +0200, Karsten Otto wrote:
> read_packet reads content in chunks. Thus seek must be clamped to valid
> chunk positions in the file, which in turn are relative to chapter start
> positions.
> 
> So in read_header, scan for chapter headers once by skipping through the
> content. Set stream time_base to bitrate in bytes/s, for easy timestamp to
> position conversion.

IIUC this would be linearly reading through the whole file before playing
anything?
if thats the case, thats not ideal, is that unavoidable ?



> 
> Then in read_seek, find the chapter containing the seek position, calculate
> the nearest chunk position, and reinit the read_seek state accordingly.
> ---
>  libavformat/aadec.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 83 insertions(+), 4 deletions(-)
> 
> diff --git a/libavformat/aadec.c b/libavformat/aadec.c
> index 4db71b1939..b009c9deca 100644
> --- a/libavformat/aadec.c
> +++ b/libavformat/aadec.c
> @@ -35,6 +35,8 @@
>  #define MAX_TOC_ENTRIES 16
>  #define MAX_DICTIONARY_ENTRIES 128
>  #define TEA_BLOCK_SIZE 8
> +#define CHAPTER_HEADER_SIZE 8
> +#define TIMEPREC 1000
>  
>  typedef struct AADemuxContext {
>      AVClass *class;
> @@ -46,6 +48,7 @@ typedef struct AADemuxContext {
>      struct AVTEA *tea_ctx;
>      uint8_t file_key[16];
>      int64_t current_chapter_size;
> +    int64_t content_start;
>      int64_t content_end;
>  } AADemuxContext;
>  
> @@ -70,7 +73,7 @@ static int aa_read_header(AVFormatContext *s)
>      uint32_t nkey, nval, toc_size, npairs, header_seed = 0, start;
>      char key[128], val[128], codec_name[64] = {0};
>      uint8_t output[24], dst[8], src[8];
> -    int64_t largest_size = -1, current_size = -1;
> +    int64_t largest_size = -1, current_size = -1, chapter_pos;
>      struct toc_entry {
>          uint32_t offset;
>          uint32_t size;

> @@ -172,19 +175,23 @@ static int aa_read_header(AVFormatContext *s)
>      if (!strcmp(codec_name, "mp332")) {
>          st->codecpar->codec_id = AV_CODEC_ID_MP3;
>          st->codecpar->sample_rate = 22050;
> +        st->time_base = av_make_q(8, 32000 * TIMEPREC);
>          st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
> -        st->start_time = 0;
>      } else if (!strcmp(codec_name, "acelp85")) {
>          st->codecpar->codec_id = AV_CODEC_ID_SIPR;
>          st->codecpar->block_align = 19;
>          st->codecpar->channels = 1;
>          st->codecpar->sample_rate = 8500;
> +        st->codecpar->bit_rate = 8500;
> +        st->time_base = av_make_q(8, 8500 * TIMEPREC);
>          st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
>      } else if (!strcmp(codec_name, "acelp16")) {
>          st->codecpar->codec_id = AV_CODEC_ID_SIPR;
>          st->codecpar->block_align = 20;
>          st->codecpar->channels = 1;
>          st->codecpar->sample_rate = 16000;
> +        st->codecpar->bit_rate = 16000;
> +        st->time_base = av_make_q(8, 16000 * TIMEPREC);

see avpriv_set_pts_info()


[...]
Karsten Otto July 2, 2018, 4:51 p.m. UTC | #2
> Am 02.07.2018 um 10:22 schrieb Michael Niedermayer <michael@niedermayer.cc>:
> 
> Signierter PGP-Teil
> On Thu, Jun 21, 2018 at 06:58:25PM +0200, Karsten Otto wrote:
>> read_packet reads content in chunks. Thus seek must be clamped to valid
>> chunk positions in the file, which in turn are relative to chapter start
>> positions.
>> 
>> So in read_header, scan for chapter headers once by skipping through the
>> content. Set stream time_base to bitrate in bytes/s, for easy timestamp to
>> position conversion.
> 
> IIUC this would be linearly reading through the whole file before playing
> anything?
> if thats the case, thats not ideal, is that unavoidable ?
> 
It is not quite as bad: A chapter header contains the byte size of the chapter,
so after reading it I can avio_skip() ahead to the next chapter header, and
repeat. So, this is not a linear read, but mostly a sequence of seek operations.

I am aware this is not a perfect solution, but unfortunately at the current level of
knowledge about the aa format, this is indeed unavoidable for now. Possibly it
could be improved later if anyone gains a deeper understanding of the format.

> 
> 
>> 
>> Then in read_seek, find the chapter containing the seek position, calculate
>> the nearest chunk position, and reinit the read_seek state accordingly.
>> ---
>> libavformat/aadec.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++---
>> 1 file changed, 83 insertions(+), 4 deletions(-)
>> 
>> diff --git a/libavformat/aadec.c b/libavformat/aadec.c
>> index 4db71b1939..b009c9deca 100644
>> --- a/libavformat/aadec.c
>> +++ b/libavformat/aadec.c
>> @@ -35,6 +35,8 @@
>> #define MAX_TOC_ENTRIES 16
>> #define MAX_DICTIONARY_ENTRIES 128
>> #define TEA_BLOCK_SIZE 8
>> +#define CHAPTER_HEADER_SIZE 8
>> +#define TIMEPREC 1000
>> 
>> typedef struct AADemuxContext {
>>     AVClass *class;
>> @@ -46,6 +48,7 @@ typedef struct AADemuxContext {
>>     struct AVTEA *tea_ctx;
>>     uint8_t file_key[16];
>>     int64_t current_chapter_size;
>> +    int64_t content_start;
>>     int64_t content_end;
>> } AADemuxContext;
>> 
>> @@ -70,7 +73,7 @@ static int aa_read_header(AVFormatContext *s)
>>     uint32_t nkey, nval, toc_size, npairs, header_seed = 0, start;
>>     char key[128], val[128], codec_name[64] = {0};
>>     uint8_t output[24], dst[8], src[8];
>> -    int64_t largest_size = -1, current_size = -1;
>> +    int64_t largest_size = -1, current_size = -1, chapter_pos;
>>     struct toc_entry {
>>         uint32_t offset;
>>         uint32_t size;
> 
>> @@ -172,19 +175,23 @@ static int aa_read_header(AVFormatContext *s)
>>     if (!strcmp(codec_name, "mp332")) {
>>         st->codecpar->codec_id = AV_CODEC_ID_MP3;
>>         st->codecpar->sample_rate = 22050;
>> +        st->time_base = av_make_q(8, 32000 * TIMEPREC);
>>         st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
>> -        st->start_time = 0;
>>     } else if (!strcmp(codec_name, "acelp85")) {
>>         st->codecpar->codec_id = AV_CODEC_ID_SIPR;
>>         st->codecpar->block_align = 19;
>>         st->codecpar->channels = 1;
>>         st->codecpar->sample_rate = 8500;
>> +        st->codecpar->bit_rate = 8500;
>> +        st->time_base = av_make_q(8, 8500 * TIMEPREC);
>>         st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
>>     } else if (!strcmp(codec_name, "acelp16")) {
>>         st->codecpar->codec_id = AV_CODEC_ID_SIPR;
>>         st->codecpar->block_align = 20;
>>         st->codecpar->channels = 1;
>>         st->codecpar->sample_rate = 16000;
>> +        st->codecpar->bit_rate = 16000;
>> +        st->time_base = av_make_q(8, 16000 * TIMEPREC);
> 
> see avpriv_set_pts_info()
> 
Thanks, I will check it out.

> 
> [...]
> -- 
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
> 
> Elect your leaders based on what they did after the last election, not
> based on what they say before an election.
> 
> 
>
diff mbox

Patch

diff --git a/libavformat/aadec.c b/libavformat/aadec.c
index 4db71b1939..b009c9deca 100644
--- a/libavformat/aadec.c
+++ b/libavformat/aadec.c
@@ -35,6 +35,8 @@ 
 #define MAX_TOC_ENTRIES 16
 #define MAX_DICTIONARY_ENTRIES 128
 #define TEA_BLOCK_SIZE 8
+#define CHAPTER_HEADER_SIZE 8
+#define TIMEPREC 1000
 
 typedef struct AADemuxContext {
     AVClass *class;
@@ -46,6 +48,7 @@  typedef struct AADemuxContext {
     struct AVTEA *tea_ctx;
     uint8_t file_key[16];
     int64_t current_chapter_size;
+    int64_t content_start;
     int64_t content_end;
 } AADemuxContext;
 
@@ -70,7 +73,7 @@  static int aa_read_header(AVFormatContext *s)
     uint32_t nkey, nval, toc_size, npairs, header_seed = 0, start;
     char key[128], val[128], codec_name[64] = {0};
     uint8_t output[24], dst[8], src[8];
-    int64_t largest_size = -1, current_size = -1;
+    int64_t largest_size = -1, current_size = -1, chapter_pos;
     struct toc_entry {
         uint32_t offset;
         uint32_t size;
@@ -172,19 +175,23 @@  static int aa_read_header(AVFormatContext *s)
     if (!strcmp(codec_name, "mp332")) {
         st->codecpar->codec_id = AV_CODEC_ID_MP3;
         st->codecpar->sample_rate = 22050;
+        st->time_base = av_make_q(8, 32000 * TIMEPREC);
         st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
-        st->start_time = 0;
     } else if (!strcmp(codec_name, "acelp85")) {
         st->codecpar->codec_id = AV_CODEC_ID_SIPR;
         st->codecpar->block_align = 19;
         st->codecpar->channels = 1;
         st->codecpar->sample_rate = 8500;
+        st->codecpar->bit_rate = 8500;
+        st->time_base = av_make_q(8, 8500 * TIMEPREC);
         st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
     } else if (!strcmp(codec_name, "acelp16")) {
         st->codecpar->codec_id = AV_CODEC_ID_SIPR;
         st->codecpar->block_align = 20;
         st->codecpar->channels = 1;
         st->codecpar->sample_rate = 16000;
+        st->codecpar->bit_rate = 16000;
+        st->time_base = av_make_q(8, 16000 * TIMEPREC);
         st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
     }
 
@@ -198,7 +205,30 @@  static int aa_read_header(AVFormatContext *s)
     }
     start = TOC[largest_idx].offset;
     avio_seek(pb, start, SEEK_SET);
+
+    // extract chapter positions. since all formats have constant bit rate, use it
+    // as time base in bytes/s, for easy stream position <-> timestamp conversion
+    st->start_time = 0;
+    c->content_start = start;
     c->content_end = start + largest_size;
+
+    while ((chapter_pos = avio_tell(pb)) >= 0 && chapter_pos < c->content_end) {
+        int chapter_idx, chapter_size;
+        chapter_idx = s->nb_chapters;
+        chapter_pos -= start + CHAPTER_HEADER_SIZE * chapter_idx;
+        chapter_size = avio_rb32(pb);
+        if (chapter_size == 0) break;
+        avio_skip(pb, 4);
+        avio_skip(pb, chapter_size);
+        if (!avpriv_new_chapter(s, chapter_idx, st->time_base,
+            chapter_pos * TIMEPREC, (chapter_pos + chapter_size) * TIMEPREC, NULL))
+                return AVERROR(ENOMEM);
+    }
+
+    st->duration = (largest_size - CHAPTER_HEADER_SIZE * s->nb_chapters) * TIMEPREC;
+
+    ff_update_cur_dts(s, st, 0);
+    avio_seek(pb, start, SEEK_SET);
     c->current_chapter_size = 0;
 
     return 0;
@@ -215,9 +245,10 @@  static int aa_read_packet(AVFormatContext *s, AVPacket *pkt)
     int written = 0;
     int ret;
     AADemuxContext *c = s->priv_data;
+    uint64_t pos = avio_tell(s->pb);
 
     // are we at the end of the audio content?
-    if (avio_tell(s->pb) >= c->content_end) {
+    if (pos >= c->content_end) {
         return AVERROR_EOF;
     }
 
@@ -230,6 +261,7 @@  static int aa_read_packet(AVFormatContext *s, AVPacket *pkt)
         av_log(s, AV_LOG_DEBUG, "Chapter %d (%" PRId64 " bytes)\n", c->chapter_idx, c->current_chapter_size);
         c->chapter_idx = c->chapter_idx + 1;
         avio_skip(s->pb, 4); // data start offset
+        pos += 8;
         c->current_codec_second_size = c->codec_second_size;
     }
 
@@ -267,10 +299,56 @@  static int aa_read_packet(AVFormatContext *s, AVPacket *pkt)
     if (ret < 0)
         return ret;
     memcpy(pkt->data, buf, written);
+    pkt->pos = pos;
 
     return 0;
 }
 
+static int aa_read_seek(AVFormatContext *s,
+                        int stream_index, int64_t timestamp, int flags)
+{
+    AADemuxContext *c = s->priv_data;
+    AVChapter *ch;
+    int64_t chapter_pos, chapter_start, chapter_size;
+    int chapter_idx = 0;
+
+    // find chapter containing seek timestamp
+    if (timestamp < 0)
+        timestamp = 0;
+
+    while (chapter_idx < s->nb_chapters && timestamp >= s->chapters[chapter_idx]->end) {
+        ++chapter_idx;
+    }
+
+    if (chapter_idx >= s->nb_chapters) {
+        chapter_idx = s->nb_chapters - 1;
+        if (chapter_idx < 0) return -1; // there is no chapter.
+        timestamp = s->chapters[chapter_idx]->end;
+    }
+
+    ch = s->chapters[chapter_idx];
+
+    // sync by clamping timestamp to nearest valid block position in its chapter
+    chapter_size = ch->end / TIMEPREC - ch->start / TIMEPREC;
+    chapter_pos = av_rescale_rnd((timestamp - ch->start) / TIMEPREC,
+        1, c->codec_second_size,
+        (flags & AVSEEK_FLAG_BACKWARD) ? AV_ROUND_DOWN : AV_ROUND_UP)
+        * c->codec_second_size;
+    if (chapter_pos >= chapter_size)
+        chapter_pos = chapter_size;
+    chapter_start = c->content_start + (ch->start / TIMEPREC) + CHAPTER_HEADER_SIZE * (1 + chapter_idx);
+
+    // reinit read state
+    avio_seek(s->pb, chapter_start + chapter_pos, SEEK_SET);
+    c->current_codec_second_size = c->codec_second_size;
+    c->current_chapter_size = chapter_size - chapter_pos;
+    c->chapter_idx = 1 + chapter_idx;
+
+    ff_update_cur_dts(s, s->streams[0], ch->start + chapter_pos * TIMEPREC);
+
+    return 1;
+}
+
 static int aa_probe(AVProbeData *p)
 {
     uint8_t *buf = p->buf;
@@ -316,6 +394,7 @@  AVInputFormat ff_aa_demuxer = {
     .read_probe     = aa_probe,
     .read_header    = aa_read_header,
     .read_packet    = aa_read_packet,
+    .read_seek      = aa_read_seek,
     .read_close     = aa_read_close,
-    .flags          = AVFMT_GENERIC_INDEX,
+    .flags          = AVFMT_NO_BYTE_SEEK | AVFMT_NOGENSEARCH,
 };