diff mbox series

[FFmpeg-devel,07/15] avformat/mov:Heuristic search for moov

Message ID 2411572a0f49ceb147146eea04ac711dfdac3eb6.camel@haerdin.se
State New
Headers show
Series Spotify patchset | expand

Checks

Context Check Description
andriy/configure_x86 warning Failed to apply patch
yinshiyou/configure_loongarch64 warning Failed to apply patch

Commit Message

Tomas Härdin Oct. 29, 2024, 2:48 p.m. UTC
Needs sample. Also might be incorrect if mdat has moov atoms inside
mdat for some reason. But the concept is reasonable enough - mov files
tend to have the moov atom either in the header or in the footer. The
need for this patch may have been deprecated by the introduction of
mfra parsing, which is very similar to the RandomIndexPack (RIP) in
MXF..

The heuristic for how far to seek back from the end could use some
work. stts, ctts, stss, stsc, stsz and stco add up to 48 bytes per
frame, so the present heuristic won't work for lower bitrate files. For
example a 25 Hz video file below 3.6 Mbit/s is on average under 18k per
frame which when divided by 384 comes to 46 bytes compared to 48 bytes'
worth of stbl stuff. There some RLE possible with the headers, and
keyframe density makes a difference, but still

I also feel this should leverage the existing atom parsing code.

Spotify comments
----------------
Input mp4 file with lots of mdat:s (non-fragmented?) causing lots of
reads and seeks making initial parse over mid-latency network access
very slow.

Possible other solutions:
    • Other or more safe way to quickly find moov?
    • TODO: wasn’t there some option to use “mfra”? Only for fragmented
files? caused other problems?

/Tomas
diff mbox series

Patch

From 34d4732f0beb12d58113958153db3f6a8006fd2c Mon Sep 17 00:00:00 2001
From: Ulrik <ulrikm@spotify.com>
Date: Thu, 9 Dec 2021 17:48:00 +0100
Subject: [PATCH 07/15] avformat/mov:Heuristic search for moov

After passing more than 5 mdat-boxes before seeing moov, insert a quick
attempt at reading moov from the end of the file. Moov is scanned for with
the byte-sequence `moov`. When located, the area is inspected to avoid
spurious matches.

In particular, it's expected that the inner boxes of the
`moov` contains `mfhd` and `trak` boxes, and that reading root-boxes from
the found offset ends up evenly at end of file.
---
 libavformat/mov.c | 156 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 151 insertions(+), 5 deletions(-)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index 8c3329b815..f58f8f3102 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -84,6 +84,7 @@  typedef struct MOVParseTableEntry {
 } MOVParseTableEntry;
 
 static int mov_read_default(MOVContext *c, AVIOContext *pb, MOVAtom atom);
+static int mov_heuristic_scan_moov(MOVContext *c, AVIOContext *pb);
 static int mov_read_mfra(MOVContext *c, AVIOContext *f);
 static void mov_free_stream_context(AVFormatContext *s, AVStream *st);
 static int64_t add_ctts_entry(MOVCtts** ctts_data, unsigned int* ctts_count, unsigned int* allocated_size,
@@ -1361,7 +1362,12 @@  static int mov_read_mdat(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
     if (atom.size == 0) /* wrong one (MP4) */
         return 0;
-    c->found_mdat=1;
+    c->found_mdat+=1;
+    if (c->found_mdat == 5 && !c->found_moov) {
+        /* Some mp4:s consist of 100s or even 1000s of mdats, with a moov at the end. To avoid a
+        ton of seeking, we heuristically look for a moov at the end of the file instead */
+        mov_heuristic_scan_moov(c, pb);
+    }
     return 0; /* now go for moov */
 }
 
@@ -1571,6 +1577,141 @@  static int mov_read_moov(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     return 0; /* now go for mdat */
 }
 
+static int mov_read_atom_header(AVIOContext *pb, MOVAtom *atom) {
+    atom->size = avio_rb32(pb);
+    atom->type = avio_rl32(pb);
+    if (atom->size == 1) {
+        atom->size = avio_rb64(pb);
+        return 12;
+    } else {
+        return 8;
+    }
+}
+
+static int mov_try_read_moov(MOVContext *c, AVIOContext *pb, int64_t offset, int64_t filesize) {
+    MOVAtom moov, child;
+    int64_t moov_header_size, ret, i;
+    struct { uint32_t type; uint8_t seen; } boxes_seen[] = {
+        { MKTAG('m','v','h','d'), 0 },
+        { MKTAG('t','r','a','k'), 0 },
+    };
+    const size_t num_boxes = sizeof(boxes_seen)/sizeof(boxes_seen[0]);
+
+    av_log(c->fc, AV_LOG_TRACE, "Trying to read moov from hint at offset %"PRId64"\n", offset);
+    if ((ret = avio_seek(pb, offset, SEEK_SET)) < 0) {
+        return ret;
+    }
+    if ((moov_header_size = mov_read_atom_header(pb, &moov)) < 0) {
+        return moov_header_size;
+    }
+
+    if (moov.type != MKTAG('m','o','o','v')) {
+        av_log(c->fc, AV_LOG_TRACE,
+            "Moov-hint at %"PRId64" failed type&size-check, %"PRId64" != %"PRId64"\n",
+            offset, moov.size, filesize - offset
+        );
+        return -1;
+    }
+
+    /* Iterate through inner boxes, looking for some must-have types*/
+    while (avio_tell(pb) < offset + moov.size) {
+        ret = mov_read_atom_header(pb, &child);
+        for (i=0; i < num_boxes; i++) {
+            if (child.type == boxes_seen[i].type) {
+                boxes_seen[i].seen = 1;
+            }
+        }
+        avio_seek(pb, child.size - ret, SEEK_CUR);
+    }
+
+    /* Verify that expected children were seen */
+    for (i=0; i < num_boxes; i++) {
+        if (!boxes_seen[i].seen) {
+            return -1;
+        }
+    }
+
+    if (avio_tell(pb) > filesize) {
+        return -1;
+    }
+
+    /* Verify that the remaining root-level boxes ends up evenly at the end */
+    while (avio_tell(pb) < filesize) {
+        ret = mov_read_atom_header(pb, &child);
+        for (i=0; i < num_boxes; i++) {
+            if (child.type == boxes_seen[i].type) {
+                boxes_seen[i].seen = 1;
+            }
+        }
+        if (child.size == 0) { // Child is expected to continue to EOF
+            break;
+        }
+        if (child.size - ret + avio_tell(pb) > filesize) {
+            av_log(c->fc, AV_LOG_TRACE, "Remaining boxes does not align with EOF", offset);
+            return -1;
+        }
+        avio_seek(pb, child.size - ret, SEEK_CUR);
+    }
+
+    av_log(c->fc, AV_LOG_TRACE, "Moov hint checks out at offset %"PRId64"\n", offset);
+    if ((ret = avio_seek(pb, offset + moov_header_size, SEEK_SET)) < 0) {
+        return ret;
+    }
+    moov.type = MKTAG('r', 'o', 'o', 't');
+    moov.size = filesize;
+    return mov_read_moov(c, pb, moov);
+}
+
+#define BUF_SPILLOVER (sizeof(needle) - 1)
+static int mov_heuristic_scan_moov(MOVContext *c, AVIOContext *pb) {
+    uint8_t needle[4] = {'m', 'o', 'o', 'v'}, buf[4096];
+    size_t buf_fill = 0, i;
+    int64_t ret, new_pos;
+
+    int64_t original_pos = avio_tell(pb);
+    int64_t filesize = avio_size(pb);
+
+    if (!(pb->seekable & AVIO_SEEKABLE_NORMAL) || original_pos < 0 || filesize < 0) {
+        return 0;
+    }
+
+    /* Try to determine position from end, where moov-box would fit with good margin */
+    new_pos = filesize - ((4 * 1024 * 1024) + (filesize / 384));
+    if (new_pos <= original_pos) {
+        return 0;
+    }
+
+    if (avio_seek(pb, new_pos, SEEK_SET) < 0) {
+        return 0;
+    }
+
+    /* Scan for 'moov' string */
+    while (1) {
+        ret = avio_read(pb, buf + buf_fill, sizeof(buf) - buf_fill);
+        if (ret <= 0) {
+            goto quit;
+        }
+        buf_fill += ret;
+        new_pos += ret;
+        for (i=0; i < buf_fill - sizeof(needle); i++) {
+            if (memcmp(buf + i, needle, sizeof(needle)) == 0) {
+                /* 'moov' tag located, try position-hinted read */
+                if (mov_try_read_moov(c, pb, new_pos + i - buf_fill - 4, filesize) == 0) {
+                    goto quit;
+                } else if (avio_seek(pb, new_pos, SEEK_SET) < 0) {
+                    goto quit;
+                }
+            }
+        }
+        if (buf_fill > BUF_SPILLOVER) {
+            memmove(buf, buf + buf_fill - BUF_SPILLOVER, BUF_SPILLOVER);
+            buf_fill = BUF_SPILLOVER;
+        }
+    }
+quit:
+    return avio_seek(pb, original_pos, SEEK_SET);
+}
+
 static MOVFragmentStreamInfo * get_frag_stream_info(
     MOVFragmentIndex *frag_index,
     int index,
@@ -9355,15 +9496,20 @@  static int mov_read_default(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         } else {
             int64_t start_pos = avio_tell(pb);
             int64_t left;
+            uint8_t index_satisfied, at_end;
             int err = parse(c, pb, a);
             if (err < 0) {
                 c->atom_depth --;
                 return err;
             }
-            if (c->found_moov && c->found_mdat && a.size <= INT64_MAX - start_pos &&
-                ((!(pb->seekable & AVIO_SEEKABLE_NORMAL) || c->fc->flags & AVFMT_FLAG_IGNIDX || c->frag_index.complete) ||
-                 start_pos + a.size == avio_size(pb))) {
-                if (!(pb->seekable & AVIO_SEEKABLE_NORMAL) || c->fc->flags & AVFMT_FLAG_IGNIDX || c->frag_index.complete)
+            index_satisfied = (!(pb->seekable & AVIO_SEEKABLE_NORMAL))
+                || (c->fc->flags & AVFMT_FLAG_IGNIDX)
+                || c->frag_index.complete
+                || (c->found_mdat > 2 && (c->frag_index.nb_items == 0)); /* If we've read past 2
+                mdats, with no fragments in fragment-index, we assume file is not fragmented */
+            at_end = (start_pos + a.size) == avio_size(pb);
+            if (c->found_moov && c->found_mdat && (a.size <= INT64_MAX - start_pos) && (index_satisfied || at_end)) {
+                if (!at_end)
                     c->next_root_atom = start_pos + a.size;
                 c->atom_depth --;
                 return 0;
-- 
2.39.2