diff mbox series

[FFmpeg-devel,2/2,v2] avformat/mov: improve HEIF parsing

Message ID 20240110012722.64652-1-jamrial@gmail.com
State New
Headers show
Series None | expand

Commit Message

James Almer Jan. 10, 2024, 1:27 a.m. UTC
Parse iprp and iinf boxes and children boxes to get the actual codec used
(AV1 for avif, HEVC for heic), and properly export extradata and other
properties in a generic way.
The reference files for the avif tests are updated as the extradata is now
exported.

Based on a patch by Swaraj Hota.

Co-authored-by: Swaraj Hota <swarajhota353@gmail.com>
Signed-off-by: James Almer <jamrial@gmail.com>
---
 libavformat/isom.h                            |  19 +-
 libavformat/mov.c                             | 292 +++++++++++++-----
 .../fate/mov-avif-demux-still-image-1-item    |   2 +-
 .../mov-avif-demux-still-image-multiple-items |   2 +-
 4 files changed, 228 insertions(+), 87 deletions(-)

Comments

James Almer Jan. 15, 2024, 4:50 p.m. UTC | #1
On 1/9/2024 10:27 PM, James Almer wrote:
> Parse iprp and iinf boxes and children boxes to get the actual codec used
> (AV1 for avif, HEVC for heic), and properly export extradata and other
> properties in a generic way.
> The reference files for the avif tests are updated as the extradata is now
> exported.
> 
> Based on a patch by Swaraj Hota.
> 
> Co-authored-by: Swaraj Hota <swarajhota353@gmail.com>
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>   libavformat/isom.h                            |  19 +-
>   libavformat/mov.c                             | 292 +++++++++++++-----
>   .../fate/mov-avif-demux-still-image-1-item    |   2 +-
>   .../mov-avif-demux-still-image-multiple-items |   2 +-
>   4 files changed, 228 insertions(+), 87 deletions(-)

Will apply.
diff mbox series

Patch

diff --git a/libavformat/isom.h b/libavformat/isom.h
index 90c4fb5530..f397b3697f 100644
--- a/libavformat/isom.h
+++ b/libavformat/isom.h
@@ -262,12 +262,22 @@  typedef struct MOVStreamContext {
     } cenc;
 } MOVStreamContext;
 
+typedef struct HEIFItem {
+    AVStream *st;
+    int item_id;
+    int extent_length;
+    int64_t extent_offset;
+    int64_t size;
+    int type;
+} HEIFItem;
+
 typedef struct MOVContext {
     const AVClass *class; ///< class for private options
     AVFormatContext *fc;
     int time_scale;
     int64_t duration;     ///< duration of the longest track
     int found_moov;       ///< 'moov' atom has been found
+    int found_iloc;       ///< 'iloc' atom has been found
     int found_mdat;       ///< 'mdat' atom has been found
     int found_hdlr_mdta;  ///< 'hdlr' atom with type 'mdta' has been found
     int trak_index;       ///< Index of the current 'trak'
@@ -321,14 +331,9 @@  typedef struct MOVContext {
     uint32_t max_stts_delta;
     int is_still_picture_avif;
     int primary_item_id;
-    struct {
-        int item_id;
-        int extent_length;
-        int64_t extent_offset;
-    } *heif_info;
+    int cur_item_id;
+    HEIFItem *heif_info;
     int heif_info_size;
-    int64_t hvcC_offset;
-    int hvcC_size;
     int interleaved_read;
 } MOVContext;
 
diff --git a/libavformat/mov.c b/libavformat/mov.c
index 12e82c66a9..0a8d752e03 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -1233,8 +1233,6 @@  static int mov_read_ftyp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         c->isom = 1;
     av_log(c->fc, AV_LOG_DEBUG, "ISO: File Type Major Brand: %.4s\n",(char *)&type);
     av_dict_set(&c->fc->metadata, "major_brand", type, 0);
-    c->is_still_picture_avif = !strncmp(type, "avif", 4) ||
-                               !strncmp(type, "mif1", 4);
     minor_ver = avio_rb32(pb); /* minor version */
     av_dict_set_int(&c->fc->metadata, "minor_version", minor_ver, 0);
 
@@ -4641,10 +4639,6 @@  static int mov_read_trak(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     MOVStreamContext *sc;
     int ret;
 
-    if (c->is_still_picture_avif) {
-        return AVERROR_INVALIDDATA;
-    }
-
     st = avformat_new_stream(c->fc, NULL);
     if (!st) return AVERROR(ENOMEM);
     st->id = -1;
@@ -4916,20 +4910,13 @@  static int mov_read_custom(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     return ret;
 }
 
-static int heif_add_stream(MOVContext *c, int item_id)
+static int heif_add_stream(MOVContext *c, HEIFItem *item)
 {
     MOVStreamContext *sc;
     AVStream *st;
-    int item_index = -1;
+
     if (c->fc->nb_streams)
         return AVERROR_INVALIDDATA;
-    for (int i = 0; i < c->heif_info_size; i++)
-        if (c->heif_info[i].item_id == item_id) {
-            item_index = i;
-            break;
-        }
-    if (item_index < 0)
-        return AVERROR_INVALIDDATA;
     st = avformat_new_stream(c->fc, NULL);
     if (!st)
         return AVERROR(ENOMEM);
@@ -4938,22 +4925,10 @@  static int heif_add_stream(MOVContext *c, int item_id)
     if (!sc)
         return AVERROR(ENOMEM);
 
+    item->st = st;
     st->priv_data = sc;
     st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
-    st->codecpar->codec_id = AV_CODEC_ID_AV1;
-    if (c->hvcC_offset >= 0) {
-        int ret;
-        int64_t pos = avio_tell(c->fc->pb);
-        st->codecpar->codec_id = AV_CODEC_ID_HEVC;
-        if (avio_seek(c->fc->pb, c->hvcC_offset, SEEK_SET) != c->hvcC_offset) {
-            av_log(c->fc, AV_LOG_ERROR, "Failed to seek to hvcC data.\n");
-            return AVERROR_UNKNOWN;
-        }
-        ret = ff_get_extradata(c->fc, st->codecpar, c->fc->pb, c->hvcC_size);
-        if (ret < 0)
-            return ret;
-        avio_seek(c->fc->pb, pos, SEEK_SET);
-    }
+    st->codecpar->codec_id = mov_codec_id(st, item->type);
     sc->ffindex = st->index;
     c->trak_index = st->index;
     st->avg_frame_rate.num = st->avg_frame_rate.den = 1;
@@ -4987,17 +4962,12 @@  static int heif_add_stream(MOVContext *c, int item_id)
     sc->stts_data[0].count = 1;
     // Not used for still images. But needed by mov_build_index.
     sc->stts_data[0].duration = 0;
-    sc->sample_sizes[0] = c->heif_info[item_index].extent_length;
-    sc->chunk_offsets[0] = c->heif_info[item_index].extent_offset;
 
-    mov_build_index(c, st);
     return 0;
 }
 
 static int mov_read_meta(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
-    c->hvcC_offset = -1;
-    c->hvcC_size = 0;
     while (atom.size > 8) {
         uint32_t tag;
         if (avio_feof(pb))
@@ -5005,23 +4975,9 @@  static int mov_read_meta(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         tag = avio_rl32(pb);
         atom.size -= 4;
         if (tag == MKTAG('h','d','l','r')) {
-            int ret;
             avio_seek(pb, -8, SEEK_CUR);
             atom.size += 8;
-            if ((ret = mov_read_default(c, pb, atom)) < 0)
-                return ret;
-            if (c->is_still_picture_avif) {
-                int ret;
-                // Add a stream for the YUV planes (primary item).
-                if ((ret = heif_add_stream(c, c->primary_item_id)) < 0)
-                    return ret;
-                // For still AVIF images, the meta box contains all the
-                // necessary information that would generally be provided by the
-                // moov box. So simply mark that we have found the moov box so
-                // that parsing can continue.
-                c->found_moov = 1;
-            }
-            return ret;
+            return mov_read_default(c, pb, atom);
         }
     }
     return 0;
@@ -7813,18 +7769,10 @@  static int mov_read_iloc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     uint64_t base_offset, extent_offset, extent_length;
     uint8_t value;
 
-    if (!c->is_still_picture_avif) {
-        // * For non-avif, we simply ignore the iloc box.
-        // * For animated avif, we don't care about the iloc box as all the
-        //   necessary information can be found in the moov box.
-        return 0;
-    }
-
-    if (c->heif_info) {
+    if (c->found_iloc) {
         av_log(c->fc, AV_LOG_INFO, "Duplicate iloc box found\n");
         return 0;
     }
-    av_assert0(!c->fc->nb_streams);
 
     version = avio_r8(pb);
     avio_rb24(pb);  // flags.
@@ -7841,10 +7789,12 @@  static int mov_read_iloc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     }
     item_count = (version < 2) ? avio_rb16(pb) : avio_rb32(pb);
 
-    c->heif_info = av_malloc_array(item_count, sizeof(*c->heif_info));
-    if (!c->heif_info)
-        return AVERROR(ENOMEM);
-    c->heif_info_size = item_count;
+    if (!c->heif_info) {
+        c->heif_info = av_calloc(item_count, sizeof(*c->heif_info));
+        if (!c->heif_info)
+            return AVERROR(ENOMEM);
+        c->heif_info_size = item_count;
+    }
 
     for (int i = 0; i < item_count; i++) {
         int item_id = (version < 2) ? avio_rb16(pb) : avio_rb32(pb);
@@ -7872,29 +7822,198 @@  static int mov_read_iloc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         }
     }
 
+    c->found_iloc = 1;
     return atom.size;
 }
 
+static int mov_read_infe(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    char item_name[128];
+    int64_t size = atom.size;
+    uint32_t item_type;
+    int item_id;
+    int version, ret;
+
+    version = avio_r8(pb);
+    avio_rb24(pb);  // flags.
+    size -= 4;
+
+    if (version != 2) {
+        av_log(c->fc, AV_LOG_ERROR, "infe: version != 2 not supported.\n");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    item_id = avio_rb16(pb);
+    avio_rb16(pb); // item_protection_index
+    item_type = avio_rl32(pb);
+    size -= 8;
+    size -= avio_get_str(pb, INT_MAX, item_name, sizeof(item_name));
+
+    av_log(c->fc, AV_LOG_DEBUG, "infe: item_id %d, item_type %s.\n", item_id, av_fourcc2str(item_type));
+
+    // Skip all but the primary item until support is added
+    if (item_id != c->primary_item_id)
+        return 0;
+
+    if (size > 0)
+        avio_skip(pb, size);
+
+    c->heif_info[c->cur_item_id].item_id = item_id;
+    c->heif_info[c->cur_item_id].type    = item_type;
+
+    switch (item_type) {
+    case MKTAG('a','v','0','1'):
+    case MKTAG('h','v','c','1'):
+        ret = heif_add_stream(c, &c->heif_info[c->cur_item_id]);
+        if (ret < 0)
+            return ret;
+        break;
+    default:
+        av_log(c->fc, AV_LOG_DEBUG, "infe: ignoring item_type %s.\n", av_fourcc2str(item_type));
+        break;
+    }
+
+    c->cur_item_id++;
+
+    return 0;
+}
+
+static int mov_read_iinf(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    int entry_count;
+    int version, ret;
+
+    version = avio_r8(pb);
+    avio_rb24(pb);  // flags.
+    entry_count = version ? avio_rb32(pb) : avio_rb16(pb);
+
+    if (!c->heif_info) {
+        c->heif_info = av_calloc(entry_count, sizeof(*c->heif_info));
+        if (!c->heif_info)
+            return AVERROR(ENOMEM);
+        c->heif_info_size = entry_count;
+    }
+
+    c->cur_item_id = 0;
+
+    for (int i = 0; i < entry_count; i++) {
+        MOVAtom infe;
+
+        infe.size = avio_rb32(pb) - 8;
+        infe.type = avio_rl32(pb);
+        ret = mov_read_infe(c, pb, infe);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
 static int mov_read_iprp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
-    int size = avio_rb32(pb);
-    if (avio_rl32(pb) != MKTAG('i','p','c','o'))
+    typedef struct MOVAtoms {
+        FFIOContext b;
+        MOVAtom a;
+        uint8_t *data;
+    } MOVAtoms;
+    MOVAtoms *atoms = NULL;
+    MOVAtom a;
+    unsigned count;
+    int nb_atoms = 0;
+    int version, flags;
+    int ret;
+
+    a.size = avio_rb32(pb);
+    a.type = avio_rl32(pb);
+
+    if (a.size < 8 || a.type != MKTAG('i','p','c','o'))
         return AVERROR_INVALIDDATA;
-    size -= 8;
-    while (size > 0) {
-        int sub_size, sub_type;
-        sub_size = avio_rb32(pb);
-        sub_type = avio_rl32(pb);
-        sub_size -= 8;
-        size -= sub_size + 8;
-        if (sub_type == MKTAG('h','v','c','C')) {
-            c->hvcC_offset = avio_tell(pb);
-            c->hvcC_size = sub_size;
+    if (avio_feof(pb))
+        return AVERROR_INVALIDDATA;
+
+    a.size -= 8;
+    while (a.size >= 8) {
+        MOVAtoms *ref = av_dynarray2_add((void**)&atoms, &nb_atoms, sizeof(MOVAtoms), NULL);
+        if (!ref) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+        ref->data = NULL;
+        ref->a.size = avio_rb32(pb);
+        ref->a.type = avio_rl32(pb);
+        if (ref->a.size > a.size || ref->a.size < 8)
             break;
+        ref->data = av_malloc(ref->a.size);
+        if (!ref->data) {
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
         }
-        avio_skip(pb, sub_size);
+        av_log(c->fc, AV_LOG_DEBUG, "ipco: index %d, box type %s.\n", nb_atoms, av_fourcc2str(ref->a.type));
+        avio_seek(pb, -8, SEEK_CUR);
+        if (avio_read(pb, ref->data, ref->a.size) != ref->a.size) {
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+        ffio_init_context(&ref->b, ref->data, ref->a.size, 0, NULL, NULL, NULL, NULL);
+        a.size -= ref->a.size;
     }
-    return atom.size;
+
+    if (a.size) {
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    a.size = avio_rb32(pb);
+    a.type = avio_rl32(pb);
+
+    if (a.size < 8 || a.type != MKTAG('i','p','m','a')) {
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    version = avio_r8(pb);
+    flags   = avio_rb24(pb);
+    count   = avio_rb32(pb);
+
+    for (int i = 0; i < count; i++) {
+        int item_id = version ? avio_rb32(pb) : avio_rb16(pb);
+        int assoc_count = avio_r8(pb);
+
+        for (int j = 0; j < assoc_count; j++) {
+            MOVAtoms *atom;
+            int index = avio_r8(pb) & 0x7f;
+            if (flags & 1) {
+                index <<= 8;
+                index |= avio_r8(pb);
+            }
+            if (index > nb_atoms || index <= 0) {
+                ret = AVERROR_INVALIDDATA;
+                goto fail;
+            }
+            atom = &atoms[--index];
+
+            av_log(c->fc, AV_LOG_DEBUG, "ipma: property_index %d, item_id %d, item_type %s.\n",
+                   index + 1, item_id, av_fourcc2str(atom->a.type));
+
+            // Skip properties referencing items other than the primary item until support is added
+            if (item_id != c->primary_item_id)
+                continue;
+
+            ret = mov_read_default(c, &atom->b.pub,
+                                   (MOVAtom) { .size = atom->a.size,
+                                               .type = MKTAG('i','p','c','o') });
+            if (ret < 0)
+                goto fail;
+        }
+    }
+
+    ret = 0;
+fail:
+    for (int i = 0; i < nb_atoms; i++)
+        av_free(atoms[i].data);
+    av_free(atoms);
+
+    return ret;
 }
 
 static const MOVParseTableEntry mov_default_parse_table[] = {
@@ -8005,6 +8124,7 @@  static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('p','i','t','m'), mov_read_pitm },
 { MKTAG('e','v','c','C'), mov_read_glbl },
 { MKTAG('i','p','r','p'), mov_read_iprp },
+{ MKTAG('i','i','n','f'), mov_read_iinf },
 { 0, NULL }
 };
 
@@ -8672,13 +8792,29 @@  static int mov_read_header(AVFormatContext *s)
             av_log(s, AV_LOG_ERROR, "error reading header\n");
             return err;
         }
-    } while ((pb->seekable & AVIO_SEEKABLE_NORMAL) && !mov->found_moov && !mov->moov_retry++);
-    if (!mov->found_moov) {
+    } while ((pb->seekable & AVIO_SEEKABLE_NORMAL) && !mov->found_moov && !mov->found_iloc && !mov->moov_retry++);
+    if (!mov->found_moov && !mov->found_iloc) {
         av_log(s, AV_LOG_ERROR, "moov atom not found\n");
         return AVERROR_INVALIDDATA;
     }
     av_log(mov->fc, AV_LOG_TRACE, "on_parse_exit_offset=%"PRId64"\n", avio_tell(pb));
 
+    if (mov->found_iloc) {
+        for (i = 0; i < mov->heif_info_size; i++) {
+            MOVStreamContext *sc;
+            HEIFItem *item = &mov->heif_info[i];
+
+            if (!item->st)
+                continue;
+
+            sc = item->st->priv_data;
+            sc->sample_sizes[0] = item->extent_length;
+            sc->chunk_offsets[0] = item->extent_offset;
+
+            mov_build_index(mov, item->st);
+        }
+    }
+
     if (pb->seekable & AVIO_SEEKABLE_NORMAL) {
         if (mov->nb_chapter_tracks > 0 && !mov->ignore_chapters)
             mov_read_chapters(s);
diff --git a/tests/ref/fate/mov-avif-demux-still-image-1-item b/tests/ref/fate/mov-avif-demux-still-image-1-item
index b74a2d9dbc..1ead593caa 100644
--- a/tests/ref/fate/mov-avif-demux-still-image-1-item
+++ b/tests/ref/fate/mov-avif-demux-still-image-1-item
@@ -1,7 +1,7 @@ 
 #format: frame checksums
 #version: 2
 #hash: MD5
-#extradata 0,                              13, b52ae298d37128862ef1918cf916239c
+#extradata 0,                               4, b24b71499a8480fa4469bcbcba2140aa
 #tb 0: 1/1
 #media_type 0: video
 #codec_id 0: av1
diff --git a/tests/ref/fate/mov-avif-demux-still-image-multiple-items b/tests/ref/fate/mov-avif-demux-still-image-multiple-items
index b74a2d9dbc..1ead593caa 100644
--- a/tests/ref/fate/mov-avif-demux-still-image-multiple-items
+++ b/tests/ref/fate/mov-avif-demux-still-image-multiple-items
@@ -1,7 +1,7 @@ 
 #format: frame checksums
 #version: 2
 #hash: MD5
-#extradata 0,                              13, b52ae298d37128862ef1918cf916239c
+#extradata 0,                               4, b24b71499a8480fa4469bcbcba2140aa
 #tb 0: 1/1
 #media_type 0: video
 #codec_id 0: av1