[FFmpeg-devel] lavf/mov: initial support for reading HEIF images

Submitted by Rodger Combs on Aug. 19, 2017, 7:24 a.m.

Details

Message ID 20170819072456.11276-1-rodger.combs@gmail.com
State New
Headers show

Commit Message

Rodger Combs Aug. 19, 2017, 7:24 a.m.
There's a decent chance this doesn't work on [pretty much anything other than
the samples I tested it with], but I haven't found very many samples, and don't
want to implement the rest of the features blind with nothing but a spec, and
most of it's pretty over-the-top anyway.

For some reason my phone running iOS 11 doesn't actually export HEIFs; it just
gives me JPEGs with a .HEIC extension.

HEIF is basically just an overengineered new mechanism for attaching still
pictures to MP4 files. Its atoms can exist along with a moov, or without one.

This can probably seek-thrash pretty badly in some cases, so if it becomes
common to use this to attach pictures to audio or video files, it'd probably
be worth adding an "ignore_attached_pics" option, a la "ignore_chapters". It'd
be possible to optimize some of the seeks out by adding some more internal
state (this is a naive implementation of a very implementer-unfriendly format),
but some are inherent to the design, like with "chap" chapters.

Short rant: this whole format is crazy. There are a bunch of variable-size
fields ("in case you ever need more than 2^16 pictures in a file!", and also
"in case you want to save space by using 3 bytes for the size instead of 4!"),
and everything has extra cruft at every opportunity in the name of future
extensibility. If somebody wants to explain why an 8-byte atom containing a
width and a height needs another 4 bytes for version and flags fields, I'm
all ears. There are also some count fields that serve no purpose within their
parent atom.

For some reason the format supports splitting a picture into multiple byte
ranges, which can be in separate mdats, or separate positions in the same
mdat, and there are 3 different ways to address them (I've only implemented
the one used in the samples I had). If anyone knows why anyone would ever
want to do that, please inform me.

This will definitely completely fall apart if any of the metadata atoms come
after the iloc. Hopefully that never happens, because it would be a pain to
work around.

TL;DR this format is bad, but at least we're finally moving past JPEG.
---
 libavformat/isom.h |   2 +
 libavformat/mov.c  | 331 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 326 insertions(+), 7 deletions(-)

Comments

Carl Eugen Hoyos Aug. 19, 2017, 9:40 a.m.
2017-08-19 9:24 GMT+02:00 Rodger Combs <rodger.combs@gmail.com>:

>  AVInputFormat ff_mov_demuxer = {
> -    .name           = "mov,mp4,m4a,3gp,3g2,mj2",
> +    .name           = "mov,mp4,m4a,3gp,3g2,mj2,heif,heic",

Since it isn't necessary for the new feature:
Please don't change this line.

Carl Eugen

Patch hide | download patch | download mbox

diff --git a/libavformat/isom.h b/libavformat/isom.h
index ff009b0896..a3ba10bf76 100644
--- a/libavformat/isom.h
+++ b/libavformat/isom.h
@@ -223,7 +223,9 @@  typedef struct MOVContext {
     int found_moov;       ///< 'moov' atom has been found
     int found_mdat;       ///< 'mdat' atom has been found
     int found_hdlr_mdta;  ///< 'hdlr' atom with type 'mdta' has been found
+    int found_iloc;       ///< 'iloc' atom has been found
     int trak_index;       ///< Index of the current 'trak'
+    int cur_stream_index; ///< Stream currently being populated
     char **meta_keys;
     unsigned meta_keys_count;
     DVDemuxContext *dv_demux;
diff --git a/libavformat/mov.c b/libavformat/mov.c
index 522ce60c2d..87dbc83d6f 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -1678,9 +1678,9 @@  static int mov_read_glbl(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     AVStream *st;
     int ret;
 
-    if (c->fc->nb_streams < 1)
+    if (c->cur_stream_index < 0)
         return 0;
-    st = c->fc->streams[c->fc->nb_streams-1];
+    st = c->fc->streams[c->cur_stream_index];
 
     if ((uint64_t)atom.size > (1<<30))
         return AVERROR_INVALIDDATA;
@@ -3742,6 +3742,8 @@  static int mov_read_trak(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     sc = av_mallocz(sizeof(MOVStreamContext));
     if (!sc) return AVERROR(ENOMEM);
 
+    c->cur_stream_index = st->index;
+
     st->priv_data = sc;
     st->codecpar->codec_type = AVMEDIA_TYPE_DATA;
     sc->ffindex = st->index;
@@ -3976,6 +3978,10 @@  static int mov_read_custom(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 
 static int mov_read_meta(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
+    // Skip version and flags
+    avio_skip(pb, 4);
+    atom.size -= 4;
+
     while (atom.size > 8) {
         uint32_t tag = avio_rl32(pb);
         atom.size -= 4;
@@ -5470,6 +5476,310 @@  static int mov_read_dops(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     return 0;
 }
 
+static uint64_t read_length(AVIOContext *pb, unsigned len)
+{
+    uint64_t ret = 0, i = 0;
+    for (i = 0; i < len; i++)
+        ret = (ret << 8) | avio_r8(pb);
+    return ret;
+}
+
+enum HEIFOffsetType {
+    HEIF_OFFSET_FILE = 0,
+    HEIF_OFFSET_MDAT = 1,
+    HEIF_OFFSET_ITEM = 2
+};
+
+static int mov_read_iloc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    int version = avio_r8(pb);
+    avio_rb24(pb); // flags
+    int ret = 0;
+
+    unsigned item_count, i, j;
+
+    int offset_size = avio_r8(pb);
+    int length_size = offset_size & 0xf;
+    int base_offset_size = avio_r8(pb);
+    int index_size = base_offset_size & 0xf;
+    offset_size >>= 4;
+    base_offset_size >>= 4;
+
+    if (version > 2) {
+        avpriv_request_sample(c->fc, "iloc atom Version %d", version);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    if (offset_size > 8 || length_size > 8 || base_offset_size > 8 || index_size > 8) {
+        avpriv_request_sample(c->fc, ">8-byte sizes in iloc");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    item_count = (version < 2) ? avio_rb16(pb) : avio_rb32(pb);
+
+    for (i = 0; i < item_count; i++) {
+        int64_t base_offset;
+        int data_ref_index, nb_extents;
+        enum HEIFOffsetType offset_type = 0;
+
+        MOVStreamContext *sc;
+        AVStream *st = avformat_new_stream(c->fc, NULL);
+        if (!st)
+            return AVERROR(ENOMEM);
+        sc = av_mallocz(sizeof(*sc));
+        if (!sc)
+            return AVERROR(ENOMEM);
+        st->priv_data = sc;
+
+        st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
+        st->disposition |= AV_DISPOSITION_ATTACHED_PIC;
+
+        st->id = (version < 2) ? avio_rb16(pb) : (int)avio_rb32(pb);
+
+        if (version > 0) {
+            avio_r8(pb); // reserved
+            offset_type = avio_r8(pb) & 0xf;
+        }
+
+        if (offset_type != HEIF_OFFSET_FILE) {
+            avpriv_request_sample(c->fc, "iloc offset type %d", offset_type);
+            return AVERROR_PATCHWELCOME;
+        }
+
+        data_ref_index = avio_rb16(pb);
+        base_offset = read_length(pb, base_offset_size);
+        nb_extents = avio_rb16(pb);
+
+        for (j = 0; j < nb_extents; j++) {
+            int64_t pos = 0;
+            int64_t old_pos;
+            int64_t ret64;
+            int64_t ext_index = 0, ext_offset, ext_len;
+            if (version > 0)
+                ext_index = read_length(pb, index_size);
+            ext_offset = read_length(pb, offset_size);
+            ext_len = read_length(pb, length_size);
+
+            if (offset_type == HEIF_OFFSET_FILE) {
+                pos = base_offset + ext_offset;
+            }
+
+            //FIXME: this will thrash badly if there are lots of extents or lots of items
+            old_pos = avio_tell(pb);
+            if ((ret64 = avio_seek(pb, pos, SEEK_SET)) < 0) {
+                ret = (int)ret64;
+                goto err;
+            }
+
+            if ((ret = av_append_packet(pb, &st->attached_pic, ext_len)) < 0)
+                goto err;
+
+            if ((ret64 = avio_seek(pb, old_pos, SEEK_SET)) < 0) {
+                ret = (int)ret64;
+                goto err;
+            }
+        }
+
+        st->attached_pic.stream_index = st->index;
+        st->attached_pic.flags       |= AV_PKT_FLAG_KEY;
+
+err:
+        if (ret < 0) {
+            av_packet_unref(&st->attached_pic);
+            break;
+        }
+    }
+
+    if (ret >= 0)
+        c->found_iloc = 1;
+
+    return ret;
+}
+
+static int mov_read_iinf(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    int version = avio_r8(pb);
+    avio_rb24(pb); // flags
+
+    atom.size -= 4;
+
+    // Completely useless entry count
+    if (version > 0) {
+        avio_rb32(pb);
+        atom.size -= 4;
+    } else {
+        avio_rb16(pb);
+        atom.size -= 2;
+    }
+
+    return mov_read_default(c, pb, atom);
+}
+
+static int mov_read_infe(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    AVStream *st = NULL;
+    int codec_tag;
+    int stream_id, i;
+    int version = avio_r8(pb);
+    avio_rb24(pb); // flags
+
+    stream_id = (version >= 3) ? avio_rb32(pb) : avio_rb16(pb);
+
+    for (i = 0; i < c->fc->nb_streams; i++) {
+        if (c->fc->streams[i]->id == stream_id) {
+            st = c->fc->streams[i];
+            break;
+        }
+    }
+
+    if (!st)
+        return AVERROR_INVALIDDATA;
+
+    if (avio_rb16(pb) != 0) {
+        avpriv_request_sample(c->fc, "protected HEIF");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    codec_tag = avio_rl32(pb);
+    st->codecpar->codec_id = mov_codec_id(st, codec_tag);
+
+    //FIXME: there may be more information here; no samples I've seen use it
+    return 0;
+}
+
+static int mov_read_iprp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    typedef struct AtomPos {
+        int64_t pos;
+        int64_t size;
+    } AtomPos;
+    int version, flags;
+    unsigned count, i, j;
+    AtomPos *atoms = NULL;
+    int nb_atoms = 0;
+    int ret = 0;
+    int64_t ret64;
+    int64_t old_pos;
+
+    MOVAtom a;
+    a.size = avio_rb32(pb);
+    a.type = avio_rl32(pb);
+
+    if (a.size < 8 || a.type != MKTAG('i','p','c','o'))
+        return AVERROR_INVALIDDATA;
+
+    a.size -= 8;
+
+    while (a.size >= 8) {
+        AtomPos *ref = av_dynarray2_add((void**)&atoms, &nb_atoms, sizeof(AtomPos), NULL);
+        if (!ref) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+        ref->pos = avio_tell(pb);
+        ref->size = avio_rb32(pb);
+        if (ref->size > a.size || ref->size < 8)
+            break;
+        if ((ret64 = avio_seek(pb, ref->pos + ref->size, SEEK_SET)) < 0) {
+            ret = ret64;
+            goto fail;
+        }
+        a.size -= ref->size;
+    }
+
+    if (a.size) {
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    a.size = avio_rb32(pb);
+    a.type = avio_rl32(pb);
+
+    if (a.size < 8 || a.type != MKTAG('i','p','m','a')) {
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    version = avio_r8(pb);
+    flags   = avio_rb24(pb);
+
+    count = avio_rb32(pb);
+
+    for (i = 0; i < count; i++) {
+        int stream_id = (version >= 1) ? avio_rb32(pb) : avio_rb16(pb);
+        int acount = avio_r8(pb);
+
+        for (j = 0; j < c->fc->nb_streams; j++) {
+            if (c->fc->streams[j]->id == stream_id)
+                break;
+        }
+
+        if (j == c->fc->nb_streams) {
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+        c->cur_stream_index = j;
+
+        for (j = 0; j < acount; j++) {
+            MOVAtom parentAtom;
+            AtomPos *atom;
+            int index = avio_r8(pb) & 0x7f;
+            if (flags & 1) {
+                index <<= 8;
+                index |= avio_r8(pb);
+            }
+            if (index > nb_atoms || index == 0) {
+                ret = AVERROR_INVALIDDATA;
+                goto fail;
+            }
+
+            index--;
+
+            atom = &atoms[index];
+
+            old_pos = avio_tell(pb);
+
+            if ((ret64 = avio_seek(pb, atom->pos, SEEK_SET)) < 0) {
+                ret = (int)ret64;
+                goto fail;
+            }
+
+            parentAtom = (MOVAtom){ .size = atom->size, .type = MKTAG('i','p','c','o') };
+            if ((ret = mov_read_default(c, pb, parentAtom)) < 0)
+                goto fail;
+
+            if ((ret64 = avio_seek(pb, old_pos, SEEK_SET)) < 0) {
+                ret = (int)ret64;
+                goto fail;
+            }
+        }
+    }
+
+    ret = 0;
+
+fail:
+    av_free(atoms);
+    return ret;
+}
+
+static int mov_read_ispe(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    AVStream *st;
+
+    if (c->cur_stream_index < 0)
+        return 0;
+    st = c->fc->streams[c->cur_stream_index];
+
+    avio_r8(pb);   // version
+    avio_rb24(pb); // flags
+
+    st->codecpar->width  = avio_rb32(pb);
+    st->codecpar->height = avio_rb32(pb);
+
+    return 0;
+}
+
 static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('A','C','L','R'), mov_read_aclr },
 { MKTAG('A','P','R','G'), mov_read_avid },
@@ -5555,6 +5865,11 @@  static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('S','m','D','m'), mov_read_smdm },
 { MKTAG('C','o','L','L'), mov_read_coll },
 { MKTAG('v','p','c','C'), mov_read_vpcc },
+{ MKTAG('i','l','o','c'), mov_read_iloc },
+{ MKTAG('i','i','n','f'), mov_read_iinf },
+{ MKTAG('i','n','f','e'), mov_read_infe },
+{ MKTAG('i','p','r','p'), mov_read_iprp },
+{ MKTAG('i','s','p','e'), mov_read_ispe },
 { 0, NULL }
 };
 
@@ -5701,6 +6016,7 @@  static int mov_probe(AVProbeData *p)
         case MKTAG('p','n','o','t'): /* detect movs with preview pics like ew.mov and april.mov */
         case MKTAG('u','d','t','a'): /* Packet Video PVAuthor adds this and a lot of more junk */
         case MKTAG('f','t','y','p'):
+        case MKTAG('i','l','o','c'):
             if (AV_RB32(p->buf+offset) < 8 &&
                 (AV_RB32(p->buf+offset) != 1 ||
                  offset + 12 > (unsigned int)p->buf_size ||
@@ -6167,6 +6483,7 @@  static int mov_read_header(AVFormatContext *s)
 
     mov->fc = s;
     mov->trak_index = -1;
+    mov->cur_stream_index = -1;
     /* .mov and .mp4 aren't streamable anyway (only progressive download if moov is before mdat) */
     if (pb->seekable & AVIO_SEEKABLE_NORMAL)
         atom.size = avio_size(pb);
@@ -6182,8 +6499,8 @@  static int mov_read_header(AVFormatContext *s)
         mov_read_close(s);
         return err;
     }
-    } while ((pb->seekable & AVIO_SEEKABLE_NORMAL) && !mov->found_moov && !mov->moov_retry++);
-    if (!mov->found_moov) {
+    } while ((pb->seekable & AVIO_SEEKABLE_NORMAL) && !mov->found_moov && !mov->found_iloc && !mov->moov_retry++);
+    if (!mov->found_moov && !mov->found_iloc) {
         av_log(s, AV_LOG_ERROR, "moov atom not found\n");
         mov_read_close(s);
         return AVERROR_INVALIDDATA;
@@ -6767,18 +7084,18 @@  static const AVOption mov_options[] = {
 };
 
 static const AVClass mov_class = {
-    .class_name = "mov,mp4,m4a,3gp,3g2,mj2",
+    .class_name = "mov,mp4,m4a,3gp,3g2,mj2,heif,heic",
     .item_name  = av_default_item_name,
     .option     = mov_options,
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
 AVInputFormat ff_mov_demuxer = {
-    .name           = "mov,mp4,m4a,3gp,3g2,mj2",
+    .name           = "mov,mp4,m4a,3gp,3g2,mj2,heif,heic",
     .long_name      = NULL_IF_CONFIG_SMALL("QuickTime / MOV"),
     .priv_class     = &mov_class,
     .priv_data_size = sizeof(MOVContext),
-    .extensions     = "mov,mp4,m4a,3gp,3g2,mj2",
+    .extensions     = "mov,mp4,m4a,3gp,3g2,mj2,heif,heic",
     .read_probe     = mov_probe,
     .read_header    = mov_read_header,
     .read_packet    = mov_read_packet,