diff mbox series

[FFmpeg-devel,3/4,v4] avformat/mov: add support for tiled HEIF still images

Message ID 20240131174718.17829-3-jamrial@gmail.com
State New
Headers show
Series [FFmpeg-devel,1/4,v3] avformat/mov: ignore item boxes for animated heif | expand

Checks

Context Check Description
yinshiyou/make_fate_loongarch64 success Make fate finished
yinshiyou/make_loongarch64 warning New warnings during build
andriy/make_fate_x86 success Make fate finished
andriy/make_x86 warning New warnings during build

Commit Message

James Almer Jan. 31, 2024, 5:47 p.m. UTC
Export each tile as its own stream, and the tiling information as a Stream
Group of type TILE_GRID.
This also enables exporting other stream items like thumbnails, which may be
present in non tiled HEIF images too. For those, the primary stream will be
tagged with the default disposition.

Based on a patch by Swaraj Hota

Signed-off-by: James Almer <jamrial@gmail.com>
---
 libavformat/avformat.h |   6 +
 libavformat/dump.c     |   2 +
 libavformat/isom.h     |   9 +-
 libavformat/mov.c      | 369 +++++++++++++++++++++++++++++++++++++----
 4 files changed, 350 insertions(+), 36 deletions(-)
diff mbox series

Patch

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 6577f13ef1..cf7744ce2e 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -811,6 +811,12 @@  typedef struct AVIndexEntry {
  * The video stream contains still images.
  */
 #define AV_DISPOSITION_STILL_IMAGE          (1 << 20)
+/**
+ * The video stream is intended to be merged with another stream before
+ * presentation.
+ * Used for example to signal the stream contains a tile from a HEIF grid.
+ */
+#define AV_DISPOSITION_TILE                 (1 << 21)
 
 /**
  * @return The AV_DISPOSITION_* flag corresponding to disp or a negative error
diff --git a/libavformat/dump.c b/libavformat/dump.c
index c9b7369bcd..de0e1d8b39 100644
--- a/libavformat/dump.c
+++ b/libavformat/dump.c
@@ -640,6 +640,8 @@  static void dump_stream_format(const AVFormatContext *ic, int i,
         av_log(NULL, log_level, " (still image)");
     if (st->disposition & AV_DISPOSITION_NON_DIEGETIC)
         av_log(NULL, log_level, " (non-diegetic)");
+    if (st->disposition & AV_DISPOSITION_TILE)
+        av_log(NULL, log_level, " (tile)");
     av_log(NULL, log_level, "\n");
 
     dump_metadata(NULL, st->metadata, extra_indent, log_level);
diff --git a/libavformat/isom.h b/libavformat/isom.h
index 21caaac256..8ce89059d3 100644
--- a/libavformat/isom.h
+++ b/libavformat/isom.h
@@ -267,10 +267,12 @@  typedef struct HEIFItem {
     int item_id;
     int64_t extent_length;
     int64_t extent_offset;
-    int64_t size;
+    int tile_rows;
+    int tile_cols;
     int width;
     int height;
     int type;
+    int is_idat_relative;
 } HEIFItem;
 
 typedef struct MOVContext {
@@ -336,6 +338,11 @@  typedef struct MOVContext {
     int cur_item_id;
     HEIFItem *heif_info;
     int heif_info_size;
+    int grid_item_id;
+    int thmb_item_id;
+    int16_t *tile_id_list;
+    int nb_tiles;
+    int64_t idat_offset;
     int interleaved_read;
 } MOVContext;
 
diff --git a/libavformat/mov.c b/libavformat/mov.c
index af95e1f662..b390c265ea 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -185,6 +185,30 @@  static int mov_read_mac_string(MOVContext *c, AVIOContext *pb, int len,
     return p - dst;
 }
 
+static AVStream *get_curr_st(MOVContext *c)
+{
+    AVStream *st = NULL;
+
+    if (c->fc->nb_streams < 1)
+        return NULL;
+
+    for (int i = 0; i < c->heif_info_size; i++) {
+        HEIFItem *item = &c->heif_info[i];
+
+        if (!item->st)
+            continue;
+        if (item->st->id != c->cur_item_id)
+            continue;
+
+        st = item->st;
+        break;
+    }
+    if (!st)
+        st = c->fc->streams[c->fc->nb_streams-1];
+
+    return st;
+}
+
 static int mov_read_covr(MOVContext *c, AVIOContext *pb, int type, int len)
 {
     AVStream *st;
@@ -1767,9 +1791,9 @@  static int mov_read_colr(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     uint16_t color_primaries, color_trc, color_matrix;
     int ret;
 
-    if (c->fc->nb_streams < 1)
+    st = get_curr_st(c);
+    if (!st)
         return 0;
-    st = c->fc->streams[c->fc->nb_streams - 1];
 
     ret = ffio_read_size(pb, color_parameter_type, 4);
     if (ret < 0)
@@ -2117,9 +2141,9 @@  static int mov_read_glbl(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     AVStream *st;
     int ret;
 
-    if (c->fc->nb_streams < 1)
+    st = get_curr_st(c);
+    if (!st)
         return 0;
-    st = c->fc->streams[c->fc->nb_streams-1];
 
     if ((uint64_t)atom.size > (1<<30))
         return AVERROR_INVALIDDATA;
@@ -4951,12 +4975,10 @@  static int heif_add_stream(MOVContext *c, HEIFItem *item)
     st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
     st->codecpar->codec_id = mov_codec_id(st, item->type);
     sc->ffindex = st->index;
-    c->trak_index = st->index;
     st->avg_frame_rate.num = st->avg_frame_rate.den = 1;
     st->time_base.num = st->time_base.den = 1;
     st->nb_frames = 1;
     sc->time_scale = 1;
-    sc = st->priv_data;
     sc->pb = c->fc->pb;
     sc->pb_is_copied = 1;
 
@@ -7784,11 +7806,55 @@  static int mov_read_pitm(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     return atom.size;
 }
 
+static int mov_read_idat(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    c->idat_offset = avio_tell(pb);
+    return 0;
+}
+
+static int read_image_grid(AVFormatContext *s, AVStreamGroupTileGrid *tile_grid,
+                           HEIFItem *item)
+{
+    MOVContext *c = s->priv_data;
+    int64_t offset = 0, pos = avio_tell(s->pb);
+    uint8_t flags;
+
+    if (!(s->pb->seekable & AVIO_SEEKABLE_NORMAL)) {
+        av_log(c->fc, AV_LOG_INFO, "grid box with non seekable input\n");
+        return AVERROR_PATCHWELCOME;
+    }
+    if (item->is_idat_relative) {
+        if (!c->idat_offset) {
+            av_log(c->fc, AV_LOG_ERROR, "missing idat box required by the image grid\n");
+            return AVERROR_INVALIDDATA;
+        }
+        offset = c->idat_offset;
+    }
+
+    avio_seek(s->pb, item->extent_offset + offset, SEEK_SET);
+
+    avio_r8(s->pb);    /* version */
+    flags = avio_r8(s->pb);
+
+    item->tile_rows = avio_r8(s->pb) + 1;
+    item->tile_cols = avio_r8(s->pb) + 1;
+    /* actual width and height of output image */
+    tile_grid->width  = (flags & 1) ? avio_rb32(s->pb) : avio_rb16(s->pb);
+    tile_grid->height = (flags & 1) ? avio_rb32(s->pb) : avio_rb16(s->pb);
+
+    av_log(c->fc, AV_LOG_TRACE, "grid: grid_rows %d grid_cols %d output_width %d output_height %d\n",
+           item->tile_rows, item->tile_cols, tile_grid->width, tile_grid->height);
+
+    avio_seek(s->pb, pos, SEEK_SET);
+
+    return 0;
+}
+
 static int mov_read_iloc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
     int version, offset_size, length_size, base_offset_size, index_size;
     int item_count, extent_count;
-    uint64_t base_offset, extent_offset, extent_length;
+    int64_t base_offset, extent_offset, extent_length;
     uint8_t value;
 
     if (c->found_moov) {
@@ -7835,6 +7901,7 @@  static int mov_read_iloc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         avio_rb16(pb);  // data_reference_index.
         if (rb_size(pb, &base_offset, base_offset_size) < 0)
             return AVERROR_INVALIDDATA;
+        av_log(c->fc, AV_LOG_TRACE, "iloc: base_offset %"PRId64"\n", base_offset);
         extent_count = avio_rb16(pb);
         if (extent_count > 1) {
             // For still AVIF images, we only support one extent item.
@@ -7845,6 +7912,8 @@  static int mov_read_iloc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
             if (rb_size(pb, &extent_offset, offset_size) < 0 ||
                 rb_size(pb, &extent_length, length_size) < 0)
                 return AVERROR_INVALIDDATA;
+            if (offset_type == 1)
+                c->heif_info[i].is_idat_relative = 1;
             c->heif_info[i].extent_length = extent_length;
             c->heif_info[i].extent_offset = base_offset + extent_offset;
             av_log(c->fc, AV_LOG_TRACE, "iloc: item_idx %d, offset_type %d, "
@@ -7883,10 +7952,6 @@  static int mov_read_infe(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     av_log(c->fc, AV_LOG_TRACE, "infe: item_id %d, item_type %s, item_name %s\n",
            item_id, av_fourcc2str(item_type), item_name);
 
-    // Skip all but the primary item until support is added
-    if (item_id != c->primary_item_id)
-        return 0;
-
     if (size > 0)
         avio_skip(pb, size);
 
@@ -7900,6 +7965,9 @@  static int mov_read_infe(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         if (ret < 0)
             return ret;
         break;
+    case MKTAG('g','r','i','d'):
+        c->grid_item_id = item_id;
+        break;
     default:
         av_log(c->fc, AV_LOG_TRACE, "infe: ignoring item_type %s\n", av_fourcc2str(item_type));
         break;
@@ -7959,6 +8027,59 @@  static int mov_read_iref(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     return mov_read_default(c, pb, atom);
 }
 
+static int mov_read_dimg(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    int entries, i;
+    int from_item_id = avio_rb16(pb);
+
+    if (c->grid_item_id < 0) {
+        av_log(c->fc, AV_LOG_ERROR, "Missing grid information\n");
+        return AVERROR_INVALIDDATA;
+    }
+    if (from_item_id != c->grid_item_id) {
+        avpriv_request_sample(c->fc, "Derived item of type other than 'grid'");
+        return AVERROR_PATCHWELCOME;
+    }
+    entries = avio_rb16(pb);
+    c->tile_id_list = av_malloc_array(entries, sizeof(*c->tile_id_list));
+    if (!c->tile_id_list)
+        return AVERROR(ENOMEM);
+    /* 'to' item ids */
+    for (i = 0; i < entries; i++)
+        c->tile_id_list[i] = avio_rb16(pb);
+    c->nb_tiles = entries;
+
+    av_log(c->fc, AV_LOG_TRACE, "dimg: from_item_id %d, entries %d\n",
+           from_item_id, entries);
+
+    return 0;
+}
+
+static int mov_read_thmb(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    int entries;
+    int to_item_id, from_item_id = avio_rb16(pb);
+
+    entries = avio_rb16(pb);
+    if (entries > 1) {
+        avpriv_request_sample(c->fc, "More than one thmb entry");
+        return AVERROR_PATCHWELCOME;
+    }
+    /* 'to' item ids */
+    to_item_id = avio_rb16(pb);
+
+    if (to_item_id != c->primary_item_id ||
+        to_item_id != c->grid_item_id)
+        return 0;
+
+    c->thmb_item_id = from_item_id;
+
+    av_log(c->fc, AV_LOG_TRACE, "thmb: from_item_id %d, entries %d\n",
+           from_item_id, entries);
+
+    return 0;
+}
+
 static int mov_read_ispe(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
     uint32_t width, height;
@@ -7974,15 +8095,17 @@  static int mov_read_ispe(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     width  = avio_rb32(pb);
     height = avio_rb32(pb);
 
-    av_log(c->fc, AV_LOG_TRACE, "ispe: item_id %d, width %u, height %u\n",
+    av_log(c->fc, AV_LOG_TRACE, "ispe: cur_item_id %d, width %u, height %u\n",
            c->cur_item_id, width, height);
 
     for (int i = 0; i < c->heif_info_size; i++) {
-        if (c->heif_info[i].item_id == c->cur_item_id) {
-            c->heif_info[i].width  = width;
-            c->heif_info[i].height = height;
-            break;
-        }
+        HEIFItem *item = &c->heif_info[i];
+        if (item->item_id != c->cur_item_id)
+            continue;
+
+        item->width  = width;
+        item->height = height;
+        break;
     }
 
     return 0;
@@ -8079,10 +8202,6 @@  static int mov_read_iprp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
             av_log(c->fc, AV_LOG_TRACE, "ipma: property_index %d, item_id %d, item_type %s\n",
                    index + 1, item_id, av_fourcc2str(ref->type));
 
-            // Skip properties referencing items other than the primary item until support is added
-            if (item_id != c->primary_item_id)
-                continue;
-
             c->cur_item_id = item_id;
 
             ret = mov_read_default(c, &ref->b.pub,
@@ -8211,6 +8330,9 @@  static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('p','c','m','C'), mov_read_pcmc }, /* PCM configuration box */
 { MKTAG('p','i','t','m'), mov_read_pitm },
 { MKTAG('e','v','c','C'), mov_read_glbl },
+{ MKTAG('d','i','m','g'), mov_read_dimg },
+{ MKTAG('t','h','m','b'), mov_read_thmb },
+{ MKTAG('i','d','a','t'), mov_read_idat },
 { MKTAG('i','r','e','f'), mov_read_iref },
 { MKTAG('i','s','p','e'), mov_read_ispe },
 { MKTAG('i','p','r','p'), mov_read_iprp },
@@ -8719,6 +8841,7 @@  static int mov_read_close(AVFormatContext *s)
     av_freep(&mov->aes_decrypt);
     av_freep(&mov->chapter_tracks);
     av_freep(&mov->heif_info);
+    av_freep(&mov->tile_id_list);
 
     return 0;
 }
@@ -8858,6 +8981,172 @@  fail:
     return ret;
 }
 
+static int mov_set_tile_grid_offsets(AVFormatContext *s, AVStreamGroup *stg)
+{
+    AVStreamGroupTileGrid *tile_grid = stg->params.tile_grid;
+    int ret, x = 0, y = 0, i = 0;
+
+    tile_grid->offsets = av_calloc(stg->nb_streams, sizeof(*tile_grid->offsets));
+    if (!tile_grid->offsets)
+        return AVERROR(ENOMEM);
+
+    while (y < tile_grid->coded_height) {
+        int left_col = i;
+
+        av_log(s, AV_LOG_INFO, "leftmost tile %d\n", i);
+        while (x < tile_grid->coded_width) {
+            if (i == stg->nb_streams) {
+                ret = AVERROR(EINVAL);
+                goto fail;
+            }
+
+            av_log(s, AV_LOG_INFO, "tile %d, x = %d, y = %d\n", i, x, y);
+            tile_grid->offsets[i].x = x;
+            tile_grid->offsets[i].y = y;
+
+            x += stg->streams[i++]->codecpar->width;
+        }
+
+        if (x > tile_grid->coded_width) {
+            avpriv_request_sample(s, "Non uniform HEIF tiles");
+            ret = AVERROR_PATCHWELCOME;
+            goto fail;
+        }
+
+        x  = 0;
+        y += stg->streams[left_col]->codecpar->height;
+    }
+
+    if (y > tile_grid->coded_width || i != stg->nb_streams) {
+        avpriv_request_sample(s, "Non uniform HEIF tiles");
+        ret = AVERROR_PATCHWELCOME;
+        goto fail;
+    }
+
+    return 0;
+fail:
+    av_freep(&tile_grid->offsets);
+
+    return ret;
+}
+
+static int mov_parse_tiles(AVFormatContext *s)
+{
+    MOVContext *mov = s->priv_data;
+    AVStreamGroup *stg = avformat_stream_group_create(s, AV_STREAM_GROUP_PARAMS_TILE_GRID, NULL);
+    AVStreamGroupTileGrid *tile_grid;
+    HEIFItem *grid_item = NULL;
+    int coded_width = 0, coded_height = 0;
+    int size, err;
+
+    if (!stg)
+        return AVERROR(ENOMEM);
+
+    tile_grid = stg->params.tile_grid;
+
+    av_assert0(mov->grid_item_id >= 0);
+    for (int i = 0; i < mov->heif_info_size; i++) {
+        HEIFItem *item = &mov->heif_info[i];
+
+        if (item->item_id != mov->grid_item_id)
+            continue;
+        err = read_image_grid(s, tile_grid, item);
+        if (err < 0)
+            return err;
+        stg->id = item->item_id;
+        grid_item = item;
+        break;
+    }
+
+    for (int i = 0; i < mov->nb_tiles; i++) {
+        int tile_id = mov->tile_id_list[i];
+
+        for (int j = 0; j < mov->heif_info_size; j++) {
+            HEIFItem *item = &mov->heif_info[j];
+            AVStream *st = item->st;
+            AVPacketSideData *sd;
+            MOVStreamContext *sc;
+            int64_t offset = 0;
+
+            if (item->item_id != tile_id)
+                continue;
+            if (!st) {
+                av_log(s, AV_LOG_ERROR, "HEIF tile %d doesn't reference a stream\n", tile_id);
+                return AVERROR_INVALIDDATA;
+            }
+            if (item->is_idat_relative) {
+                if (!mov->idat_offset) {
+                    av_log(s, AV_LOG_ERROR, "Missing idat box for HEIF tile %d\n", tile_id);
+                    return AVERROR_INVALIDDATA;
+                }
+                offset = mov->idat_offset;
+            }
+
+            st->codecpar->width  = item->width;
+            st->codecpar->height = item->height;
+
+            err = avformat_stream_group_add_stream(stg, st);
+            if (err == AVERROR(EEXIST))
+                return AVERROR_INVALIDDATA;
+            else if (err < 0)
+                return err;
+
+            sc = st->priv_data;
+            sc->sample_sizes[0]  = item->extent_length;
+            sc->chunk_offsets[0] = item->extent_offset + offset;
+
+            st->disposition |= AV_DISPOSITION_TILE;
+
+            mov_build_index(mov, st);
+
+            break;
+        }
+    }
+
+    size = grid_item->tile_rows * grid_item->tile_cols;
+    for (int i = 0; i < grid_item->tile_cols; i++)
+        coded_width += stg->streams[i]->codecpar->width;
+    for (int i = 0; i < size; i += grid_item->tile_cols)
+        coded_height += stg->streams[i]->codecpar->height;
+
+    tile_grid->coded_width  = coded_width;
+    tile_grid->coded_height = coded_height;
+
+    err = mov_set_tile_grid_offsets(s, stg);
+    if (err < 0)
+        return AVERROR_INVALIDDATA;
+
+    for (int i = 0; i < mov->heif_info_size; i++) {
+        HEIFItem *item = &mov->heif_info[i];
+        AVStream *st = item->st;
+        MOVStreamContext *sc;
+        int64_t offset = 0;
+
+        if (item->item_id != mov->thmb_item_id)
+            continue;
+
+        if (!st) {
+            av_log(s, AV_LOG_ERROR, "HEIF thumbnail doesn't reference a stream\n");
+            return AVERROR_INVALIDDATA;
+        }
+        if (item->is_idat_relative) {
+            if (!mov->idat_offset) {
+                av_log(s, AV_LOG_ERROR, "Missing idat box for HEIF thumbnail\n");
+                return AVERROR_INVALIDDATA;
+            }
+            offset = mov->idat_offset;
+        }
+
+        sc = st->priv_data;
+        sc->sample_sizes[0]  = item->extent_length;
+        sc->chunk_offsets[0] = item->extent_offset + offset;
+
+        mov_build_index(mov, st);
+    }
+
+    return 0;
+}
+
 static int mov_read_header(AVFormatContext *s)
 {
     MOVContext *mov = s->priv_data;
@@ -8874,6 +9163,9 @@  static int mov_read_header(AVFormatContext *s)
 
     mov->fc = s;
     mov->trak_index = -1;
+    mov->grid_item_id = -1;
+    mov->thmb_item_id = -1;
+    mov->primary_item_id = -1;
     /* .mov and .mp4 aren't streamable anyway (only progressive download if moov is before mdat) */
     if (pb->seekable & AVIO_SEEKABLE_NORMAL)
         atom.size = avio_size(pb);
@@ -8896,23 +9188,30 @@  static int mov_read_header(AVFormatContext *s)
     av_log(mov->fc, AV_LOG_TRACE, "on_parse_exit_offset=%"PRId64"\n", avio_tell(pb));
 
     if (mov->found_iloc) {
-        for (i = 0; i < mov->heif_info_size; i++) {
-            HEIFItem *item = &mov->heif_info[i];
-            MOVStreamContext *sc;
-            AVStream *st;
+        if (mov->nb_tiles) {
+            err = mov_parse_tiles(s);
+            if (err < 0)
+                return err;
+        } else
+            for (i = 0; i < mov->heif_info_size; i++) {
+                HEIFItem *item = &mov->heif_info[i];
+                AVStream *st = item->st;
+                MOVStreamContext *sc;
 
-            if (!item->st)
-                continue;
+                if (!st)
+                    continue;
 
-            st = item->st;
-            sc = st->priv_data;
-            st->codecpar->width  = item->width;
-            st->codecpar->height = item->height;
-            sc->sample_sizes[0]  = item->extent_length;
-            sc->chunk_offsets[0] = item->extent_offset;
+                sc = st->priv_data;
+                st->codecpar->width  = item->width;
+                st->codecpar->height = item->height;
+                sc->sample_sizes[0]  = item->extent_length;
+                sc->chunk_offsets[0] = item->extent_offset;
 
-            mov_build_index(mov, st);
-        }
+                if (item->item_id == mov->primary_item_id)
+                    st->disposition |= AV_DISPOSITION_DEFAULT;
+
+                mov_build_index(mov, st);
+            }
     }
 
     if (pb->seekable & AVIO_SEEKABLE_NORMAL) {