diff mbox series

[FFmpeg-devel] avformat/mov: Add support for demuxing still HEIC images

Message ID 20230926173742.2623244-1-vigneshv@google.com
State New
Headers show
Series [FFmpeg-devel] avformat/mov: Add support for demuxing still HEIC images | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Vignesh Venkat Sept. 26, 2023, 5:37 p.m. UTC
They are similar to AVIF images (both use the HEIF container).
The only additional work needed is to parse the hvcC box and put
it in the extradata.

With this patch applied, ffmpeg (when built with an HEVC decoder)
is able to decode the files in
https://github.com/nokiatech/heif/tree/gh-pages/content/images

Partially fixes trac ticket #6521.

Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
---
 libavformat/isom.h |  2 ++
 libavformat/mov.c  | 38 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 39 insertions(+), 1 deletion(-)

Comments

Vignesh Venkat Oct. 3, 2023, 10:56 p.m. UTC | #1
On Tue, Sep 26, 2023 at 10:37 AM Vignesh Venkatasubramanian
<vigneshv@google.com> wrote:
>
> They are similar to AVIF images (both use the HEIF container).
> The only additional work needed is to parse the hvcC box and put
> it in the extradata.
>
> With this patch applied, ffmpeg (when built with an HEVC decoder)
> is able to decode the files in
> https://github.com/nokiatech/heif/tree/gh-pages/content/images
>
> Partially fixes trac ticket #6521.
>
> Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
> ---
>  libavformat/isom.h |  2 ++
>  libavformat/mov.c  | 38 +++++++++++++++++++++++++++++++++++++-
>  2 files changed, 39 insertions(+), 1 deletion(-)
>
> diff --git a/libavformat/isom.h b/libavformat/isom.h
> index 3d375d7a46..b30b9da65e 100644
> --- a/libavformat/isom.h
> +++ b/libavformat/isom.h
> @@ -327,6 +327,8 @@ typedef struct MOVContext {
>          int64_t extent_offset;
>      } *avif_info;
>      int avif_info_size;
> +    int64_t hvcC_offset;
> +    int hvcC_size;
>      int interleaved_read;
>  } MOVContext;
>
> diff --git a/libavformat/mov.c b/libavformat/mov.c
> index 1996e0028c..cec9cb5fe1 100644
> --- a/libavformat/mov.c
> +++ b/libavformat/mov.c
> @@ -1218,7 +1218,8 @@ static int mov_read_ftyp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
>          c->isom = 1;
>      av_log(c->fc, AV_LOG_DEBUG, "ISO: File Type Major Brand: %.4s\n",(char *)&type);
>      av_dict_set(&c->fc->metadata, "major_brand", type, 0);
> -    c->is_still_picture_avif = !strncmp(type, "avif", 4);
> +    c->is_still_picture_avif = !strncmp(type, "avif", 4) ||
> +                               !strncmp(type, "mif1", 4);
>      minor_ver = avio_rb32(pb); /* minor version */
>      av_dict_set_int(&c->fc->metadata, "minor_version", minor_ver, 0);
>
> @@ -4911,6 +4912,16 @@ static int avif_add_stream(MOVContext *c, int item_id)
>      st->priv_data = sc;
>      st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
>      st->codecpar->codec_id = AV_CODEC_ID_AV1;
> +    if (c->hvcC_offset >= 0) {
> +        int ret;
> +        int64_t pos = avio_tell(c->fc->pb);
> +        st->codecpar->codec_id = AV_CODEC_ID_HEVC;
> +        avio_seek(c->fc->pb, c->hvcC_offset, SEEK_SET);
> +        ret = ff_get_extradata(c->fc, st->codecpar, c->fc->pb, c->hvcC_size);
> +        if (ret < 0)
> +            return ret;
> +        avio_seek(c->fc->pb, pos, SEEK_SET);
> +    }
>      sc->ffindex = st->index;
>      c->trak_index = st->index;
>      st->avg_frame_rate.num = st->avg_frame_rate.den = 1;
> @@ -4953,6 +4964,8 @@ static int avif_add_stream(MOVContext *c, int item_id)
>
>  static int mov_read_meta(MOVContext *c, AVIOContext *pb, MOVAtom atom)
>  {
> +    c->hvcC_offset = -1;
> +    c->hvcC_size = 0;
>      while (atom.size > 8) {
>          uint32_t tag;
>          if (avio_feof(pb))
> @@ -7826,6 +7839,28 @@ static int mov_read_iloc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
>      return atom.size;
>  }
>
> +static int mov_read_iprp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
> +{
> +    int size = avio_rb32(pb);
> +    if (avio_rl32(pb) != MKTAG('i','p','c','o'))
> +        return AVERROR_INVALIDDATA;
> +    size -= 8;
> +    while (size > 0) {
> +        int sub_size, sub_type;
> +        sub_size = avio_rb32(pb);
> +        sub_type = avio_rl32(pb);
> +        sub_size -= 8;
> +        size -= sub_size + 8;
> +        if (sub_type == MKTAG('h','v','c','C')) {
> +            c->hvcC_offset = avio_tell(pb);
> +            c->hvcC_size = sub_size;
> +            break;
> +        }
> +        avio_skip(pb, sub_size);
> +    }
> +    return atom.size;
> +}
> +
>  static const MOVParseTableEntry mov_default_parse_table[] = {
>  { MKTAG('A','C','L','R'), mov_read_aclr },
>  { MKTAG('A','P','R','G'), mov_read_avid },
> @@ -7933,6 +7968,7 @@ static const MOVParseTableEntry mov_default_parse_table[] = {
>  { MKTAG('p','c','m','C'), mov_read_pcmc }, /* PCM configuration box */
>  { MKTAG('p','i','t','m'), mov_read_pitm },
>  { MKTAG('e','v','c','C'), mov_read_glbl },
> +{ MKTAG('i','p','r','p'), mov_read_iprp },
>  { 0, NULL }
>  };
>
> --
> 2.42.0.515.g380fc7ccd1-goog
>

Any comments/objections on merging this?
Steven Liu Oct. 4, 2023, 12:29 a.m. UTC | #2
Vignesh Venkat via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>于2023年10月4日
周三06:57写道:

> On Tue, Sep 26, 2023 at 10:37 AM Vignesh Venkatasubramanian
> <vigneshv@google.com> wrote:
> >
> > They are similar to AVIF images (both use the HEIF container).
> > The only additional work needed is to parse the hvcC box and put
> > it in the extradata.
> >
> > With this patch applied, ffmpeg (when built with an HEVC decoder)
> > is able to decode the files in
> > https://github.com/nokiatech/heif/tree/gh-pages/content/images
> >
> > Partially fixes trac ticket #6521.
> >
> > Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
> > ---
> >  libavformat/isom.h |  2 ++
> >  libavformat/mov.c  | 38 +++++++++++++++++++++++++++++++++++++-
> >  2 files changed, 39 insertions(+), 1 deletion(-)
> >
> > diff --git a/libavformat/isom.h b/libavformat/isom.h
> > index 3d375d7a46..b30b9da65e 100644
> > --- a/libavformat/isom.h
> > +++ b/libavformat/isom.h
> > @@ -327,6 +327,8 @@ typedef struct MOVContext {
> >          int64_t extent_offset;
> >      } *avif_info;
> >      int avif_info_size;
> > +    int64_t hvcC_offset;
> > +    int hvcC_size;
> >      int interleaved_read;
> >  } MOVContext;
> >
> > diff --git a/libavformat/mov.c b/libavformat/mov.c
> > index 1996e0028c..cec9cb5fe1 100644
> > --- a/libavformat/mov.c
> > +++ b/libavformat/mov.c
> > @@ -1218,7 +1218,8 @@ static int mov_read_ftyp(MOVContext *c,
> AVIOContext *pb, MOVAtom atom)
> >          c->isom = 1;
> >      av_log(c->fc, AV_LOG_DEBUG, "ISO: File Type Major Brand:
> %.4s\n",(char *)&type);
> >      av_dict_set(&c->fc->metadata, "major_brand", type, 0);
> > -    c->is_still_picture_avif = !strncmp(type, "avif", 4);
> > +    c->is_still_picture_avif = !strncmp(type, "avif", 4) ||
> > +                               !strncmp(type, "mif1", 4);
> >      minor_ver = avio_rb32(pb); /* minor version */
> >      av_dict_set_int(&c->fc->metadata, "minor_version", minor_ver, 0);
> >
> > @@ -4911,6 +4912,16 @@ static int avif_add_stream(MOVContext *c, int
> item_id)
> >      st->priv_data = sc;
> >      st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
> >      st->codecpar->codec_id = AV_CODEC_ID_AV1;
> > +    if (c->hvcC_offset >= 0) {
> > +        int ret;
> > +        int64_t pos = avio_tell(c->fc->pb);
> > +        st->codecpar->codec_id = AV_CODEC_ID_HEVC;
> > +        avio_seek(c->fc->pb, c->hvcC_offset, SEEK_SET);
> > +        ret = ff_get_extradata(c->fc, st->codecpar, c->fc->pb,
> c->hvcC_size);
> > +        if (ret < 0)
> > +            return ret;
> > +        avio_seek(c->fc->pb, pos, SEEK_SET);
> > +    }
> >      sc->ffindex = st->index;
> >      c->trak_index = st->index;
> >      st->avg_frame_rate.num = st->avg_frame_rate.den = 1;
> > @@ -4953,6 +4964,8 @@ static int avif_add_stream(MOVContext *c, int
> item_id)
> >
> >  static int mov_read_meta(MOVContext *c, AVIOContext *pb, MOVAtom atom)
> >  {
> > +    c->hvcC_offset = -1;
> > +    c->hvcC_size = 0;
> >      while (atom.size > 8) {
> >          uint32_t tag;
> >          if (avio_feof(pb))
> > @@ -7826,6 +7839,28 @@ static int mov_read_iloc(MOVContext *c,
> AVIOContext *pb, MOVAtom atom)
> >      return atom.size;
> >  }
> >
> > +static int mov_read_iprp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
> > +{
> > +    int size = avio_rb32(pb);
> > +    if (avio_rl32(pb) != MKTAG('i','p','c','o'))
> > +        return AVERROR_INVALIDDATA;
> > +    size -= 8;
> > +    while (size > 0) {
> > +        int sub_size, sub_type;
> > +        sub_size = avio_rb32(pb);
> > +        sub_type = avio_rl32(pb);
> > +        sub_size -= 8;
> > +        size -= sub_size + 8;
> > +        if (sub_type == MKTAG('h','v','c','C')) {
> > +            c->hvcC_offset = avio_tell(pb);
> > +            c->hvcC_size = sub_size;
> > +            break;
> > +        }
> > +        avio_skip(pb, sub_size);
> > +    }
> > +    return atom.size;
> > +}
> > +
> >  static const MOVParseTableEntry mov_default_parse_table[] = {
> >  { MKTAG('A','C','L','R'), mov_read_aclr },
> >  { MKTAG('A','P','R','G'), mov_read_avid },
> > @@ -7933,6 +7968,7 @@ static const MOVParseTableEntry
> mov_default_parse_table[] = {
> >  { MKTAG('p','c','m','C'), mov_read_pcmc }, /* PCM configuration box */
> >  { MKTAG('p','i','t','m'), mov_read_pitm },
> >  { MKTAG('e','v','c','C'), mov_read_glbl },
> > +{ MKTAG('i','p','r','p'), mov_read_iprp },
> >  { 0, NULL }
> >  };
> >
> > --
> > 2.42.0.515.g380fc7ccd1-goog
> >
>
> Any comments/objections on merging this?


Can this patch support tiled hevc coded or sequence heif?

>
>
> --
> Vignesh
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Vittorio Giovara Oct. 4, 2023, 1:31 a.m. UTC | #3
On Tue, Oct 3, 2023 at 8:30 PM Steven Liu <lingjiujianke@gmail.com> wrote:

> > > 2.42.0.515.g380fc7ccd1-goog
> > >
> >
> > Any comments/objections on merging this?
>
>
> Can this patch support tiled hevc coded or sequence heif?=
>

I believe that will be possible only after AVStreamGroup is implemented.

Vignesh is there a sample available? Could we add a test?
Leo Izen Oct. 4, 2023, 2:35 a.m. UTC | #4
On 9/26/23 13:37, Vignesh Venkatasubramanian via ffmpeg-devel wrote:
> They are similar to AVIF images (both use the HEIF container).
> The only additional work needed is to parse the hvcC box and put
> it in the extradata.
> 
> With this patch applied, ffmpeg (when built with an HEVC decoder)
> is able to decode the files in
> https://github.com/nokiatech/heif/tree/gh-pages/content/images
> 
> Partially fixes trac ticket #6521.
> 
> Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
> ---
>   libavformat/isom.h |  2 ++
>   libavformat/mov.c  | 38 +++++++++++++++++++++++++++++++++++++-
>   2 files changed, 39 insertions(+), 1 deletion(-)
> 
> diff --git a/libavformat/isom.h b/libavformat/isom.h
> index 3d375d7a46..b30b9da65e 100644
> --- a/libavformat/isom.h
> +++ b/libavformat/isom.h
> @@ -327,6 +327,8 @@ typedef struct MOVContext {
>           int64_t extent_offset;
>       } *avif_info;
>       int avif_info_size;
> +    int64_t hvcC_offset;
> +    int hvcC_size;
>       int interleaved_read;
>   } MOVContext;
>   
> diff --git a/libavformat/mov.c b/libavformat/mov.c
> index 1996e0028c..cec9cb5fe1 100644
> --- a/libavformat/mov.c
> +++ b/libavformat/mov.c
> @@ -1218,7 +1218,8 @@ static int mov_read_ftyp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
>           c->isom = 1;
>       av_log(c->fc, AV_LOG_DEBUG, "ISO: File Type Major Brand: %.4s\n",(char *)&type);
>       av_dict_set(&c->fc->metadata, "major_brand", type, 0);
> -    c->is_still_picture_avif = !strncmp(type, "avif", 4);
> +    c->is_still_picture_avif = !strncmp(type, "avif", 4) ||
> +                               !strncmp(type, "mif1", 4);

This appears to be an unrelated change. Is it?

>       minor_ver = avio_rb32(pb); /* minor version */
>       av_dict_set_int(&c->fc->metadata, "minor_version", minor_ver, 0);
>   
> @@ -4911,6 +4912,16 @@ static int avif_add_stream(MOVContext *c, int item_id)
>       st->priv_data = sc;
>       st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
>       st->codecpar->codec_id = AV_CODEC_ID_AV1;
> +    if (c->hvcC_offset >= 0) {
> +        int ret;
> +        int64_t pos = avio_tell(c->fc->pb);
> +        st->codecpar->codec_id = AV_CODEC_ID_HEVC;
> +        avio_seek(c->fc->pb, c->hvcC_offset, SEEK_SET);
> +        ret = ff_get_extradata(c->fc, st->codecpar, c->fc->pb, c->hvcC_size);
> +        if (ret < 0)
> +            return ret;
> +        avio_seek(c->fc->pb, pos, SEEK_SET);
> +    }

Will this fail on non-seekable input? If it does, do we care?

>       sc->ffindex = st->index;
>       c->trak_index = st->index;
>       st->avg_frame_rate.num = st->avg_frame_rate.den = 1;
> @@ -4953,6 +4964,8 @@ static int avif_add_stream(MOVContext *c, int item_id)
>   
>   static int mov_read_meta(MOVContext *c, AVIOContext *pb, MOVAtom atom)
>   {
> +    c->hvcC_offset = -1;
> +    c->hvcC_size = 0;
>       while (atom.size > 8) {
>           uint32_t tag;
>           if (avio_feof(pb))
> @@ -7826,6 +7839,28 @@ static int mov_read_iloc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
>       return atom.size;
>   }
>   
> +static int mov_read_iprp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
> +{
> +    int size = avio_rb32(pb);
> +    if (avio_rl32(pb) != MKTAG('i','p','c','o'))
> +        return AVERROR_INVALIDDATA;

Is there a reason you use ipco here instead of iprp? I'm not saying this 
is wrong, but I am confused while you're doing it this way.

> +    size -= 8;
> +    while (size > 0) {
> +        int sub_size, sub_type;
> +        sub_size = avio_rb32(pb);
> +        sub_type = avio_rl32(pb);
> +        sub_size -= 8;
> +        size -= sub_size + 8;
> +        if (sub_type == MKTAG('h','v','c','C')) {
> +            c->hvcC_offset = avio_tell(pb);
> +            c->hvcC_size = sub_size;
> +            break;
> +        }

Are these permitted to use extended-size tags? i.e. size = 1, followed 
by a big-endian 64-bit size, then followed by the tag?

> +        avio_skip(pb, sub_size);
> +    }
> +    return atom.size;
> +}
> +
>   static const MOVParseTableEntry mov_default_parse_table[] = {
>   { MKTAG('A','C','L','R'), mov_read_aclr },
>   { MKTAG('A','P','R','G'), mov_read_avid },
> @@ -7933,6 +7968,7 @@ static const MOVParseTableEntry mov_default_parse_table[] = {
>   { MKTAG('p','c','m','C'), mov_read_pcmc }, /* PCM configuration box */
>   { MKTAG('p','i','t','m'), mov_read_pitm },
>   { MKTAG('e','v','c','C'), mov_read_glbl },
> +{ MKTAG('i','p','r','p'), mov_read_iprp },
>   { 0, NULL }
>   };
>   

- Leo Izen
Vignesh Venkat Oct. 4, 2023, 4:01 a.m. UTC | #5
On Tue, Oct 3, 2023 at 6:32 PM Vittorio Giovara
<vittorio.giovara@gmail.com> wrote:
>
> On Tue, Oct 3, 2023 at 8:30 PM Steven Liu <lingjiujianke@gmail.com> wrote:
>
> > > > 2.42.0.515.g380fc7ccd1-goog
> > > >
> > >
> > > Any comments/objections on merging this?
> >
> >
> > Can this patch support tiled hevc coded or sequence heif?=
> >
>
> I believe that will be possible only after AVStreamGroup is implemented.
>

Yes, this patch only supports still HEIC images that don't have alpha
and grids (tiles).

Tiles and Alpha support will be possible only after AVStreamGroup is
implemented. I will look into HEIC sequences in a follow-up.

> Vignesh is there a sample available? Could we add a test?

I tested it from the files in
https://github.com/nokiatech/heif/tree/gh-pages/content/images. I am
not sure about HEVC licensing and if we are allowed to copy some of
those files in the ffmpeg fate server. Would generating a random image
with ffmpeg and encoding it as HEIC be good enough?

> --
> Vittorio
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Vignesh Venkat Oct. 4, 2023, 4:19 a.m. UTC | #6
On Tue, Oct 3, 2023 at 7:35 PM Leo Izen <leo.izen@gmail.com> wrote:
>
> On 9/26/23 13:37, Vignesh Venkatasubramanian via ffmpeg-devel wrote:
> > They are similar to AVIF images (both use the HEIF container).
> > The only additional work needed is to parse the hvcC box and put
> > it in the extradata.
> >
> > With this patch applied, ffmpeg (when built with an HEVC decoder)
> > is able to decode the files in
> > https://github.com/nokiatech/heif/tree/gh-pages/content/images
> >
> > Partially fixes trac ticket #6521.
> >
> > Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
> > ---
> >   libavformat/isom.h |  2 ++
> >   libavformat/mov.c  | 38 +++++++++++++++++++++++++++++++++++++-
> >   2 files changed, 39 insertions(+), 1 deletion(-)
> >
> > diff --git a/libavformat/isom.h b/libavformat/isom.h
> > index 3d375d7a46..b30b9da65e 100644
> > --- a/libavformat/isom.h
> > +++ b/libavformat/isom.h
> > @@ -327,6 +327,8 @@ typedef struct MOVContext {
> >           int64_t extent_offset;
> >       } *avif_info;
> >       int avif_info_size;
> > +    int64_t hvcC_offset;
> > +    int hvcC_size;
> >       int interleaved_read;
> >   } MOVContext;
> >
> > diff --git a/libavformat/mov.c b/libavformat/mov.c
> > index 1996e0028c..cec9cb5fe1 100644
> > --- a/libavformat/mov.c
> > +++ b/libavformat/mov.c
> > @@ -1218,7 +1218,8 @@ static int mov_read_ftyp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
> >           c->isom = 1;
> >       av_log(c->fc, AV_LOG_DEBUG, "ISO: File Type Major Brand: %.4s\n",(char *)&type);
> >       av_dict_set(&c->fc->metadata, "major_brand", type, 0);
> > -    c->is_still_picture_avif = !strncmp(type, "avif", 4);
> > +    c->is_still_picture_avif = !strncmp(type, "avif", 4) ||
> > +                               !strncmp(type, "mif1", 4);
>
> This appears to be an unrelated change. Is it?
>

No, "mif1" is the major brand reported by HEIC files. So this is
needed to detect HEIC files.

> >       minor_ver = avio_rb32(pb); /* minor version */
> >       av_dict_set_int(&c->fc->metadata, "minor_version", minor_ver, 0);
> >
> > @@ -4911,6 +4912,16 @@ static int avif_add_stream(MOVContext *c, int item_id)
> >       st->priv_data = sc;
> >       st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
> >       st->codecpar->codec_id = AV_CODEC_ID_AV1;
> > +    if (c->hvcC_offset >= 0) {
> > +        int ret;
> > +        int64_t pos = avio_tell(c->fc->pb);
> > +        st->codecpar->codec_id = AV_CODEC_ID_HEVC;
> > +        avio_seek(c->fc->pb, c->hvcC_offset, SEEK_SET);
> > +        ret = ff_get_extradata(c->fc, st->codecpar, c->fc->pb, c->hvcC_size);
> > +        if (ret < 0)
> > +            return ret;
> > +        avio_seek(c->fc->pb, pos, SEEK_SET);
> > +    }
>
> Will this fail on non-seekable input? If it does, do we care?
>

I tried this with a pipe as an input (cat file.heic | ffmpeg -i - ...)
and it worked. I also see plenty of unconditional avio_seek calls in
this file. So I am not sure how it works.

Anyways, I added a check to fail here if the seek does not take us to
the desired position.

> >       sc->ffindex = st->index;
> >       c->trak_index = st->index;
> >       st->avg_frame_rate.num = st->avg_frame_rate.den = 1;
> > @@ -4953,6 +4964,8 @@ static int avif_add_stream(MOVContext *c, int item_id)
> >
> >   static int mov_read_meta(MOVContext *c, AVIOContext *pb, MOVAtom atom)
> >   {
> > +    c->hvcC_offset = -1;
> > +    c->hvcC_size = 0;
> >       while (atom.size > 8) {
> >           uint32_t tag;
> >           if (avio_feof(pb))
> > @@ -7826,6 +7839,28 @@ static int mov_read_iloc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
> >       return atom.size;
> >   }
> >
> > +static int mov_read_iprp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
> > +{
> > +    int size = avio_rb32(pb);
> > +    if (avio_rl32(pb) != MKTAG('i','p','c','o'))
> > +        return AVERROR_INVALIDDATA;
>
> Is there a reason you use ipco here instead of iprp? I'm not saying this
> is wrong, but I am confused while you're doing it this way.
>

The control flow will jump into this function only when an "iprp" box
has been seen (pb points to the start of the contents of the iprp
box). "ipco" is a sub-box of the "iprp" box that we care about. We
cannot register sub-boxes to mov_read_default without registering the
containing box, as the containing box will be skipped if it is
unknown. Some other sub-box reading is already implemented this way
(mov_read_meta for example).

> > +    size -= 8;
> > +    while (size > 0) {
> > +        int sub_size, sub_type;
> > +        sub_size = avio_rb32(pb);
> > +        sub_type = avio_rl32(pb);
> > +        sub_size -= 8;
> > +        size -= sub_size + 8;
> > +        if (sub_type == MKTAG('h','v','c','C')) {
> > +            c->hvcC_offset = avio_tell(pb);
> > +            c->hvcC_size = sub_size;
> > +            break;
> > +        }
>
> Are these permitted to use extended-size tags? i.e. size = 1, followed
> by a big-endian 64-bit size, then followed by the tag?
>

MP4 "type" fields are always 32-bits. So they cannot be of any other size.

Thank you for the review!

> > +        avio_skip(pb, sub_size);
> > +    }
> > +    return atom.size;
> > +}
> > +
> >   static const MOVParseTableEntry mov_default_parse_table[] = {
> >   { MKTAG('A','C','L','R'), mov_read_aclr },
> >   { MKTAG('A','P','R','G'), mov_read_avid },
> > @@ -7933,6 +7968,7 @@ static const MOVParseTableEntry mov_default_parse_table[] = {
> >   { MKTAG('p','c','m','C'), mov_read_pcmc }, /* PCM configuration box */
> >   { MKTAG('p','i','t','m'), mov_read_pitm },
> >   { MKTAG('e','v','c','C'), mov_read_glbl },
> > +{ MKTAG('i','p','r','p'), mov_read_iprp },
> >   { 0, NULL }
> >   };
> >
>
> - Leo Izen
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Vignesh Venkat Oct. 4, 2023, 4:28 a.m. UTC | #7
On Tue, Oct 3, 2023 at 5:30 PM Steven Liu <lingjiujianke@gmail.com> wrote:
>
> Vignesh Venkat via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>于2023年10月4日
> 周三06:57写道:
>
> > On Tue, Sep 26, 2023 at 10:37 AM Vignesh Venkatasubramanian
> > <vigneshv@google.com> wrote:
> > >
> > > They are similar to AVIF images (both use the HEIF container).
> > > The only additional work needed is to parse the hvcC box and put
> > > it in the extradata.
> > >
> > > With this patch applied, ffmpeg (when built with an HEVC decoder)
> > > is able to decode the files in
> > > https://github.com/nokiatech/heif/tree/gh-pages/content/images
> > >
> > > Partially fixes trac ticket #6521.
> > >
> > > Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
> > > ---
> > >  libavformat/isom.h |  2 ++
> > >  libavformat/mov.c  | 38 +++++++++++++++++++++++++++++++++++++-
> > >  2 files changed, 39 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/libavformat/isom.h b/libavformat/isom.h
> > > index 3d375d7a46..b30b9da65e 100644
> > > --- a/libavformat/isom.h
> > > +++ b/libavformat/isom.h
> > > @@ -327,6 +327,8 @@ typedef struct MOVContext {
> > >          int64_t extent_offset;
> > >      } *avif_info;
> > >      int avif_info_size;
> > > +    int64_t hvcC_offset;
> > > +    int hvcC_size;
> > >      int interleaved_read;
> > >  } MOVContext;
> > >
> > > diff --git a/libavformat/mov.c b/libavformat/mov.c
> > > index 1996e0028c..cec9cb5fe1 100644
> > > --- a/libavformat/mov.c
> > > +++ b/libavformat/mov.c
> > > @@ -1218,7 +1218,8 @@ static int mov_read_ftyp(MOVContext *c,
> > AVIOContext *pb, MOVAtom atom)
> > >          c->isom = 1;
> > >      av_log(c->fc, AV_LOG_DEBUG, "ISO: File Type Major Brand:
> > %.4s\n",(char *)&type);
> > >      av_dict_set(&c->fc->metadata, "major_brand", type, 0);
> > > -    c->is_still_picture_avif = !strncmp(type, "avif", 4);
> > > +    c->is_still_picture_avif = !strncmp(type, "avif", 4) ||
> > > +                               !strncmp(type, "mif1", 4);
> > >      minor_ver = avio_rb32(pb); /* minor version */
> > >      av_dict_set_int(&c->fc->metadata, "minor_version", minor_ver, 0);
> > >
> > > @@ -4911,6 +4912,16 @@ static int avif_add_stream(MOVContext *c, int
> > item_id)
> > >      st->priv_data = sc;
> > >      st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
> > >      st->codecpar->codec_id = AV_CODEC_ID_AV1;
> > > +    if (c->hvcC_offset >= 0) {
> > > +        int ret;
> > > +        int64_t pos = avio_tell(c->fc->pb);
> > > +        st->codecpar->codec_id = AV_CODEC_ID_HEVC;
> > > +        avio_seek(c->fc->pb, c->hvcC_offset, SEEK_SET);
> > > +        ret = ff_get_extradata(c->fc, st->codecpar, c->fc->pb,
> > c->hvcC_size);
> > > +        if (ret < 0)
> > > +            return ret;
> > > +        avio_seek(c->fc->pb, pos, SEEK_SET);
> > > +    }
> > >      sc->ffindex = st->index;
> > >      c->trak_index = st->index;
> > >      st->avg_frame_rate.num = st->avg_frame_rate.den = 1;
> > > @@ -4953,6 +4964,8 @@ static int avif_add_stream(MOVContext *c, int
> > item_id)
> > >
> > >  static int mov_read_meta(MOVContext *c, AVIOContext *pb, MOVAtom atom)
> > >  {
> > > +    c->hvcC_offset = -1;
> > > +    c->hvcC_size = 0;
> > >      while (atom.size > 8) {
> > >          uint32_t tag;
> > >          if (avio_feof(pb))
> > > @@ -7826,6 +7839,28 @@ static int mov_read_iloc(MOVContext *c,
> > AVIOContext *pb, MOVAtom atom)
> > >      return atom.size;
> > >  }
> > >
> > > +static int mov_read_iprp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
> > > +{
> > > +    int size = avio_rb32(pb);
> > > +    if (avio_rl32(pb) != MKTAG('i','p','c','o'))
> > > +        return AVERROR_INVALIDDATA;
> > > +    size -= 8;
> > > +    while (size > 0) {
> > > +        int sub_size, sub_type;
> > > +        sub_size = avio_rb32(pb);
> > > +        sub_type = avio_rl32(pb);
> > > +        sub_size -= 8;
> > > +        size -= sub_size + 8;
> > > +        if (sub_type == MKTAG('h','v','c','C')) {
> > > +            c->hvcC_offset = avio_tell(pb);
> > > +            c->hvcC_size = sub_size;
> > > +            break;
> > > +        }
> > > +        avio_skip(pb, sub_size);
> > > +    }
> > > +    return atom.size;
> > > +}
> > > +
> > >  static const MOVParseTableEntry mov_default_parse_table[] = {
> > >  { MKTAG('A','C','L','R'), mov_read_aclr },
> > >  { MKTAG('A','P','R','G'), mov_read_avid },
> > > @@ -7933,6 +7968,7 @@ static const MOVParseTableEntry
> > mov_default_parse_table[] = {
> > >  { MKTAG('p','c','m','C'), mov_read_pcmc }, /* PCM configuration box */
> > >  { MKTAG('p','i','t','m'), mov_read_pitm },
> > >  { MKTAG('e','v','c','C'), mov_read_glbl },
> > > +{ MKTAG('i','p','r','p'), mov_read_iprp },
> > >  { 0, NULL }
> > >  };
> > >
> > > --
> > > 2.42.0.515.g380fc7ccd1-goog
> > >
> >
> > Any comments/objections on merging this?
>
>
> Can this patch support tiled hevc coded or sequence heif?
>

Actually, sequence HEIF is very similar to an HEVC video in a MP4
container and all the necessary data is in the 'moov' box. So it
already works with the current tip-of-tree build of ffmpeg. I tried
all the files in
https://github.com/nokiatech/heif/tree/gh-pages/content/image_sequences
and was able to decode all of them.

So once this patch is applied, the only unsupported items would be
Alpha and Grid (Tiles) for both AVIF and HEIC.

> >
> >
> > --
> > Vignesh
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel@ffmpeg.org
> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >
> > To unsubscribe, visit link above, or email
> > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> >
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Vittorio Giovara Oct. 4, 2023, 4:40 a.m. UTC | #8
On Wed, Oct 4, 2023 at 12:02 AM Vignesh Venkat via ffmpeg-devel <
ffmpeg-devel@ffmpeg.org> wrote:

> On Tue, Oct 3, 2023 at 6:32 PM Vittorio Giovara
> <vittorio.giovara@gmail.com> wrote:
> >
> > On Tue, Oct 3, 2023 at 8:30 PM Steven Liu <lingjiujianke@gmail.com>
> wrote:
> >
> > > > > 2.42.0.515.g380fc7ccd1-goog
> > > > >
> > > >
> > > > Any comments/objections on merging this?
> > >
> > >
> > > Can this patch support tiled hevc coded or sequence heif?=
> > >
> >
> > I believe that will be possible only after AVStreamGroup is implemented.
> >
>
> Yes, this patch only supports still HEIC images that don't have alpha
> and grids (tiles).
>
> Tiles and Alpha support will be possible only after AVStreamGroup is
> implemented. I will look into HEIC sequences in a follow-up.
>
> > Vignesh is there a sample available? Could we add a test?
>
> I tested it from the files in
> https://github.com/nokiatech/heif/tree/gh-pages/content/images. I am
> not sure about HEVC licensing and if we are allowed to copy some of
> those files in the ffmpeg fate server. Would generating a random image
> with ffmpeg and encoding it as HEIC be good enough?
>

I would prefer a real world example and FATE has a bunch of conformance
samples already.
Adding the ones from https://github.com/nokiatech/heif_conformance
shouldn't be a problem.
Ideally the sample test should be added to this same patch.
Vignesh Venkat Oct. 4, 2023, 4:36 p.m. UTC | #9
On Tue, Oct 3, 2023 at 9:40 PM Vittorio Giovara
<vittorio.giovara@gmail.com> wrote:
>
>
>
> On Wed, Oct 4, 2023 at 12:02 AM Vignesh Venkat via ffmpeg-devel <ffmpeg-devel@ffmpeg.org> wrote:
>>
>> On Tue, Oct 3, 2023 at 6:32 PM Vittorio Giovara
>> <vittorio.giovara@gmail.com> wrote:
>> >
>> > On Tue, Oct 3, 2023 at 8:30 PM Steven Liu <lingjiujianke@gmail.com> wrote:
>> >
>> > > > > 2.42.0.515.g380fc7ccd1-goog
>> > > > >
>> > > >
>> > > > Any comments/objections on merging this?
>> > >
>> > >
>> > > Can this patch support tiled hevc coded or sequence heif?=
>> > >
>> >
>> > I believe that will be possible only after AVStreamGroup is implemented.
>> >
>>
>> Yes, this patch only supports still HEIC images that don't have alpha
>> and grids (tiles).
>>
>> Tiles and Alpha support will be possible only after AVStreamGroup is
>> implemented. I will look into HEIC sequences in a follow-up.
>>
>> > Vignesh is there a sample available? Could we add a test?
>>
>> I tested it from the files in
>> https://github.com/nokiatech/heif/tree/gh-pages/content/images. I am
>> not sure about HEVC licensing and if we are allowed to copy some of
>> those files in the ffmpeg fate server. Would generating a random image
>> with ffmpeg and encoding it as HEIC be good enough?
>
>
> I would prefer a real world example and FATE has a bunch of conformance samples already.
> Adding the ones from https://github.com/nokiatech/heif_conformance shouldn't be a problem.
> Ideally the sample test should be added to this same patch.

Great, i have added two samples. Can you please upload C002.heic and
C003.heic from the heif_conformance repository to the fate server
under the "heif-conformance" sub-directory. I have also attached those
two files in this email for reference. I will update the patch with
the fate tests.

> --
> Vittorio
Andreas Rheinhardt Oct. 5, 2023, 5:59 p.m. UTC | #10
Vignesh Venkat via ffmpeg-devel:
> On Tue, Oct 3, 2023 at 9:40 PM Vittorio Giovara
> <vittorio.giovara@gmail.com> wrote:
>>
>>
>>
>> On Wed, Oct 4, 2023 at 12:02 AM Vignesh Venkat via ffmpeg-devel <ffmpeg-devel@ffmpeg.org> wrote:
>>>
>>> On Tue, Oct 3, 2023 at 6:32 PM Vittorio Giovara
>>> <vittorio.giovara@gmail.com> wrote:
>>>>
>>>> On Tue, Oct 3, 2023 at 8:30 PM Steven Liu <lingjiujianke@gmail.com> wrote:
>>>>
>>>>>>> 2.42.0.515.g380fc7ccd1-goog
>>>>>>>
>>>>>>
>>>>>> Any comments/objections on merging this?
>>>>>
>>>>>
>>>>> Can this patch support tiled hevc coded or sequence heif?=
>>>>>
>>>>
>>>> I believe that will be possible only after AVStreamGroup is implemented.
>>>>
>>>
>>> Yes, this patch only supports still HEIC images that don't have alpha
>>> and grids (tiles).
>>>
>>> Tiles and Alpha support will be possible only after AVStreamGroup is
>>> implemented. I will look into HEIC sequences in a follow-up.
>>>
>>>> Vignesh is there a sample available? Could we add a test?
>>>
>>> I tested it from the files in
>>> https://github.com/nokiatech/heif/tree/gh-pages/content/images. I am
>>> not sure about HEVC licensing and if we are allowed to copy some of
>>> those files in the ffmpeg fate server. Would generating a random image
>>> with ffmpeg and encoding it as HEIC be good enough?
>>
>>
>> I would prefer a real world example and FATE has a bunch of conformance samples already.
>> Adding the ones from https://github.com/nokiatech/heif_conformance shouldn't be a problem.
>> Ideally the sample test should be added to this same patch.
> 
> Great, i have added two samples. Can you please upload C002.heic and
> C003.heic from the heif_conformance repository to the fate server
> under the "heif-conformance" sub-directory. I have also attached those
> two files in this email for reference. I will update the patch with
> the fate tests.
> 

Why are you intend to add so big files when the linked repo contains
smaller files? All five multilayer files are below 20KB each;
multilayer005.heic is even only 4.5KB. MIAF002.heic (8.6KB) and
MIAF003.heic (13.5KB) are also quite small. C025.heic (19.4KB) and
C053.heic (14.2KB) are also quite small and there are also other samples
in the 50KB-60KB range (C017.heic, C018.heic, C019.heic, C020.heic,
C041.heic). Besides taking up less diskspace, smaller files will likely
be faster to decode (which is particularly advantageous for people like
me who run FATE a lot).

- Andreas
Vignesh Venkat Oct. 5, 2023, 10:32 p.m. UTC | #11
On Thu, Oct 5, 2023 at 10:58 AM Andreas Rheinhardt
<andreas.rheinhardt@outlook.com> wrote:
>
> Vignesh Venkat via ffmpeg-devel:
> > On Tue, Oct 3, 2023 at 9:40 PM Vittorio Giovara
> > <vittorio.giovara@gmail.com> wrote:
> >>
> >>
> >>
> >> On Wed, Oct 4, 2023 at 12:02 AM Vignesh Venkat via ffmpeg-devel <ffmpeg-devel@ffmpeg.org> wrote:
> >>>
> >>> On Tue, Oct 3, 2023 at 6:32 PM Vittorio Giovara
> >>> <vittorio.giovara@gmail.com> wrote:
> >>>>
> >>>> On Tue, Oct 3, 2023 at 8:30 PM Steven Liu <lingjiujianke@gmail.com> wrote:
> >>>>
> >>>>>>> 2.42.0.515.g380fc7ccd1-goog
> >>>>>>>
> >>>>>>
> >>>>>> Any comments/objections on merging this?
> >>>>>
> >>>>>
> >>>>> Can this patch support tiled hevc coded or sequence heif?=
> >>>>>
> >>>>
> >>>> I believe that will be possible only after AVStreamGroup is implemented.
> >>>>
> >>>
> >>> Yes, this patch only supports still HEIC images that don't have alpha
> >>> and grids (tiles).
> >>>
> >>> Tiles and Alpha support will be possible only after AVStreamGroup is
> >>> implemented. I will look into HEIC sequences in a follow-up.
> >>>
> >>>> Vignesh is there a sample available? Could we add a test?
> >>>
> >>> I tested it from the files in
> >>> https://github.com/nokiatech/heif/tree/gh-pages/content/images. I am
> >>> not sure about HEVC licensing and if we are allowed to copy some of
> >>> those files in the ffmpeg fate server. Would generating a random image
> >>> with ffmpeg and encoding it as HEIC be good enough?
> >>
> >>
> >> I would prefer a real world example and FATE has a bunch of conformance samples already.
> >> Adding the ones from https://github.com/nokiatech/heif_conformance shouldn't be a problem.
> >> Ideally the sample test should be added to this same patch.
> >
> > Great, i have added two samples. Can you please upload C002.heic and
> > C003.heic from the heif_conformance repository to the fate server
> > under the "heif-conformance" sub-directory. I have also attached those
> > two files in this email for reference. I will update the patch with
> > the fate tests.
> >
>
> Why are you intend to add so big files when the linked repo contains
> smaller files? All five multilayer files are below 20KB each;
> multilayer005.heic is even only 4.5KB. MIAF002.heic (8.6KB) and
> MIAF003.heic (13.5KB) are also quite small. C025.heic (19.4KB) and
> C053.heic (14.2KB) are also quite small and there are also other samples
> in the 50KB-60KB range (C017.heic, C018.heic, C019.heic, C020.heic,
> C041.heic). Besides taking up less diskspace, smaller files will likely
> be faster to decode (which is particularly advantageous for people like
> me who run FATE a lot).
>

There is an excel sheet in the samples github repository that explains
the features of each of those files. I picked the files with the
simplest features (single item and multi-item) since those are the
only two files that I know for certain the current ffmpeg parser can
parse as-is. The multi-layer files that you mention may work, but i am
not sure if we are handling those layers the right way. So adding
something that I am not sure if it has the right behavior or not to a
fate test didn't seem right.

Also, the fate test only demuxes the files (using -c copy) and does
not do any decoding since it tests the parser and not the hevc
decoder. So decoding time is not a concern. I do understand the
concern about disk-space though. I can probably make a random file
that is smaller, but I also thought it was better to use files from an
existing conformance repository.

Please let me know if that is reasonable.

> - Andreas
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff mbox series

Patch

diff --git a/libavformat/isom.h b/libavformat/isom.h
index 3d375d7a46..b30b9da65e 100644
--- a/libavformat/isom.h
+++ b/libavformat/isom.h
@@ -327,6 +327,8 @@  typedef struct MOVContext {
         int64_t extent_offset;
     } *avif_info;
     int avif_info_size;
+    int64_t hvcC_offset;
+    int hvcC_size;
     int interleaved_read;
 } MOVContext;
 
diff --git a/libavformat/mov.c b/libavformat/mov.c
index 1996e0028c..cec9cb5fe1 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -1218,7 +1218,8 @@  static int mov_read_ftyp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         c->isom = 1;
     av_log(c->fc, AV_LOG_DEBUG, "ISO: File Type Major Brand: %.4s\n",(char *)&type);
     av_dict_set(&c->fc->metadata, "major_brand", type, 0);
-    c->is_still_picture_avif = !strncmp(type, "avif", 4);
+    c->is_still_picture_avif = !strncmp(type, "avif", 4) ||
+                               !strncmp(type, "mif1", 4);
     minor_ver = avio_rb32(pb); /* minor version */
     av_dict_set_int(&c->fc->metadata, "minor_version", minor_ver, 0);
 
@@ -4911,6 +4912,16 @@  static int avif_add_stream(MOVContext *c, int item_id)
     st->priv_data = sc;
     st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
     st->codecpar->codec_id = AV_CODEC_ID_AV1;
+    if (c->hvcC_offset >= 0) {
+        int ret;
+        int64_t pos = avio_tell(c->fc->pb);
+        st->codecpar->codec_id = AV_CODEC_ID_HEVC;
+        avio_seek(c->fc->pb, c->hvcC_offset, SEEK_SET);
+        ret = ff_get_extradata(c->fc, st->codecpar, c->fc->pb, c->hvcC_size);
+        if (ret < 0)
+            return ret;
+        avio_seek(c->fc->pb, pos, SEEK_SET);
+    }
     sc->ffindex = st->index;
     c->trak_index = st->index;
     st->avg_frame_rate.num = st->avg_frame_rate.den = 1;
@@ -4953,6 +4964,8 @@  static int avif_add_stream(MOVContext *c, int item_id)
 
 static int mov_read_meta(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
+    c->hvcC_offset = -1;
+    c->hvcC_size = 0;
     while (atom.size > 8) {
         uint32_t tag;
         if (avio_feof(pb))
@@ -7826,6 +7839,28 @@  static int mov_read_iloc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     return atom.size;
 }
 
+static int mov_read_iprp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    int size = avio_rb32(pb);
+    if (avio_rl32(pb) != MKTAG('i','p','c','o'))
+        return AVERROR_INVALIDDATA;
+    size -= 8;
+    while (size > 0) {
+        int sub_size, sub_type;
+        sub_size = avio_rb32(pb);
+        sub_type = avio_rl32(pb);
+        sub_size -= 8;
+        size -= sub_size + 8;
+        if (sub_type == MKTAG('h','v','c','C')) {
+            c->hvcC_offset = avio_tell(pb);
+            c->hvcC_size = sub_size;
+            break;
+        }
+        avio_skip(pb, sub_size);
+    }
+    return atom.size;
+}
+
 static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('A','C','L','R'), mov_read_aclr },
 { MKTAG('A','P','R','G'), mov_read_avid },
@@ -7933,6 +7968,7 @@  static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('p','c','m','C'), mov_read_pcmc }, /* PCM configuration box */
 { MKTAG('p','i','t','m'), mov_read_pitm },
 { MKTAG('e','v','c','C'), mov_read_glbl },
+{ MKTAG('i','p','r','p'), mov_read_iprp },
 { 0, NULL }
 };