diff mbox series

[FFmpeg-devel,3/3] avformat/movenc: Add support for AVIF muxing

Message ID 20220222214318.3053157-1-vigneshv@google.com
State New
Headers show
Series None | expand

Commit Message

Vignesh Venkat Feb. 22, 2022, 9:43 p.m. UTC
Add an AVIF muxer by re-using the existing the mov/mp4 muxer.

AVIF Specifiation: https://aomediacodec.github.io/av1-avif

Sample usage for still image:
ffmpeg -i image.png -c:v libaom-av1 -avif-image 1 image.avif

Sample usage for animated AVIF image:
ffmpeg -i video.mp4 animated.avif

We can re-use any of the AV1 encoding options that will make
sense for image encoding (like bitrate, tiles, encoding speed,
etc).

The files generated by this muxer has been verified to be valid
AVIF files by the following:
1) Displays on Chrome (both still and animated images).
2) Displays on Firefox (only still images, firefox does not support
   animated AVIF yet).
3) Verfied to be valid by Compliance Warden:
   https://github.com/gpac/ComplianceWarden

Fixes the encoder/muxer part of Trac Ticket #7621

Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
---
 configure                |   1 +
 libavformat/allformats.c |   1 +
 libavformat/movenc.c     | 300 +++++++++++++++++++++++++++++++++++----
 libavformat/movenc.h     |   5 +
 4 files changed, 282 insertions(+), 25 deletions(-)

Comments

Vignesh Venkat Feb. 24, 2022, 5:34 p.m. UTC | #1
On Tue, Feb 22, 2022 at 1:43 PM Vignesh Venkatasubramanian
<vigneshv@google.com> wrote:
>
> Add an AVIF muxer by re-using the existing the mov/mp4 muxer.
>
> AVIF Specifiation: https://aomediacodec.github.io/av1-avif
>
> Sample usage for still image:
> ffmpeg -i image.png -c:v libaom-av1 -avif-image 1 image.avif
>
> Sample usage for animated AVIF image:
> ffmpeg -i video.mp4 animated.avif
>
> We can re-use any of the AV1 encoding options that will make
> sense for image encoding (like bitrate, tiles, encoding speed,
> etc).
>
> The files generated by this muxer has been verified to be valid
> AVIF files by the following:
> 1) Displays on Chrome (both still and animated images).
> 2) Displays on Firefox (only still images, firefox does not support
>    animated AVIF yet).
> 3) Verfied to be valid by Compliance Warden:
>    https://github.com/gpac/ComplianceWarden
>
> Fixes the encoder/muxer part of Trac Ticket #7621
>
> Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
> ---
>  configure                |   1 +
>  libavformat/allformats.c |   1 +
>  libavformat/movenc.c     | 300 +++++++++++++++++++++++++++++++++++----
>  libavformat/movenc.h     |   5 +
>  4 files changed, 282 insertions(+), 25 deletions(-)
>
> diff --git a/configure b/configure
> index 1535dc3c5b..87b380fe3a 100755
> --- a/configure
> +++ b/configure
> @@ -3393,6 +3393,7 @@ asf_stream_muxer_select="asf_muxer"
>  av1_demuxer_select="av1_frame_merge_bsf av1_parser"
>  avi_demuxer_select="riffdec exif"
>  avi_muxer_select="riffenc"
> +avif_muxer_select="mov_muxer"
>  caf_demuxer_select="iso_media"
>  caf_muxer_select="iso_media"
>  dash_muxer_select="mp4_muxer"
> diff --git a/libavformat/allformats.c b/libavformat/allformats.c
> index d066a7745b..400c17afbd 100644
> --- a/libavformat/allformats.c
> +++ b/libavformat/allformats.c
> @@ -81,6 +81,7 @@ extern const AVOutputFormat ff_au_muxer;
>  extern const AVInputFormat  ff_av1_demuxer;
>  extern const AVInputFormat  ff_avi_demuxer;
>  extern const AVOutputFormat ff_avi_muxer;
> +extern const AVOutputFormat ff_avif_muxer;
>  extern const AVInputFormat  ff_avisynth_demuxer;
>  extern const AVOutputFormat ff_avm2_muxer;
>  extern const AVInputFormat  ff_avr_demuxer;
> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> index 1a746a67fd..53258f0d11 100644
> --- a/libavformat/movenc.c
> +++ b/libavformat/movenc.c
> @@ -1303,7 +1303,7 @@ static int mov_write_av1c_tag(AVIOContext *pb, MOVTrack *track)
>
>      avio_wb32(pb, 0);
>      ffio_wfourcc(pb, "av1C");
> -    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, 1);
> +    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, track->mode != MODE_AVIF);
>      return update_size(pb, pos);
>  }
>
> @@ -2004,12 +2004,13 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
>          }
>      }
>
> -    /* We should only ever be called by MOV or MP4. */
> -    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4);
> +    /* We should only ever be called for MOV, MP4 and AVIF. */
> +    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4 ||
> +               track->mode == MODE_AVIF);
>
>      avio_wb32(pb, 0); /* size */
>      ffio_wfourcc(pb, "colr");
> -    if (track->mode == MODE_MP4)
> +    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF)
>          ffio_wfourcc(pb, "nclx");
>      else
>          ffio_wfourcc(pb, "nclc");
> @@ -2019,7 +2020,7 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
>      avio_wb16(pb, track->par->color_primaries);
>      avio_wb16(pb, track->par->color_trc);
>      avio_wb16(pb, track->par->color_space);
> -    if (track->mode == MODE_MP4) {
> +    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
>          int full_range = track->par->color_range == AVCOL_RANGE_JPEG;
>          avio_w8(pb, full_range << 7);
>      }
> @@ -2085,7 +2086,7 @@ static void find_compressor(char * compressor_name, int len, MOVTrack *track)
>                    || (track->par->width == 1440 && track->par->height == 1080)
>                    || (track->par->width == 1920 && track->par->height == 1080);
>
> -    if (track->mode == MODE_MOV &&
> +    if ((track->mode == MODE_AVIF || track->mode == MODE_MOV) &&
>          (encoder = av_dict_get(track->st->metadata, "encoder", NULL, 0))) {
>          av_strlcpy(compressor_name, encoder->value, 32);
>      } else if (track->par->codec_id == AV_CODEC_ID_MPEG2VIDEO && xdcam_res) {
> @@ -2123,6 +2124,8 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
>      avio_wb32(pb, 0); /* size */
>      if (mov->encryption_scheme != MOV_ENC_NONE) {
>          ffio_wfourcc(pb, "encv");
> +    } else if (track->mode == MODE_AVIF) {
> +        ffio_wfourcc(pb, "av01");
>      } else {
>          avio_wl32(pb, track->tag); // store it byteswapped
>      }
> @@ -2239,7 +2242,7 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
>          else
>              av_log(mov->fc, AV_LOG_WARNING, "Not writing 'gama' atom. Format is not MOV.\n");
>      }
> -    if (track->mode == MODE_MOV || track->mode == MODE_MP4) {
> +    if (track->mode == MODE_MOV || track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
>          int has_color_info = track->par->color_primaries != AVCOL_PRI_UNSPECIFIED &&
>                               track->par->color_trc != AVCOL_TRC_UNSPECIFIED &&
>                               track->par->color_space != AVCOL_SPC_UNSPECIFIED;
> @@ -2792,7 +2795,10 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
>
>      if (track) {
>          hdlr = (track->mode == MODE_MOV) ? "mhlr" : "\0\0\0\0";
> -        if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
> +        if (track->mode == MODE_AVIF) {
> +            hdlr_type = "pict";
> +            descr = "ffmpeg";
> +        } else if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
>              hdlr_type = "vide";
>              descr     = "VideoHandler";
>          } else if (track->par->codec_type == AVMEDIA_TYPE_AUDIO) {
> @@ -2859,6 +2865,131 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
>      return update_size(pb, pos);
>  }
>
> +static int mov_write_pitm_tag(AVIOContext *pb, int item_id)
> +{
> +    int64_t pos = avio_tell(pb);
> +    avio_wb32(pb, 0); /* size */
> +    ffio_wfourcc(pb, "pitm");
> +    avio_wb32(pb, 0); /* Version & flags */
> +    avio_wb16(pb, item_id); /* item_id */
> +    return update_size(pb, pos);
> +}
> +
> +static int mov_write_iloc_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> +{
> +    int64_t pos = avio_tell(pb);
> +    avio_wb32(pb, 0); /* size */
> +    ffio_wfourcc(pb, "iloc");
> +    avio_wb32(pb, 0); /* Version & flags */
> +    avio_w8(pb, (4 << 4) + 4); /* offset_size(4) and length_size(4) */
> +    avio_w8(pb, 0); /* base_offset_size(4) and reserved(4) */
> +    avio_wb16(pb, 1); /* item_count */
> +
> +    avio_wb16(pb, 1); /* item_id */
> +    avio_wb16(pb, 0); /* data_reference_index */
> +    avio_wb16(pb, 1); /* extent_count */
> +    mov->avif_extent_pos = avio_tell(pb);
> +    avio_wb32(pb, 0); /* extent_offset (written later) */
> +    // For animated AVIF, we simply write the first packet's size.
> +    avio_wb32(pb, mov->avif_extent_length); /* extent_length */
> +
> +    return update_size(pb, pos);
> +}
> +
> +static int mov_write_iinf_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> +{
> +    int64_t infe_pos;
> +    int64_t iinf_pos = avio_tell(pb);
> +    avio_wb32(pb, 0); /* size */
> +    ffio_wfourcc(pb, "iinf");
> +    avio_wb32(pb, 0); /* Version & flags */
> +    avio_wb16(pb, 1); /* entry_count */
> +
> +    infe_pos = avio_tell(pb);
> +    avio_wb32(pb, 0); /* size */
> +    ffio_wfourcc(pb, "infe");
> +    avio_w8(pb, 0x2); /* Version */
> +    avio_wb24(pb, 0); /* flags */
> +    avio_wb16(pb, 1); /* item_id */
> +    avio_wb16(pb, 0); /* item_protection_index */
> +    avio_write(pb, "av01", 4); /* item_type */
> +    avio_write(pb, "Color\0", 6); /* item_name */
> +    update_size(pb, infe_pos);
> +
> +    return update_size(pb, iinf_pos);
> +}
> +
> +static int mov_write_ispe_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> +{
> +    int64_t pos = avio_tell(pb);
> +    avio_wb32(pb, 0); /* size */
> +    ffio_wfourcc(pb, "ispe");
> +    avio_wb32(pb, 0); /* Version & flags */
> +    avio_wb32(pb, s->streams[0]->codecpar->width); /* image_width */
> +    avio_wb32(pb, s->streams[0]->codecpar->height); /* image_height */
> +    return update_size(pb, pos);
> +}
> +
> +
> +static int mov_write_pixi_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> +{
> +    int64_t pos = avio_tell(pb);
> +    int num_channels = av_pix_fmt_count_planes(s->streams[0]->codecpar->format);
> +    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
> +    int i;
> +    avio_wb32(pb, 0); /* size */
> +    ffio_wfourcc(pb, "pixi");
> +    avio_wb32(pb, 0); /* Version & flags */
> +    avio_w8(pb, num_channels); /* num_channels */
> +    for (i = 0; i < num_channels; ++i) {
> +      avio_w8(pb, pixdesc->comp[i].depth); /* bits_per_channel */
> +    }
> +    return update_size(pb, pos);
> +}
> +
> +static int mov_write_ipco_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> +{
> +    int64_t pos = avio_tell(pb);
> +    avio_wb32(pb, 0); /* size */
> +    ffio_wfourcc(pb, "ipco");
> +    mov_write_ispe_tag(pb, mov, s);
> +    mov_write_pixi_tag(pb, mov, s);
> +    mov_write_av1c_tag(pb, &mov->tracks[0]);
> +    mov_write_colr_tag(pb, &mov->tracks[0], 0);
> +    return update_size(pb, pos);
> +}
> +
> +static int mov_write_ipma_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> +{
> +    int64_t pos = avio_tell(pb);
> +    avio_wb32(pb, 0); /* size */
> +    ffio_wfourcc(pb, "ipma");
> +    avio_wb32(pb, 0); /* Version & flags */
> +    avio_wb32(pb, 1); /* entry_count */
> +    avio_wb16(pb, 1); /* item_ID */
> +    avio_w8(pb, 4); /* association_count */
> +
> +    // ispe association.
> +    avio_w8(pb, 1); /* essential and property_index */
> +    // pixi association.
> +    avio_w8(pb, 2); /* essential and property_index */
> +    // av1C association.
> +    avio_w8(pb, 0x80 | 3); /* essential and property_index */
> +    // colr association.
> +    avio_w8(pb, 4); /* essential and property_index */
> +    return update_size(pb, pos);
> +}
> +
> +static int mov_write_iprp_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> +{
> +    int64_t pos = avio_tell(pb);
> +    avio_wb32(pb, 0); /* size */
> +    ffio_wfourcc(pb, "iprp");
> +    mov_write_ipco_tag(pb, mov, s);
> +    mov_write_ipma_tag(pb, mov, s);
> +    return update_size(pb, pos);
> +}
> +
>  static int mov_write_hmhd_tag(AVIOContext *pb)
>  {
>      /* This atom must be present, but leaving the values at zero
> @@ -3056,7 +3187,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
>              display_matrix = NULL;
>      }
>
> -    if (track->flags & MOV_TRACK_ENABLED)
> +    if (track->flags & MOV_TRACK_ENABLED || track->mode == MODE_AVIF)
>          flags |= MOV_TKHD_FLAG_ENABLED;
>
>      if (track->mode == MODE_ISM)
> @@ -3104,7 +3235,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
>      if (st && (track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
>                 track->par->codec_type == AVMEDIA_TYPE_SUBTITLE)) {
>          int64_t track_width_1616;
> -        if (track->mode == MODE_MOV) {
> +        if (track->mode == MODE_MOV || track->mode == MODE_AVIF) {
>              track_width_1616 = track->par->width * 0x10000ULL;
>          } else {
>              track_width_1616 = av_rescale(st->sample_aspect_ratio.num,
> @@ -3439,7 +3570,8 @@ static int mov_write_trak_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
>              mov_write_tapt_tag(pb, track);
>          }
>      }
> -    mov_write_track_udta_tag(pb, mov, st);
> +    if (track->mode != MODE_AVIF)
> +        mov_write_track_udta_tag(pb, mov, st);
>      track->entry = entry_backup;
>      track->chunkCount = chunk_backup;
>      return update_size(pb, pos);
> @@ -3914,8 +4046,15 @@ static int mov_write_meta_tag(AVIOContext *pb, MOVMuxContext *mov,
>          mov_write_mdta_hdlr_tag(pb, mov, s);
>          mov_write_mdta_keys_tag(pb, mov, s);
>          mov_write_mdta_ilst_tag(pb, mov, s);
> -    }
> -    else {
> +    } else if (mov->mode == MODE_AVIF) {
> +        mov_write_hdlr_tag(s, pb, &mov->tracks[0]);
> +        // We always write the primary item id as 1 since only one track is
> +        // supported for AVIF.
> +        mov_write_pitm_tag(pb, 1);
> +        mov_write_iloc_tag(pb, mov, s);
> +        mov_write_iinf_tag(pb, mov, s);
> +        mov_write_iprp_tag(pb, mov, s);
> +    } else {
>          /* iTunes metadata tag */
>          mov_write_itunes_hdlr_tag(pb, mov, s);
>          mov_write_ilst_tag(pb, mov, s);
> @@ -4245,10 +4384,11 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
>      }
>
>      mov_write_mvhd_tag(pb, mov);
> -    if (mov->mode != MODE_MOV && !mov->iods_skip)
> +    if (mov->mode != MODE_MOV && mov->mode != MODE_AVIF && !mov->iods_skip)
>          mov_write_iods_tag(pb, mov);
>      for (i = 0; i < mov->nb_streams; i++) {
> -        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT) {
> +        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT ||
> +            mov->mode == MODE_AVIF) {
>              int ret = mov_write_trak_tag(s, pb, mov, &(mov->tracks[i]), i < s->nb_streams ? s->streams[i] : NULL);
>              if (ret < 0)
>                  return ret;
> @@ -4259,7 +4399,7 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
>
>      if (mov->mode == MODE_PSP)
>          mov_write_uuidusmt_tag(pb, s);
> -    else
> +    else if (mov->mode != MODE_AVIF)
>          mov_write_udta_tag(pb, mov, s);
>
>      return update_size(pb, pos);
> @@ -5002,6 +5142,9 @@ static void mov_write_ftyp_tag_internal(AVIOContext *pb, AVFormatContext *s,
>      else if (mov->mode == MODE_3GP) {
>          ffio_wfourcc(pb, has_h264 ? "3gp6"  : "3gp4");
>          minor =     has_h264 ?   0x100 :   0x200;
> +    } else if (mov->mode == MODE_AVIF) {
> +        ffio_wfourcc(pb, mov->is_animated_avif ? "avis" : "avif");
> +        minor = 0;
>      } else if (mov->mode & MODE_3G2) {
>          ffio_wfourcc(pb, has_h264 ? "3g2b"  : "3g2a");
>          minor =     has_h264 ? 0x20000 : 0x10000;
> @@ -5065,6 +5208,30 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
>      // compatible brand a second time.
>      if (mov->mode == MODE_ISM) {
>          ffio_wfourcc(pb, "piff");
> +    } else if (mov->mode == MODE_AVIF) {
> +        const AVPixFmtDescriptor *pix_fmt_desc =
> +            av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
> +        const int depth = pix_fmt_desc->comp[0].depth;
> +        if (mov->is_animated_avif) {
> +            // For animated AVIF, major brand is "avis". Add "avif" as a
> +            // compatible brand.
> +            ffio_wfourcc(pb, "avif");
> +            ffio_wfourcc(pb, "msf1");
> +        }
> +        ffio_wfourcc(pb, "mif1");
> +        ffio_wfourcc(pb, "miaf");
> +        if (depth == 8 || depth == 10) {
> +            // MA1B and MA1A brands are based on AV1 profile. Short hand for
> +            // computing that is based on chroma subsampling type. 420 chroma
> +            // subsampling is MA1B.  444 chroma subsampling is MA1A.
> +            if (pix_fmt_desc->log2_chroma_w == 0 && pix_fmt_desc->log2_chroma_h == 0) {
> +                // 444 chroma subsampling.
> +                ffio_wfourcc(pb, "MA1A");
> +            } else {
> +                // 420 chroma subsampling.
> +                ffio_wfourcc(pb, "MA1B");
> +            }
> +        }
>      } else if (mov->mode != MODE_MOV) {
>          // We add tfdt atoms when fragmenting, signal this with the iso6 compatible
>          // brand, if not already the major brand. This is compatible with users that
> @@ -5669,7 +5836,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>      if (ret < 0)
>          return ret;
>
> -    if (mov->flags & FF_MOV_FLAG_FRAGMENT) {
> +    if (mov->flags & FF_MOV_FLAG_FRAGMENT || mov->mode == MODE_AVIF) {
>          int ret;
>          if (mov->moov_written || mov->flags & FF_MOV_FLAG_EMPTY_MOOV) {
>              if (mov->frag_interleave && mov->fragments > 0) {
> @@ -5802,7 +5969,9 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>              }
>          }
>      } else if (par->codec_id == AV_CODEC_ID_AV1) {
> -        if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
> +        if (trk->mode == MODE_AVIF) {
> +            avio_write(pb, pkt->data, pkt->size);
> +        } else if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
>              ret = ff_av1_filter_obus_buf(pkt->data, &reformatted_data,
>                                           &size, &offset);
>              if (ret < 0)
> @@ -6230,6 +6399,10 @@ fail:
>              }
>          }
>
> +        if (trk->mode == MODE_AVIF && !mov->avif_extent_length) {
> +            mov->avif_extent_length = pkt->size;
> +        }
> +
>          return mov_write_single_packet(s, pkt);
>      }
>  }
> @@ -6569,11 +6742,15 @@ static int mov_init(AVFormatContext *s)
>      else if (IS_MODE(ipod, IPOD)) mov->mode = MODE_IPOD;
>      else if (IS_MODE(ismv, ISMV)) mov->mode = MODE_ISM;
>      else if (IS_MODE(f4v,   F4V)) mov->mode = MODE_F4V;
> +    else if (IS_MODE(avif, AVIF)) mov->mode = MODE_AVIF;
>  #undef IS_MODE
>
>      if (mov->flags & FF_MOV_FLAG_DELAY_MOOV)
>          mov->flags |= FF_MOV_FLAG_EMPTY_MOOV;
>
> +    if (mov->mode == MODE_AVIF)
> +        mov->flags |= FF_MOV_FLAG_DELAY_MOOV;
> +
>      /* Set the FRAGMENT flag if any of the fragmentation methods are
>       * enabled. */
>      if (mov->max_fragment_duration || mov->max_fragment_size ||
> @@ -6797,12 +6974,13 @@ static int mov_init(AVFormatContext *s)
>                          pix_fmt == AV_PIX_FMT_MONOWHITE ||
>                          pix_fmt == AV_PIX_FMT_MONOBLACK;
>              }
> -            if (track->par->codec_id == AV_CODEC_ID_VP9 ||
> -                track->par->codec_id == AV_CODEC_ID_AV1) {
> -                if (track->mode != MODE_MP4) {
> -                    av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
> -                    return AVERROR(EINVAL);
> -                }
> +            if (track->par->codec_id == AV_CODEC_ID_VP9 && track->mode != MODE_MP4) {
> +                av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
> +               return AVERROR(EINVAL);
> +            } else if (track->par->codec_id == AV_CODEC_ID_AV1 &&
> +                       track->mode != MODE_MP4 && track->mode != MODE_AVIF) {
> +                av_log(s, AV_LOG_ERROR, "%s only supported in MP4 and AVIF.\n", avcodec_get_name(track->par->codec_id));
> +                return AVERROR(EINVAL);
>              } else if (track->par->codec_id == AV_CODEC_ID_VP8) {
>                  /* altref frames handling is not defined in the spec as of version v1.0,
>                   * so just forbid muxing VP8 streams altogether until a new version does */
> @@ -7003,7 +7181,7 @@ static int mov_write_header(AVFormatContext *s)
>                              FF_MOV_FLAG_FRAG_EVERY_FRAME)) &&
>              !mov->max_fragment_duration && !mov->max_fragment_size)
>              mov->flags |= FF_MOV_FLAG_FRAG_KEYFRAME;
> -    } else {
> +    } else if (mov->mode != MODE_AVIF) {
>          if (mov->flags & FF_MOV_FLAG_FASTSTART)
>              mov->reserved_header_pos = avio_tell(pb);
>          mov_write_mdat_tag(pb, mov);
> @@ -7291,6 +7469,54 @@ static int mov_check_bitstream(AVFormatContext *s, AVStream *st,
>      return ret;
>  }
>
> +static int avif_write_trailer(AVFormatContext *s)
> +{
> +    AVIOContext *pb = s->pb;
> +    MOVMuxContext *mov = s->priv_data;
> +    int64_t pos_backup, mdat_pos;
> +    uint8_t *buf;
> +    int buf_size, moov_size;
> +    int i;
> +
> +    if (mov->moov_written) return 0;
> +
> +    mov->is_animated_avif = s->streams[0]->nb_frames > 1;
> +    mov_write_identification(pb, s);
> +    mov_write_meta_tag(pb, mov, s);
> +
> +    moov_size = get_moov_size(s);
> +    for (i = 0; i < mov->nb_streams; i++)
> +        mov->tracks[i].data_offset = avio_tell(pb) + moov_size + 8;
> +
> +    if (mov->is_animated_avif) {
> +        int ret;
> +        if ((ret = mov_write_moov_tag(pb, mov, s)) < 0)
> +            return ret;
> +    }
> +
> +    buf_size = avio_get_dyn_buf(mov->mdat_buf, &buf);
> +    avio_wb32(pb, buf_size + 8);
> +    ffio_wfourcc(pb, "mdat");
> +    mdat_pos = avio_tell(pb);
> +
> +    avio_write(pb, buf, buf_size);
> +    ffio_free_dyn_buf(&mov->mdat_buf);
> +
> +    // write extent offset.
> +    pos_backup = avio_tell(pb);
> +    avio_seek(pb, mov->avif_extent_pos, SEEK_SET);
> +    avio_wb32(pb, mdat_pos); /* rewrite offset */
> +    avio_seek(pb, pos_backup, SEEK_SET);
> +
> +    mov->moov_written = 1;
> +    mov->mdat_size = 0;
> +    for (i = 0; i < mov->nb_streams; i++) {
> +        mov->tracks[i].entry = 0;
> +        mov->tracks[i].end_reliable = 0;
> +    }
> +    return 0;
> +}
> +
>  #if CONFIG_TGP_MUXER || CONFIG_TG2_MUXER
>  static const AVCodecTag codec_3gp_tags[] = {
>      { AV_CODEC_ID_H263,     MKTAG('s','2','6','3') },
> @@ -7373,6 +7599,12 @@ static const AVCodecTag codec_f4v_tags[] = {
>      { AV_CODEC_ID_NONE, 0 },
>  };
>
> +static const AVCodecTag codec_avif_tags[] = {
> +    { AV_CODEC_ID_AV1,     MKTAG('a','v','0','1') },
> +    { AV_CODEC_ID_NONE, 0 },
> +};
> +static const AVCodecTag *const codec_avif_tags_list[] = { codec_avif_tags, NULL };
> +
>  #if CONFIG_MOV_MUXER
>  const AVOutputFormat ff_mov_muxer = {
>      .name              = "mov",
> @@ -7535,3 +7767,21 @@ const AVOutputFormat ff_f4v_muxer = {
>      .priv_class        = &mov_isobmff_muxer_class,
>  };
>  #endif
> +#if CONFIG_AVIF_MUXER
> +const AVOutputFormat ff_avif_muxer = {
> +    .name              = "avif",
> +    .long_name         = NULL_IF_CONFIG_SMALL("AVIF"),
> +    .mime_type         = "image/avif",
> +    .extensions        = "avif",
> +    .priv_data_size    = sizeof(MOVMuxContext),
> +    .video_codec       = AV_CODEC_ID_AV1,
> +    .init              = mov_init,
> +    .write_header      = mov_write_header,
> +    .write_packet      = mov_write_packet,
> +    .write_trailer     = avif_write_trailer,
> +    .deinit            = mov_free,
> +    .flags             = AVFMT_GLOBALHEADER | AVFMT_ALLOW_FLUSH,
> +    .codec_tag         = codec_avif_tags_list,
> +    .priv_class        = &mov_isobmff_muxer_class,
> +};
> +#endif
> diff --git a/libavformat/movenc.h b/libavformat/movenc.h
> index 2ac84ed070..55b8469f68 100644
> --- a/libavformat/movenc.h
> +++ b/libavformat/movenc.h
> @@ -43,6 +43,7 @@
>  #define MODE_IPOD 0x20
>  #define MODE_ISM  0x40
>  #define MODE_F4V  0x80
> +#define MODE_AVIF 0x100
>
>  typedef struct MOVIentry {
>      uint64_t     pos;
> @@ -242,6 +243,10 @@ typedef struct MOVMuxContext {
>      MOVPrftBox write_prft;
>      int empty_hdlr_name;
>      int movie_timescale;
> +
> +    int64_t avif_extent_pos;
> +    int avif_extent_length;
> +    int is_animated_avif;
>  } MOVMuxContext;
>
>  #define FF_MOV_FLAG_RTP_HINT              (1 <<  0)
> --
> 2.35.1.473.g83b2b277ed-goog
>

If there are no further comments, can this series of patches be merged
please? :)
Vignesh Venkat March 1, 2022, 4:49 p.m. UTC | #2
On Thu, Feb 24, 2022 at 9:34 AM Vignesh Venkatasubramanian
<vigneshv@google.com> wrote:
>
> On Tue, Feb 22, 2022 at 1:43 PM Vignesh Venkatasubramanian
> <vigneshv@google.com> wrote:
> >
> > Add an AVIF muxer by re-using the existing the mov/mp4 muxer.
> >
> > AVIF Specifiation: https://aomediacodec.github.io/av1-avif
> >
> > Sample usage for still image:
> > ffmpeg -i image.png -c:v libaom-av1 -avif-image 1 image.avif
> >
> > Sample usage for animated AVIF image:
> > ffmpeg -i video.mp4 animated.avif
> >
> > We can re-use any of the AV1 encoding options that will make
> > sense for image encoding (like bitrate, tiles, encoding speed,
> > etc).
> >
> > The files generated by this muxer has been verified to be valid
> > AVIF files by the following:
> > 1) Displays on Chrome (both still and animated images).
> > 2) Displays on Firefox (only still images, firefox does not support
> >    animated AVIF yet).
> > 3) Verfied to be valid by Compliance Warden:
> >    https://github.com/gpac/ComplianceWarden
> >
> > Fixes the encoder/muxer part of Trac Ticket #7621
> >
> > Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
> > ---
> >  configure                |   1 +
> >  libavformat/allformats.c |   1 +
> >  libavformat/movenc.c     | 300 +++++++++++++++++++++++++++++++++++----
> >  libavformat/movenc.h     |   5 +
> >  4 files changed, 282 insertions(+), 25 deletions(-)
> >
> > diff --git a/configure b/configure
> > index 1535dc3c5b..87b380fe3a 100755
> > --- a/configure
> > +++ b/configure
> > @@ -3393,6 +3393,7 @@ asf_stream_muxer_select="asf_muxer"
> >  av1_demuxer_select="av1_frame_merge_bsf av1_parser"
> >  avi_demuxer_select="riffdec exif"
> >  avi_muxer_select="riffenc"
> > +avif_muxer_select="mov_muxer"
> >  caf_demuxer_select="iso_media"
> >  caf_muxer_select="iso_media"
> >  dash_muxer_select="mp4_muxer"
> > diff --git a/libavformat/allformats.c b/libavformat/allformats.c
> > index d066a7745b..400c17afbd 100644
> > --- a/libavformat/allformats.c
> > +++ b/libavformat/allformats.c
> > @@ -81,6 +81,7 @@ extern const AVOutputFormat ff_au_muxer;
> >  extern const AVInputFormat  ff_av1_demuxer;
> >  extern const AVInputFormat  ff_avi_demuxer;
> >  extern const AVOutputFormat ff_avi_muxer;
> > +extern const AVOutputFormat ff_avif_muxer;
> >  extern const AVInputFormat  ff_avisynth_demuxer;
> >  extern const AVOutputFormat ff_avm2_muxer;
> >  extern const AVInputFormat  ff_avr_demuxer;
> > diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> > index 1a746a67fd..53258f0d11 100644
> > --- a/libavformat/movenc.c
> > +++ b/libavformat/movenc.c
> > @@ -1303,7 +1303,7 @@ static int mov_write_av1c_tag(AVIOContext *pb, MOVTrack *track)
> >
> >      avio_wb32(pb, 0);
> >      ffio_wfourcc(pb, "av1C");
> > -    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, 1);
> > +    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, track->mode != MODE_AVIF);
> >      return update_size(pb, pos);
> >  }
> >
> > @@ -2004,12 +2004,13 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
> >          }
> >      }
> >
> > -    /* We should only ever be called by MOV or MP4. */
> > -    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4);
> > +    /* We should only ever be called for MOV, MP4 and AVIF. */
> > +    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4 ||
> > +               track->mode == MODE_AVIF);
> >
> >      avio_wb32(pb, 0); /* size */
> >      ffio_wfourcc(pb, "colr");
> > -    if (track->mode == MODE_MP4)
> > +    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF)
> >          ffio_wfourcc(pb, "nclx");
> >      else
> >          ffio_wfourcc(pb, "nclc");
> > @@ -2019,7 +2020,7 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
> >      avio_wb16(pb, track->par->color_primaries);
> >      avio_wb16(pb, track->par->color_trc);
> >      avio_wb16(pb, track->par->color_space);
> > -    if (track->mode == MODE_MP4) {
> > +    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
> >          int full_range = track->par->color_range == AVCOL_RANGE_JPEG;
> >          avio_w8(pb, full_range << 7);
> >      }
> > @@ -2085,7 +2086,7 @@ static void find_compressor(char * compressor_name, int len, MOVTrack *track)
> >                    || (track->par->width == 1440 && track->par->height == 1080)
> >                    || (track->par->width == 1920 && track->par->height == 1080);
> >
> > -    if (track->mode == MODE_MOV &&
> > +    if ((track->mode == MODE_AVIF || track->mode == MODE_MOV) &&
> >          (encoder = av_dict_get(track->st->metadata, "encoder", NULL, 0))) {
> >          av_strlcpy(compressor_name, encoder->value, 32);
> >      } else if (track->par->codec_id == AV_CODEC_ID_MPEG2VIDEO && xdcam_res) {
> > @@ -2123,6 +2124,8 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
> >      avio_wb32(pb, 0); /* size */
> >      if (mov->encryption_scheme != MOV_ENC_NONE) {
> >          ffio_wfourcc(pb, "encv");
> > +    } else if (track->mode == MODE_AVIF) {
> > +        ffio_wfourcc(pb, "av01");
> >      } else {
> >          avio_wl32(pb, track->tag); // store it byteswapped
> >      }
> > @@ -2239,7 +2242,7 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
> >          else
> >              av_log(mov->fc, AV_LOG_WARNING, "Not writing 'gama' atom. Format is not MOV.\n");
> >      }
> > -    if (track->mode == MODE_MOV || track->mode == MODE_MP4) {
> > +    if (track->mode == MODE_MOV || track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
> >          int has_color_info = track->par->color_primaries != AVCOL_PRI_UNSPECIFIED &&
> >                               track->par->color_trc != AVCOL_TRC_UNSPECIFIED &&
> >                               track->par->color_space != AVCOL_SPC_UNSPECIFIED;
> > @@ -2792,7 +2795,10 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
> >
> >      if (track) {
> >          hdlr = (track->mode == MODE_MOV) ? "mhlr" : "\0\0\0\0";
> > -        if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
> > +        if (track->mode == MODE_AVIF) {
> > +            hdlr_type = "pict";
> > +            descr = "ffmpeg";
> > +        } else if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
> >              hdlr_type = "vide";
> >              descr     = "VideoHandler";
> >          } else if (track->par->codec_type == AVMEDIA_TYPE_AUDIO) {
> > @@ -2859,6 +2865,131 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
> >      return update_size(pb, pos);
> >  }
> >
> > +static int mov_write_pitm_tag(AVIOContext *pb, int item_id)
> > +{
> > +    int64_t pos = avio_tell(pb);
> > +    avio_wb32(pb, 0); /* size */
> > +    ffio_wfourcc(pb, "pitm");
> > +    avio_wb32(pb, 0); /* Version & flags */
> > +    avio_wb16(pb, item_id); /* item_id */
> > +    return update_size(pb, pos);
> > +}
> > +
> > +static int mov_write_iloc_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > +{
> > +    int64_t pos = avio_tell(pb);
> > +    avio_wb32(pb, 0); /* size */
> > +    ffio_wfourcc(pb, "iloc");
> > +    avio_wb32(pb, 0); /* Version & flags */
> > +    avio_w8(pb, (4 << 4) + 4); /* offset_size(4) and length_size(4) */
> > +    avio_w8(pb, 0); /* base_offset_size(4) and reserved(4) */
> > +    avio_wb16(pb, 1); /* item_count */
> > +
> > +    avio_wb16(pb, 1); /* item_id */
> > +    avio_wb16(pb, 0); /* data_reference_index */
> > +    avio_wb16(pb, 1); /* extent_count */
> > +    mov->avif_extent_pos = avio_tell(pb);
> > +    avio_wb32(pb, 0); /* extent_offset (written later) */
> > +    // For animated AVIF, we simply write the first packet's size.
> > +    avio_wb32(pb, mov->avif_extent_length); /* extent_length */
> > +
> > +    return update_size(pb, pos);
> > +}
> > +
> > +static int mov_write_iinf_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > +{
> > +    int64_t infe_pos;
> > +    int64_t iinf_pos = avio_tell(pb);
> > +    avio_wb32(pb, 0); /* size */
> > +    ffio_wfourcc(pb, "iinf");
> > +    avio_wb32(pb, 0); /* Version & flags */
> > +    avio_wb16(pb, 1); /* entry_count */
> > +
> > +    infe_pos = avio_tell(pb);
> > +    avio_wb32(pb, 0); /* size */
> > +    ffio_wfourcc(pb, "infe");
> > +    avio_w8(pb, 0x2); /* Version */
> > +    avio_wb24(pb, 0); /* flags */
> > +    avio_wb16(pb, 1); /* item_id */
> > +    avio_wb16(pb, 0); /* item_protection_index */
> > +    avio_write(pb, "av01", 4); /* item_type */
> > +    avio_write(pb, "Color\0", 6); /* item_name */
> > +    update_size(pb, infe_pos);
> > +
> > +    return update_size(pb, iinf_pos);
> > +}
> > +
> > +static int mov_write_ispe_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > +{
> > +    int64_t pos = avio_tell(pb);
> > +    avio_wb32(pb, 0); /* size */
> > +    ffio_wfourcc(pb, "ispe");
> > +    avio_wb32(pb, 0); /* Version & flags */
> > +    avio_wb32(pb, s->streams[0]->codecpar->width); /* image_width */
> > +    avio_wb32(pb, s->streams[0]->codecpar->height); /* image_height */
> > +    return update_size(pb, pos);
> > +}
> > +
> > +
> > +static int mov_write_pixi_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > +{
> > +    int64_t pos = avio_tell(pb);
> > +    int num_channels = av_pix_fmt_count_planes(s->streams[0]->codecpar->format);
> > +    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
> > +    int i;
> > +    avio_wb32(pb, 0); /* size */
> > +    ffio_wfourcc(pb, "pixi");
> > +    avio_wb32(pb, 0); /* Version & flags */
> > +    avio_w8(pb, num_channels); /* num_channels */
> > +    for (i = 0; i < num_channels; ++i) {
> > +      avio_w8(pb, pixdesc->comp[i].depth); /* bits_per_channel */
> > +    }
> > +    return update_size(pb, pos);
> > +}
> > +
> > +static int mov_write_ipco_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > +{
> > +    int64_t pos = avio_tell(pb);
> > +    avio_wb32(pb, 0); /* size */
> > +    ffio_wfourcc(pb, "ipco");
> > +    mov_write_ispe_tag(pb, mov, s);
> > +    mov_write_pixi_tag(pb, mov, s);
> > +    mov_write_av1c_tag(pb, &mov->tracks[0]);
> > +    mov_write_colr_tag(pb, &mov->tracks[0], 0);
> > +    return update_size(pb, pos);
> > +}
> > +
> > +static int mov_write_ipma_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > +{
> > +    int64_t pos = avio_tell(pb);
> > +    avio_wb32(pb, 0); /* size */
> > +    ffio_wfourcc(pb, "ipma");
> > +    avio_wb32(pb, 0); /* Version & flags */
> > +    avio_wb32(pb, 1); /* entry_count */
> > +    avio_wb16(pb, 1); /* item_ID */
> > +    avio_w8(pb, 4); /* association_count */
> > +
> > +    // ispe association.
> > +    avio_w8(pb, 1); /* essential and property_index */
> > +    // pixi association.
> > +    avio_w8(pb, 2); /* essential and property_index */
> > +    // av1C association.
> > +    avio_w8(pb, 0x80 | 3); /* essential and property_index */
> > +    // colr association.
> > +    avio_w8(pb, 4); /* essential and property_index */
> > +    return update_size(pb, pos);
> > +}
> > +
> > +static int mov_write_iprp_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > +{
> > +    int64_t pos = avio_tell(pb);
> > +    avio_wb32(pb, 0); /* size */
> > +    ffio_wfourcc(pb, "iprp");
> > +    mov_write_ipco_tag(pb, mov, s);
> > +    mov_write_ipma_tag(pb, mov, s);
> > +    return update_size(pb, pos);
> > +}
> > +
> >  static int mov_write_hmhd_tag(AVIOContext *pb)
> >  {
> >      /* This atom must be present, but leaving the values at zero
> > @@ -3056,7 +3187,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
> >              display_matrix = NULL;
> >      }
> >
> > -    if (track->flags & MOV_TRACK_ENABLED)
> > +    if (track->flags & MOV_TRACK_ENABLED || track->mode == MODE_AVIF)
> >          flags |= MOV_TKHD_FLAG_ENABLED;
> >
> >      if (track->mode == MODE_ISM)
> > @@ -3104,7 +3235,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
> >      if (st && (track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
> >                 track->par->codec_type == AVMEDIA_TYPE_SUBTITLE)) {
> >          int64_t track_width_1616;
> > -        if (track->mode == MODE_MOV) {
> > +        if (track->mode == MODE_MOV || track->mode == MODE_AVIF) {
> >              track_width_1616 = track->par->width * 0x10000ULL;
> >          } else {
> >              track_width_1616 = av_rescale(st->sample_aspect_ratio.num,
> > @@ -3439,7 +3570,8 @@ static int mov_write_trak_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
> >              mov_write_tapt_tag(pb, track);
> >          }
> >      }
> > -    mov_write_track_udta_tag(pb, mov, st);
> > +    if (track->mode != MODE_AVIF)
> > +        mov_write_track_udta_tag(pb, mov, st);
> >      track->entry = entry_backup;
> >      track->chunkCount = chunk_backup;
> >      return update_size(pb, pos);
> > @@ -3914,8 +4046,15 @@ static int mov_write_meta_tag(AVIOContext *pb, MOVMuxContext *mov,
> >          mov_write_mdta_hdlr_tag(pb, mov, s);
> >          mov_write_mdta_keys_tag(pb, mov, s);
> >          mov_write_mdta_ilst_tag(pb, mov, s);
> > -    }
> > -    else {
> > +    } else if (mov->mode == MODE_AVIF) {
> > +        mov_write_hdlr_tag(s, pb, &mov->tracks[0]);
> > +        // We always write the primary item id as 1 since only one track is
> > +        // supported for AVIF.
> > +        mov_write_pitm_tag(pb, 1);
> > +        mov_write_iloc_tag(pb, mov, s);
> > +        mov_write_iinf_tag(pb, mov, s);
> > +        mov_write_iprp_tag(pb, mov, s);
> > +    } else {
> >          /* iTunes metadata tag */
> >          mov_write_itunes_hdlr_tag(pb, mov, s);
> >          mov_write_ilst_tag(pb, mov, s);
> > @@ -4245,10 +4384,11 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
> >      }
> >
> >      mov_write_mvhd_tag(pb, mov);
> > -    if (mov->mode != MODE_MOV && !mov->iods_skip)
> > +    if (mov->mode != MODE_MOV && mov->mode != MODE_AVIF && !mov->iods_skip)
> >          mov_write_iods_tag(pb, mov);
> >      for (i = 0; i < mov->nb_streams; i++) {
> > -        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT) {
> > +        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT ||
> > +            mov->mode == MODE_AVIF) {
> >              int ret = mov_write_trak_tag(s, pb, mov, &(mov->tracks[i]), i < s->nb_streams ? s->streams[i] : NULL);
> >              if (ret < 0)
> >                  return ret;
> > @@ -4259,7 +4399,7 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
> >
> >      if (mov->mode == MODE_PSP)
> >          mov_write_uuidusmt_tag(pb, s);
> > -    else
> > +    else if (mov->mode != MODE_AVIF)
> >          mov_write_udta_tag(pb, mov, s);
> >
> >      return update_size(pb, pos);
> > @@ -5002,6 +5142,9 @@ static void mov_write_ftyp_tag_internal(AVIOContext *pb, AVFormatContext *s,
> >      else if (mov->mode == MODE_3GP) {
> >          ffio_wfourcc(pb, has_h264 ? "3gp6"  : "3gp4");
> >          minor =     has_h264 ?   0x100 :   0x200;
> > +    } else if (mov->mode == MODE_AVIF) {
> > +        ffio_wfourcc(pb, mov->is_animated_avif ? "avis" : "avif");
> > +        minor = 0;
> >      } else if (mov->mode & MODE_3G2) {
> >          ffio_wfourcc(pb, has_h264 ? "3g2b"  : "3g2a");
> >          minor =     has_h264 ? 0x20000 : 0x10000;
> > @@ -5065,6 +5208,30 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
> >      // compatible brand a second time.
> >      if (mov->mode == MODE_ISM) {
> >          ffio_wfourcc(pb, "piff");
> > +    } else if (mov->mode == MODE_AVIF) {
> > +        const AVPixFmtDescriptor *pix_fmt_desc =
> > +            av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
> > +        const int depth = pix_fmt_desc->comp[0].depth;
> > +        if (mov->is_animated_avif) {
> > +            // For animated AVIF, major brand is "avis". Add "avif" as a
> > +            // compatible brand.
> > +            ffio_wfourcc(pb, "avif");
> > +            ffio_wfourcc(pb, "msf1");
> > +        }
> > +        ffio_wfourcc(pb, "mif1");
> > +        ffio_wfourcc(pb, "miaf");
> > +        if (depth == 8 || depth == 10) {
> > +            // MA1B and MA1A brands are based on AV1 profile. Short hand for
> > +            // computing that is based on chroma subsampling type. 420 chroma
> > +            // subsampling is MA1B.  444 chroma subsampling is MA1A.
> > +            if (pix_fmt_desc->log2_chroma_w == 0 && pix_fmt_desc->log2_chroma_h == 0) {
> > +                // 444 chroma subsampling.
> > +                ffio_wfourcc(pb, "MA1A");
> > +            } else {
> > +                // 420 chroma subsampling.
> > +                ffio_wfourcc(pb, "MA1B");
> > +            }
> > +        }
> >      } else if (mov->mode != MODE_MOV) {
> >          // We add tfdt atoms when fragmenting, signal this with the iso6 compatible
> >          // brand, if not already the major brand. This is compatible with users that
> > @@ -5669,7 +5836,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
> >      if (ret < 0)
> >          return ret;
> >
> > -    if (mov->flags & FF_MOV_FLAG_FRAGMENT) {
> > +    if (mov->flags & FF_MOV_FLAG_FRAGMENT || mov->mode == MODE_AVIF) {
> >          int ret;
> >          if (mov->moov_written || mov->flags & FF_MOV_FLAG_EMPTY_MOOV) {
> >              if (mov->frag_interleave && mov->fragments > 0) {
> > @@ -5802,7 +5969,9 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
> >              }
> >          }
> >      } else if (par->codec_id == AV_CODEC_ID_AV1) {
> > -        if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
> > +        if (trk->mode == MODE_AVIF) {
> > +            avio_write(pb, pkt->data, pkt->size);
> > +        } else if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
> >              ret = ff_av1_filter_obus_buf(pkt->data, &reformatted_data,
> >                                           &size, &offset);
> >              if (ret < 0)
> > @@ -6230,6 +6399,10 @@ fail:
> >              }
> >          }
> >
> > +        if (trk->mode == MODE_AVIF && !mov->avif_extent_length) {
> > +            mov->avif_extent_length = pkt->size;
> > +        }
> > +
> >          return mov_write_single_packet(s, pkt);
> >      }
> >  }
> > @@ -6569,11 +6742,15 @@ static int mov_init(AVFormatContext *s)
> >      else if (IS_MODE(ipod, IPOD)) mov->mode = MODE_IPOD;
> >      else if (IS_MODE(ismv, ISMV)) mov->mode = MODE_ISM;
> >      else if (IS_MODE(f4v,   F4V)) mov->mode = MODE_F4V;
> > +    else if (IS_MODE(avif, AVIF)) mov->mode = MODE_AVIF;
> >  #undef IS_MODE
> >
> >      if (mov->flags & FF_MOV_FLAG_DELAY_MOOV)
> >          mov->flags |= FF_MOV_FLAG_EMPTY_MOOV;
> >
> > +    if (mov->mode == MODE_AVIF)
> > +        mov->flags |= FF_MOV_FLAG_DELAY_MOOV;
> > +
> >      /* Set the FRAGMENT flag if any of the fragmentation methods are
> >       * enabled. */
> >      if (mov->max_fragment_duration || mov->max_fragment_size ||
> > @@ -6797,12 +6974,13 @@ static int mov_init(AVFormatContext *s)
> >                          pix_fmt == AV_PIX_FMT_MONOWHITE ||
> >                          pix_fmt == AV_PIX_FMT_MONOBLACK;
> >              }
> > -            if (track->par->codec_id == AV_CODEC_ID_VP9 ||
> > -                track->par->codec_id == AV_CODEC_ID_AV1) {
> > -                if (track->mode != MODE_MP4) {
> > -                    av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
> > -                    return AVERROR(EINVAL);
> > -                }
> > +            if (track->par->codec_id == AV_CODEC_ID_VP9 && track->mode != MODE_MP4) {
> > +                av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
> > +               return AVERROR(EINVAL);
> > +            } else if (track->par->codec_id == AV_CODEC_ID_AV1 &&
> > +                       track->mode != MODE_MP4 && track->mode != MODE_AVIF) {
> > +                av_log(s, AV_LOG_ERROR, "%s only supported in MP4 and AVIF.\n", avcodec_get_name(track->par->codec_id));
> > +                return AVERROR(EINVAL);
> >              } else if (track->par->codec_id == AV_CODEC_ID_VP8) {
> >                  /* altref frames handling is not defined in the spec as of version v1.0,
> >                   * so just forbid muxing VP8 streams altogether until a new version does */
> > @@ -7003,7 +7181,7 @@ static int mov_write_header(AVFormatContext *s)
> >                              FF_MOV_FLAG_FRAG_EVERY_FRAME)) &&
> >              !mov->max_fragment_duration && !mov->max_fragment_size)
> >              mov->flags |= FF_MOV_FLAG_FRAG_KEYFRAME;
> > -    } else {
> > +    } else if (mov->mode != MODE_AVIF) {
> >          if (mov->flags & FF_MOV_FLAG_FASTSTART)
> >              mov->reserved_header_pos = avio_tell(pb);
> >          mov_write_mdat_tag(pb, mov);
> > @@ -7291,6 +7469,54 @@ static int mov_check_bitstream(AVFormatContext *s, AVStream *st,
> >      return ret;
> >  }
> >
> > +static int avif_write_trailer(AVFormatContext *s)
> > +{
> > +    AVIOContext *pb = s->pb;
> > +    MOVMuxContext *mov = s->priv_data;
> > +    int64_t pos_backup, mdat_pos;
> > +    uint8_t *buf;
> > +    int buf_size, moov_size;
> > +    int i;
> > +
> > +    if (mov->moov_written) return 0;
> > +
> > +    mov->is_animated_avif = s->streams[0]->nb_frames > 1;
> > +    mov_write_identification(pb, s);
> > +    mov_write_meta_tag(pb, mov, s);
> > +
> > +    moov_size = get_moov_size(s);
> > +    for (i = 0; i < mov->nb_streams; i++)
> > +        mov->tracks[i].data_offset = avio_tell(pb) + moov_size + 8;
> > +
> > +    if (mov->is_animated_avif) {
> > +        int ret;
> > +        if ((ret = mov_write_moov_tag(pb, mov, s)) < 0)
> > +            return ret;
> > +    }
> > +
> > +    buf_size = avio_get_dyn_buf(mov->mdat_buf, &buf);
> > +    avio_wb32(pb, buf_size + 8);
> > +    ffio_wfourcc(pb, "mdat");
> > +    mdat_pos = avio_tell(pb);
> > +
> > +    avio_write(pb, buf, buf_size);
> > +    ffio_free_dyn_buf(&mov->mdat_buf);
> > +
> > +    // write extent offset.
> > +    pos_backup = avio_tell(pb);
> > +    avio_seek(pb, mov->avif_extent_pos, SEEK_SET);
> > +    avio_wb32(pb, mdat_pos); /* rewrite offset */
> > +    avio_seek(pb, pos_backup, SEEK_SET);
> > +
> > +    mov->moov_written = 1;
> > +    mov->mdat_size = 0;
> > +    for (i = 0; i < mov->nb_streams; i++) {
> > +        mov->tracks[i].entry = 0;
> > +        mov->tracks[i].end_reliable = 0;
> > +    }
> > +    return 0;
> > +}
> > +
> >  #if CONFIG_TGP_MUXER || CONFIG_TG2_MUXER
> >  static const AVCodecTag codec_3gp_tags[] = {
> >      { AV_CODEC_ID_H263,     MKTAG('s','2','6','3') },
> > @@ -7373,6 +7599,12 @@ static const AVCodecTag codec_f4v_tags[] = {
> >      { AV_CODEC_ID_NONE, 0 },
> >  };
> >
> > +static const AVCodecTag codec_avif_tags[] = {
> > +    { AV_CODEC_ID_AV1,     MKTAG('a','v','0','1') },
> > +    { AV_CODEC_ID_NONE, 0 },
> > +};
> > +static const AVCodecTag *const codec_avif_tags_list[] = { codec_avif_tags, NULL };
> > +
> >  #if CONFIG_MOV_MUXER
> >  const AVOutputFormat ff_mov_muxer = {
> >      .name              = "mov",
> > @@ -7535,3 +7767,21 @@ const AVOutputFormat ff_f4v_muxer = {
> >      .priv_class        = &mov_isobmff_muxer_class,
> >  };
> >  #endif
> > +#if CONFIG_AVIF_MUXER
> > +const AVOutputFormat ff_avif_muxer = {
> > +    .name              = "avif",
> > +    .long_name         = NULL_IF_CONFIG_SMALL("AVIF"),
> > +    .mime_type         = "image/avif",
> > +    .extensions        = "avif",
> > +    .priv_data_size    = sizeof(MOVMuxContext),
> > +    .video_codec       = AV_CODEC_ID_AV1,
> > +    .init              = mov_init,
> > +    .write_header      = mov_write_header,
> > +    .write_packet      = mov_write_packet,
> > +    .write_trailer     = avif_write_trailer,
> > +    .deinit            = mov_free,
> > +    .flags             = AVFMT_GLOBALHEADER | AVFMT_ALLOW_FLUSH,
> > +    .codec_tag         = codec_avif_tags_list,
> > +    .priv_class        = &mov_isobmff_muxer_class,
> > +};
> > +#endif
> > diff --git a/libavformat/movenc.h b/libavformat/movenc.h
> > index 2ac84ed070..55b8469f68 100644
> > --- a/libavformat/movenc.h
> > +++ b/libavformat/movenc.h
> > @@ -43,6 +43,7 @@
> >  #define MODE_IPOD 0x20
> >  #define MODE_ISM  0x40
> >  #define MODE_F4V  0x80
> > +#define MODE_AVIF 0x100
> >
> >  typedef struct MOVIentry {
> >      uint64_t     pos;
> > @@ -242,6 +243,10 @@ typedef struct MOVMuxContext {
> >      MOVPrftBox write_prft;
> >      int empty_hdlr_name;
> >      int movie_timescale;
> > +
> > +    int64_t avif_extent_pos;
> > +    int avif_extent_length;
> > +    int is_animated_avif;
> >  } MOVMuxContext;
> >
> >  #define FF_MOV_FLAG_RTP_HINT              (1 <<  0)
> > --
> > 2.35.1.473.g83b2b277ed-goog
> >
>
> If there are no further comments, can this series of patches be merged
> please? :)
>

Ping on this please!

> --
> Vignesh
James Almer March 3, 2022, 3:36 p.m. UTC | #3
On 2/22/2022 6:43 PM, Vignesh Venkatasubramanian wrote:
> Add an AVIF muxer by re-using the existing the mov/mp4 muxer.
> 
> AVIF Specifiation: https://aomediacodec.github.io/av1-avif
> 
> Sample usage for still image:
> ffmpeg -i image.png -c:v libaom-av1 -avif-image 1 image.avif
> 
> Sample usage for animated AVIF image:
> ffmpeg -i video.mp4 animated.avif
> 
> We can re-use any of the AV1 encoding options that will make
> sense for image encoding (like bitrate, tiles, encoding speed,
> etc).
> 
> The files generated by this muxer has been verified to be valid
> AVIF files by the following:
> 1) Displays on Chrome (both still and animated images).
> 2) Displays on Firefox (only still images, firefox does not support
>     animated AVIF yet).
> 3) Verfied to be valid by Compliance Warden:
>     https://github.com/gpac/ComplianceWarden
> 
> Fixes the encoder/muxer part of Trac Ticket #7621
> 
> Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
> ---
>   configure                |   1 +
>   libavformat/allformats.c |   1 +
>   libavformat/movenc.c     | 300 +++++++++++++++++++++++++++++++++++----
>   libavformat/movenc.h     |   5 +
>   4 files changed, 282 insertions(+), 25 deletions(-)

With a single frame i get no errors in that compliance tool, but when i 
encode an animated AVIF i get the following:

[heif][Rule #12] Error: CodingConstraintsBox ('ccst') shall be present once
[heif][Rule #28] Error: Wrong arity for boxes { ccst } in parents { avc1 
avc2 avc3 avc4 hev1 hev2 hvc1 hvc2 av01 }: expected in range [1-1], found 0
[heif][Rule #31] Error: 'msf1' brand: this file shall conform to HEIF 
(section 7.2)
[heif][Rule #31] Error: 'msf1' brand: 'iso8' shall be present among the 
compatible brands array
[heif][Rule #32] Error: 'mif1' brand: this file shall conform to HEIF 
section 6, check the other errors for details
[heif][Rule #33] Error: 'msf1' brand: this file shall conform to HEIF 
section 7, check the other errors for details

All but one of these should be solved by writing a ccst box after the 
av1C box in the sample entry. The missing one should be solved by 
writing the iso8 compatible brand.

The ccst box looks like it would need some bitstream information, so 
either you parse the packets to get it, or just hardcode sane defaults, 
considering it's used as a hint and it's not required for demuxing.
Vignesh Venkat March 3, 2022, 7:20 p.m. UTC | #4
On Thu, Mar 3, 2022 at 7:36 AM James Almer <jamrial@gmail.com> wrote:
>
> On 2/22/2022 6:43 PM, Vignesh Venkatasubramanian wrote:
> > Add an AVIF muxer by re-using the existing the mov/mp4 muxer.
> >
> > AVIF Specifiation: https://aomediacodec.github.io/av1-avif
> >
> > Sample usage for still image:
> > ffmpeg -i image.png -c:v libaom-av1 -avif-image 1 image.avif
> >
> > Sample usage for animated AVIF image:
> > ffmpeg -i video.mp4 animated.avif
> >
> > We can re-use any of the AV1 encoding options that will make
> > sense for image encoding (like bitrate, tiles, encoding speed,
> > etc).
> >
> > The files generated by this muxer has been verified to be valid
> > AVIF files by the following:
> > 1) Displays on Chrome (both still and animated images).
> > 2) Displays on Firefox (only still images, firefox does not support
> >     animated AVIF yet).
> > 3) Verfied to be valid by Compliance Warden:
> >     https://github.com/gpac/ComplianceWarden
> >
> > Fixes the encoder/muxer part of Trac Ticket #7621
> >
> > Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
> > ---
> >   configure                |   1 +
> >   libavformat/allformats.c |   1 +
> >   libavformat/movenc.c     | 300 +++++++++++++++++++++++++++++++++++----
> >   libavformat/movenc.h     |   5 +
> >   4 files changed, 282 insertions(+), 25 deletions(-)
>
> With a single frame i get no errors in that compliance tool, but when i
> encode an animated AVIF i get the following:
>
> [heif][Rule #12] Error: CodingConstraintsBox ('ccst') shall be present once
> [heif][Rule #28] Error: Wrong arity for boxes { ccst } in parents { avc1
> avc2 avc3 avc4 hev1 hev2 hvc1 hvc2 av01 }: expected in range [1-1], found 0
> [heif][Rule #31] Error: 'msf1' brand: this file shall conform to HEIF
> (section 7.2)
> [heif][Rule #31] Error: 'msf1' brand: 'iso8' shall be present among the
> compatible brands array
> [heif][Rule #32] Error: 'mif1' brand: this file shall conform to HEIF
> section 6, check the other errors for details
> [heif][Rule #33] Error: 'msf1' brand: this file shall conform to HEIF
> section 7, check the other errors for details
>
> All but one of these should be solved by writing a ccst box after the
> av1C box in the sample entry. The missing one should be solved by
> writing the iso8 compatible brand.
>
> The ccst box looks like it would need some bitstream information, so
> either you parse the packets to get it, or just hardcode sane defaults,
> considering it's used as a hint and it's not required for demuxing.

I recently fixed these errors in libavif [1][2] (the reference AVIF
encoder). I was hoping to have a follow-up patch since i already
uploaded the existing patches. Since you have pointed this out now, i
have included the fix in this patch itself. The new patch will now
write the iso8 compatible brand and the ccst box with sane default
values. The file produced is now identical to the file produced by
libavif in terms of box structure.

Also, notice that the compliance tool still shows the following error
for animated avif:

[avif][Rule #3] Warning: [ItemId=1] still_picture flag set to 0
[avif][Rule #4] Warning: [ItemId=1] reduced_still_picture_header flag set to 0

I believe these are incorrect since it does not make sense to set
these flag to 0 for animated avif sequences. These warnings also show
up with files produced by libavif. So it is okay to ignore them. I
will file an issue with the compliance tool separately.

Please take another look, thanks!

[1] https://github.com/AOMediaCodec/libavif/pull/855
[2] https://github.com/AOMediaCodec/libavif/pull/856
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
James Almer March 3, 2022, 7:46 p.m. UTC | #5
On 3/3/2022 4:20 PM, Vignesh Venkatasubramanian wrote:
> On Thu, Mar 3, 2022 at 7:36 AM James Almer <jamrial@gmail.com> wrote:
>>
>> On 2/22/2022 6:43 PM, Vignesh Venkatasubramanian wrote:
>>> Add an AVIF muxer by re-using the existing the mov/mp4 muxer.
>>>
>>> AVIF Specifiation: https://aomediacodec.github.io/av1-avif
>>>
>>> Sample usage for still image:
>>> ffmpeg -i image.png -c:v libaom-av1 -avif-image 1 image.avif
>>>
>>> Sample usage for animated AVIF image:
>>> ffmpeg -i video.mp4 animated.avif
>>>
>>> We can re-use any of the AV1 encoding options that will make
>>> sense for image encoding (like bitrate, tiles, encoding speed,
>>> etc).
>>>
>>> The files generated by this muxer has been verified to be valid
>>> AVIF files by the following:
>>> 1) Displays on Chrome (both still and animated images).
>>> 2) Displays on Firefox (only still images, firefox does not support
>>>      animated AVIF yet).
>>> 3) Verfied to be valid by Compliance Warden:
>>>      https://github.com/gpac/ComplianceWarden
>>>
>>> Fixes the encoder/muxer part of Trac Ticket #7621
>>>
>>> Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
>>> ---
>>>    configure                |   1 +
>>>    libavformat/allformats.c |   1 +
>>>    libavformat/movenc.c     | 300 +++++++++++++++++++++++++++++++++++----
>>>    libavformat/movenc.h     |   5 +
>>>    4 files changed, 282 insertions(+), 25 deletions(-)
>>
>> With a single frame i get no errors in that compliance tool, but when i
>> encode an animated AVIF i get the following:
>>
>> [heif][Rule #12] Error: CodingConstraintsBox ('ccst') shall be present once
>> [heif][Rule #28] Error: Wrong arity for boxes { ccst } in parents { avc1
>> avc2 avc3 avc4 hev1 hev2 hvc1 hvc2 av01 }: expected in range [1-1], found 0
>> [heif][Rule #31] Error: 'msf1' brand: this file shall conform to HEIF
>> (section 7.2)
>> [heif][Rule #31] Error: 'msf1' brand: 'iso8' shall be present among the
>> compatible brands array
>> [heif][Rule #32] Error: 'mif1' brand: this file shall conform to HEIF
>> section 6, check the other errors for details
>> [heif][Rule #33] Error: 'msf1' brand: this file shall conform to HEIF
>> section 7, check the other errors for details
>>
>> All but one of these should be solved by writing a ccst box after the
>> av1C box in the sample entry. The missing one should be solved by
>> writing the iso8 compatible brand.
>>
>> The ccst box looks like it would need some bitstream information, so
>> either you parse the packets to get it, or just hardcode sane defaults,
>> considering it's used as a hint and it's not required for demuxing.
> 
> I recently fixed these errors in libavif [1][2] (the reference AVIF
> encoder). I was hoping to have a follow-up patch since i already
> uploaded the existing patches. Since you have pointed this out now, i
> have included the fix in this patch itself. The new patch will now
> write the iso8 compatible brand and the ccst box with sane default
> values. The file produced is now identical to the file produced by
> libavif in terms of box structure.
> 
> Also, notice that the compliance tool still shows the following error
> for animated avif:
> 
> [avif][Rule #3] Warning: [ItemId=1] still_picture flag set to 0
> [avif][Rule #4] Warning: [ItemId=1] reduced_still_picture_header flag set to 0
> 
> I believe these are incorrect since it does not make sense to set
> these flag to 0 for animated avif sequences. These warnings also show
> up with files produced by libavif. So it is okay to ignore them. I
> will file an issue with the compliance tool separately.

The compliance tool uses the 1.0.0 revision of the spec, and what you 
mentioned was removed in the current unfinished draft: 
https://github.com/AOMediaCodec/av1-avif/pull/112

I assume the tool will be updated once a new revision is released, so 
yes, we can ignore them.

> 
> Please take another look, thanks!

I noticed that using -still-picture 1 and passing more than one frame to 
the libaom encoder will succeed, despite you setting enccfg.g_limit to 
1, and encode every frame as key frames.
I'd expect the encoder would error out if you try to feed it more 
frames. Is it a libaom bug?

> 
> [1] https://github.com/AOMediaCodec/libavif/pull/855
> [2] https://github.com/AOMediaCodec/libavif/pull/856
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> 
> 
>
Vignesh Venkat March 3, 2022, 7:57 p.m. UTC | #6
On Thu, Mar 3, 2022 at 11:46 AM James Almer <jamrial@gmail.com> wrote:
>
>
>
> On 3/3/2022 4:20 PM, Vignesh Venkatasubramanian wrote:
> > On Thu, Mar 3, 2022 at 7:36 AM James Almer <jamrial@gmail.com> wrote:
> >>
> >> On 2/22/2022 6:43 PM, Vignesh Venkatasubramanian wrote:
> >>> Add an AVIF muxer by re-using the existing the mov/mp4 muxer.
> >>>
> >>> AVIF Specifiation: https://aomediacodec.github.io/av1-avif
> >>>
> >>> Sample usage for still image:
> >>> ffmpeg -i image.png -c:v libaom-av1 -avif-image 1 image.avif
> >>>
> >>> Sample usage for animated AVIF image:
> >>> ffmpeg -i video.mp4 animated.avif
> >>>
> >>> We can re-use any of the AV1 encoding options that will make
> >>> sense for image encoding (like bitrate, tiles, encoding speed,
> >>> etc).
> >>>
> >>> The files generated by this muxer has been verified to be valid
> >>> AVIF files by the following:
> >>> 1) Displays on Chrome (both still and animated images).
> >>> 2) Displays on Firefox (only still images, firefox does not support
> >>>      animated AVIF yet).
> >>> 3) Verfied to be valid by Compliance Warden:
> >>>      https://github.com/gpac/ComplianceWarden
> >>>
> >>> Fixes the encoder/muxer part of Trac Ticket #7621
> >>>
> >>> Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
> >>> ---
> >>>    configure                |   1 +
> >>>    libavformat/allformats.c |   1 +
> >>>    libavformat/movenc.c     | 300 +++++++++++++++++++++++++++++++++++----
> >>>    libavformat/movenc.h     |   5 +
> >>>    4 files changed, 282 insertions(+), 25 deletions(-)
> >>
> >> With a single frame i get no errors in that compliance tool, but when i
> >> encode an animated AVIF i get the following:
> >>
> >> [heif][Rule #12] Error: CodingConstraintsBox ('ccst') shall be present once
> >> [heif][Rule #28] Error: Wrong arity for boxes { ccst } in parents { avc1
> >> avc2 avc3 avc4 hev1 hev2 hvc1 hvc2 av01 }: expected in range [1-1], found 0
> >> [heif][Rule #31] Error: 'msf1' brand: this file shall conform to HEIF
> >> (section 7.2)
> >> [heif][Rule #31] Error: 'msf1' brand: 'iso8' shall be present among the
> >> compatible brands array
> >> [heif][Rule #32] Error: 'mif1' brand: this file shall conform to HEIF
> >> section 6, check the other errors for details
> >> [heif][Rule #33] Error: 'msf1' brand: this file shall conform to HEIF
> >> section 7, check the other errors for details
> >>
> >> All but one of these should be solved by writing a ccst box after the
> >> av1C box in the sample entry. The missing one should be solved by
> >> writing the iso8 compatible brand.
> >>
> >> The ccst box looks like it would need some bitstream information, so
> >> either you parse the packets to get it, or just hardcode sane defaults,
> >> considering it's used as a hint and it's not required for demuxing.
> >
> > I recently fixed these errors in libavif [1][2] (the reference AVIF
> > encoder). I was hoping to have a follow-up patch since i already
> > uploaded the existing patches. Since you have pointed this out now, i
> > have included the fix in this patch itself. The new patch will now
> > write the iso8 compatible brand and the ccst box with sane default
> > values. The file produced is now identical to the file produced by
> > libavif in terms of box structure.
> >
> > Also, notice that the compliance tool still shows the following error
> > for animated avif:
> >
> > [avif][Rule #3] Warning: [ItemId=1] still_picture flag set to 0
> > [avif][Rule #4] Warning: [ItemId=1] reduced_still_picture_header flag set to 0
> >
> > I believe these are incorrect since it does not make sense to set
> > these flag to 0 for animated avif sequences. These warnings also show
> > up with files produced by libavif. So it is okay to ignore them. I
> > will file an issue with the compliance tool separately.
>
> The compliance tool uses the 1.0.0 revision of the spec, and what you
> mentioned was removed in the current unfinished draft:
> https://github.com/AOMediaCodec/av1-avif/pull/112
>
> I assume the tool will be updated once a new revision is released, so
> yes, we can ignore them.
>
> >
> > Please take another look, thanks!
>
> I noticed that using -still-picture 1 and passing more than one frame to
> the libaom encoder will succeed, despite you setting enccfg.g_limit to
> 1, and encode every frame as key frames.
> I'd expect the encoder would error out if you try to feed it more
> frames. Is it a libaom bug?
>

Hmm yeah it seems like libaom only uses the value to set the
still-picture sequence header values in 1-pass mode. I think in a way
it may be useful for us to be able to use AVIF output with the image2
muxer. For example, something like:

ffmpeg -i video.mp4 -still-picture 1 -c:v libaom-av1 -an -f image2
image-%02d.avif

This command does not work as of now, but I have some follow-up
patches to make the image2 muxer work with AVIF images.

> >
> > [1] https://github.com/AOMediaCodec/libavif/pull/855
> > [2] https://github.com/AOMediaCodec/libavif/pull/856
> >> _______________________________________________
> >> ffmpeg-devel mailing list
> >> ffmpeg-devel@ffmpeg.org
> >> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >>
> >> To unsubscribe, visit link above, or email
> >> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> >
> >
> >
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff mbox series

Patch

diff --git a/configure b/configure
index 1535dc3c5b..87b380fe3a 100755
--- a/configure
+++ b/configure
@@ -3393,6 +3393,7 @@  asf_stream_muxer_select="asf_muxer"
 av1_demuxer_select="av1_frame_merge_bsf av1_parser"
 avi_demuxer_select="riffdec exif"
 avi_muxer_select="riffenc"
+avif_muxer_select="mov_muxer"
 caf_demuxer_select="iso_media"
 caf_muxer_select="iso_media"
 dash_muxer_select="mp4_muxer"
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index d066a7745b..400c17afbd 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -81,6 +81,7 @@  extern const AVOutputFormat ff_au_muxer;
 extern const AVInputFormat  ff_av1_demuxer;
 extern const AVInputFormat  ff_avi_demuxer;
 extern const AVOutputFormat ff_avi_muxer;
+extern const AVOutputFormat ff_avif_muxer;
 extern const AVInputFormat  ff_avisynth_demuxer;
 extern const AVOutputFormat ff_avm2_muxer;
 extern const AVInputFormat  ff_avr_demuxer;
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 1a746a67fd..53258f0d11 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -1303,7 +1303,7 @@  static int mov_write_av1c_tag(AVIOContext *pb, MOVTrack *track)
 
     avio_wb32(pb, 0);
     ffio_wfourcc(pb, "av1C");
-    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, 1);
+    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, track->mode != MODE_AVIF);
     return update_size(pb, pos);
 }
 
@@ -2004,12 +2004,13 @@  static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
         }
     }
 
-    /* We should only ever be called by MOV or MP4. */
-    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4);
+    /* We should only ever be called for MOV, MP4 and AVIF. */
+    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4 ||
+               track->mode == MODE_AVIF);
 
     avio_wb32(pb, 0); /* size */
     ffio_wfourcc(pb, "colr");
-    if (track->mode == MODE_MP4)
+    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF)
         ffio_wfourcc(pb, "nclx");
     else
         ffio_wfourcc(pb, "nclc");
@@ -2019,7 +2020,7 @@  static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
     avio_wb16(pb, track->par->color_primaries);
     avio_wb16(pb, track->par->color_trc);
     avio_wb16(pb, track->par->color_space);
-    if (track->mode == MODE_MP4) {
+    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
         int full_range = track->par->color_range == AVCOL_RANGE_JPEG;
         avio_w8(pb, full_range << 7);
     }
@@ -2085,7 +2086,7 @@  static void find_compressor(char * compressor_name, int len, MOVTrack *track)
                   || (track->par->width == 1440 && track->par->height == 1080)
                   || (track->par->width == 1920 && track->par->height == 1080);
 
-    if (track->mode == MODE_MOV &&
+    if ((track->mode == MODE_AVIF || track->mode == MODE_MOV) &&
         (encoder = av_dict_get(track->st->metadata, "encoder", NULL, 0))) {
         av_strlcpy(compressor_name, encoder->value, 32);
     } else if (track->par->codec_id == AV_CODEC_ID_MPEG2VIDEO && xdcam_res) {
@@ -2123,6 +2124,8 @@  static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
     avio_wb32(pb, 0); /* size */
     if (mov->encryption_scheme != MOV_ENC_NONE) {
         ffio_wfourcc(pb, "encv");
+    } else if (track->mode == MODE_AVIF) {
+        ffio_wfourcc(pb, "av01");
     } else {
         avio_wl32(pb, track->tag); // store it byteswapped
     }
@@ -2239,7 +2242,7 @@  static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
         else
             av_log(mov->fc, AV_LOG_WARNING, "Not writing 'gama' atom. Format is not MOV.\n");
     }
-    if (track->mode == MODE_MOV || track->mode == MODE_MP4) {
+    if (track->mode == MODE_MOV || track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
         int has_color_info = track->par->color_primaries != AVCOL_PRI_UNSPECIFIED &&
                              track->par->color_trc != AVCOL_TRC_UNSPECIFIED &&
                              track->par->color_space != AVCOL_SPC_UNSPECIFIED;
@@ -2792,7 +2795,10 @@  static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
 
     if (track) {
         hdlr = (track->mode == MODE_MOV) ? "mhlr" : "\0\0\0\0";
-        if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
+        if (track->mode == MODE_AVIF) {
+            hdlr_type = "pict";
+            descr = "ffmpeg";
+        } else if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
             hdlr_type = "vide";
             descr     = "VideoHandler";
         } else if (track->par->codec_type == AVMEDIA_TYPE_AUDIO) {
@@ -2859,6 +2865,131 @@  static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
     return update_size(pb, pos);
 }
 
+static int mov_write_pitm_tag(AVIOContext *pb, int item_id)
+{
+    int64_t pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "pitm");
+    avio_wb32(pb, 0); /* Version & flags */
+    avio_wb16(pb, item_id); /* item_id */
+    return update_size(pb, pos);
+}
+
+static int mov_write_iloc_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
+{
+    int64_t pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "iloc");
+    avio_wb32(pb, 0); /* Version & flags */
+    avio_w8(pb, (4 << 4) + 4); /* offset_size(4) and length_size(4) */
+    avio_w8(pb, 0); /* base_offset_size(4) and reserved(4) */
+    avio_wb16(pb, 1); /* item_count */
+
+    avio_wb16(pb, 1); /* item_id */
+    avio_wb16(pb, 0); /* data_reference_index */
+    avio_wb16(pb, 1); /* extent_count */
+    mov->avif_extent_pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* extent_offset (written later) */
+    // For animated AVIF, we simply write the first packet's size.
+    avio_wb32(pb, mov->avif_extent_length); /* extent_length */
+
+    return update_size(pb, pos);
+}
+
+static int mov_write_iinf_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
+{
+    int64_t infe_pos;
+    int64_t iinf_pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "iinf");
+    avio_wb32(pb, 0); /* Version & flags */
+    avio_wb16(pb, 1); /* entry_count */
+
+    infe_pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "infe");
+    avio_w8(pb, 0x2); /* Version */
+    avio_wb24(pb, 0); /* flags */
+    avio_wb16(pb, 1); /* item_id */
+    avio_wb16(pb, 0); /* item_protection_index */
+    avio_write(pb, "av01", 4); /* item_type */
+    avio_write(pb, "Color\0", 6); /* item_name */
+    update_size(pb, infe_pos);
+
+    return update_size(pb, iinf_pos);
+}
+
+static int mov_write_ispe_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
+{
+    int64_t pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "ispe");
+    avio_wb32(pb, 0); /* Version & flags */
+    avio_wb32(pb, s->streams[0]->codecpar->width); /* image_width */
+    avio_wb32(pb, s->streams[0]->codecpar->height); /* image_height */
+    return update_size(pb, pos);
+}
+
+
+static int mov_write_pixi_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
+{
+    int64_t pos = avio_tell(pb);
+    int num_channels = av_pix_fmt_count_planes(s->streams[0]->codecpar->format);
+    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
+    int i;
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "pixi");
+    avio_wb32(pb, 0); /* Version & flags */
+    avio_w8(pb, num_channels); /* num_channels */
+    for (i = 0; i < num_channels; ++i) {
+      avio_w8(pb, pixdesc->comp[i].depth); /* bits_per_channel */
+    }
+    return update_size(pb, pos);
+}
+
+static int mov_write_ipco_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
+{
+    int64_t pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "ipco");
+    mov_write_ispe_tag(pb, mov, s);
+    mov_write_pixi_tag(pb, mov, s);
+    mov_write_av1c_tag(pb, &mov->tracks[0]);
+    mov_write_colr_tag(pb, &mov->tracks[0], 0);
+    return update_size(pb, pos);
+}
+
+static int mov_write_ipma_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
+{
+    int64_t pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "ipma");
+    avio_wb32(pb, 0); /* Version & flags */
+    avio_wb32(pb, 1); /* entry_count */
+    avio_wb16(pb, 1); /* item_ID */
+    avio_w8(pb, 4); /* association_count */
+
+    // ispe association.
+    avio_w8(pb, 1); /* essential and property_index */
+    // pixi association.
+    avio_w8(pb, 2); /* essential and property_index */
+    // av1C association.
+    avio_w8(pb, 0x80 | 3); /* essential and property_index */
+    // colr association.
+    avio_w8(pb, 4); /* essential and property_index */
+    return update_size(pb, pos);
+}
+
+static int mov_write_iprp_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
+{
+    int64_t pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "iprp");
+    mov_write_ipco_tag(pb, mov, s);
+    mov_write_ipma_tag(pb, mov, s);
+    return update_size(pb, pos);
+}
+
 static int mov_write_hmhd_tag(AVIOContext *pb)
 {
     /* This atom must be present, but leaving the values at zero
@@ -3056,7 +3187,7 @@  static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
             display_matrix = NULL;
     }
 
-    if (track->flags & MOV_TRACK_ENABLED)
+    if (track->flags & MOV_TRACK_ENABLED || track->mode == MODE_AVIF)
         flags |= MOV_TKHD_FLAG_ENABLED;
 
     if (track->mode == MODE_ISM)
@@ -3104,7 +3235,7 @@  static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
     if (st && (track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
                track->par->codec_type == AVMEDIA_TYPE_SUBTITLE)) {
         int64_t track_width_1616;
-        if (track->mode == MODE_MOV) {
+        if (track->mode == MODE_MOV || track->mode == MODE_AVIF) {
             track_width_1616 = track->par->width * 0x10000ULL;
         } else {
             track_width_1616 = av_rescale(st->sample_aspect_ratio.num,
@@ -3439,7 +3570,8 @@  static int mov_write_trak_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
             mov_write_tapt_tag(pb, track);
         }
     }
-    mov_write_track_udta_tag(pb, mov, st);
+    if (track->mode != MODE_AVIF)
+        mov_write_track_udta_tag(pb, mov, st);
     track->entry = entry_backup;
     track->chunkCount = chunk_backup;
     return update_size(pb, pos);
@@ -3914,8 +4046,15 @@  static int mov_write_meta_tag(AVIOContext *pb, MOVMuxContext *mov,
         mov_write_mdta_hdlr_tag(pb, mov, s);
         mov_write_mdta_keys_tag(pb, mov, s);
         mov_write_mdta_ilst_tag(pb, mov, s);
-    }
-    else {
+    } else if (mov->mode == MODE_AVIF) {
+        mov_write_hdlr_tag(s, pb, &mov->tracks[0]);
+        // We always write the primary item id as 1 since only one track is
+        // supported for AVIF.
+        mov_write_pitm_tag(pb, 1);
+        mov_write_iloc_tag(pb, mov, s);
+        mov_write_iinf_tag(pb, mov, s);
+        mov_write_iprp_tag(pb, mov, s);
+    } else {
         /* iTunes metadata tag */
         mov_write_itunes_hdlr_tag(pb, mov, s);
         mov_write_ilst_tag(pb, mov, s);
@@ -4245,10 +4384,11 @@  static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
     }
 
     mov_write_mvhd_tag(pb, mov);
-    if (mov->mode != MODE_MOV && !mov->iods_skip)
+    if (mov->mode != MODE_MOV && mov->mode != MODE_AVIF && !mov->iods_skip)
         mov_write_iods_tag(pb, mov);
     for (i = 0; i < mov->nb_streams; i++) {
-        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT) {
+        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT ||
+            mov->mode == MODE_AVIF) {
             int ret = mov_write_trak_tag(s, pb, mov, &(mov->tracks[i]), i < s->nb_streams ? s->streams[i] : NULL);
             if (ret < 0)
                 return ret;
@@ -4259,7 +4399,7 @@  static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
 
     if (mov->mode == MODE_PSP)
         mov_write_uuidusmt_tag(pb, s);
-    else
+    else if (mov->mode != MODE_AVIF)
         mov_write_udta_tag(pb, mov, s);
 
     return update_size(pb, pos);
@@ -5002,6 +5142,9 @@  static void mov_write_ftyp_tag_internal(AVIOContext *pb, AVFormatContext *s,
     else if (mov->mode == MODE_3GP) {
         ffio_wfourcc(pb, has_h264 ? "3gp6"  : "3gp4");
         minor =     has_h264 ?   0x100 :   0x200;
+    } else if (mov->mode == MODE_AVIF) {
+        ffio_wfourcc(pb, mov->is_animated_avif ? "avis" : "avif");
+        minor = 0;
     } else if (mov->mode & MODE_3G2) {
         ffio_wfourcc(pb, has_h264 ? "3g2b"  : "3g2a");
         minor =     has_h264 ? 0x20000 : 0x10000;
@@ -5065,6 +5208,30 @@  static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
     // compatible brand a second time.
     if (mov->mode == MODE_ISM) {
         ffio_wfourcc(pb, "piff");
+    } else if (mov->mode == MODE_AVIF) {
+        const AVPixFmtDescriptor *pix_fmt_desc =
+            av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
+        const int depth = pix_fmt_desc->comp[0].depth;
+        if (mov->is_animated_avif) {
+            // For animated AVIF, major brand is "avis". Add "avif" as a
+            // compatible brand.
+            ffio_wfourcc(pb, "avif");
+            ffio_wfourcc(pb, "msf1");
+        }
+        ffio_wfourcc(pb, "mif1");
+        ffio_wfourcc(pb, "miaf");
+        if (depth == 8 || depth == 10) {
+            // MA1B and MA1A brands are based on AV1 profile. Short hand for
+            // computing that is based on chroma subsampling type. 420 chroma
+            // subsampling is MA1B.  444 chroma subsampling is MA1A.
+            if (pix_fmt_desc->log2_chroma_w == 0 && pix_fmt_desc->log2_chroma_h == 0) {
+                // 444 chroma subsampling.
+                ffio_wfourcc(pb, "MA1A");
+            } else {
+                // 420 chroma subsampling.
+                ffio_wfourcc(pb, "MA1B");
+            }
+        }
     } else if (mov->mode != MODE_MOV) {
         // We add tfdt atoms when fragmenting, signal this with the iso6 compatible
         // brand, if not already the major brand. This is compatible with users that
@@ -5669,7 +5836,7 @@  int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
     if (ret < 0)
         return ret;
 
-    if (mov->flags & FF_MOV_FLAG_FRAGMENT) {
+    if (mov->flags & FF_MOV_FLAG_FRAGMENT || mov->mode == MODE_AVIF) {
         int ret;
         if (mov->moov_written || mov->flags & FF_MOV_FLAG_EMPTY_MOOV) {
             if (mov->frag_interleave && mov->fragments > 0) {
@@ -5802,7 +5969,9 @@  int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
             }
         }
     } else if (par->codec_id == AV_CODEC_ID_AV1) {
-        if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
+        if (trk->mode == MODE_AVIF) {
+            avio_write(pb, pkt->data, pkt->size);
+        } else if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
             ret = ff_av1_filter_obus_buf(pkt->data, &reformatted_data,
                                          &size, &offset);
             if (ret < 0)
@@ -6230,6 +6399,10 @@  fail:
             }
         }
 
+        if (trk->mode == MODE_AVIF && !mov->avif_extent_length) {
+            mov->avif_extent_length = pkt->size;
+        }
+
         return mov_write_single_packet(s, pkt);
     }
 }
@@ -6569,11 +6742,15 @@  static int mov_init(AVFormatContext *s)
     else if (IS_MODE(ipod, IPOD)) mov->mode = MODE_IPOD;
     else if (IS_MODE(ismv, ISMV)) mov->mode = MODE_ISM;
     else if (IS_MODE(f4v,   F4V)) mov->mode = MODE_F4V;
+    else if (IS_MODE(avif, AVIF)) mov->mode = MODE_AVIF;
 #undef IS_MODE
 
     if (mov->flags & FF_MOV_FLAG_DELAY_MOOV)
         mov->flags |= FF_MOV_FLAG_EMPTY_MOOV;
 
+    if (mov->mode == MODE_AVIF)
+        mov->flags |= FF_MOV_FLAG_DELAY_MOOV;
+
     /* Set the FRAGMENT flag if any of the fragmentation methods are
      * enabled. */
     if (mov->max_fragment_duration || mov->max_fragment_size ||
@@ -6797,12 +6974,13 @@  static int mov_init(AVFormatContext *s)
                         pix_fmt == AV_PIX_FMT_MONOWHITE ||
                         pix_fmt == AV_PIX_FMT_MONOBLACK;
             }
-            if (track->par->codec_id == AV_CODEC_ID_VP9 ||
-                track->par->codec_id == AV_CODEC_ID_AV1) {
-                if (track->mode != MODE_MP4) {
-                    av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
-                    return AVERROR(EINVAL);
-                }
+            if (track->par->codec_id == AV_CODEC_ID_VP9 && track->mode != MODE_MP4) {
+                av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
+		return AVERROR(EINVAL);
+            } else if (track->par->codec_id == AV_CODEC_ID_AV1 &&
+                       track->mode != MODE_MP4 && track->mode != MODE_AVIF) {
+                av_log(s, AV_LOG_ERROR, "%s only supported in MP4 and AVIF.\n", avcodec_get_name(track->par->codec_id));
+                return AVERROR(EINVAL);
             } else if (track->par->codec_id == AV_CODEC_ID_VP8) {
                 /* altref frames handling is not defined in the spec as of version v1.0,
                  * so just forbid muxing VP8 streams altogether until a new version does */
@@ -7003,7 +7181,7 @@  static int mov_write_header(AVFormatContext *s)
                             FF_MOV_FLAG_FRAG_EVERY_FRAME)) &&
             !mov->max_fragment_duration && !mov->max_fragment_size)
             mov->flags |= FF_MOV_FLAG_FRAG_KEYFRAME;
-    } else {
+    } else if (mov->mode != MODE_AVIF) {
         if (mov->flags & FF_MOV_FLAG_FASTSTART)
             mov->reserved_header_pos = avio_tell(pb);
         mov_write_mdat_tag(pb, mov);
@@ -7291,6 +7469,54 @@  static int mov_check_bitstream(AVFormatContext *s, AVStream *st,
     return ret;
 }
 
+static int avif_write_trailer(AVFormatContext *s)
+{
+    AVIOContext *pb = s->pb;
+    MOVMuxContext *mov = s->priv_data;
+    int64_t pos_backup, mdat_pos;
+    uint8_t *buf;
+    int buf_size, moov_size;
+    int i;
+
+    if (mov->moov_written) return 0;
+
+    mov->is_animated_avif = s->streams[0]->nb_frames > 1;
+    mov_write_identification(pb, s);
+    mov_write_meta_tag(pb, mov, s);
+
+    moov_size = get_moov_size(s);
+    for (i = 0; i < mov->nb_streams; i++)
+        mov->tracks[i].data_offset = avio_tell(pb) + moov_size + 8;
+
+    if (mov->is_animated_avif) {
+        int ret;
+        if ((ret = mov_write_moov_tag(pb, mov, s)) < 0)
+            return ret;
+    }
+
+    buf_size = avio_get_dyn_buf(mov->mdat_buf, &buf);
+    avio_wb32(pb, buf_size + 8);
+    ffio_wfourcc(pb, "mdat");
+    mdat_pos = avio_tell(pb);
+
+    avio_write(pb, buf, buf_size);
+    ffio_free_dyn_buf(&mov->mdat_buf);
+
+    // write extent offset.
+    pos_backup = avio_tell(pb);
+    avio_seek(pb, mov->avif_extent_pos, SEEK_SET);
+    avio_wb32(pb, mdat_pos); /* rewrite offset */
+    avio_seek(pb, pos_backup, SEEK_SET);
+
+    mov->moov_written = 1;
+    mov->mdat_size = 0;
+    for (i = 0; i < mov->nb_streams; i++) {
+        mov->tracks[i].entry = 0;
+        mov->tracks[i].end_reliable = 0;
+    }
+    return 0;
+}
+
 #if CONFIG_TGP_MUXER || CONFIG_TG2_MUXER
 static const AVCodecTag codec_3gp_tags[] = {
     { AV_CODEC_ID_H263,     MKTAG('s','2','6','3') },
@@ -7373,6 +7599,12 @@  static const AVCodecTag codec_f4v_tags[] = {
     { AV_CODEC_ID_NONE, 0 },
 };
 
+static const AVCodecTag codec_avif_tags[] = {
+    { AV_CODEC_ID_AV1,     MKTAG('a','v','0','1') },
+    { AV_CODEC_ID_NONE, 0 },
+};
+static const AVCodecTag *const codec_avif_tags_list[] = { codec_avif_tags, NULL };
+
 #if CONFIG_MOV_MUXER
 const AVOutputFormat ff_mov_muxer = {
     .name              = "mov",
@@ -7535,3 +7767,21 @@  const AVOutputFormat ff_f4v_muxer = {
     .priv_class        = &mov_isobmff_muxer_class,
 };
 #endif
+#if CONFIG_AVIF_MUXER
+const AVOutputFormat ff_avif_muxer = {
+    .name              = "avif",
+    .long_name         = NULL_IF_CONFIG_SMALL("AVIF"),
+    .mime_type         = "image/avif",
+    .extensions        = "avif",
+    .priv_data_size    = sizeof(MOVMuxContext),
+    .video_codec       = AV_CODEC_ID_AV1,
+    .init              = mov_init,
+    .write_header      = mov_write_header,
+    .write_packet      = mov_write_packet,
+    .write_trailer     = avif_write_trailer,
+    .deinit            = mov_free,
+    .flags             = AVFMT_GLOBALHEADER | AVFMT_ALLOW_FLUSH,
+    .codec_tag         = codec_avif_tags_list,
+    .priv_class        = &mov_isobmff_muxer_class,
+};
+#endif
diff --git a/libavformat/movenc.h b/libavformat/movenc.h
index 2ac84ed070..55b8469f68 100644
--- a/libavformat/movenc.h
+++ b/libavformat/movenc.h
@@ -43,6 +43,7 @@ 
 #define MODE_IPOD 0x20
 #define MODE_ISM  0x40
 #define MODE_F4V  0x80
+#define MODE_AVIF 0x100
 
 typedef struct MOVIentry {
     uint64_t     pos;
@@ -242,6 +243,10 @@  typedef struct MOVMuxContext {
     MOVPrftBox write_prft;
     int empty_hdlr_name;
     int movie_timescale;
+
+    int64_t avif_extent_pos;
+    int avif_extent_length;
+    int is_animated_avif;
 } MOVMuxContext;
 
 #define FF_MOV_FLAG_RTP_HINT              (1 <<  0)