diff mbox series

[FFmpeg-devel,3/3] avformat/movenc: Add support for AVIF muxing

Message ID 20220310181232.1924687-1-vigneshv@google.com
State New
Headers show
Series None | expand

Commit Message

Vignesh Venkat March 10, 2022, 6:12 p.m. UTC
Add an AVIF muxer by re-using the existing the mov/mp4 muxer.

AVIF Specifiation: https://aomediacodec.github.io/av1-avif

Sample usage for still image:
ffmpeg -i image.png -c:v libaom-av1 -avif-image 1 image.avif

Sample usage for animated AVIF image:
ffmpeg -i video.mp4 animated.avif

We can re-use any of the AV1 encoding options that will make
sense for image encoding (like bitrate, tiles, encoding speed,
etc).

The files generated by this muxer has been verified to be valid
AVIF files by the following:
1) Displays on Chrome (both still and animated images).
2) Displays on Firefox (only still images, firefox does not support
   animated AVIF yet).
3) Verfied to be valid by Compliance Warden:
   https://github.com/gpac/ComplianceWarden

Fixes the encoder/muxer part of Trac Ticket #7621

Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
---
 configure                |   1 +
 libavformat/allformats.c |   1 +
 libavformat/movenc.c     | 341 ++++++++++++++++++++++++++++++++++++---
 libavformat/movenc.h     |   5 +
 4 files changed, 322 insertions(+), 26 deletions(-)

Comments

Andreas Rheinhardt March 21, 2022, 8:46 p.m. UTC | #1
Vignesh Venkatasubramanian:
> Add an AVIF muxer by re-using the existing the mov/mp4 muxer.
> 
> AVIF Specifiation: https://aomediacodec.github.io/av1-avif
> 
> Sample usage for still image:
> ffmpeg -i image.png -c:v libaom-av1 -avif-image 1 image.avif
> 
> Sample usage for animated AVIF image:
> ffmpeg -i video.mp4 animated.avif
> 
> We can re-use any of the AV1 encoding options that will make
> sense for image encoding (like bitrate, tiles, encoding speed,
> etc).
> 
> The files generated by this muxer has been verified to be valid
> AVIF files by the following:
> 1) Displays on Chrome (both still and animated images).
> 2) Displays on Firefox (only still images, firefox does not support
>    animated AVIF yet).
> 3) Verfied to be valid by Compliance Warden:
>    https://github.com/gpac/ComplianceWarden
> 
> Fixes the encoder/muxer part of Trac Ticket #7621
> 
> Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
> ---
>  configure                |   1 +
>  libavformat/allformats.c |   1 +
>  libavformat/movenc.c     | 341 ++++++++++++++++++++++++++++++++++++---
>  libavformat/movenc.h     |   5 +
>  4 files changed, 322 insertions(+), 26 deletions(-)
> 
> diff --git a/configure b/configure
> index 8c69ab0c86..6d7020e96b 100755
> --- a/configure
> +++ b/configure
> @@ -3390,6 +3390,7 @@ asf_stream_muxer_select="asf_muxer"
>  av1_demuxer_select="av1_frame_merge_bsf av1_parser"
>  avi_demuxer_select="riffdec exif"
>  avi_muxer_select="riffenc"
> +avif_muxer_select="mov_muxer"
>  caf_demuxer_select="iso_media"
>  caf_muxer_select="iso_media"
>  dash_muxer_select="mp4_muxer"
> diff --git a/libavformat/allformats.c b/libavformat/allformats.c
> index d066a7745b..400c17afbd 100644
> --- a/libavformat/allformats.c
> +++ b/libavformat/allformats.c
> @@ -81,6 +81,7 @@ extern const AVOutputFormat ff_au_muxer;
>  extern const AVInputFormat  ff_av1_demuxer;
>  extern const AVInputFormat  ff_avi_demuxer;
>  extern const AVOutputFormat ff_avi_muxer;
> +extern const AVOutputFormat ff_avif_muxer;
>  extern const AVInputFormat  ff_avisynth_demuxer;
>  extern const AVOutputFormat ff_avm2_muxer;
>  extern const AVInputFormat  ff_avr_demuxer;
> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> index 1a746a67fd..ff41579300 100644
> --- a/libavformat/movenc.c
> +++ b/libavformat/movenc.c
> @@ -1303,7 +1303,7 @@ static int mov_write_av1c_tag(AVIOContext *pb, MOVTrack *track)
>  
>      avio_wb32(pb, 0);
>      ffio_wfourcc(pb, "av1C");
> -    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, 1);
> +    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, track->mode != MODE_AVIF);
>      return update_size(pb, pos);
>  }
>  
> @@ -2004,12 +2004,13 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
>          }
>      }
>  
> -    /* We should only ever be called by MOV or MP4. */
> -    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4);
> +    /* We should only ever be called for MOV, MP4 and AVIF. */
> +    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4 ||
> +               track->mode == MODE_AVIF);
>  
>      avio_wb32(pb, 0); /* size */
>      ffio_wfourcc(pb, "colr");
> -    if (track->mode == MODE_MP4)
> +    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF)
>          ffio_wfourcc(pb, "nclx");
>      else
>          ffio_wfourcc(pb, "nclc");
> @@ -2019,7 +2020,7 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
>      avio_wb16(pb, track->par->color_primaries);
>      avio_wb16(pb, track->par->color_trc);
>      avio_wb16(pb, track->par->color_space);
> -    if (track->mode == MODE_MP4) {
> +    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
>          int full_range = track->par->color_range == AVCOL_RANGE_JPEG;
>          avio_w8(pb, full_range << 7);
>      }
> @@ -2085,7 +2086,7 @@ static void find_compressor(char * compressor_name, int len, MOVTrack *track)
>                    || (track->par->width == 1440 && track->par->height == 1080)
>                    || (track->par->width == 1920 && track->par->height == 1080);
>  
> -    if (track->mode == MODE_MOV &&
> +    if ((track->mode == MODE_AVIF || track->mode == MODE_MOV) &&
>          (encoder = av_dict_get(track->st->metadata, "encoder", NULL, 0))) {
>          av_strlcpy(compressor_name, encoder->value, 32);
>      } else if (track->par->codec_id == AV_CODEC_ID_MPEG2VIDEO && xdcam_res) {
> @@ -2106,6 +2107,25 @@ static void find_compressor(char * compressor_name, int len, MOVTrack *track)
>      }
>  }
>  
> +static int mov_write_ccst_tag(AVIOContext *pb)
> +{
> +    int64_t pos = avio_tell(pb);
> +    // Write sane defaults:
> +    // all_ref_pics_intra = 0 : all samples can use any type of reference.
> +    // intra_pred_used = 1 : intra prediction may or may not be used.
> +    // max_ref_per_pic = 15 : reserved value to indicate that any number of
> +    //                        reference images can be used.
> +    uint8_t ccstValue = (0 << 7) |  /* all_ref_pics_intra */
> +                        (1 << 6) |  /* intra_pred_used */
> +                        (15 << 2);  /* max_ref_per_pic */
> +    avio_wb32(pb, 0); /* size */
> +    ffio_wfourcc(pb, "ccst");
> +    avio_wb32(pb, 0); /* Version & flags */
> +    avio_w8(pb, ccstValue);
> +    avio_wb24(pb, 0);  /* reserved */
> +    return update_size(pb, pos);
> +}
> +
>  static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext *mov, MOVTrack *track)
>  {
>      int ret = AVERROR_BUG;
> @@ -2123,6 +2143,8 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
>      avio_wb32(pb, 0); /* size */
>      if (mov->encryption_scheme != MOV_ENC_NONE) {
>          ffio_wfourcc(pb, "encv");
> +    } else if (track->mode == MODE_AVIF) {
> +        ffio_wfourcc(pb, "av01");
>      } else {
>          avio_wl32(pb, track->tag); // store it byteswapped
>      }
> @@ -2239,7 +2261,7 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
>          else
>              av_log(mov->fc, AV_LOG_WARNING, "Not writing 'gama' atom. Format is not MOV.\n");
>      }
> -    if (track->mode == MODE_MOV || track->mode == MODE_MP4) {
> +    if (track->mode == MODE_MOV || track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
>          int has_color_info = track->par->color_primaries != AVCOL_PRI_UNSPECIFIED &&
>                               track->par->color_trc != AVCOL_TRC_UNSPECIFIED &&
>                               track->par->color_space != AVCOL_SPC_UNSPECIFIED;
> @@ -2291,6 +2313,9 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
>      if (avid)
>          avio_wb32(pb, 0);
>  
> +    if (track->mode == MODE_AVIF)
> +        mov_write_ccst_tag(pb);
> +
>      return update_size(pb, pos);
>  }
>  
> @@ -2792,7 +2817,10 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
>  
>      if (track) {
>          hdlr = (track->mode == MODE_MOV) ? "mhlr" : "\0\0\0\0";
> -        if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
> +        if (track->mode == MODE_AVIF) {
> +            hdlr_type = "pict";
> +            descr = "ffmpeg";
> +        } else if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
>              hdlr_type = "vide";
>              descr     = "VideoHandler";
>          } else if (track->par->codec_type == AVMEDIA_TYPE_AUDIO) {
> @@ -2859,6 +2887,131 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
>      return update_size(pb, pos);
>  }
>  
> +static int mov_write_pitm_tag(AVIOContext *pb, int item_id)
> +{
> +    int64_t pos = avio_tell(pb);
> +    avio_wb32(pb, 0); /* size */
> +    ffio_wfourcc(pb, "pitm");
> +    avio_wb32(pb, 0); /* Version & flags */
> +    avio_wb16(pb, item_id); /* item_id */
> +    return update_size(pb, pos);
> +}
> +
> +static int mov_write_iloc_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> +{
> +    int64_t pos = avio_tell(pb);
> +    avio_wb32(pb, 0); /* size */
> +    ffio_wfourcc(pb, "iloc");
> +    avio_wb32(pb, 0); /* Version & flags */
> +    avio_w8(pb, (4 << 4) + 4); /* offset_size(4) and length_size(4) */
> +    avio_w8(pb, 0); /* base_offset_size(4) and reserved(4) */
> +    avio_wb16(pb, 1); /* item_count */
> +
> +    avio_wb16(pb, 1); /* item_id */
> +    avio_wb16(pb, 0); /* data_reference_index */
> +    avio_wb16(pb, 1); /* extent_count */
> +    mov->avif_extent_pos = avio_tell(pb);
> +    avio_wb32(pb, 0); /* extent_offset (written later) */
> +    // For animated AVIF, we simply write the first packet's size.
> +    avio_wb32(pb, mov->avif_extent_length); /* extent_length */
> +
> +    return update_size(pb, pos);
> +}
> +
> +static int mov_write_iinf_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> +{
> +    int64_t infe_pos;
> +    int64_t iinf_pos = avio_tell(pb);
> +    avio_wb32(pb, 0); /* size */
> +    ffio_wfourcc(pb, "iinf");
> +    avio_wb32(pb, 0); /* Version & flags */
> +    avio_wb16(pb, 1); /* entry_count */
> +
> +    infe_pos = avio_tell(pb);
> +    avio_wb32(pb, 0); /* size */
> +    ffio_wfourcc(pb, "infe");
> +    avio_w8(pb, 0x2); /* Version */
> +    avio_wb24(pb, 0); /* flags */
> +    avio_wb16(pb, 1); /* item_id */
> +    avio_wb16(pb, 0); /* item_protection_index */
> +    avio_write(pb, "av01", 4); /* item_type */
> +    avio_write(pb, "Color\0", 6); /* item_name */
> +    update_size(pb, infe_pos);
> +
> +    return update_size(pb, iinf_pos);
> +}
> +
> +static int mov_write_ispe_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> +{
> +    int64_t pos = avio_tell(pb);
> +    avio_wb32(pb, 0); /* size */
> +    ffio_wfourcc(pb, "ispe");
> +    avio_wb32(pb, 0); /* Version & flags */
> +    avio_wb32(pb, s->streams[0]->codecpar->width); /* image_width */
> +    avio_wb32(pb, s->streams[0]->codecpar->height); /* image_height */
> +    return update_size(pb, pos);
> +}
> +
> +
> +static int mov_write_pixi_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> +{
> +    int64_t pos = avio_tell(pb);
> +    int num_channels = av_pix_fmt_count_planes(s->streams[0]->codecpar->format);

Is the number of planes really the correct number here? After all, for a
muxer (instead of a decoder) it does not matter whether the chroma
planes are interleaved like in AV_PIX_FMT_NV12 or not like in
AV_PIX_FMT_YUV420P. You should better use pixdesc->nb_components.

> +    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
> +    int i;

We allow and use "for (int i = 0;" from C99 to save lines like these.

> +    avio_wb32(pb, 0); /* size */
> +    ffio_wfourcc(pb, "pixi");
> +    avio_wb32(pb, 0); /* Version & flags */
> +    avio_w8(pb, num_channels); /* num_channels */
> +    for (i = 0; i < num_channels; ++i) {
> +      avio_w8(pb, pixdesc->comp[i].depth); /* bits_per_channel */
> +    }
> +    return update_size(pb, pos);
> +}
> +
> +static int mov_write_ipco_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> +{
> +    int64_t pos = avio_tell(pb);
> +    avio_wb32(pb, 0); /* size */
> +    ffio_wfourcc(pb, "ipco");
> +    mov_write_ispe_tag(pb, mov, s);
> +    mov_write_pixi_tag(pb, mov, s);
> +    mov_write_av1c_tag(pb, &mov->tracks[0]);
> +    mov_write_colr_tag(pb, &mov->tracks[0], 0);
> +    return update_size(pb, pos);
> +}
> +
> +static int mov_write_ipma_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> +{
> +    int64_t pos = avio_tell(pb);
> +    avio_wb32(pb, 0); /* size */
> +    ffio_wfourcc(pb, "ipma");
> +    avio_wb32(pb, 0); /* Version & flags */
> +    avio_wb32(pb, 1); /* entry_count */
> +    avio_wb16(pb, 1); /* item_ID */
> +    avio_w8(pb, 4); /* association_count */
> +
> +    // ispe association.
> +    avio_w8(pb, 1); /* essential and property_index */
> +    // pixi association.
> +    avio_w8(pb, 2); /* essential and property_index */
> +    // av1C association.
> +    avio_w8(pb, 0x80 | 3); /* essential and property_index */
> +    // colr association.
> +    avio_w8(pb, 4); /* essential and property_index */
> +    return update_size(pb, pos);
> +}
> +
> +static int mov_write_iprp_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> +{
> +    int64_t pos = avio_tell(pb);
> +    avio_wb32(pb, 0); /* size */
> +    ffio_wfourcc(pb, "iprp");
> +    mov_write_ipco_tag(pb, mov, s);
> +    mov_write_ipma_tag(pb, mov, s);
> +    return update_size(pb, pos);
> +}
> +
>  static int mov_write_hmhd_tag(AVIOContext *pb)
>  {
>      /* This atom must be present, but leaving the values at zero
> @@ -3056,7 +3209,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
>              display_matrix = NULL;
>      }
>  
> -    if (track->flags & MOV_TRACK_ENABLED)
> +    if (track->flags & MOV_TRACK_ENABLED || track->mode == MODE_AVIF)
>          flags |= MOV_TKHD_FLAG_ENABLED;
>  
>      if (track->mode == MODE_ISM)
> @@ -3104,7 +3257,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
>      if (st && (track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
>                 track->par->codec_type == AVMEDIA_TYPE_SUBTITLE)) {
>          int64_t track_width_1616;
> -        if (track->mode == MODE_MOV) {
> +        if (track->mode == MODE_MOV || track->mode == MODE_AVIF) {
>              track_width_1616 = track->par->width * 0x10000ULL;
>          } else {
>              track_width_1616 = av_rescale(st->sample_aspect_ratio.num,
> @@ -3439,7 +3592,8 @@ static int mov_write_trak_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
>              mov_write_tapt_tag(pb, track);
>          }
>      }
> -    mov_write_track_udta_tag(pb, mov, st);
> +    if (track->mode != MODE_AVIF)
> +        mov_write_track_udta_tag(pb, mov, st);
>      track->entry = entry_backup;
>      track->chunkCount = chunk_backup;
>      return update_size(pb, pos);
> @@ -3914,8 +4068,15 @@ static int mov_write_meta_tag(AVIOContext *pb, MOVMuxContext *mov,
>          mov_write_mdta_hdlr_tag(pb, mov, s);
>          mov_write_mdta_keys_tag(pb, mov, s);
>          mov_write_mdta_ilst_tag(pb, mov, s);
> -    }
> -    else {
> +    } else if (mov->mode == MODE_AVIF) {
> +        mov_write_hdlr_tag(s, pb, &mov->tracks[0]);
> +        // We always write the primary item id as 1 since only one track is
> +        // supported for AVIF.
> +        mov_write_pitm_tag(pb, 1);
> +        mov_write_iloc_tag(pb, mov, s);
> +        mov_write_iinf_tag(pb, mov, s);
> +        mov_write_iprp_tag(pb, mov, s);
> +    } else {
>          /* iTunes metadata tag */
>          mov_write_itunes_hdlr_tag(pb, mov, s);
>          mov_write_ilst_tag(pb, mov, s);
> @@ -4245,10 +4406,11 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
>      }
>  
>      mov_write_mvhd_tag(pb, mov);
> -    if (mov->mode != MODE_MOV && !mov->iods_skip)
> +    if (mov->mode != MODE_MOV && mov->mode != MODE_AVIF && !mov->iods_skip)
>          mov_write_iods_tag(pb, mov);
>      for (i = 0; i < mov->nb_streams; i++) {
> -        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT) {
> +        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT ||
> +            mov->mode == MODE_AVIF) {
>              int ret = mov_write_trak_tag(s, pb, mov, &(mov->tracks[i]), i < s->nb_streams ? s->streams[i] : NULL);
>              if (ret < 0)
>                  return ret;
> @@ -4259,7 +4421,7 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
>  
>      if (mov->mode == MODE_PSP)
>          mov_write_uuidusmt_tag(pb, s);
> -    else
> +    else if (mov->mode != MODE_AVIF)
>          mov_write_udta_tag(pb, mov, s);
>  
>      return update_size(pb, pos);
> @@ -5002,6 +5164,9 @@ static void mov_write_ftyp_tag_internal(AVIOContext *pb, AVFormatContext *s,
>      else if (mov->mode == MODE_3GP) {
>          ffio_wfourcc(pb, has_h264 ? "3gp6"  : "3gp4");
>          minor =     has_h264 ?   0x100 :   0x200;
> +    } else if (mov->mode == MODE_AVIF) {
> +        ffio_wfourcc(pb, mov->is_animated_avif ? "avis" : "avif");
> +        minor = 0;
>      } else if (mov->mode & MODE_3G2) {
>          ffio_wfourcc(pb, has_h264 ? "3g2b"  : "3g2a");
>          minor =     has_h264 ? 0x20000 : 0x10000;
> @@ -5065,6 +5230,31 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
>      // compatible brand a second time.
>      if (mov->mode == MODE_ISM) {
>          ffio_wfourcc(pb, "piff");
> +    } else if (mov->mode == MODE_AVIF) {
> +        const AVPixFmtDescriptor *pix_fmt_desc =
> +            av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
> +        const int depth = pix_fmt_desc->comp[0].depth;
> +        if (mov->is_animated_avif) {
> +            // For animated AVIF, major brand is "avis". Add "avif" as a
> +            // compatible brand.
> +            ffio_wfourcc(pb, "avif");
> +            ffio_wfourcc(pb, "msf1");
> +            ffio_wfourcc(pb, "iso8");
> +        }
> +        ffio_wfourcc(pb, "mif1");
> +        ffio_wfourcc(pb, "miaf");
> +        if (depth == 8 || depth == 10) {
> +            // MA1B and MA1A brands are based on AV1 profile. Short hand for
> +            // computing that is based on chroma subsampling type. 420 chroma
> +            // subsampling is MA1B.  444 chroma subsampling is MA1A.
> +            if (pix_fmt_desc->log2_chroma_w == 0 && pix_fmt_desc->log2_chroma_h == 0) {
> +                // 444 chroma subsampling.
> +                ffio_wfourcc(pb, "MA1A");
> +            } else {
> +                // 420 chroma subsampling.
> +                ffio_wfourcc(pb, "MA1B");
> +            }
> +        }
>      } else if (mov->mode != MODE_MOV) {
>          // We add tfdt atoms when fragmenting, signal this with the iso6 compatible
>          // brand, if not already the major brand. This is compatible with users that
> @@ -5669,7 +5859,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>      if (ret < 0)
>          return ret;
>  
> -    if (mov->flags & FF_MOV_FLAG_FRAGMENT) {
> +    if (mov->flags & FF_MOV_FLAG_FRAGMENT || mov->mode == MODE_AVIF) {
>          int ret;
>          if (mov->moov_written || mov->flags & FF_MOV_FLAG_EMPTY_MOOV) {
>              if (mov->frag_interleave && mov->fragments > 0) {
> @@ -5802,7 +5992,9 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>              }
>          }
>      } else if (par->codec_id == AV_CODEC_ID_AV1) {
> -        if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
> +        if (trk->mode == MODE_AVIF) {

Why this? AVIF requires that AVI-in-ISOBMFF sample format is honoured
and this contains e.g. "OBUs of type OBU_TEMPORAL_DELIMITER,
OBU_PADDING, or OBU_REDUNDANT_FRAME_HEADER SHOULD NOT be used".
ff_av1_filter_obus(_buf)? merely ensures that these OBUs are stripped away.

If the aim of this check is to disallow hint tracks or so, then it fails
(all it does is ensuring that both the ordinary track as well as the
hint track get data that might contain OBUs that should not be there).

> +            avio_write(pb, pkt->data, pkt->size);
> +        } else if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
>              ret = ff_av1_filter_obus_buf(pkt->data, &reformatted_data,
>                                           &size, &offset);
>              if (ret < 0)
> @@ -6230,6 +6422,10 @@ fail:
>              }
>          }
>  
> +        if (trk->mode == MODE_AVIF && !mov->avif_extent_length) {
> +            mov->avif_extent_length = pkt->size;
> +        }
> +
>          return mov_write_single_packet(s, pkt);
>      }
>  }
> @@ -6569,11 +6765,15 @@ static int mov_init(AVFormatContext *s)
>      else if (IS_MODE(ipod, IPOD)) mov->mode = MODE_IPOD;
>      else if (IS_MODE(ismv, ISMV)) mov->mode = MODE_ISM;
>      else if (IS_MODE(f4v,   F4V)) mov->mode = MODE_F4V;
> +    else if (IS_MODE(avif, AVIF)) mov->mode = MODE_AVIF;
>  #undef IS_MODE
>  
>      if (mov->flags & FF_MOV_FLAG_DELAY_MOOV)
>          mov->flags |= FF_MOV_FLAG_EMPTY_MOOV;
>  
> +    if (mov->mode == MODE_AVIF)
> +        mov->flags |= FF_MOV_FLAG_DELAY_MOOV;
> +
>      /* Set the FRAGMENT flag if any of the fragmentation methods are
>       * enabled. */
>      if (mov->max_fragment_duration || mov->max_fragment_size ||
> @@ -6654,11 +6854,25 @@ static int mov_init(AVFormatContext *s)
>      /* Non-seekable output is ok if using fragmentation. If ism_lookahead
>       * is enabled, we don't support non-seekable output at all. */
>      if (!(s->pb->seekable & AVIO_SEEKABLE_NORMAL) &&
> -        (!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead)) {
> +        (!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead ||
> +         mov->mode == MODE_AVIF)) {
>          av_log(s, AV_LOG_ERROR, "muxer does not support non seekable output\n");
>          return AVERROR(EINVAL);
>      }
>  
> +    /* AVIF output must have exactly one video stream */
> +    if (mov->mode == MODE_AVIF) {
> +        if (s->nb_streams > 1) {
> +            av_log(s, AV_LOG_ERROR, "AVIF output requires exactly one stream\n");
> +            return AVERROR(EINVAL);
> +        }
> +        if (s->streams[0]->codecpar->codec_type != AVMEDIA_TYPE_VIDEO) {
> +            av_log(s, AV_LOG_ERROR, "AVIF output requires one video stream\n");
> +            return AVERROR(EINVAL);
> +        }
> +    }
> +
> +
>      mov->nb_streams = s->nb_streams;
>      if (mov->mode & (MODE_MP4|MODE_MOV|MODE_IPOD) && s->nb_chapters)
>          mov->chapter_track = mov->nb_streams++;
> @@ -6797,12 +7011,13 @@ static int mov_init(AVFormatContext *s)
>                          pix_fmt == AV_PIX_FMT_MONOWHITE ||
>                          pix_fmt == AV_PIX_FMT_MONOBLACK;
>              }
> -            if (track->par->codec_id == AV_CODEC_ID_VP9 ||
> -                track->par->codec_id == AV_CODEC_ID_AV1) {
> -                if (track->mode != MODE_MP4) {
> -                    av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
> -                    return AVERROR(EINVAL);
> -                }
> +            if (track->par->codec_id == AV_CODEC_ID_VP9 && track->mode != MODE_MP4) {
> +                av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
> +		return AVERROR(EINVAL);
> +            } else if (track->par->codec_id == AV_CODEC_ID_AV1 &&
> +                       track->mode != MODE_MP4 && track->mode != MODE_AVIF) {
> +                av_log(s, AV_LOG_ERROR, "%s only supported in MP4 and AVIF.\n", avcodec_get_name(track->par->codec_id));
> +                return AVERROR(EINVAL);
>              } else if (track->par->codec_id == AV_CODEC_ID_VP8) {
>                  /* altref frames handling is not defined in the spec as of version v1.0,
>                   * so just forbid muxing VP8 streams altogether until a new version does */
> @@ -7003,7 +7218,7 @@ static int mov_write_header(AVFormatContext *s)
>                              FF_MOV_FLAG_FRAG_EVERY_FRAME)) &&
>              !mov->max_fragment_duration && !mov->max_fragment_size)
>              mov->flags |= FF_MOV_FLAG_FRAG_KEYFRAME;
> -    } else {
> +    } else if (mov->mode != MODE_AVIF) {
>          if (mov->flags & FF_MOV_FLAG_FASTSTART)
>              mov->reserved_header_pos = avio_tell(pb);
>          mov_write_mdat_tag(pb, mov);
> @@ -7291,6 +7506,48 @@ static int mov_check_bitstream(AVFormatContext *s, AVStream *st,
>      return ret;
>  }
>  
> +static int avif_write_trailer(AVFormatContext *s)
> +{
> +    AVIOContext *pb = s->pb;
> +    MOVMuxContext *mov = s->priv_data;
> +    int64_t pos_backup, mdat_pos;
> +    uint8_t *buf;
> +    int buf_size, moov_size;
> +    int i;
> +
> +    if (mov->moov_written) return 0;
> +
> +    mov->is_animated_avif = s->streams[0]->nb_frames > 1;
> +    mov_write_identification(pb, s);
> +    mov_write_meta_tag(pb, mov, s);
> +
> +    moov_size = get_moov_size(s);
> +    for (i = 0; i < mov->nb_streams; i++)
> +        mov->tracks[i].data_offset = avio_tell(pb) + moov_size + 8;

Don't call avio_tell() in a loop (and I wonder whether this loop is even
necessary given that s->nb_streams is checked to be one).

> +
> +    if (mov->is_animated_avif) {
> +        int ret;
> +        if ((ret = mov_write_moov_tag(pb, mov, s)) < 0)
> +            return ret;
> +    }
> +
> +    buf_size = avio_get_dyn_buf(mov->mdat_buf, &buf);
> +    avio_wb32(pb, buf_size + 8);
> +    ffio_wfourcc(pb, "mdat");
> +    mdat_pos = avio_tell(pb);
> +
> +    avio_write(pb, buf, buf_size);
> +    ffio_free_dyn_buf(&mov->mdat_buf);

Unnecessary: This will be freed in mov_free().

> +
> +    // write extent offset.
> +    pos_backup = avio_tell(pb);
> +    avio_seek(pb, mov->avif_extent_pos, SEEK_SET);
> +    avio_wb32(pb, mdat_pos); /* rewrite offset */

What guarantees that this fits into 32bits?

> +    avio_seek(pb, pos_backup, SEEK_SET);
> +
> +    return 0;
> +}
> +
>  #if CONFIG_TGP_MUXER || CONFIG_TG2_MUXER
>  static const AVCodecTag codec_3gp_tags[] = {
>      { AV_CODEC_ID_H263,     MKTAG('s','2','6','3') },
> @@ -7373,6 +7630,20 @@ static const AVCodecTag codec_f4v_tags[] = {
>      { AV_CODEC_ID_NONE, 0 },
>  };
>  
> +#if CONFIG_AVIF_MUXER
> +static const AVCodecTag codec_avif_tags[] = {
> +    { AV_CODEC_ID_AV1,     MKTAG('a','v','0','1') },
> +    { AV_CODEC_ID_NONE, 0 },
> +};
> +static const AVCodecTag *const codec_avif_tags_list[] = { codec_avif_tags, NULL };
> +
> +static const AVClass mov_avif_muxer_class = {
> +    .class_name = "avif muxer",
> +    .item_name  = av_default_item_name,
> +    .version    = LIBAVUTIL_VERSION_INT,
> +};

It's not mandatory for a muxer to have a private class; it is only
necessary for options. If you do not have options (like here), then the
AVClass is useless.
I actually wanted that you support the options that make sense for AVIF
and I dislike that the avif muxer uses a different write_trailer than
every other muxer here (which is the reason why e.g. the faststart
option is not supported by it). Is it really so different?
Furthermore, given that you don't use the same AVClass as everyone else,
the values for fields that are AVOpt-enabled are zero; and this does not
always coincide with the default value of the relevant option. See e.g.
skip_iods, iods_audio_profile, iods_video_profile etc. movie_timescale
will even be set to a value that is outside of its legal range. I don't
know whether this can lead to any divide-by-zero crashes or asserts, but
it is certainly very fragile.

> +#endif
> +
>  #if CONFIG_MOV_MUXER
>  const AVOutputFormat ff_mov_muxer = {
>      .name              = "mov",
> @@ -7535,3 +7806,21 @@ const AVOutputFormat ff_f4v_muxer = {
>      .priv_class        = &mov_isobmff_muxer_class,
>  };
>  #endif
> +#if CONFIG_AVIF_MUXER
> +const AVOutputFormat ff_avif_muxer = {
> +    .name              = "avif",
> +    .long_name         = NULL_IF_CONFIG_SMALL("AVIF"),
> +    .mime_type         = "image/avif",
> +    .extensions        = "avif",
> +    .priv_data_size    = sizeof(MOVMuxContext),
> +    .video_codec       = AV_CODEC_ID_AV1,
> +    .init              = mov_init,
> +    .write_header      = mov_write_header,
> +    .write_packet      = mov_write_packet,
> +    .write_trailer     = avif_write_trailer,
> +    .deinit            = mov_free,
> +    .flags             = AVFMT_GLOBALHEADER | AVFMT_ALLOW_FLUSH,
> +    .codec_tag         = codec_avif_tags_list,
> +    .priv_class        = &mov_avif_muxer_class,
> +};
> +#endif
> diff --git a/libavformat/movenc.h b/libavformat/movenc.h
> index 2ac84ed070..55b8469f68 100644
> --- a/libavformat/movenc.h
> +++ b/libavformat/movenc.h
> @@ -43,6 +43,7 @@
>  #define MODE_IPOD 0x20
>  #define MODE_ISM  0x40
>  #define MODE_F4V  0x80
> +#define MODE_AVIF 0x100
>  
>  typedef struct MOVIentry {
>      uint64_t     pos;
> @@ -242,6 +243,10 @@ typedef struct MOVMuxContext {
>      MOVPrftBox write_prft;
>      int empty_hdlr_name;
>      int movie_timescale;
> +
> +    int64_t avif_extent_pos;
> +    int avif_extent_length;
> +    int is_animated_avif;
>  } MOVMuxContext;
>  
>  #define FF_MOV_FLAG_RTP_HINT              (1 <<  0)
Vignesh Venkat March 22, 2022, 4:45 p.m. UTC | #2
On Mon, Mar 21, 2022 at 1:46 PM Andreas Rheinhardt
<andreas.rheinhardt@outlook.com> wrote:
>
> Vignesh Venkatasubramanian:
> > Add an AVIF muxer by re-using the existing the mov/mp4 muxer.
> >
> > AVIF Specifiation: https://aomediacodec.github.io/av1-avif
> >
> > Sample usage for still image:
> > ffmpeg -i image.png -c:v libaom-av1 -avif-image 1 image.avif
> >
> > Sample usage for animated AVIF image:
> > ffmpeg -i video.mp4 animated.avif
> >
> > We can re-use any of the AV1 encoding options that will make
> > sense for image encoding (like bitrate, tiles, encoding speed,
> > etc).
> >
> > The files generated by this muxer has been verified to be valid
> > AVIF files by the following:
> > 1) Displays on Chrome (both still and animated images).
> > 2) Displays on Firefox (only still images, firefox does not support
> >    animated AVIF yet).
> > 3) Verfied to be valid by Compliance Warden:
> >    https://github.com/gpac/ComplianceWarden
> >
> > Fixes the encoder/muxer part of Trac Ticket #7621
> >
> > Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
> > ---
> >  configure                |   1 +
> >  libavformat/allformats.c |   1 +
> >  libavformat/movenc.c     | 341 ++++++++++++++++++++++++++++++++++++---
> >  libavformat/movenc.h     |   5 +
> >  4 files changed, 322 insertions(+), 26 deletions(-)
> >
> > diff --git a/configure b/configure
> > index 8c69ab0c86..6d7020e96b 100755
> > --- a/configure
> > +++ b/configure
> > @@ -3390,6 +3390,7 @@ asf_stream_muxer_select="asf_muxer"
> >  av1_demuxer_select="av1_frame_merge_bsf av1_parser"
> >  avi_demuxer_select="riffdec exif"
> >  avi_muxer_select="riffenc"
> > +avif_muxer_select="mov_muxer"
> >  caf_demuxer_select="iso_media"
> >  caf_muxer_select="iso_media"
> >  dash_muxer_select="mp4_muxer"
> > diff --git a/libavformat/allformats.c b/libavformat/allformats.c
> > index d066a7745b..400c17afbd 100644
> > --- a/libavformat/allformats.c
> > +++ b/libavformat/allformats.c
> > @@ -81,6 +81,7 @@ extern const AVOutputFormat ff_au_muxer;
> >  extern const AVInputFormat  ff_av1_demuxer;
> >  extern const AVInputFormat  ff_avi_demuxer;
> >  extern const AVOutputFormat ff_avi_muxer;
> > +extern const AVOutputFormat ff_avif_muxer;
> >  extern const AVInputFormat  ff_avisynth_demuxer;
> >  extern const AVOutputFormat ff_avm2_muxer;
> >  extern const AVInputFormat  ff_avr_demuxer;
> > diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> > index 1a746a67fd..ff41579300 100644
> > --- a/libavformat/movenc.c
> > +++ b/libavformat/movenc.c
> > @@ -1303,7 +1303,7 @@ static int mov_write_av1c_tag(AVIOContext *pb, MOVTrack *track)
> >
> >      avio_wb32(pb, 0);
> >      ffio_wfourcc(pb, "av1C");
> > -    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, 1);
> > +    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, track->mode != MODE_AVIF);
> >      return update_size(pb, pos);
> >  }
> >
> > @@ -2004,12 +2004,13 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
> >          }
> >      }
> >
> > -    /* We should only ever be called by MOV or MP4. */
> > -    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4);
> > +    /* We should only ever be called for MOV, MP4 and AVIF. */
> > +    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4 ||
> > +               track->mode == MODE_AVIF);
> >
> >      avio_wb32(pb, 0); /* size */
> >      ffio_wfourcc(pb, "colr");
> > -    if (track->mode == MODE_MP4)
> > +    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF)
> >          ffio_wfourcc(pb, "nclx");
> >      else
> >          ffio_wfourcc(pb, "nclc");
> > @@ -2019,7 +2020,7 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
> >      avio_wb16(pb, track->par->color_primaries);
> >      avio_wb16(pb, track->par->color_trc);
> >      avio_wb16(pb, track->par->color_space);
> > -    if (track->mode == MODE_MP4) {
> > +    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
> >          int full_range = track->par->color_range == AVCOL_RANGE_JPEG;
> >          avio_w8(pb, full_range << 7);
> >      }
> > @@ -2085,7 +2086,7 @@ static void find_compressor(char * compressor_name, int len, MOVTrack *track)
> >                    || (track->par->width == 1440 && track->par->height == 1080)
> >                    || (track->par->width == 1920 && track->par->height == 1080);
> >
> > -    if (track->mode == MODE_MOV &&
> > +    if ((track->mode == MODE_AVIF || track->mode == MODE_MOV) &&
> >          (encoder = av_dict_get(track->st->metadata, "encoder", NULL, 0))) {
> >          av_strlcpy(compressor_name, encoder->value, 32);
> >      } else if (track->par->codec_id == AV_CODEC_ID_MPEG2VIDEO && xdcam_res) {
> > @@ -2106,6 +2107,25 @@ static void find_compressor(char * compressor_name, int len, MOVTrack *track)
> >      }
> >  }
> >
> > +static int mov_write_ccst_tag(AVIOContext *pb)
> > +{
> > +    int64_t pos = avio_tell(pb);
> > +    // Write sane defaults:
> > +    // all_ref_pics_intra = 0 : all samples can use any type of reference.
> > +    // intra_pred_used = 1 : intra prediction may or may not be used.
> > +    // max_ref_per_pic = 15 : reserved value to indicate that any number of
> > +    //                        reference images can be used.
> > +    uint8_t ccstValue = (0 << 7) |  /* all_ref_pics_intra */
> > +                        (1 << 6) |  /* intra_pred_used */
> > +                        (15 << 2);  /* max_ref_per_pic */
> > +    avio_wb32(pb, 0); /* size */
> > +    ffio_wfourcc(pb, "ccst");
> > +    avio_wb32(pb, 0); /* Version & flags */
> > +    avio_w8(pb, ccstValue);
> > +    avio_wb24(pb, 0);  /* reserved */
> > +    return update_size(pb, pos);
> > +}
> > +
> >  static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext *mov, MOVTrack *track)
> >  {
> >      int ret = AVERROR_BUG;
> > @@ -2123,6 +2143,8 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
> >      avio_wb32(pb, 0); /* size */
> >      if (mov->encryption_scheme != MOV_ENC_NONE) {
> >          ffio_wfourcc(pb, "encv");
> > +    } else if (track->mode == MODE_AVIF) {
> > +        ffio_wfourcc(pb, "av01");
> >      } else {
> >          avio_wl32(pb, track->tag); // store it byteswapped
> >      }
> > @@ -2239,7 +2261,7 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
> >          else
> >              av_log(mov->fc, AV_LOG_WARNING, "Not writing 'gama' atom. Format is not MOV.\n");
> >      }
> > -    if (track->mode == MODE_MOV || track->mode == MODE_MP4) {
> > +    if (track->mode == MODE_MOV || track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
> >          int has_color_info = track->par->color_primaries != AVCOL_PRI_UNSPECIFIED &&
> >                               track->par->color_trc != AVCOL_TRC_UNSPECIFIED &&
> >                               track->par->color_space != AVCOL_SPC_UNSPECIFIED;
> > @@ -2291,6 +2313,9 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
> >      if (avid)
> >          avio_wb32(pb, 0);
> >
> > +    if (track->mode == MODE_AVIF)
> > +        mov_write_ccst_tag(pb);
> > +
> >      return update_size(pb, pos);
> >  }
> >
> > @@ -2792,7 +2817,10 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
> >
> >      if (track) {
> >          hdlr = (track->mode == MODE_MOV) ? "mhlr" : "\0\0\0\0";
> > -        if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
> > +        if (track->mode == MODE_AVIF) {
> > +            hdlr_type = "pict";
> > +            descr = "ffmpeg";
> > +        } else if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
> >              hdlr_type = "vide";
> >              descr     = "VideoHandler";
> >          } else if (track->par->codec_type == AVMEDIA_TYPE_AUDIO) {
> > @@ -2859,6 +2887,131 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
> >      return update_size(pb, pos);
> >  }
> >
> > +static int mov_write_pitm_tag(AVIOContext *pb, int item_id)
> > +{
> > +    int64_t pos = avio_tell(pb);
> > +    avio_wb32(pb, 0); /* size */
> > +    ffio_wfourcc(pb, "pitm");
> > +    avio_wb32(pb, 0); /* Version & flags */
> > +    avio_wb16(pb, item_id); /* item_id */
> > +    return update_size(pb, pos);
> > +}
> > +
> > +static int mov_write_iloc_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > +{
> > +    int64_t pos = avio_tell(pb);
> > +    avio_wb32(pb, 0); /* size */
> > +    ffio_wfourcc(pb, "iloc");
> > +    avio_wb32(pb, 0); /* Version & flags */
> > +    avio_w8(pb, (4 << 4) + 4); /* offset_size(4) and length_size(4) */
> > +    avio_w8(pb, 0); /* base_offset_size(4) and reserved(4) */
> > +    avio_wb16(pb, 1); /* item_count */
> > +
> > +    avio_wb16(pb, 1); /* item_id */
> > +    avio_wb16(pb, 0); /* data_reference_index */
> > +    avio_wb16(pb, 1); /* extent_count */
> > +    mov->avif_extent_pos = avio_tell(pb);
> > +    avio_wb32(pb, 0); /* extent_offset (written later) */
> > +    // For animated AVIF, we simply write the first packet's size.
> > +    avio_wb32(pb, mov->avif_extent_length); /* extent_length */
> > +
> > +    return update_size(pb, pos);
> > +}
> > +
> > +static int mov_write_iinf_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > +{
> > +    int64_t infe_pos;
> > +    int64_t iinf_pos = avio_tell(pb);
> > +    avio_wb32(pb, 0); /* size */
> > +    ffio_wfourcc(pb, "iinf");
> > +    avio_wb32(pb, 0); /* Version & flags */
> > +    avio_wb16(pb, 1); /* entry_count */
> > +
> > +    infe_pos = avio_tell(pb);
> > +    avio_wb32(pb, 0); /* size */
> > +    ffio_wfourcc(pb, "infe");
> > +    avio_w8(pb, 0x2); /* Version */
> > +    avio_wb24(pb, 0); /* flags */
> > +    avio_wb16(pb, 1); /* item_id */
> > +    avio_wb16(pb, 0); /* item_protection_index */
> > +    avio_write(pb, "av01", 4); /* item_type */
> > +    avio_write(pb, "Color\0", 6); /* item_name */
> > +    update_size(pb, infe_pos);
> > +
> > +    return update_size(pb, iinf_pos);
> > +}
> > +
> > +static int mov_write_ispe_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > +{
> > +    int64_t pos = avio_tell(pb);
> > +    avio_wb32(pb, 0); /* size */
> > +    ffio_wfourcc(pb, "ispe");
> > +    avio_wb32(pb, 0); /* Version & flags */
> > +    avio_wb32(pb, s->streams[0]->codecpar->width); /* image_width */
> > +    avio_wb32(pb, s->streams[0]->codecpar->height); /* image_height */
> > +    return update_size(pb, pos);
> > +}
> > +
> > +
> > +static int mov_write_pixi_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > +{
> > +    int64_t pos = avio_tell(pb);
> > +    int num_channels = av_pix_fmt_count_planes(s->streams[0]->codecpar->format);
>
> Is the number of planes really the correct number here? After all, for a
> muxer (instead of a decoder) it does not matter whether the chroma
> planes are interleaved like in AV_PIX_FMT_NV12 or not like in
> AV_PIX_FMT_YUV420P. You should better use pixdesc->nb_components.
>

Done.

> > +    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
> > +    int i;
>
> We allow and use "for (int i = 0;" from C99 to save lines like these.
>

Done.

> > +    avio_wb32(pb, 0); /* size */
> > +    ffio_wfourcc(pb, "pixi");
> > +    avio_wb32(pb, 0); /* Version & flags */
> > +    avio_w8(pb, num_channels); /* num_channels */
> > +    for (i = 0; i < num_channels; ++i) {
> > +      avio_w8(pb, pixdesc->comp[i].depth); /* bits_per_channel */
> > +    }
> > +    return update_size(pb, pos);
> > +}
> > +
> > +static int mov_write_ipco_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > +{
> > +    int64_t pos = avio_tell(pb);
> > +    avio_wb32(pb, 0); /* size */
> > +    ffio_wfourcc(pb, "ipco");
> > +    mov_write_ispe_tag(pb, mov, s);
> > +    mov_write_pixi_tag(pb, mov, s);
> > +    mov_write_av1c_tag(pb, &mov->tracks[0]);
> > +    mov_write_colr_tag(pb, &mov->tracks[0], 0);
> > +    return update_size(pb, pos);
> > +}
> > +
> > +static int mov_write_ipma_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > +{
> > +    int64_t pos = avio_tell(pb);
> > +    avio_wb32(pb, 0); /* size */
> > +    ffio_wfourcc(pb, "ipma");
> > +    avio_wb32(pb, 0); /* Version & flags */
> > +    avio_wb32(pb, 1); /* entry_count */
> > +    avio_wb16(pb, 1); /* item_ID */
> > +    avio_w8(pb, 4); /* association_count */
> > +
> > +    // ispe association.
> > +    avio_w8(pb, 1); /* essential and property_index */
> > +    // pixi association.
> > +    avio_w8(pb, 2); /* essential and property_index */
> > +    // av1C association.
> > +    avio_w8(pb, 0x80 | 3); /* essential and property_index */
> > +    // colr association.
> > +    avio_w8(pb, 4); /* essential and property_index */
> > +    return update_size(pb, pos);
> > +}
> > +
> > +static int mov_write_iprp_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > +{
> > +    int64_t pos = avio_tell(pb);
> > +    avio_wb32(pb, 0); /* size */
> > +    ffio_wfourcc(pb, "iprp");
> > +    mov_write_ipco_tag(pb, mov, s);
> > +    mov_write_ipma_tag(pb, mov, s);
> > +    return update_size(pb, pos);
> > +}
> > +
> >  static int mov_write_hmhd_tag(AVIOContext *pb)
> >  {
> >      /* This atom must be present, but leaving the values at zero
> > @@ -3056,7 +3209,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
> >              display_matrix = NULL;
> >      }
> >
> > -    if (track->flags & MOV_TRACK_ENABLED)
> > +    if (track->flags & MOV_TRACK_ENABLED || track->mode == MODE_AVIF)
> >          flags |= MOV_TKHD_FLAG_ENABLED;
> >
> >      if (track->mode == MODE_ISM)
> > @@ -3104,7 +3257,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
> >      if (st && (track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
> >                 track->par->codec_type == AVMEDIA_TYPE_SUBTITLE)) {
> >          int64_t track_width_1616;
> > -        if (track->mode == MODE_MOV) {
> > +        if (track->mode == MODE_MOV || track->mode == MODE_AVIF) {
> >              track_width_1616 = track->par->width * 0x10000ULL;
> >          } else {
> >              track_width_1616 = av_rescale(st->sample_aspect_ratio.num,
> > @@ -3439,7 +3592,8 @@ static int mov_write_trak_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
> >              mov_write_tapt_tag(pb, track);
> >          }
> >      }
> > -    mov_write_track_udta_tag(pb, mov, st);
> > +    if (track->mode != MODE_AVIF)
> > +        mov_write_track_udta_tag(pb, mov, st);
> >      track->entry = entry_backup;
> >      track->chunkCount = chunk_backup;
> >      return update_size(pb, pos);
> > @@ -3914,8 +4068,15 @@ static int mov_write_meta_tag(AVIOContext *pb, MOVMuxContext *mov,
> >          mov_write_mdta_hdlr_tag(pb, mov, s);
> >          mov_write_mdta_keys_tag(pb, mov, s);
> >          mov_write_mdta_ilst_tag(pb, mov, s);
> > -    }
> > -    else {
> > +    } else if (mov->mode == MODE_AVIF) {
> > +        mov_write_hdlr_tag(s, pb, &mov->tracks[0]);
> > +        // We always write the primary item id as 1 since only one track is
> > +        // supported for AVIF.
> > +        mov_write_pitm_tag(pb, 1);
> > +        mov_write_iloc_tag(pb, mov, s);
> > +        mov_write_iinf_tag(pb, mov, s);
> > +        mov_write_iprp_tag(pb, mov, s);
> > +    } else {
> >          /* iTunes metadata tag */
> >          mov_write_itunes_hdlr_tag(pb, mov, s);
> >          mov_write_ilst_tag(pb, mov, s);
> > @@ -4245,10 +4406,11 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
> >      }
> >
> >      mov_write_mvhd_tag(pb, mov);
> > -    if (mov->mode != MODE_MOV && !mov->iods_skip)
> > +    if (mov->mode != MODE_MOV && mov->mode != MODE_AVIF && !mov->iods_skip)
> >          mov_write_iods_tag(pb, mov);
> >      for (i = 0; i < mov->nb_streams; i++) {
> > -        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT) {
> > +        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT ||
> > +            mov->mode == MODE_AVIF) {
> >              int ret = mov_write_trak_tag(s, pb, mov, &(mov->tracks[i]), i < s->nb_streams ? s->streams[i] : NULL);
> >              if (ret < 0)
> >                  return ret;
> > @@ -4259,7 +4421,7 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
> >
> >      if (mov->mode == MODE_PSP)
> >          mov_write_uuidusmt_tag(pb, s);
> > -    else
> > +    else if (mov->mode != MODE_AVIF)
> >          mov_write_udta_tag(pb, mov, s);
> >
> >      return update_size(pb, pos);
> > @@ -5002,6 +5164,9 @@ static void mov_write_ftyp_tag_internal(AVIOContext *pb, AVFormatContext *s,
> >      else if (mov->mode == MODE_3GP) {
> >          ffio_wfourcc(pb, has_h264 ? "3gp6"  : "3gp4");
> >          minor =     has_h264 ?   0x100 :   0x200;
> > +    } else if (mov->mode == MODE_AVIF) {
> > +        ffio_wfourcc(pb, mov->is_animated_avif ? "avis" : "avif");
> > +        minor = 0;
> >      } else if (mov->mode & MODE_3G2) {
> >          ffio_wfourcc(pb, has_h264 ? "3g2b"  : "3g2a");
> >          minor =     has_h264 ? 0x20000 : 0x10000;
> > @@ -5065,6 +5230,31 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
> >      // compatible brand a second time.
> >      if (mov->mode == MODE_ISM) {
> >          ffio_wfourcc(pb, "piff");
> > +    } else if (mov->mode == MODE_AVIF) {
> > +        const AVPixFmtDescriptor *pix_fmt_desc =
> > +            av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
> > +        const int depth = pix_fmt_desc->comp[0].depth;
> > +        if (mov->is_animated_avif) {
> > +            // For animated AVIF, major brand is "avis". Add "avif" as a
> > +            // compatible brand.
> > +            ffio_wfourcc(pb, "avif");
> > +            ffio_wfourcc(pb, "msf1");
> > +            ffio_wfourcc(pb, "iso8");
> > +        }
> > +        ffio_wfourcc(pb, "mif1");
> > +        ffio_wfourcc(pb, "miaf");
> > +        if (depth == 8 || depth == 10) {
> > +            // MA1B and MA1A brands are based on AV1 profile. Short hand for
> > +            // computing that is based on chroma subsampling type. 420 chroma
> > +            // subsampling is MA1B.  444 chroma subsampling is MA1A.
> > +            if (pix_fmt_desc->log2_chroma_w == 0 && pix_fmt_desc->log2_chroma_h == 0) {
> > +                // 444 chroma subsampling.
> > +                ffio_wfourcc(pb, "MA1A");
> > +            } else {
> > +                // 420 chroma subsampling.
> > +                ffio_wfourcc(pb, "MA1B");
> > +            }
> > +        }
> >      } else if (mov->mode != MODE_MOV) {
> >          // We add tfdt atoms when fragmenting, signal this with the iso6 compatible
> >          // brand, if not already the major brand. This is compatible with users that
> > @@ -5669,7 +5859,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
> >      if (ret < 0)
> >          return ret;
> >
> > -    if (mov->flags & FF_MOV_FLAG_FRAGMENT) {
> > +    if (mov->flags & FF_MOV_FLAG_FRAGMENT || mov->mode == MODE_AVIF) {
> >          int ret;
> >          if (mov->moov_written || mov->flags & FF_MOV_FLAG_EMPTY_MOOV) {
> >              if (mov->frag_interleave && mov->fragments > 0) {
> > @@ -5802,7 +5992,9 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
> >              }
> >          }
> >      } else if (par->codec_id == AV_CODEC_ID_AV1) {
> > -        if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
> > +        if (trk->mode == MODE_AVIF) {
>
> Why this? AVIF requires that AVI-in-ISOBMFF sample format is honoured
> and this contains e.g. "OBUs of type OBU_TEMPORAL_DELIMITER,
> OBU_PADDING, or OBU_REDUNDANT_FRAME_HEADER SHOULD NOT be used".
> ff_av1_filter_obus(_buf)? merely ensures that these OBUs are stripped away.
>
> If the aim of this check is to disallow hint tracks or so, then it fails
> (all it does is ensuring that both the ordinary track as well as the
> hint track get data that might contain OBUs that should not be there).
>

Done. I had to update the extent offset code here as well.

> > +            avio_write(pb, pkt->data, pkt->size);
> > +        } else if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
> >              ret = ff_av1_filter_obus_buf(pkt->data, &reformatted_data,
> >                                           &size, &offset);
> >              if (ret < 0)
> > @@ -6230,6 +6422,10 @@ fail:
> >              }
> >          }
> >
> > +        if (trk->mode == MODE_AVIF && !mov->avif_extent_length) {
> > +            mov->avif_extent_length = pkt->size;
> > +        }
> > +
> >          return mov_write_single_packet(s, pkt);
> >      }
> >  }
> > @@ -6569,11 +6765,15 @@ static int mov_init(AVFormatContext *s)
> >      else if (IS_MODE(ipod, IPOD)) mov->mode = MODE_IPOD;
> >      else if (IS_MODE(ismv, ISMV)) mov->mode = MODE_ISM;
> >      else if (IS_MODE(f4v,   F4V)) mov->mode = MODE_F4V;
> > +    else if (IS_MODE(avif, AVIF)) mov->mode = MODE_AVIF;
> >  #undef IS_MODE
> >
> >      if (mov->flags & FF_MOV_FLAG_DELAY_MOOV)
> >          mov->flags |= FF_MOV_FLAG_EMPTY_MOOV;
> >
> > +    if (mov->mode == MODE_AVIF)
> > +        mov->flags |= FF_MOV_FLAG_DELAY_MOOV;
> > +
> >      /* Set the FRAGMENT flag if any of the fragmentation methods are
> >       * enabled. */
> >      if (mov->max_fragment_duration || mov->max_fragment_size ||
> > @@ -6654,11 +6854,25 @@ static int mov_init(AVFormatContext *s)
> >      /* Non-seekable output is ok if using fragmentation. If ism_lookahead
> >       * is enabled, we don't support non-seekable output at all. */
> >      if (!(s->pb->seekable & AVIO_SEEKABLE_NORMAL) &&
> > -        (!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead)) {
> > +        (!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead ||
> > +         mov->mode == MODE_AVIF)) {
> >          av_log(s, AV_LOG_ERROR, "muxer does not support non seekable output\n");
> >          return AVERROR(EINVAL);
> >      }
> >
> > +    /* AVIF output must have exactly one video stream */
> > +    if (mov->mode == MODE_AVIF) {
> > +        if (s->nb_streams > 1) {
> > +            av_log(s, AV_LOG_ERROR, "AVIF output requires exactly one stream\n");
> > +            return AVERROR(EINVAL);
> > +        }
> > +        if (s->streams[0]->codecpar->codec_type != AVMEDIA_TYPE_VIDEO) {
> > +            av_log(s, AV_LOG_ERROR, "AVIF output requires one video stream\n");
> > +            return AVERROR(EINVAL);
> > +        }
> > +    }
> > +
> > +
> >      mov->nb_streams = s->nb_streams;
> >      if (mov->mode & (MODE_MP4|MODE_MOV|MODE_IPOD) && s->nb_chapters)
> >          mov->chapter_track = mov->nb_streams++;
> > @@ -6797,12 +7011,13 @@ static int mov_init(AVFormatContext *s)
> >                          pix_fmt == AV_PIX_FMT_MONOWHITE ||
> >                          pix_fmt == AV_PIX_FMT_MONOBLACK;
> >              }
> > -            if (track->par->codec_id == AV_CODEC_ID_VP9 ||
> > -                track->par->codec_id == AV_CODEC_ID_AV1) {
> > -                if (track->mode != MODE_MP4) {
> > -                    av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
> > -                    return AVERROR(EINVAL);
> > -                }
> > +            if (track->par->codec_id == AV_CODEC_ID_VP9 && track->mode != MODE_MP4) {
> > +                av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
> > +             return AVERROR(EINVAL);
> > +            } else if (track->par->codec_id == AV_CODEC_ID_AV1 &&
> > +                       track->mode != MODE_MP4 && track->mode != MODE_AVIF) {
> > +                av_log(s, AV_LOG_ERROR, "%s only supported in MP4 and AVIF.\n", avcodec_get_name(track->par->codec_id));
> > +                return AVERROR(EINVAL);
> >              } else if (track->par->codec_id == AV_CODEC_ID_VP8) {
> >                  /* altref frames handling is not defined in the spec as of version v1.0,
> >                   * so just forbid muxing VP8 streams altogether until a new version does */
> > @@ -7003,7 +7218,7 @@ static int mov_write_header(AVFormatContext *s)
> >                              FF_MOV_FLAG_FRAG_EVERY_FRAME)) &&
> >              !mov->max_fragment_duration && !mov->max_fragment_size)
> >              mov->flags |= FF_MOV_FLAG_FRAG_KEYFRAME;
> > -    } else {
> > +    } else if (mov->mode != MODE_AVIF) {
> >          if (mov->flags & FF_MOV_FLAG_FASTSTART)
> >              mov->reserved_header_pos = avio_tell(pb);
> >          mov_write_mdat_tag(pb, mov);
> > @@ -7291,6 +7506,48 @@ static int mov_check_bitstream(AVFormatContext *s, AVStream *st,
> >      return ret;
> >  }
> >
> > +static int avif_write_trailer(AVFormatContext *s)
> > +{
> > +    AVIOContext *pb = s->pb;
> > +    MOVMuxContext *mov = s->priv_data;
> > +    int64_t pos_backup, mdat_pos;
> > +    uint8_t *buf;
> > +    int buf_size, moov_size;
> > +    int i;
> > +
> > +    if (mov->moov_written) return 0;
> > +
> > +    mov->is_animated_avif = s->streams[0]->nb_frames > 1;
> > +    mov_write_identification(pb, s);
> > +    mov_write_meta_tag(pb, mov, s);
> > +
> > +    moov_size = get_moov_size(s);
> > +    for (i = 0; i < mov->nb_streams; i++)
> > +        mov->tracks[i].data_offset = avio_tell(pb) + moov_size + 8;
>
> Don't call avio_tell() in a loop (and I wonder whether this loop is even
> necessary given that s->nb_streams is checked to be one).
>

Removed the loop. I was thinking about potentially supporting multiple
output tracks in the future (for alpha channel for example), but this
code would not work in that case any way.

> > +
> > +    if (mov->is_animated_avif) {
> > +        int ret;
> > +        if ((ret = mov_write_moov_tag(pb, mov, s)) < 0)
> > +            return ret;
> > +    }
> > +
> > +    buf_size = avio_get_dyn_buf(mov->mdat_buf, &buf);
> > +    avio_wb32(pb, buf_size + 8);
> > +    ffio_wfourcc(pb, "mdat");
> > +    mdat_pos = avio_tell(pb);
> > +
> > +    avio_write(pb, buf, buf_size);
> > +    ffio_free_dyn_buf(&mov->mdat_buf);
>
> Unnecessary: This will be freed in mov_free().
>

Removed.

> > +
> > +    // write extent offset.
> > +    pos_backup = avio_tell(pb);
> > +    avio_seek(pb, mov->avif_extent_pos, SEEK_SET);
> > +    avio_wb32(pb, mdat_pos); /* rewrite offset */
>
> What guarantees that this fits into 32bits?
>

Added a check to fail if it does not fit in 32-bits.

> > +    avio_seek(pb, pos_backup, SEEK_SET);
> > +
> > +    return 0;
> > +}
> > +
> >  #if CONFIG_TGP_MUXER || CONFIG_TG2_MUXER
> >  static const AVCodecTag codec_3gp_tags[] = {
> >      { AV_CODEC_ID_H263,     MKTAG('s','2','6','3') },
> > @@ -7373,6 +7630,20 @@ static const AVCodecTag codec_f4v_tags[] = {
> >      { AV_CODEC_ID_NONE, 0 },
> >  };
> >
> > +#if CONFIG_AVIF_MUXER
> > +static const AVCodecTag codec_avif_tags[] = {
> > +    { AV_CODEC_ID_AV1,     MKTAG('a','v','0','1') },
> > +    { AV_CODEC_ID_NONE, 0 },
> > +};
> > +static const AVCodecTag *const codec_avif_tags_list[] = { codec_avif_tags, NULL };
> > +
> > +static const AVClass mov_avif_muxer_class = {
> > +    .class_name = "avif muxer",
> > +    .item_name  = av_default_item_name,
> > +    .version    = LIBAVUTIL_VERSION_INT,
> > +};
>
> It's not mandatory for a muxer to have a private class; it is only
> necessary for options. If you do not have options (like here), then the
> AVClass is useless.
> I actually wanted that you support the options that make sense for AVIF
> and I dislike that the avif muxer uses a different write_trailer than
> every other muxer here (which is the reason why e.g. the faststart
> option is not supported by it). Is it really so different?
> Furthermore, given that you don't use the same AVClass as everyone else,
> the values for fields that are AVOpt-enabled are zero; and this does not
> always coincide with the default value of the relevant option. See e.g.
> skip_iods, iods_audio_profile, iods_video_profile etc. movie_timescale
> will even be set to a value that is outside of its legal range. I don't
> know whether this can lead to any divide-by-zero crashes or asserts, but
> it is certainly very fragile.
>

Hmm, i see a couple of solutions here:
1) Use the same private class as the MOV muxer and document that not
all options are supported when the format is AVIF. I think there are
already cases within this file where not all muxers may support all
the options.
2) Leave it as-is since the code always checks for AVIF mode first and
then does the rest, so the defaults of the non-relevant options should
not matter. But i agree that this is very fragile. One way to ensure
future changes don't break this structure is to add a fate test.

Please let me know what you think.

About using a separate write_trailer function, i can re-use the
existing mov_write_trailer but it will mostly be a special case for
AVIF mode with the code in avif_write_trailer, so i thought it was
cleaner to have a separate function anyway. I am not sure if there are
any other implications because of this. If you prefer that i move it
into mov_write_trailer, please let me know and i can do that as well.


> > +#endif
> > +
> >  #if CONFIG_MOV_MUXER
> >  const AVOutputFormat ff_mov_muxer = {
> >      .name              = "mov",
> > @@ -7535,3 +7806,21 @@ const AVOutputFormat ff_f4v_muxer = {
> >      .priv_class        = &mov_isobmff_muxer_class,
> >  };
> >  #endif
> > +#if CONFIG_AVIF_MUXER
> > +const AVOutputFormat ff_avif_muxer = {
> > +    .name              = "avif",
> > +    .long_name         = NULL_IF_CONFIG_SMALL("AVIF"),
> > +    .mime_type         = "image/avif",
> > +    .extensions        = "avif",
> > +    .priv_data_size    = sizeof(MOVMuxContext),
> > +    .video_codec       = AV_CODEC_ID_AV1,
> > +    .init              = mov_init,
> > +    .write_header      = mov_write_header,
> > +    .write_packet      = mov_write_packet,
> > +    .write_trailer     = avif_write_trailer,
> > +    .deinit            = mov_free,
> > +    .flags             = AVFMT_GLOBALHEADER | AVFMT_ALLOW_FLUSH,
> > +    .codec_tag         = codec_avif_tags_list,
> > +    .priv_class        = &mov_avif_muxer_class,
> > +};
> > +#endif
> > diff --git a/libavformat/movenc.h b/libavformat/movenc.h
> > index 2ac84ed070..55b8469f68 100644
> > --- a/libavformat/movenc.h
> > +++ b/libavformat/movenc.h
> > @@ -43,6 +43,7 @@
> >  #define MODE_IPOD 0x20
> >  #define MODE_ISM  0x40
> >  #define MODE_F4V  0x80
> > +#define MODE_AVIF 0x100
> >
> >  typedef struct MOVIentry {
> >      uint64_t     pos;
> > @@ -242,6 +243,10 @@ typedef struct MOVMuxContext {
> >      MOVPrftBox write_prft;
> >      int empty_hdlr_name;
> >      int movie_timescale;
> > +
> > +    int64_t avif_extent_pos;
> > +    int avif_extent_length;
> > +    int is_animated_avif;
> >  } MOVMuxContext;
> >
> >  #define FF_MOV_FLAG_RTP_HINT              (1 <<  0)
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".



--
Vignesh
Andreas Rheinhardt April 13, 2022, 9:04 p.m. UTC | #3
Vignesh Venkatasubramanian:
> On Mon, Mar 21, 2022 at 1:46 PM Andreas Rheinhardt
> <andreas.rheinhardt@outlook.com> wrote:
>>
>> Vignesh Venkatasubramanian:
>>> Add an AVIF muxer by re-using the existing the mov/mp4 muxer.
>>>
>>> AVIF Specifiation: https://aomediacodec.github.io/av1-avif
>>>
>>> Sample usage for still image:
>>> ffmpeg -i image.png -c:v libaom-av1 -avif-image 1 image.avif
>>>
>>> Sample usage for animated AVIF image:
>>> ffmpeg -i video.mp4 animated.avif
>>>
>>> We can re-use any of the AV1 encoding options that will make
>>> sense for image encoding (like bitrate, tiles, encoding speed,
>>> etc).
>>>
>>> The files generated by this muxer has been verified to be valid
>>> AVIF files by the following:
>>> 1) Displays on Chrome (both still and animated images).
>>> 2) Displays on Firefox (only still images, firefox does not support
>>>    animated AVIF yet).
>>> 3) Verfied to be valid by Compliance Warden:
>>>    https://github.com/gpac/ComplianceWarden
>>>
>>> Fixes the encoder/muxer part of Trac Ticket #7621
>>>
>>> Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
>>> ---
>>>  configure                |   1 +
>>>  libavformat/allformats.c |   1 +
>>>  libavformat/movenc.c     | 341 ++++++++++++++++++++++++++++++++++++---
>>>  libavformat/movenc.h     |   5 +
>>>  4 files changed, 322 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/configure b/configure
>>> index 8c69ab0c86..6d7020e96b 100755
>>> --- a/configure
>>> +++ b/configure
>>> @@ -3390,6 +3390,7 @@ asf_stream_muxer_select="asf_muxer"
>>>  av1_demuxer_select="av1_frame_merge_bsf av1_parser"
>>>  avi_demuxer_select="riffdec exif"
>>>  avi_muxer_select="riffenc"
>>> +avif_muxer_select="mov_muxer"
>>>  caf_demuxer_select="iso_media"
>>>  caf_muxer_select="iso_media"
>>>  dash_muxer_select="mp4_muxer"
>>> diff --git a/libavformat/allformats.c b/libavformat/allformats.c
>>> index d066a7745b..400c17afbd 100644
>>> --- a/libavformat/allformats.c
>>> +++ b/libavformat/allformats.c
>>> @@ -81,6 +81,7 @@ extern const AVOutputFormat ff_au_muxer;
>>>  extern const AVInputFormat  ff_av1_demuxer;
>>>  extern const AVInputFormat  ff_avi_demuxer;
>>>  extern const AVOutputFormat ff_avi_muxer;
>>> +extern const AVOutputFormat ff_avif_muxer;
>>>  extern const AVInputFormat  ff_avisynth_demuxer;
>>>  extern const AVOutputFormat ff_avm2_muxer;
>>>  extern const AVInputFormat  ff_avr_demuxer;
>>> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
>>> index 1a746a67fd..ff41579300 100644
>>> --- a/libavformat/movenc.c
>>> +++ b/libavformat/movenc.c
>>> @@ -1303,7 +1303,7 @@ static int mov_write_av1c_tag(AVIOContext *pb, MOVTrack *track)
>>>
>>>      avio_wb32(pb, 0);
>>>      ffio_wfourcc(pb, "av1C");
>>> -    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, 1);
>>> +    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, track->mode != MODE_AVIF);
>>>      return update_size(pb, pos);
>>>  }
>>>
>>> @@ -2004,12 +2004,13 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
>>>          }
>>>      }
>>>
>>> -    /* We should only ever be called by MOV or MP4. */
>>> -    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4);
>>> +    /* We should only ever be called for MOV, MP4 and AVIF. */
>>> +    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4 ||
>>> +               track->mode == MODE_AVIF);
>>>
>>>      avio_wb32(pb, 0); /* size */
>>>      ffio_wfourcc(pb, "colr");
>>> -    if (track->mode == MODE_MP4)
>>> +    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF)
>>>          ffio_wfourcc(pb, "nclx");
>>>      else
>>>          ffio_wfourcc(pb, "nclc");
>>> @@ -2019,7 +2020,7 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
>>>      avio_wb16(pb, track->par->color_primaries);
>>>      avio_wb16(pb, track->par->color_trc);
>>>      avio_wb16(pb, track->par->color_space);
>>> -    if (track->mode == MODE_MP4) {
>>> +    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
>>>          int full_range = track->par->color_range == AVCOL_RANGE_JPEG;
>>>          avio_w8(pb, full_range << 7);
>>>      }
>>> @@ -2085,7 +2086,7 @@ static void find_compressor(char * compressor_name, int len, MOVTrack *track)
>>>                    || (track->par->width == 1440 && track->par->height == 1080)
>>>                    || (track->par->width == 1920 && track->par->height == 1080);
>>>
>>> -    if (track->mode == MODE_MOV &&
>>> +    if ((track->mode == MODE_AVIF || track->mode == MODE_MOV) &&
>>>          (encoder = av_dict_get(track->st->metadata, "encoder", NULL, 0))) {
>>>          av_strlcpy(compressor_name, encoder->value, 32);
>>>      } else if (track->par->codec_id == AV_CODEC_ID_MPEG2VIDEO && xdcam_res) {
>>> @@ -2106,6 +2107,25 @@ static void find_compressor(char * compressor_name, int len, MOVTrack *track)
>>>      }
>>>  }
>>>
>>> +static int mov_write_ccst_tag(AVIOContext *pb)
>>> +{
>>> +    int64_t pos = avio_tell(pb);
>>> +    // Write sane defaults:
>>> +    // all_ref_pics_intra = 0 : all samples can use any type of reference.
>>> +    // intra_pred_used = 1 : intra prediction may or may not be used.
>>> +    // max_ref_per_pic = 15 : reserved value to indicate that any number of
>>> +    //                        reference images can be used.
>>> +    uint8_t ccstValue = (0 << 7) |  /* all_ref_pics_intra */
>>> +                        (1 << 6) |  /* intra_pred_used */
>>> +                        (15 << 2);  /* max_ref_per_pic */
>>> +    avio_wb32(pb, 0); /* size */
>>> +    ffio_wfourcc(pb, "ccst");
>>> +    avio_wb32(pb, 0); /* Version & flags */
>>> +    avio_w8(pb, ccstValue);
>>> +    avio_wb24(pb, 0);  /* reserved */
>>> +    return update_size(pb, pos);
>>> +}
>>> +
>>>  static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext *mov, MOVTrack *track)
>>>  {
>>>      int ret = AVERROR_BUG;
>>> @@ -2123,6 +2143,8 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
>>>      avio_wb32(pb, 0); /* size */
>>>      if (mov->encryption_scheme != MOV_ENC_NONE) {
>>>          ffio_wfourcc(pb, "encv");
>>> +    } else if (track->mode == MODE_AVIF) {
>>> +        ffio_wfourcc(pb, "av01");
>>>      } else {
>>>          avio_wl32(pb, track->tag); // store it byteswapped
>>>      }
>>> @@ -2239,7 +2261,7 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
>>>          else
>>>              av_log(mov->fc, AV_LOG_WARNING, "Not writing 'gama' atom. Format is not MOV.\n");
>>>      }
>>> -    if (track->mode == MODE_MOV || track->mode == MODE_MP4) {
>>> +    if (track->mode == MODE_MOV || track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
>>>          int has_color_info = track->par->color_primaries != AVCOL_PRI_UNSPECIFIED &&
>>>                               track->par->color_trc != AVCOL_TRC_UNSPECIFIED &&
>>>                               track->par->color_space != AVCOL_SPC_UNSPECIFIED;
>>> @@ -2291,6 +2313,9 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
>>>      if (avid)
>>>          avio_wb32(pb, 0);
>>>
>>> +    if (track->mode == MODE_AVIF)
>>> +        mov_write_ccst_tag(pb);
>>> +
>>>      return update_size(pb, pos);
>>>  }
>>>
>>> @@ -2792,7 +2817,10 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
>>>
>>>      if (track) {
>>>          hdlr = (track->mode == MODE_MOV) ? "mhlr" : "\0\0\0\0";
>>> -        if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
>>> +        if (track->mode == MODE_AVIF) {
>>> +            hdlr_type = "pict";
>>> +            descr = "ffmpeg";
>>> +        } else if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
>>>              hdlr_type = "vide";
>>>              descr     = "VideoHandler";
>>>          } else if (track->par->codec_type == AVMEDIA_TYPE_AUDIO) {
>>> @@ -2859,6 +2887,131 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
>>>      return update_size(pb, pos);
>>>  }
>>>
>>> +static int mov_write_pitm_tag(AVIOContext *pb, int item_id)
>>> +{
>>> +    int64_t pos = avio_tell(pb);
>>> +    avio_wb32(pb, 0); /* size */
>>> +    ffio_wfourcc(pb, "pitm");
>>> +    avio_wb32(pb, 0); /* Version & flags */
>>> +    avio_wb16(pb, item_id); /* item_id */
>>> +    return update_size(pb, pos);
>>> +}
>>> +
>>> +static int mov_write_iloc_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
>>> +{
>>> +    int64_t pos = avio_tell(pb);
>>> +    avio_wb32(pb, 0); /* size */
>>> +    ffio_wfourcc(pb, "iloc");
>>> +    avio_wb32(pb, 0); /* Version & flags */
>>> +    avio_w8(pb, (4 << 4) + 4); /* offset_size(4) and length_size(4) */
>>> +    avio_w8(pb, 0); /* base_offset_size(4) and reserved(4) */
>>> +    avio_wb16(pb, 1); /* item_count */
>>> +
>>> +    avio_wb16(pb, 1); /* item_id */
>>> +    avio_wb16(pb, 0); /* data_reference_index */
>>> +    avio_wb16(pb, 1); /* extent_count */
>>> +    mov->avif_extent_pos = avio_tell(pb);
>>> +    avio_wb32(pb, 0); /* extent_offset (written later) */
>>> +    // For animated AVIF, we simply write the first packet's size.
>>> +    avio_wb32(pb, mov->avif_extent_length); /* extent_length */
>>> +
>>> +    return update_size(pb, pos);
>>> +}
>>> +
>>> +static int mov_write_iinf_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
>>> +{
>>> +    int64_t infe_pos;
>>> +    int64_t iinf_pos = avio_tell(pb);
>>> +    avio_wb32(pb, 0); /* size */
>>> +    ffio_wfourcc(pb, "iinf");
>>> +    avio_wb32(pb, 0); /* Version & flags */
>>> +    avio_wb16(pb, 1); /* entry_count */
>>> +
>>> +    infe_pos = avio_tell(pb);
>>> +    avio_wb32(pb, 0); /* size */
>>> +    ffio_wfourcc(pb, "infe");
>>> +    avio_w8(pb, 0x2); /* Version */
>>> +    avio_wb24(pb, 0); /* flags */
>>> +    avio_wb16(pb, 1); /* item_id */
>>> +    avio_wb16(pb, 0); /* item_protection_index */
>>> +    avio_write(pb, "av01", 4); /* item_type */
>>> +    avio_write(pb, "Color\0", 6); /* item_name */
>>> +    update_size(pb, infe_pos);
>>> +
>>> +    return update_size(pb, iinf_pos);
>>> +}
>>> +
>>> +static int mov_write_ispe_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
>>> +{
>>> +    int64_t pos = avio_tell(pb);
>>> +    avio_wb32(pb, 0); /* size */
>>> +    ffio_wfourcc(pb, "ispe");
>>> +    avio_wb32(pb, 0); /* Version & flags */
>>> +    avio_wb32(pb, s->streams[0]->codecpar->width); /* image_width */
>>> +    avio_wb32(pb, s->streams[0]->codecpar->height); /* image_height */
>>> +    return update_size(pb, pos);
>>> +}
>>> +
>>> +
>>> +static int mov_write_pixi_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
>>> +{
>>> +    int64_t pos = avio_tell(pb);
>>> +    int num_channels = av_pix_fmt_count_planes(s->streams[0]->codecpar->format);
>>
>> Is the number of planes really the correct number here? After all, for a
>> muxer (instead of a decoder) it does not matter whether the chroma
>> planes are interleaved like in AV_PIX_FMT_NV12 or not like in
>> AV_PIX_FMT_YUV420P. You should better use pixdesc->nb_components.
>>
> 
> Done.
> 
>>> +    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
>>> +    int i;
>>
>> We allow and use "for (int i = 0;" from C99 to save lines like these.
>>
> 
> Done.
> 
>>> +    avio_wb32(pb, 0); /* size */
>>> +    ffio_wfourcc(pb, "pixi");
>>> +    avio_wb32(pb, 0); /* Version & flags */
>>> +    avio_w8(pb, num_channels); /* num_channels */
>>> +    for (i = 0; i < num_channels; ++i) {
>>> +      avio_w8(pb, pixdesc->comp[i].depth); /* bits_per_channel */
>>> +    }
>>> +    return update_size(pb, pos);
>>> +}
>>> +
>>> +static int mov_write_ipco_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
>>> +{
>>> +    int64_t pos = avio_tell(pb);
>>> +    avio_wb32(pb, 0); /* size */
>>> +    ffio_wfourcc(pb, "ipco");
>>> +    mov_write_ispe_tag(pb, mov, s);
>>> +    mov_write_pixi_tag(pb, mov, s);
>>> +    mov_write_av1c_tag(pb, &mov->tracks[0]);
>>> +    mov_write_colr_tag(pb, &mov->tracks[0], 0);
>>> +    return update_size(pb, pos);
>>> +}
>>> +
>>> +static int mov_write_ipma_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
>>> +{
>>> +    int64_t pos = avio_tell(pb);
>>> +    avio_wb32(pb, 0); /* size */
>>> +    ffio_wfourcc(pb, "ipma");
>>> +    avio_wb32(pb, 0); /* Version & flags */
>>> +    avio_wb32(pb, 1); /* entry_count */
>>> +    avio_wb16(pb, 1); /* item_ID */
>>> +    avio_w8(pb, 4); /* association_count */
>>> +
>>> +    // ispe association.
>>> +    avio_w8(pb, 1); /* essential and property_index */
>>> +    // pixi association.
>>> +    avio_w8(pb, 2); /* essential and property_index */
>>> +    // av1C association.
>>> +    avio_w8(pb, 0x80 | 3); /* essential and property_index */
>>> +    // colr association.
>>> +    avio_w8(pb, 4); /* essential and property_index */
>>> +    return update_size(pb, pos);
>>> +}
>>> +
>>> +static int mov_write_iprp_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
>>> +{
>>> +    int64_t pos = avio_tell(pb);
>>> +    avio_wb32(pb, 0); /* size */
>>> +    ffio_wfourcc(pb, "iprp");
>>> +    mov_write_ipco_tag(pb, mov, s);
>>> +    mov_write_ipma_tag(pb, mov, s);
>>> +    return update_size(pb, pos);
>>> +}
>>> +
>>>  static int mov_write_hmhd_tag(AVIOContext *pb)
>>>  {
>>>      /* This atom must be present, but leaving the values at zero
>>> @@ -3056,7 +3209,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
>>>              display_matrix = NULL;
>>>      }
>>>
>>> -    if (track->flags & MOV_TRACK_ENABLED)
>>> +    if (track->flags & MOV_TRACK_ENABLED || track->mode == MODE_AVIF)
>>>          flags |= MOV_TKHD_FLAG_ENABLED;
>>>
>>>      if (track->mode == MODE_ISM)
>>> @@ -3104,7 +3257,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
>>>      if (st && (track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
>>>                 track->par->codec_type == AVMEDIA_TYPE_SUBTITLE)) {
>>>          int64_t track_width_1616;
>>> -        if (track->mode == MODE_MOV) {
>>> +        if (track->mode == MODE_MOV || track->mode == MODE_AVIF) {
>>>              track_width_1616 = track->par->width * 0x10000ULL;
>>>          } else {
>>>              track_width_1616 = av_rescale(st->sample_aspect_ratio.num,
>>> @@ -3439,7 +3592,8 @@ static int mov_write_trak_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
>>>              mov_write_tapt_tag(pb, track);
>>>          }
>>>      }
>>> -    mov_write_track_udta_tag(pb, mov, st);
>>> +    if (track->mode != MODE_AVIF)
>>> +        mov_write_track_udta_tag(pb, mov, st);
>>>      track->entry = entry_backup;
>>>      track->chunkCount = chunk_backup;
>>>      return update_size(pb, pos);
>>> @@ -3914,8 +4068,15 @@ static int mov_write_meta_tag(AVIOContext *pb, MOVMuxContext *mov,
>>>          mov_write_mdta_hdlr_tag(pb, mov, s);
>>>          mov_write_mdta_keys_tag(pb, mov, s);
>>>          mov_write_mdta_ilst_tag(pb, mov, s);
>>> -    }
>>> -    else {
>>> +    } else if (mov->mode == MODE_AVIF) {
>>> +        mov_write_hdlr_tag(s, pb, &mov->tracks[0]);
>>> +        // We always write the primary item id as 1 since only one track is
>>> +        // supported for AVIF.
>>> +        mov_write_pitm_tag(pb, 1);
>>> +        mov_write_iloc_tag(pb, mov, s);
>>> +        mov_write_iinf_tag(pb, mov, s);
>>> +        mov_write_iprp_tag(pb, mov, s);
>>> +    } else {
>>>          /* iTunes metadata tag */
>>>          mov_write_itunes_hdlr_tag(pb, mov, s);
>>>          mov_write_ilst_tag(pb, mov, s);
>>> @@ -4245,10 +4406,11 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
>>>      }
>>>
>>>      mov_write_mvhd_tag(pb, mov);
>>> -    if (mov->mode != MODE_MOV && !mov->iods_skip)
>>> +    if (mov->mode != MODE_MOV && mov->mode != MODE_AVIF && !mov->iods_skip)
>>>          mov_write_iods_tag(pb, mov);
>>>      for (i = 0; i < mov->nb_streams; i++) {
>>> -        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT) {
>>> +        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT ||
>>> +            mov->mode == MODE_AVIF) {
>>>              int ret = mov_write_trak_tag(s, pb, mov, &(mov->tracks[i]), i < s->nb_streams ? s->streams[i] : NULL);
>>>              if (ret < 0)
>>>                  return ret;
>>> @@ -4259,7 +4421,7 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
>>>
>>>      if (mov->mode == MODE_PSP)
>>>          mov_write_uuidusmt_tag(pb, s);
>>> -    else
>>> +    else if (mov->mode != MODE_AVIF)
>>>          mov_write_udta_tag(pb, mov, s);
>>>
>>>      return update_size(pb, pos);
>>> @@ -5002,6 +5164,9 @@ static void mov_write_ftyp_tag_internal(AVIOContext *pb, AVFormatContext *s,
>>>      else if (mov->mode == MODE_3GP) {
>>>          ffio_wfourcc(pb, has_h264 ? "3gp6"  : "3gp4");
>>>          minor =     has_h264 ?   0x100 :   0x200;
>>> +    } else if (mov->mode == MODE_AVIF) {
>>> +        ffio_wfourcc(pb, mov->is_animated_avif ? "avis" : "avif");
>>> +        minor = 0;
>>>      } else if (mov->mode & MODE_3G2) {
>>>          ffio_wfourcc(pb, has_h264 ? "3g2b"  : "3g2a");
>>>          minor =     has_h264 ? 0x20000 : 0x10000;
>>> @@ -5065,6 +5230,31 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
>>>      // compatible brand a second time.
>>>      if (mov->mode == MODE_ISM) {
>>>          ffio_wfourcc(pb, "piff");
>>> +    } else if (mov->mode == MODE_AVIF) {
>>> +        const AVPixFmtDescriptor *pix_fmt_desc =
>>> +            av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
>>> +        const int depth = pix_fmt_desc->comp[0].depth;
>>> +        if (mov->is_animated_avif) {
>>> +            // For animated AVIF, major brand is "avis". Add "avif" as a
>>> +            // compatible brand.
>>> +            ffio_wfourcc(pb, "avif");
>>> +            ffio_wfourcc(pb, "msf1");
>>> +            ffio_wfourcc(pb, "iso8");
>>> +        }
>>> +        ffio_wfourcc(pb, "mif1");
>>> +        ffio_wfourcc(pb, "miaf");
>>> +        if (depth == 8 || depth == 10) {
>>> +            // MA1B and MA1A brands are based on AV1 profile. Short hand for
>>> +            // computing that is based on chroma subsampling type. 420 chroma
>>> +            // subsampling is MA1B.  444 chroma subsampling is MA1A.
>>> +            if (pix_fmt_desc->log2_chroma_w == 0 && pix_fmt_desc->log2_chroma_h == 0) {
>>> +                // 444 chroma subsampling.
>>> +                ffio_wfourcc(pb, "MA1A");
>>> +            } else {
>>> +                // 420 chroma subsampling.
>>> +                ffio_wfourcc(pb, "MA1B");
>>> +            }
>>> +        }
>>>      } else if (mov->mode != MODE_MOV) {
>>>          // We add tfdt atoms when fragmenting, signal this with the iso6 compatible
>>>          // brand, if not already the major brand. This is compatible with users that
>>> @@ -5669,7 +5859,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>>>      if (ret < 0)
>>>          return ret;
>>>
>>> -    if (mov->flags & FF_MOV_FLAG_FRAGMENT) {
>>> +    if (mov->flags & FF_MOV_FLAG_FRAGMENT || mov->mode == MODE_AVIF) {
>>>          int ret;
>>>          if (mov->moov_written || mov->flags & FF_MOV_FLAG_EMPTY_MOOV) {
>>>              if (mov->frag_interleave && mov->fragments > 0) {
>>> @@ -5802,7 +5992,9 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>>>              }
>>>          }
>>>      } else if (par->codec_id == AV_CODEC_ID_AV1) {
>>> -        if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
>>> +        if (trk->mode == MODE_AVIF) {
>>
>> Why this? AVIF requires that AVI-in-ISOBMFF sample format is honoured
>> and this contains e.g. "OBUs of type OBU_TEMPORAL_DELIMITER,
>> OBU_PADDING, or OBU_REDUNDANT_FRAME_HEADER SHOULD NOT be used".
>> ff_av1_filter_obus(_buf)? merely ensures that these OBUs are stripped away.
>>
>> If the aim of this check is to disallow hint tracks or so, then it fails
>> (all it does is ensuring that both the ordinary track as well as the
>> hint track get data that might contain OBUs that should not be there).
>>
> 
> Done. I had to update the extent offset code here as well.
> 
>>> +            avio_write(pb, pkt->data, pkt->size);
>>> +        } else if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
>>>              ret = ff_av1_filter_obus_buf(pkt->data, &reformatted_data,
>>>                                           &size, &offset);
>>>              if (ret < 0)
>>> @@ -6230,6 +6422,10 @@ fail:
>>>              }
>>>          }
>>>
>>> +        if (trk->mode == MODE_AVIF && !mov->avif_extent_length) {
>>> +            mov->avif_extent_length = pkt->size;
>>> +        }
>>> +
>>>          return mov_write_single_packet(s, pkt);
>>>      }
>>>  }
>>> @@ -6569,11 +6765,15 @@ static int mov_init(AVFormatContext *s)
>>>      else if (IS_MODE(ipod, IPOD)) mov->mode = MODE_IPOD;
>>>      else if (IS_MODE(ismv, ISMV)) mov->mode = MODE_ISM;
>>>      else if (IS_MODE(f4v,   F4V)) mov->mode = MODE_F4V;
>>> +    else if (IS_MODE(avif, AVIF)) mov->mode = MODE_AVIF;
>>>  #undef IS_MODE
>>>
>>>      if (mov->flags & FF_MOV_FLAG_DELAY_MOOV)
>>>          mov->flags |= FF_MOV_FLAG_EMPTY_MOOV;
>>>
>>> +    if (mov->mode == MODE_AVIF)
>>> +        mov->flags |= FF_MOV_FLAG_DELAY_MOOV;
>>> +
>>>      /* Set the FRAGMENT flag if any of the fragmentation methods are
>>>       * enabled. */
>>>      if (mov->max_fragment_duration || mov->max_fragment_size ||
>>> @@ -6654,11 +6854,25 @@ static int mov_init(AVFormatContext *s)
>>>      /* Non-seekable output is ok if using fragmentation. If ism_lookahead
>>>       * is enabled, we don't support non-seekable output at all. */
>>>      if (!(s->pb->seekable & AVIO_SEEKABLE_NORMAL) &&
>>> -        (!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead)) {
>>> +        (!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead ||
>>> +         mov->mode == MODE_AVIF)) {
>>>          av_log(s, AV_LOG_ERROR, "muxer does not support non seekable output\n");
>>>          return AVERROR(EINVAL);
>>>      }
>>>
>>> +    /* AVIF output must have exactly one video stream */
>>> +    if (mov->mode == MODE_AVIF) {
>>> +        if (s->nb_streams > 1) {
>>> +            av_log(s, AV_LOG_ERROR, "AVIF output requires exactly one stream\n");
>>> +            return AVERROR(EINVAL);
>>> +        }
>>> +        if (s->streams[0]->codecpar->codec_type != AVMEDIA_TYPE_VIDEO) {
>>> +            av_log(s, AV_LOG_ERROR, "AVIF output requires one video stream\n");
>>> +            return AVERROR(EINVAL);
>>> +        }
>>> +    }
>>> +
>>> +
>>>      mov->nb_streams = s->nb_streams;
>>>      if (mov->mode & (MODE_MP4|MODE_MOV|MODE_IPOD) && s->nb_chapters)
>>>          mov->chapter_track = mov->nb_streams++;
>>> @@ -6797,12 +7011,13 @@ static int mov_init(AVFormatContext *s)
>>>                          pix_fmt == AV_PIX_FMT_MONOWHITE ||
>>>                          pix_fmt == AV_PIX_FMT_MONOBLACK;
>>>              }
>>> -            if (track->par->codec_id == AV_CODEC_ID_VP9 ||
>>> -                track->par->codec_id == AV_CODEC_ID_AV1) {
>>> -                if (track->mode != MODE_MP4) {
>>> -                    av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
>>> -                    return AVERROR(EINVAL);
>>> -                }
>>> +            if (track->par->codec_id == AV_CODEC_ID_VP9 && track->mode != MODE_MP4) {
>>> +                av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
>>> +             return AVERROR(EINVAL);
>>> +            } else if (track->par->codec_id == AV_CODEC_ID_AV1 &&
>>> +                       track->mode != MODE_MP4 && track->mode != MODE_AVIF) {
>>> +                av_log(s, AV_LOG_ERROR, "%s only supported in MP4 and AVIF.\n", avcodec_get_name(track->par->codec_id));
>>> +                return AVERROR(EINVAL);
>>>              } else if (track->par->codec_id == AV_CODEC_ID_VP8) {
>>>                  /* altref frames handling is not defined in the spec as of version v1.0,
>>>                   * so just forbid muxing VP8 streams altogether until a new version does */
>>> @@ -7003,7 +7218,7 @@ static int mov_write_header(AVFormatContext *s)
>>>                              FF_MOV_FLAG_FRAG_EVERY_FRAME)) &&
>>>              !mov->max_fragment_duration && !mov->max_fragment_size)
>>>              mov->flags |= FF_MOV_FLAG_FRAG_KEYFRAME;
>>> -    } else {
>>> +    } else if (mov->mode != MODE_AVIF) {
>>>          if (mov->flags & FF_MOV_FLAG_FASTSTART)
>>>              mov->reserved_header_pos = avio_tell(pb);
>>>          mov_write_mdat_tag(pb, mov);
>>> @@ -7291,6 +7506,48 @@ static int mov_check_bitstream(AVFormatContext *s, AVStream *st,
>>>      return ret;
>>>  }
>>>
>>> +static int avif_write_trailer(AVFormatContext *s)
>>> +{
>>> +    AVIOContext *pb = s->pb;
>>> +    MOVMuxContext *mov = s->priv_data;
>>> +    int64_t pos_backup, mdat_pos;
>>> +    uint8_t *buf;
>>> +    int buf_size, moov_size;
>>> +    int i;
>>> +
>>> +    if (mov->moov_written) return 0;
>>> +
>>> +    mov->is_animated_avif = s->streams[0]->nb_frames > 1;
>>> +    mov_write_identification(pb, s);
>>> +    mov_write_meta_tag(pb, mov, s);
>>> +
>>> +    moov_size = get_moov_size(s);
>>> +    for (i = 0; i < mov->nb_streams; i++)
>>> +        mov->tracks[i].data_offset = avio_tell(pb) + moov_size + 8;
>>
>> Don't call avio_tell() in a loop (and I wonder whether this loop is even
>> necessary given that s->nb_streams is checked to be one).
>>
> 
> Removed the loop. I was thinking about potentially supporting multiple
> output tracks in the future (for alpha channel for example), but this
> code would not work in that case any way.
> 
>>> +
>>> +    if (mov->is_animated_avif) {
>>> +        int ret;
>>> +        if ((ret = mov_write_moov_tag(pb, mov, s)) < 0)
>>> +            return ret;
>>> +    }
>>> +
>>> +    buf_size = avio_get_dyn_buf(mov->mdat_buf, &buf);
>>> +    avio_wb32(pb, buf_size + 8);
>>> +    ffio_wfourcc(pb, "mdat");
>>> +    mdat_pos = avio_tell(pb);
>>> +
>>> +    avio_write(pb, buf, buf_size);
>>> +    ffio_free_dyn_buf(&mov->mdat_buf);
>>
>> Unnecessary: This will be freed in mov_free().
>>
> 
> Removed.
> 
>>> +
>>> +    // write extent offset.
>>> +    pos_backup = avio_tell(pb);
>>> +    avio_seek(pb, mov->avif_extent_pos, SEEK_SET);
>>> +    avio_wb32(pb, mdat_pos); /* rewrite offset */
>>
>> What guarantees that this fits into 32bits?
>>
> 
> Added a check to fail if it does not fit in 32-bits.
> 
>>> +    avio_seek(pb, pos_backup, SEEK_SET);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>  #if CONFIG_TGP_MUXER || CONFIG_TG2_MUXER
>>>  static const AVCodecTag codec_3gp_tags[] = {
>>>      { AV_CODEC_ID_H263,     MKTAG('s','2','6','3') },
>>> @@ -7373,6 +7630,20 @@ static const AVCodecTag codec_f4v_tags[] = {
>>>      { AV_CODEC_ID_NONE, 0 },
>>>  };
>>>
>>> +#if CONFIG_AVIF_MUXER
>>> +static const AVCodecTag codec_avif_tags[] = {
>>> +    { AV_CODEC_ID_AV1,     MKTAG('a','v','0','1') },
>>> +    { AV_CODEC_ID_NONE, 0 },
>>> +};
>>> +static const AVCodecTag *const codec_avif_tags_list[] = { codec_avif_tags, NULL };
>>> +
>>> +static const AVClass mov_avif_muxer_class = {
>>> +    .class_name = "avif muxer",
>>> +    .item_name  = av_default_item_name,
>>> +    .version    = LIBAVUTIL_VERSION_INT,
>>> +};
>>
>> It's not mandatory for a muxer to have a private class; it is only
>> necessary for options. If you do not have options (like here), then the
>> AVClass is useless.
>> I actually wanted that you support the options that make sense for AVIF
>> and I dislike that the avif muxer uses a different write_trailer than
>> every other muxer here (which is the reason why e.g. the faststart
>> option is not supported by it). Is it really so different?
>> Furthermore, given that you don't use the same AVClass as everyone else,
>> the values for fields that are AVOpt-enabled are zero; and this does not
>> always coincide with the default value of the relevant option. See e.g.
>> skip_iods, iods_audio_profile, iods_video_profile etc. movie_timescale
>> will even be set to a value that is outside of its legal range. I don't
>> know whether this can lead to any divide-by-zero crashes or asserts, but
>> it is certainly very fragile.
>>
> 
> Hmm, i see a couple of solutions here:
> 1) Use the same private class as the MOV muxer and document that not
> all options are supported when the format is AVIF. I think there are
> already cases within this file where not all muxers may support all
> the options.
> 2) Leave it as-is since the code always checks for AVIF mode first and
> then does the rest, so the defaults of the non-relevant options should
> not matter. But i agree that this is very fragile. One way to ensure
> future changes don't break this structure is to add a fate test.
> 

Can you tell which options (if enabled) would work and which would not
work or would create spec-incompliant output?

> Please let me know what you think.
> 
> About using a separate write_trailer function, i can re-use the
> existing mov_write_trailer but it will mostly be a special case for
> AVIF mode with the code in avif_write_trailer, so i thought it was
> cleaner to have a separate function anyway. I am not sure if there are
> any other implications because of this. If you prefer that i move it
> into mov_write_trailer, please let me know and i can do that as well.
>
Vignesh Venkat April 13, 2022, 9:35 p.m. UTC | #4
On Wed, Apr 13, 2022 at 2:04 PM Andreas Rheinhardt
<andreas.rheinhardt@outlook.com> wrote:
>
> Vignesh Venkatasubramanian:
> > On Mon, Mar 21, 2022 at 1:46 PM Andreas Rheinhardt
> > <andreas.rheinhardt@outlook.com> wrote:
> >>
> >> Vignesh Venkatasubramanian:
> >>> Add an AVIF muxer by re-using the existing the mov/mp4 muxer.
> >>>
> >>> AVIF Specifiation: https://aomediacodec.github.io/av1-avif
> >>>
> >>> Sample usage for still image:
> >>> ffmpeg -i image.png -c:v libaom-av1 -avif-image 1 image.avif
> >>>
> >>> Sample usage for animated AVIF image:
> >>> ffmpeg -i video.mp4 animated.avif
> >>>
> >>> We can re-use any of the AV1 encoding options that will make
> >>> sense for image encoding (like bitrate, tiles, encoding speed,
> >>> etc).
> >>>
> >>> The files generated by this muxer has been verified to be valid
> >>> AVIF files by the following:
> >>> 1) Displays on Chrome (both still and animated images).
> >>> 2) Displays on Firefox (only still images, firefox does not support
> >>>    animated AVIF yet).
> >>> 3) Verfied to be valid by Compliance Warden:
> >>>    https://github.com/gpac/ComplianceWarden
> >>>
> >>> Fixes the encoder/muxer part of Trac Ticket #7621
> >>>
> >>> Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
> >>> ---
> >>>  configure                |   1 +
> >>>  libavformat/allformats.c |   1 +
> >>>  libavformat/movenc.c     | 341 ++++++++++++++++++++++++++++++++++++---
> >>>  libavformat/movenc.h     |   5 +
> >>>  4 files changed, 322 insertions(+), 26 deletions(-)
> >>>
> >>> diff --git a/configure b/configure
> >>> index 8c69ab0c86..6d7020e96b 100755
> >>> --- a/configure
> >>> +++ b/configure
> >>> @@ -3390,6 +3390,7 @@ asf_stream_muxer_select="asf_muxer"
> >>>  av1_demuxer_select="av1_frame_merge_bsf av1_parser"
> >>>  avi_demuxer_select="riffdec exif"
> >>>  avi_muxer_select="riffenc"
> >>> +avif_muxer_select="mov_muxer"
> >>>  caf_demuxer_select="iso_media"
> >>>  caf_muxer_select="iso_media"
> >>>  dash_muxer_select="mp4_muxer"
> >>> diff --git a/libavformat/allformats.c b/libavformat/allformats.c
> >>> index d066a7745b..400c17afbd 100644
> >>> --- a/libavformat/allformats.c
> >>> +++ b/libavformat/allformats.c
> >>> @@ -81,6 +81,7 @@ extern const AVOutputFormat ff_au_muxer;
> >>>  extern const AVInputFormat  ff_av1_demuxer;
> >>>  extern const AVInputFormat  ff_avi_demuxer;
> >>>  extern const AVOutputFormat ff_avi_muxer;
> >>> +extern const AVOutputFormat ff_avif_muxer;
> >>>  extern const AVInputFormat  ff_avisynth_demuxer;
> >>>  extern const AVOutputFormat ff_avm2_muxer;
> >>>  extern const AVInputFormat  ff_avr_demuxer;
> >>> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> >>> index 1a746a67fd..ff41579300 100644
> >>> --- a/libavformat/movenc.c
> >>> +++ b/libavformat/movenc.c
> >>> @@ -1303,7 +1303,7 @@ static int mov_write_av1c_tag(AVIOContext *pb, MOVTrack *track)
> >>>
> >>>      avio_wb32(pb, 0);
> >>>      ffio_wfourcc(pb, "av1C");
> >>> -    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, 1);
> >>> +    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, track->mode != MODE_AVIF);
> >>>      return update_size(pb, pos);
> >>>  }
> >>>
> >>> @@ -2004,12 +2004,13 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
> >>>          }
> >>>      }
> >>>
> >>> -    /* We should only ever be called by MOV or MP4. */
> >>> -    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4);
> >>> +    /* We should only ever be called for MOV, MP4 and AVIF. */
> >>> +    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4 ||
> >>> +               track->mode == MODE_AVIF);
> >>>
> >>>      avio_wb32(pb, 0); /* size */
> >>>      ffio_wfourcc(pb, "colr");
> >>> -    if (track->mode == MODE_MP4)
> >>> +    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF)
> >>>          ffio_wfourcc(pb, "nclx");
> >>>      else
> >>>          ffio_wfourcc(pb, "nclc");
> >>> @@ -2019,7 +2020,7 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
> >>>      avio_wb16(pb, track->par->color_primaries);
> >>>      avio_wb16(pb, track->par->color_trc);
> >>>      avio_wb16(pb, track->par->color_space);
> >>> -    if (track->mode == MODE_MP4) {
> >>> +    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
> >>>          int full_range = track->par->color_range == AVCOL_RANGE_JPEG;
> >>>          avio_w8(pb, full_range << 7);
> >>>      }
> >>> @@ -2085,7 +2086,7 @@ static void find_compressor(char * compressor_name, int len, MOVTrack *track)
> >>>                    || (track->par->width == 1440 && track->par->height == 1080)
> >>>                    || (track->par->width == 1920 && track->par->height == 1080);
> >>>
> >>> -    if (track->mode == MODE_MOV &&
> >>> +    if ((track->mode == MODE_AVIF || track->mode == MODE_MOV) &&
> >>>          (encoder = av_dict_get(track->st->metadata, "encoder", NULL, 0))) {
> >>>          av_strlcpy(compressor_name, encoder->value, 32);
> >>>      } else if (track->par->codec_id == AV_CODEC_ID_MPEG2VIDEO && xdcam_res) {
> >>> @@ -2106,6 +2107,25 @@ static void find_compressor(char * compressor_name, int len, MOVTrack *track)
> >>>      }
> >>>  }
> >>>
> >>> +static int mov_write_ccst_tag(AVIOContext *pb)
> >>> +{
> >>> +    int64_t pos = avio_tell(pb);
> >>> +    // Write sane defaults:
> >>> +    // all_ref_pics_intra = 0 : all samples can use any type of reference.
> >>> +    // intra_pred_used = 1 : intra prediction may or may not be used.
> >>> +    // max_ref_per_pic = 15 : reserved value to indicate that any number of
> >>> +    //                        reference images can be used.
> >>> +    uint8_t ccstValue = (0 << 7) |  /* all_ref_pics_intra */
> >>> +                        (1 << 6) |  /* intra_pred_used */
> >>> +                        (15 << 2);  /* max_ref_per_pic */
> >>> +    avio_wb32(pb, 0); /* size */
> >>> +    ffio_wfourcc(pb, "ccst");
> >>> +    avio_wb32(pb, 0); /* Version & flags */
> >>> +    avio_w8(pb, ccstValue);
> >>> +    avio_wb24(pb, 0);  /* reserved */
> >>> +    return update_size(pb, pos);
> >>> +}
> >>> +
> >>>  static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext *mov, MOVTrack *track)
> >>>  {
> >>>      int ret = AVERROR_BUG;
> >>> @@ -2123,6 +2143,8 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
> >>>      avio_wb32(pb, 0); /* size */
> >>>      if (mov->encryption_scheme != MOV_ENC_NONE) {
> >>>          ffio_wfourcc(pb, "encv");
> >>> +    } else if (track->mode == MODE_AVIF) {
> >>> +        ffio_wfourcc(pb, "av01");
> >>>      } else {
> >>>          avio_wl32(pb, track->tag); // store it byteswapped
> >>>      }
> >>> @@ -2239,7 +2261,7 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
> >>>          else
> >>>              av_log(mov->fc, AV_LOG_WARNING, "Not writing 'gama' atom. Format is not MOV.\n");
> >>>      }
> >>> -    if (track->mode == MODE_MOV || track->mode == MODE_MP4) {
> >>> +    if (track->mode == MODE_MOV || track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
> >>>          int has_color_info = track->par->color_primaries != AVCOL_PRI_UNSPECIFIED &&
> >>>                               track->par->color_trc != AVCOL_TRC_UNSPECIFIED &&
> >>>                               track->par->color_space != AVCOL_SPC_UNSPECIFIED;
> >>> @@ -2291,6 +2313,9 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
> >>>      if (avid)
> >>>          avio_wb32(pb, 0);
> >>>
> >>> +    if (track->mode == MODE_AVIF)
> >>> +        mov_write_ccst_tag(pb);
> >>> +
> >>>      return update_size(pb, pos);
> >>>  }
> >>>
> >>> @@ -2792,7 +2817,10 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
> >>>
> >>>      if (track) {
> >>>          hdlr = (track->mode == MODE_MOV) ? "mhlr" : "\0\0\0\0";
> >>> -        if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
> >>> +        if (track->mode == MODE_AVIF) {
> >>> +            hdlr_type = "pict";
> >>> +            descr = "ffmpeg";
> >>> +        } else if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
> >>>              hdlr_type = "vide";
> >>>              descr     = "VideoHandler";
> >>>          } else if (track->par->codec_type == AVMEDIA_TYPE_AUDIO) {
> >>> @@ -2859,6 +2887,131 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
> >>>      return update_size(pb, pos);
> >>>  }
> >>>
> >>> +static int mov_write_pitm_tag(AVIOContext *pb, int item_id)
> >>> +{
> >>> +    int64_t pos = avio_tell(pb);
> >>> +    avio_wb32(pb, 0); /* size */
> >>> +    ffio_wfourcc(pb, "pitm");
> >>> +    avio_wb32(pb, 0); /* Version & flags */
> >>> +    avio_wb16(pb, item_id); /* item_id */
> >>> +    return update_size(pb, pos);
> >>> +}
> >>> +
> >>> +static int mov_write_iloc_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> >>> +{
> >>> +    int64_t pos = avio_tell(pb);
> >>> +    avio_wb32(pb, 0); /* size */
> >>> +    ffio_wfourcc(pb, "iloc");
> >>> +    avio_wb32(pb, 0); /* Version & flags */
> >>> +    avio_w8(pb, (4 << 4) + 4); /* offset_size(4) and length_size(4) */
> >>> +    avio_w8(pb, 0); /* base_offset_size(4) and reserved(4) */
> >>> +    avio_wb16(pb, 1); /* item_count */
> >>> +
> >>> +    avio_wb16(pb, 1); /* item_id */
> >>> +    avio_wb16(pb, 0); /* data_reference_index */
> >>> +    avio_wb16(pb, 1); /* extent_count */
> >>> +    mov->avif_extent_pos = avio_tell(pb);
> >>> +    avio_wb32(pb, 0); /* extent_offset (written later) */
> >>> +    // For animated AVIF, we simply write the first packet's size.
> >>> +    avio_wb32(pb, mov->avif_extent_length); /* extent_length */
> >>> +
> >>> +    return update_size(pb, pos);
> >>> +}
> >>> +
> >>> +static int mov_write_iinf_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> >>> +{
> >>> +    int64_t infe_pos;
> >>> +    int64_t iinf_pos = avio_tell(pb);
> >>> +    avio_wb32(pb, 0); /* size */
> >>> +    ffio_wfourcc(pb, "iinf");
> >>> +    avio_wb32(pb, 0); /* Version & flags */
> >>> +    avio_wb16(pb, 1); /* entry_count */
> >>> +
> >>> +    infe_pos = avio_tell(pb);
> >>> +    avio_wb32(pb, 0); /* size */
> >>> +    ffio_wfourcc(pb, "infe");
> >>> +    avio_w8(pb, 0x2); /* Version */
> >>> +    avio_wb24(pb, 0); /* flags */
> >>> +    avio_wb16(pb, 1); /* item_id */
> >>> +    avio_wb16(pb, 0); /* item_protection_index */
> >>> +    avio_write(pb, "av01", 4); /* item_type */
> >>> +    avio_write(pb, "Color\0", 6); /* item_name */
> >>> +    update_size(pb, infe_pos);
> >>> +
> >>> +    return update_size(pb, iinf_pos);
> >>> +}
> >>> +
> >>> +static int mov_write_ispe_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> >>> +{
> >>> +    int64_t pos = avio_tell(pb);
> >>> +    avio_wb32(pb, 0); /* size */
> >>> +    ffio_wfourcc(pb, "ispe");
> >>> +    avio_wb32(pb, 0); /* Version & flags */
> >>> +    avio_wb32(pb, s->streams[0]->codecpar->width); /* image_width */
> >>> +    avio_wb32(pb, s->streams[0]->codecpar->height); /* image_height */
> >>> +    return update_size(pb, pos);
> >>> +}
> >>> +
> >>> +
> >>> +static int mov_write_pixi_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> >>> +{
> >>> +    int64_t pos = avio_tell(pb);
> >>> +    int num_channels = av_pix_fmt_count_planes(s->streams[0]->codecpar->format);
> >>
> >> Is the number of planes really the correct number here? After all, for a
> >> muxer (instead of a decoder) it does not matter whether the chroma
> >> planes are interleaved like in AV_PIX_FMT_NV12 or not like in
> >> AV_PIX_FMT_YUV420P. You should better use pixdesc->nb_components.
> >>
> >
> > Done.
> >
> >>> +    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
> >>> +    int i;
> >>
> >> We allow and use "for (int i = 0;" from C99 to save lines like these.
> >>
> >
> > Done.
> >
> >>> +    avio_wb32(pb, 0); /* size */
> >>> +    ffio_wfourcc(pb, "pixi");
> >>> +    avio_wb32(pb, 0); /* Version & flags */
> >>> +    avio_w8(pb, num_channels); /* num_channels */
> >>> +    for (i = 0; i < num_channels; ++i) {
> >>> +      avio_w8(pb, pixdesc->comp[i].depth); /* bits_per_channel */
> >>> +    }
> >>> +    return update_size(pb, pos);
> >>> +}
> >>> +
> >>> +static int mov_write_ipco_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> >>> +{
> >>> +    int64_t pos = avio_tell(pb);
> >>> +    avio_wb32(pb, 0); /* size */
> >>> +    ffio_wfourcc(pb, "ipco");
> >>> +    mov_write_ispe_tag(pb, mov, s);
> >>> +    mov_write_pixi_tag(pb, mov, s);
> >>> +    mov_write_av1c_tag(pb, &mov->tracks[0]);
> >>> +    mov_write_colr_tag(pb, &mov->tracks[0], 0);
> >>> +    return update_size(pb, pos);
> >>> +}
> >>> +
> >>> +static int mov_write_ipma_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> >>> +{
> >>> +    int64_t pos = avio_tell(pb);
> >>> +    avio_wb32(pb, 0); /* size */
> >>> +    ffio_wfourcc(pb, "ipma");
> >>> +    avio_wb32(pb, 0); /* Version & flags */
> >>> +    avio_wb32(pb, 1); /* entry_count */
> >>> +    avio_wb16(pb, 1); /* item_ID */
> >>> +    avio_w8(pb, 4); /* association_count */
> >>> +
> >>> +    // ispe association.
> >>> +    avio_w8(pb, 1); /* essential and property_index */
> >>> +    // pixi association.
> >>> +    avio_w8(pb, 2); /* essential and property_index */
> >>> +    // av1C association.
> >>> +    avio_w8(pb, 0x80 | 3); /* essential and property_index */
> >>> +    // colr association.
> >>> +    avio_w8(pb, 4); /* essential and property_index */
> >>> +    return update_size(pb, pos);
> >>> +}
> >>> +
> >>> +static int mov_write_iprp_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> >>> +{
> >>> +    int64_t pos = avio_tell(pb);
> >>> +    avio_wb32(pb, 0); /* size */
> >>> +    ffio_wfourcc(pb, "iprp");
> >>> +    mov_write_ipco_tag(pb, mov, s);
> >>> +    mov_write_ipma_tag(pb, mov, s);
> >>> +    return update_size(pb, pos);
> >>> +}
> >>> +
> >>>  static int mov_write_hmhd_tag(AVIOContext *pb)
> >>>  {
> >>>      /* This atom must be present, but leaving the values at zero
> >>> @@ -3056,7 +3209,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
> >>>              display_matrix = NULL;
> >>>      }
> >>>
> >>> -    if (track->flags & MOV_TRACK_ENABLED)
> >>> +    if (track->flags & MOV_TRACK_ENABLED || track->mode == MODE_AVIF)
> >>>          flags |= MOV_TKHD_FLAG_ENABLED;
> >>>
> >>>      if (track->mode == MODE_ISM)
> >>> @@ -3104,7 +3257,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
> >>>      if (st && (track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
> >>>                 track->par->codec_type == AVMEDIA_TYPE_SUBTITLE)) {
> >>>          int64_t track_width_1616;
> >>> -        if (track->mode == MODE_MOV) {
> >>> +        if (track->mode == MODE_MOV || track->mode == MODE_AVIF) {
> >>>              track_width_1616 = track->par->width * 0x10000ULL;
> >>>          } else {
> >>>              track_width_1616 = av_rescale(st->sample_aspect_ratio.num,
> >>> @@ -3439,7 +3592,8 @@ static int mov_write_trak_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
> >>>              mov_write_tapt_tag(pb, track);
> >>>          }
> >>>      }
> >>> -    mov_write_track_udta_tag(pb, mov, st);
> >>> +    if (track->mode != MODE_AVIF)
> >>> +        mov_write_track_udta_tag(pb, mov, st);
> >>>      track->entry = entry_backup;
> >>>      track->chunkCount = chunk_backup;
> >>>      return update_size(pb, pos);
> >>> @@ -3914,8 +4068,15 @@ static int mov_write_meta_tag(AVIOContext *pb, MOVMuxContext *mov,
> >>>          mov_write_mdta_hdlr_tag(pb, mov, s);
> >>>          mov_write_mdta_keys_tag(pb, mov, s);
> >>>          mov_write_mdta_ilst_tag(pb, mov, s);
> >>> -    }
> >>> -    else {
> >>> +    } else if (mov->mode == MODE_AVIF) {
> >>> +        mov_write_hdlr_tag(s, pb, &mov->tracks[0]);
> >>> +        // We always write the primary item id as 1 since only one track is
> >>> +        // supported for AVIF.
> >>> +        mov_write_pitm_tag(pb, 1);
> >>> +        mov_write_iloc_tag(pb, mov, s);
> >>> +        mov_write_iinf_tag(pb, mov, s);
> >>> +        mov_write_iprp_tag(pb, mov, s);
> >>> +    } else {
> >>>          /* iTunes metadata tag */
> >>>          mov_write_itunes_hdlr_tag(pb, mov, s);
> >>>          mov_write_ilst_tag(pb, mov, s);
> >>> @@ -4245,10 +4406,11 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
> >>>      }
> >>>
> >>>      mov_write_mvhd_tag(pb, mov);
> >>> -    if (mov->mode != MODE_MOV && !mov->iods_skip)
> >>> +    if (mov->mode != MODE_MOV && mov->mode != MODE_AVIF && !mov->iods_skip)
> >>>          mov_write_iods_tag(pb, mov);
> >>>      for (i = 0; i < mov->nb_streams; i++) {
> >>> -        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT) {
> >>> +        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT ||
> >>> +            mov->mode == MODE_AVIF) {
> >>>              int ret = mov_write_trak_tag(s, pb, mov, &(mov->tracks[i]), i < s->nb_streams ? s->streams[i] : NULL);
> >>>              if (ret < 0)
> >>>                  return ret;
> >>> @@ -4259,7 +4421,7 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
> >>>
> >>>      if (mov->mode == MODE_PSP)
> >>>          mov_write_uuidusmt_tag(pb, s);
> >>> -    else
> >>> +    else if (mov->mode != MODE_AVIF)
> >>>          mov_write_udta_tag(pb, mov, s);
> >>>
> >>>      return update_size(pb, pos);
> >>> @@ -5002,6 +5164,9 @@ static void mov_write_ftyp_tag_internal(AVIOContext *pb, AVFormatContext *s,
> >>>      else if (mov->mode == MODE_3GP) {
> >>>          ffio_wfourcc(pb, has_h264 ? "3gp6"  : "3gp4");
> >>>          minor =     has_h264 ?   0x100 :   0x200;
> >>> +    } else if (mov->mode == MODE_AVIF) {
> >>> +        ffio_wfourcc(pb, mov->is_animated_avif ? "avis" : "avif");
> >>> +        minor = 0;
> >>>      } else if (mov->mode & MODE_3G2) {
> >>>          ffio_wfourcc(pb, has_h264 ? "3g2b"  : "3g2a");
> >>>          minor =     has_h264 ? 0x20000 : 0x10000;
> >>> @@ -5065,6 +5230,31 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
> >>>      // compatible brand a second time.
> >>>      if (mov->mode == MODE_ISM) {
> >>>          ffio_wfourcc(pb, "piff");
> >>> +    } else if (mov->mode == MODE_AVIF) {
> >>> +        const AVPixFmtDescriptor *pix_fmt_desc =
> >>> +            av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
> >>> +        const int depth = pix_fmt_desc->comp[0].depth;
> >>> +        if (mov->is_animated_avif) {
> >>> +            // For animated AVIF, major brand is "avis". Add "avif" as a
> >>> +            // compatible brand.
> >>> +            ffio_wfourcc(pb, "avif");
> >>> +            ffio_wfourcc(pb, "msf1");
> >>> +            ffio_wfourcc(pb, "iso8");
> >>> +        }
> >>> +        ffio_wfourcc(pb, "mif1");
> >>> +        ffio_wfourcc(pb, "miaf");
> >>> +        if (depth == 8 || depth == 10) {
> >>> +            // MA1B and MA1A brands are based on AV1 profile. Short hand for
> >>> +            // computing that is based on chroma subsampling type. 420 chroma
> >>> +            // subsampling is MA1B.  444 chroma subsampling is MA1A.
> >>> +            if (pix_fmt_desc->log2_chroma_w == 0 && pix_fmt_desc->log2_chroma_h == 0) {
> >>> +                // 444 chroma subsampling.
> >>> +                ffio_wfourcc(pb, "MA1A");
> >>> +            } else {
> >>> +                // 420 chroma subsampling.
> >>> +                ffio_wfourcc(pb, "MA1B");
> >>> +            }
> >>> +        }
> >>>      } else if (mov->mode != MODE_MOV) {
> >>>          // We add tfdt atoms when fragmenting, signal this with the iso6 compatible
> >>>          // brand, if not already the major brand. This is compatible with users that
> >>> @@ -5669,7 +5859,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
> >>>      if (ret < 0)
> >>>          return ret;
> >>>
> >>> -    if (mov->flags & FF_MOV_FLAG_FRAGMENT) {
> >>> +    if (mov->flags & FF_MOV_FLAG_FRAGMENT || mov->mode == MODE_AVIF) {
> >>>          int ret;
> >>>          if (mov->moov_written || mov->flags & FF_MOV_FLAG_EMPTY_MOOV) {
> >>>              if (mov->frag_interleave && mov->fragments > 0) {
> >>> @@ -5802,7 +5992,9 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
> >>>              }
> >>>          }
> >>>      } else if (par->codec_id == AV_CODEC_ID_AV1) {
> >>> -        if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
> >>> +        if (trk->mode == MODE_AVIF) {
> >>
> >> Why this? AVIF requires that AVI-in-ISOBMFF sample format is honoured
> >> and this contains e.g. "OBUs of type OBU_TEMPORAL_DELIMITER,
> >> OBU_PADDING, or OBU_REDUNDANT_FRAME_HEADER SHOULD NOT be used".
> >> ff_av1_filter_obus(_buf)? merely ensures that these OBUs are stripped away.
> >>
> >> If the aim of this check is to disallow hint tracks or so, then it fails
> >> (all it does is ensuring that both the ordinary track as well as the
> >> hint track get data that might contain OBUs that should not be there).
> >>
> >
> > Done. I had to update the extent offset code here as well.
> >
> >>> +            avio_write(pb, pkt->data, pkt->size);
> >>> +        } else if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
> >>>              ret = ff_av1_filter_obus_buf(pkt->data, &reformatted_data,
> >>>                                           &size, &offset);
> >>>              if (ret < 0)
> >>> @@ -6230,6 +6422,10 @@ fail:
> >>>              }
> >>>          }
> >>>
> >>> +        if (trk->mode == MODE_AVIF && !mov->avif_extent_length) {
> >>> +            mov->avif_extent_length = pkt->size;
> >>> +        }
> >>> +
> >>>          return mov_write_single_packet(s, pkt);
> >>>      }
> >>>  }
> >>> @@ -6569,11 +6765,15 @@ static int mov_init(AVFormatContext *s)
> >>>      else if (IS_MODE(ipod, IPOD)) mov->mode = MODE_IPOD;
> >>>      else if (IS_MODE(ismv, ISMV)) mov->mode = MODE_ISM;
> >>>      else if (IS_MODE(f4v,   F4V)) mov->mode = MODE_F4V;
> >>> +    else if (IS_MODE(avif, AVIF)) mov->mode = MODE_AVIF;
> >>>  #undef IS_MODE
> >>>
> >>>      if (mov->flags & FF_MOV_FLAG_DELAY_MOOV)
> >>>          mov->flags |= FF_MOV_FLAG_EMPTY_MOOV;
> >>>
> >>> +    if (mov->mode == MODE_AVIF)
> >>> +        mov->flags |= FF_MOV_FLAG_DELAY_MOOV;
> >>> +
> >>>      /* Set the FRAGMENT flag if any of the fragmentation methods are
> >>>       * enabled. */
> >>>      if (mov->max_fragment_duration || mov->max_fragment_size ||
> >>> @@ -6654,11 +6854,25 @@ static int mov_init(AVFormatContext *s)
> >>>      /* Non-seekable output is ok if using fragmentation. If ism_lookahead
> >>>       * is enabled, we don't support non-seekable output at all. */
> >>>      if (!(s->pb->seekable & AVIO_SEEKABLE_NORMAL) &&
> >>> -        (!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead)) {
> >>> +        (!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead ||
> >>> +         mov->mode == MODE_AVIF)) {
> >>>          av_log(s, AV_LOG_ERROR, "muxer does not support non seekable output\n");
> >>>          return AVERROR(EINVAL);
> >>>      }
> >>>
> >>> +    /* AVIF output must have exactly one video stream */
> >>> +    if (mov->mode == MODE_AVIF) {
> >>> +        if (s->nb_streams > 1) {
> >>> +            av_log(s, AV_LOG_ERROR, "AVIF output requires exactly one stream\n");
> >>> +            return AVERROR(EINVAL);
> >>> +        }
> >>> +        if (s->streams[0]->codecpar->codec_type != AVMEDIA_TYPE_VIDEO) {
> >>> +            av_log(s, AV_LOG_ERROR, "AVIF output requires one video stream\n");
> >>> +            return AVERROR(EINVAL);
> >>> +        }
> >>> +    }
> >>> +
> >>> +
> >>>      mov->nb_streams = s->nb_streams;
> >>>      if (mov->mode & (MODE_MP4|MODE_MOV|MODE_IPOD) && s->nb_chapters)
> >>>          mov->chapter_track = mov->nb_streams++;
> >>> @@ -6797,12 +7011,13 @@ static int mov_init(AVFormatContext *s)
> >>>                          pix_fmt == AV_PIX_FMT_MONOWHITE ||
> >>>                          pix_fmt == AV_PIX_FMT_MONOBLACK;
> >>>              }
> >>> -            if (track->par->codec_id == AV_CODEC_ID_VP9 ||
> >>> -                track->par->codec_id == AV_CODEC_ID_AV1) {
> >>> -                if (track->mode != MODE_MP4) {
> >>> -                    av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
> >>> -                    return AVERROR(EINVAL);
> >>> -                }
> >>> +            if (track->par->codec_id == AV_CODEC_ID_VP9 && track->mode != MODE_MP4) {
> >>> +                av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
> >>> +             return AVERROR(EINVAL);
> >>> +            } else if (track->par->codec_id == AV_CODEC_ID_AV1 &&
> >>> +                       track->mode != MODE_MP4 && track->mode != MODE_AVIF) {
> >>> +                av_log(s, AV_LOG_ERROR, "%s only supported in MP4 and AVIF.\n", avcodec_get_name(track->par->codec_id));
> >>> +                return AVERROR(EINVAL);
> >>>              } else if (track->par->codec_id == AV_CODEC_ID_VP8) {
> >>>                  /* altref frames handling is not defined in the spec as of version v1.0,
> >>>                   * so just forbid muxing VP8 streams altogether until a new version does */
> >>> @@ -7003,7 +7218,7 @@ static int mov_write_header(AVFormatContext *s)
> >>>                              FF_MOV_FLAG_FRAG_EVERY_FRAME)) &&
> >>>              !mov->max_fragment_duration && !mov->max_fragment_size)
> >>>              mov->flags |= FF_MOV_FLAG_FRAG_KEYFRAME;
> >>> -    } else {
> >>> +    } else if (mov->mode != MODE_AVIF) {
> >>>          if (mov->flags & FF_MOV_FLAG_FASTSTART)
> >>>              mov->reserved_header_pos = avio_tell(pb);
> >>>          mov_write_mdat_tag(pb, mov);
> >>> @@ -7291,6 +7506,48 @@ static int mov_check_bitstream(AVFormatContext *s, AVStream *st,
> >>>      return ret;
> >>>  }
> >>>
> >>> +static int avif_write_trailer(AVFormatContext *s)
> >>> +{
> >>> +    AVIOContext *pb = s->pb;
> >>> +    MOVMuxContext *mov = s->priv_data;
> >>> +    int64_t pos_backup, mdat_pos;
> >>> +    uint8_t *buf;
> >>> +    int buf_size, moov_size;
> >>> +    int i;
> >>> +
> >>> +    if (mov->moov_written) return 0;
> >>> +
> >>> +    mov->is_animated_avif = s->streams[0]->nb_frames > 1;
> >>> +    mov_write_identification(pb, s);
> >>> +    mov_write_meta_tag(pb, mov, s);
> >>> +
> >>> +    moov_size = get_moov_size(s);
> >>> +    for (i = 0; i < mov->nb_streams; i++)
> >>> +        mov->tracks[i].data_offset = avio_tell(pb) + moov_size + 8;
> >>
> >> Don't call avio_tell() in a loop (and I wonder whether this loop is even
> >> necessary given that s->nb_streams is checked to be one).
> >>
> >
> > Removed the loop. I was thinking about potentially supporting multiple
> > output tracks in the future (for alpha channel for example), but this
> > code would not work in that case any way.
> >
> >>> +
> >>> +    if (mov->is_animated_avif) {
> >>> +        int ret;
> >>> +        if ((ret = mov_write_moov_tag(pb, mov, s)) < 0)
> >>> +            return ret;
> >>> +    }
> >>> +
> >>> +    buf_size = avio_get_dyn_buf(mov->mdat_buf, &buf);
> >>> +    avio_wb32(pb, buf_size + 8);
> >>> +    ffio_wfourcc(pb, "mdat");
> >>> +    mdat_pos = avio_tell(pb);
> >>> +
> >>> +    avio_write(pb, buf, buf_size);
> >>> +    ffio_free_dyn_buf(&mov->mdat_buf);
> >>
> >> Unnecessary: This will be freed in mov_free().
> >>
> >
> > Removed.
> >
> >>> +
> >>> +    // write extent offset.
> >>> +    pos_backup = avio_tell(pb);
> >>> +    avio_seek(pb, mov->avif_extent_pos, SEEK_SET);
> >>> +    avio_wb32(pb, mdat_pos); /* rewrite offset */
> >>
> >> What guarantees that this fits into 32bits?
> >>
> >
> > Added a check to fail if it does not fit in 32-bits.
> >
> >>> +    avio_seek(pb, pos_backup, SEEK_SET);
> >>> +
> >>> +    return 0;
> >>> +}
> >>> +
> >>>  #if CONFIG_TGP_MUXER || CONFIG_TG2_MUXER
> >>>  static const AVCodecTag codec_3gp_tags[] = {
> >>>      { AV_CODEC_ID_H263,     MKTAG('s','2','6','3') },
> >>> @@ -7373,6 +7630,20 @@ static const AVCodecTag codec_f4v_tags[] = {
> >>>      { AV_CODEC_ID_NONE, 0 },
> >>>  };
> >>>
> >>> +#if CONFIG_AVIF_MUXER
> >>> +static const AVCodecTag codec_avif_tags[] = {
> >>> +    { AV_CODEC_ID_AV1,     MKTAG('a','v','0','1') },
> >>> +    { AV_CODEC_ID_NONE, 0 },
> >>> +};
> >>> +static const AVCodecTag *const codec_avif_tags_list[] = { codec_avif_tags, NULL };
> >>> +
> >>> +static const AVClass mov_avif_muxer_class = {
> >>> +    .class_name = "avif muxer",
> >>> +    .item_name  = av_default_item_name,
> >>> +    .version    = LIBAVUTIL_VERSION_INT,
> >>> +};
> >>
> >> It's not mandatory for a muxer to have a private class; it is only
> >> necessary for options. If you do not have options (like here), then the
> >> AVClass is useless.
> >> I actually wanted that you support the options that make sense for AVIF
> >> and I dislike that the avif muxer uses a different write_trailer than
> >> every other muxer here (which is the reason why e.g. the faststart
> >> option is not supported by it). Is it really so different?
> >> Furthermore, given that you don't use the same AVClass as everyone else,
> >> the values for fields that are AVOpt-enabled are zero; and this does not
> >> always coincide with the default value of the relevant option. See e.g.
> >> skip_iods, iods_audio_profile, iods_video_profile etc. movie_timescale
> >> will even be set to a value that is outside of its legal range. I don't
> >> know whether this can lead to any divide-by-zero crashes or asserts, but
> >> it is certainly very fragile.
> >>
> >
> > Hmm, i see a couple of solutions here:
> > 1) Use the same private class as the MOV muxer and document that not
> > all options are supported when the format is AVIF. I think there are
> > already cases within this file where not all muxers may support all
> > the options.
> > 2) Leave it as-is since the code always checks for AVIF mode first and
> > then does the rest, so the defaults of the non-relevant options should
> > not matter. But i agree that this is very fragile. One way to ensure
> > future changes don't break this structure is to add a fate test.
> >
>
> Can you tell which options (if enabled) would work and which would not
> work or would create spec-incompliant output?
>

The following options will make sense for AVIF (and may just work
as-is, i am not sure of this): write_colr, prefer_icc,
video_track_timescale and a few other generic MOV options like
use_stream_ids_as_track_ids, empty_hdlr_name, and brand.

> > Please let me know what you think.
> >
> > About using a separate write_trailer function, i can re-use the
> > existing mov_write_trailer but it will mostly be a special case for
> > AVIF mode with the code in avif_write_trailer, so i thought it was
> > cleaner to have a separate function anyway. I am not sure if there are
> > any other implications because of this. If you prefer that i move it
> > into mov_write_trailer, please let me know and i can do that as well.
> >
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Vignesh Venkat April 21, 2022, 4:38 p.m. UTC | #5
On Wed, Apr 13, 2022 at 2:35 PM Vignesh Venkatasubramanian
<vigneshv@google.com> wrote:
>
> On Wed, Apr 13, 2022 at 2:04 PM Andreas Rheinhardt
> <andreas.rheinhardt@outlook.com> wrote:
> >
> > Vignesh Venkatasubramanian:
> > > On Mon, Mar 21, 2022 at 1:46 PM Andreas Rheinhardt
> > > <andreas.rheinhardt@outlook.com> wrote:
> > >>
> > >> Vignesh Venkatasubramanian:
> > >>> Add an AVIF muxer by re-using the existing the mov/mp4 muxer.
> > >>>
> > >>> AVIF Specifiation: https://aomediacodec.github.io/av1-avif
> > >>>
> > >>> Sample usage for still image:
> > >>> ffmpeg -i image.png -c:v libaom-av1 -avif-image 1 image.avif
> > >>>
> > >>> Sample usage for animated AVIF image:
> > >>> ffmpeg -i video.mp4 animated.avif
> > >>>
> > >>> We can re-use any of the AV1 encoding options that will make
> > >>> sense for image encoding (like bitrate, tiles, encoding speed,
> > >>> etc).
> > >>>
> > >>> The files generated by this muxer has been verified to be valid
> > >>> AVIF files by the following:
> > >>> 1) Displays on Chrome (both still and animated images).
> > >>> 2) Displays on Firefox (only still images, firefox does not support
> > >>>    animated AVIF yet).
> > >>> 3) Verfied to be valid by Compliance Warden:
> > >>>    https://github.com/gpac/ComplianceWarden
> > >>>
> > >>> Fixes the encoder/muxer part of Trac Ticket #7621
> > >>>
> > >>> Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
> > >>> ---
> > >>>  configure                |   1 +
> > >>>  libavformat/allformats.c |   1 +
> > >>>  libavformat/movenc.c     | 341 ++++++++++++++++++++++++++++++++++++---
> > >>>  libavformat/movenc.h     |   5 +
> > >>>  4 files changed, 322 insertions(+), 26 deletions(-)
> > >>>
> > >>> diff --git a/configure b/configure
> > >>> index 8c69ab0c86..6d7020e96b 100755
> > >>> --- a/configure
> > >>> +++ b/configure
> > >>> @@ -3390,6 +3390,7 @@ asf_stream_muxer_select="asf_muxer"
> > >>>  av1_demuxer_select="av1_frame_merge_bsf av1_parser"
> > >>>  avi_demuxer_select="riffdec exif"
> > >>>  avi_muxer_select="riffenc"
> > >>> +avif_muxer_select="mov_muxer"
> > >>>  caf_demuxer_select="iso_media"
> > >>>  caf_muxer_select="iso_media"
> > >>>  dash_muxer_select="mp4_muxer"
> > >>> diff --git a/libavformat/allformats.c b/libavformat/allformats.c
> > >>> index d066a7745b..400c17afbd 100644
> > >>> --- a/libavformat/allformats.c
> > >>> +++ b/libavformat/allformats.c
> > >>> @@ -81,6 +81,7 @@ extern const AVOutputFormat ff_au_muxer;
> > >>>  extern const AVInputFormat  ff_av1_demuxer;
> > >>>  extern const AVInputFormat  ff_avi_demuxer;
> > >>>  extern const AVOutputFormat ff_avi_muxer;
> > >>> +extern const AVOutputFormat ff_avif_muxer;
> > >>>  extern const AVInputFormat  ff_avisynth_demuxer;
> > >>>  extern const AVOutputFormat ff_avm2_muxer;
> > >>>  extern const AVInputFormat  ff_avr_demuxer;
> > >>> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> > >>> index 1a746a67fd..ff41579300 100644
> > >>> --- a/libavformat/movenc.c
> > >>> +++ b/libavformat/movenc.c
> > >>> @@ -1303,7 +1303,7 @@ static int mov_write_av1c_tag(AVIOContext *pb, MOVTrack *track)
> > >>>
> > >>>      avio_wb32(pb, 0);
> > >>>      ffio_wfourcc(pb, "av1C");
> > >>> -    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, 1);
> > >>> +    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, track->mode != MODE_AVIF);
> > >>>      return update_size(pb, pos);
> > >>>  }
> > >>>
> > >>> @@ -2004,12 +2004,13 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
> > >>>          }
> > >>>      }
> > >>>
> > >>> -    /* We should only ever be called by MOV or MP4. */
> > >>> -    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4);
> > >>> +    /* We should only ever be called for MOV, MP4 and AVIF. */
> > >>> +    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4 ||
> > >>> +               track->mode == MODE_AVIF);
> > >>>
> > >>>      avio_wb32(pb, 0); /* size */
> > >>>      ffio_wfourcc(pb, "colr");
> > >>> -    if (track->mode == MODE_MP4)
> > >>> +    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF)
> > >>>          ffio_wfourcc(pb, "nclx");
> > >>>      else
> > >>>          ffio_wfourcc(pb, "nclc");
> > >>> @@ -2019,7 +2020,7 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
> > >>>      avio_wb16(pb, track->par->color_primaries);
> > >>>      avio_wb16(pb, track->par->color_trc);
> > >>>      avio_wb16(pb, track->par->color_space);
> > >>> -    if (track->mode == MODE_MP4) {
> > >>> +    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
> > >>>          int full_range = track->par->color_range == AVCOL_RANGE_JPEG;
> > >>>          avio_w8(pb, full_range << 7);
> > >>>      }
> > >>> @@ -2085,7 +2086,7 @@ static void find_compressor(char * compressor_name, int len, MOVTrack *track)
> > >>>                    || (track->par->width == 1440 && track->par->height == 1080)
> > >>>                    || (track->par->width == 1920 && track->par->height == 1080);
> > >>>
> > >>> -    if (track->mode == MODE_MOV &&
> > >>> +    if ((track->mode == MODE_AVIF || track->mode == MODE_MOV) &&
> > >>>          (encoder = av_dict_get(track->st->metadata, "encoder", NULL, 0))) {
> > >>>          av_strlcpy(compressor_name, encoder->value, 32);
> > >>>      } else if (track->par->codec_id == AV_CODEC_ID_MPEG2VIDEO && xdcam_res) {
> > >>> @@ -2106,6 +2107,25 @@ static void find_compressor(char * compressor_name, int len, MOVTrack *track)
> > >>>      }
> > >>>  }
> > >>>
> > >>> +static int mov_write_ccst_tag(AVIOContext *pb)
> > >>> +{
> > >>> +    int64_t pos = avio_tell(pb);
> > >>> +    // Write sane defaults:
> > >>> +    // all_ref_pics_intra = 0 : all samples can use any type of reference.
> > >>> +    // intra_pred_used = 1 : intra prediction may or may not be used.
> > >>> +    // max_ref_per_pic = 15 : reserved value to indicate that any number of
> > >>> +    //                        reference images can be used.
> > >>> +    uint8_t ccstValue = (0 << 7) |  /* all_ref_pics_intra */
> > >>> +                        (1 << 6) |  /* intra_pred_used */
> > >>> +                        (15 << 2);  /* max_ref_per_pic */
> > >>> +    avio_wb32(pb, 0); /* size */
> > >>> +    ffio_wfourcc(pb, "ccst");
> > >>> +    avio_wb32(pb, 0); /* Version & flags */
> > >>> +    avio_w8(pb, ccstValue);
> > >>> +    avio_wb24(pb, 0);  /* reserved */
> > >>> +    return update_size(pb, pos);
> > >>> +}
> > >>> +
> > >>>  static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext *mov, MOVTrack *track)
> > >>>  {
> > >>>      int ret = AVERROR_BUG;
> > >>> @@ -2123,6 +2143,8 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
> > >>>      avio_wb32(pb, 0); /* size */
> > >>>      if (mov->encryption_scheme != MOV_ENC_NONE) {
> > >>>          ffio_wfourcc(pb, "encv");
> > >>> +    } else if (track->mode == MODE_AVIF) {
> > >>> +        ffio_wfourcc(pb, "av01");
> > >>>      } else {
> > >>>          avio_wl32(pb, track->tag); // store it byteswapped
> > >>>      }
> > >>> @@ -2239,7 +2261,7 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
> > >>>          else
> > >>>              av_log(mov->fc, AV_LOG_WARNING, "Not writing 'gama' atom. Format is not MOV.\n");
> > >>>      }
> > >>> -    if (track->mode == MODE_MOV || track->mode == MODE_MP4) {
> > >>> +    if (track->mode == MODE_MOV || track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
> > >>>          int has_color_info = track->par->color_primaries != AVCOL_PRI_UNSPECIFIED &&
> > >>>                               track->par->color_trc != AVCOL_TRC_UNSPECIFIED &&
> > >>>                               track->par->color_space != AVCOL_SPC_UNSPECIFIED;
> > >>> @@ -2291,6 +2313,9 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
> > >>>      if (avid)
> > >>>          avio_wb32(pb, 0);
> > >>>
> > >>> +    if (track->mode == MODE_AVIF)
> > >>> +        mov_write_ccst_tag(pb);
> > >>> +
> > >>>      return update_size(pb, pos);
> > >>>  }
> > >>>
> > >>> @@ -2792,7 +2817,10 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
> > >>>
> > >>>      if (track) {
> > >>>          hdlr = (track->mode == MODE_MOV) ? "mhlr" : "\0\0\0\0";
> > >>> -        if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
> > >>> +        if (track->mode == MODE_AVIF) {
> > >>> +            hdlr_type = "pict";
> > >>> +            descr = "ffmpeg";
> > >>> +        } else if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
> > >>>              hdlr_type = "vide";
> > >>>              descr     = "VideoHandler";
> > >>>          } else if (track->par->codec_type == AVMEDIA_TYPE_AUDIO) {
> > >>> @@ -2859,6 +2887,131 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
> > >>>      return update_size(pb, pos);
> > >>>  }
> > >>>
> > >>> +static int mov_write_pitm_tag(AVIOContext *pb, int item_id)
> > >>> +{
> > >>> +    int64_t pos = avio_tell(pb);
> > >>> +    avio_wb32(pb, 0); /* size */
> > >>> +    ffio_wfourcc(pb, "pitm");
> > >>> +    avio_wb32(pb, 0); /* Version & flags */
> > >>> +    avio_wb16(pb, item_id); /* item_id */
> > >>> +    return update_size(pb, pos);
> > >>> +}
> > >>> +
> > >>> +static int mov_write_iloc_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > >>> +{
> > >>> +    int64_t pos = avio_tell(pb);
> > >>> +    avio_wb32(pb, 0); /* size */
> > >>> +    ffio_wfourcc(pb, "iloc");
> > >>> +    avio_wb32(pb, 0); /* Version & flags */
> > >>> +    avio_w8(pb, (4 << 4) + 4); /* offset_size(4) and length_size(4) */
> > >>> +    avio_w8(pb, 0); /* base_offset_size(4) and reserved(4) */
> > >>> +    avio_wb16(pb, 1); /* item_count */
> > >>> +
> > >>> +    avio_wb16(pb, 1); /* item_id */
> > >>> +    avio_wb16(pb, 0); /* data_reference_index */
> > >>> +    avio_wb16(pb, 1); /* extent_count */
> > >>> +    mov->avif_extent_pos = avio_tell(pb);
> > >>> +    avio_wb32(pb, 0); /* extent_offset (written later) */
> > >>> +    // For animated AVIF, we simply write the first packet's size.
> > >>> +    avio_wb32(pb, mov->avif_extent_length); /* extent_length */
> > >>> +
> > >>> +    return update_size(pb, pos);
> > >>> +}
> > >>> +
> > >>> +static int mov_write_iinf_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > >>> +{
> > >>> +    int64_t infe_pos;
> > >>> +    int64_t iinf_pos = avio_tell(pb);
> > >>> +    avio_wb32(pb, 0); /* size */
> > >>> +    ffio_wfourcc(pb, "iinf");
> > >>> +    avio_wb32(pb, 0); /* Version & flags */
> > >>> +    avio_wb16(pb, 1); /* entry_count */
> > >>> +
> > >>> +    infe_pos = avio_tell(pb);
> > >>> +    avio_wb32(pb, 0); /* size */
> > >>> +    ffio_wfourcc(pb, "infe");
> > >>> +    avio_w8(pb, 0x2); /* Version */
> > >>> +    avio_wb24(pb, 0); /* flags */
> > >>> +    avio_wb16(pb, 1); /* item_id */
> > >>> +    avio_wb16(pb, 0); /* item_protection_index */
> > >>> +    avio_write(pb, "av01", 4); /* item_type */
> > >>> +    avio_write(pb, "Color\0", 6); /* item_name */
> > >>> +    update_size(pb, infe_pos);
> > >>> +
> > >>> +    return update_size(pb, iinf_pos);
> > >>> +}
> > >>> +
> > >>> +static int mov_write_ispe_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > >>> +{
> > >>> +    int64_t pos = avio_tell(pb);
> > >>> +    avio_wb32(pb, 0); /* size */
> > >>> +    ffio_wfourcc(pb, "ispe");
> > >>> +    avio_wb32(pb, 0); /* Version & flags */
> > >>> +    avio_wb32(pb, s->streams[0]->codecpar->width); /* image_width */
> > >>> +    avio_wb32(pb, s->streams[0]->codecpar->height); /* image_height */
> > >>> +    return update_size(pb, pos);
> > >>> +}
> > >>> +
> > >>> +
> > >>> +static int mov_write_pixi_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > >>> +{
> > >>> +    int64_t pos = avio_tell(pb);
> > >>> +    int num_channels = av_pix_fmt_count_planes(s->streams[0]->codecpar->format);
> > >>
> > >> Is the number of planes really the correct number here? After all, for a
> > >> muxer (instead of a decoder) it does not matter whether the chroma
> > >> planes are interleaved like in AV_PIX_FMT_NV12 or not like in
> > >> AV_PIX_FMT_YUV420P. You should better use pixdesc->nb_components.
> > >>
> > >
> > > Done.
> > >
> > >>> +    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
> > >>> +    int i;
> > >>
> > >> We allow and use "for (int i = 0;" from C99 to save lines like these.
> > >>
> > >
> > > Done.
> > >
> > >>> +    avio_wb32(pb, 0); /* size */
> > >>> +    ffio_wfourcc(pb, "pixi");
> > >>> +    avio_wb32(pb, 0); /* Version & flags */
> > >>> +    avio_w8(pb, num_channels); /* num_channels */
> > >>> +    for (i = 0; i < num_channels; ++i) {
> > >>> +      avio_w8(pb, pixdesc->comp[i].depth); /* bits_per_channel */
> > >>> +    }
> > >>> +    return update_size(pb, pos);
> > >>> +}
> > >>> +
> > >>> +static int mov_write_ipco_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > >>> +{
> > >>> +    int64_t pos = avio_tell(pb);
> > >>> +    avio_wb32(pb, 0); /* size */
> > >>> +    ffio_wfourcc(pb, "ipco");
> > >>> +    mov_write_ispe_tag(pb, mov, s);
> > >>> +    mov_write_pixi_tag(pb, mov, s);
> > >>> +    mov_write_av1c_tag(pb, &mov->tracks[0]);
> > >>> +    mov_write_colr_tag(pb, &mov->tracks[0], 0);
> > >>> +    return update_size(pb, pos);
> > >>> +}
> > >>> +
> > >>> +static int mov_write_ipma_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > >>> +{
> > >>> +    int64_t pos = avio_tell(pb);
> > >>> +    avio_wb32(pb, 0); /* size */
> > >>> +    ffio_wfourcc(pb, "ipma");
> > >>> +    avio_wb32(pb, 0); /* Version & flags */
> > >>> +    avio_wb32(pb, 1); /* entry_count */
> > >>> +    avio_wb16(pb, 1); /* item_ID */
> > >>> +    avio_w8(pb, 4); /* association_count */
> > >>> +
> > >>> +    // ispe association.
> > >>> +    avio_w8(pb, 1); /* essential and property_index */
> > >>> +    // pixi association.
> > >>> +    avio_w8(pb, 2); /* essential and property_index */
> > >>> +    // av1C association.
> > >>> +    avio_w8(pb, 0x80 | 3); /* essential and property_index */
> > >>> +    // colr association.
> > >>> +    avio_w8(pb, 4); /* essential and property_index */
> > >>> +    return update_size(pb, pos);
> > >>> +}
> > >>> +
> > >>> +static int mov_write_iprp_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > >>> +{
> > >>> +    int64_t pos = avio_tell(pb);
> > >>> +    avio_wb32(pb, 0); /* size */
> > >>> +    ffio_wfourcc(pb, "iprp");
> > >>> +    mov_write_ipco_tag(pb, mov, s);
> > >>> +    mov_write_ipma_tag(pb, mov, s);
> > >>> +    return update_size(pb, pos);
> > >>> +}
> > >>> +
> > >>>  static int mov_write_hmhd_tag(AVIOContext *pb)
> > >>>  {
> > >>>      /* This atom must be present, but leaving the values at zero
> > >>> @@ -3056,7 +3209,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
> > >>>              display_matrix = NULL;
> > >>>      }
> > >>>
> > >>> -    if (track->flags & MOV_TRACK_ENABLED)
> > >>> +    if (track->flags & MOV_TRACK_ENABLED || track->mode == MODE_AVIF)
> > >>>          flags |= MOV_TKHD_FLAG_ENABLED;
> > >>>
> > >>>      if (track->mode == MODE_ISM)
> > >>> @@ -3104,7 +3257,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
> > >>>      if (st && (track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
> > >>>                 track->par->codec_type == AVMEDIA_TYPE_SUBTITLE)) {
> > >>>          int64_t track_width_1616;
> > >>> -        if (track->mode == MODE_MOV) {
> > >>> +        if (track->mode == MODE_MOV || track->mode == MODE_AVIF) {
> > >>>              track_width_1616 = track->par->width * 0x10000ULL;
> > >>>          } else {
> > >>>              track_width_1616 = av_rescale(st->sample_aspect_ratio.num,
> > >>> @@ -3439,7 +3592,8 @@ static int mov_write_trak_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
> > >>>              mov_write_tapt_tag(pb, track);
> > >>>          }
> > >>>      }
> > >>> -    mov_write_track_udta_tag(pb, mov, st);
> > >>> +    if (track->mode != MODE_AVIF)
> > >>> +        mov_write_track_udta_tag(pb, mov, st);
> > >>>      track->entry = entry_backup;
> > >>>      track->chunkCount = chunk_backup;
> > >>>      return update_size(pb, pos);
> > >>> @@ -3914,8 +4068,15 @@ static int mov_write_meta_tag(AVIOContext *pb, MOVMuxContext *mov,
> > >>>          mov_write_mdta_hdlr_tag(pb, mov, s);
> > >>>          mov_write_mdta_keys_tag(pb, mov, s);
> > >>>          mov_write_mdta_ilst_tag(pb, mov, s);
> > >>> -    }
> > >>> -    else {
> > >>> +    } else if (mov->mode == MODE_AVIF) {
> > >>> +        mov_write_hdlr_tag(s, pb, &mov->tracks[0]);
> > >>> +        // We always write the primary item id as 1 since only one track is
> > >>> +        // supported for AVIF.
> > >>> +        mov_write_pitm_tag(pb, 1);
> > >>> +        mov_write_iloc_tag(pb, mov, s);
> > >>> +        mov_write_iinf_tag(pb, mov, s);
> > >>> +        mov_write_iprp_tag(pb, mov, s);
> > >>> +    } else {
> > >>>          /* iTunes metadata tag */
> > >>>          mov_write_itunes_hdlr_tag(pb, mov, s);
> > >>>          mov_write_ilst_tag(pb, mov, s);
> > >>> @@ -4245,10 +4406,11 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
> > >>>      }
> > >>>
> > >>>      mov_write_mvhd_tag(pb, mov);
> > >>> -    if (mov->mode != MODE_MOV && !mov->iods_skip)
> > >>> +    if (mov->mode != MODE_MOV && mov->mode != MODE_AVIF && !mov->iods_skip)
> > >>>          mov_write_iods_tag(pb, mov);
> > >>>      for (i = 0; i < mov->nb_streams; i++) {
> > >>> -        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT) {
> > >>> +        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT ||
> > >>> +            mov->mode == MODE_AVIF) {
> > >>>              int ret = mov_write_trak_tag(s, pb, mov, &(mov->tracks[i]), i < s->nb_streams ? s->streams[i] : NULL);
> > >>>              if (ret < 0)
> > >>>                  return ret;
> > >>> @@ -4259,7 +4421,7 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
> > >>>
> > >>>      if (mov->mode == MODE_PSP)
> > >>>          mov_write_uuidusmt_tag(pb, s);
> > >>> -    else
> > >>> +    else if (mov->mode != MODE_AVIF)
> > >>>          mov_write_udta_tag(pb, mov, s);
> > >>>
> > >>>      return update_size(pb, pos);
> > >>> @@ -5002,6 +5164,9 @@ static void mov_write_ftyp_tag_internal(AVIOContext *pb, AVFormatContext *s,
> > >>>      else if (mov->mode == MODE_3GP) {
> > >>>          ffio_wfourcc(pb, has_h264 ? "3gp6"  : "3gp4");
> > >>>          minor =     has_h264 ?   0x100 :   0x200;
> > >>> +    } else if (mov->mode == MODE_AVIF) {
> > >>> +        ffio_wfourcc(pb, mov->is_animated_avif ? "avis" : "avif");
> > >>> +        minor = 0;
> > >>>      } else if (mov->mode & MODE_3G2) {
> > >>>          ffio_wfourcc(pb, has_h264 ? "3g2b"  : "3g2a");
> > >>>          minor =     has_h264 ? 0x20000 : 0x10000;
> > >>> @@ -5065,6 +5230,31 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
> > >>>      // compatible brand a second time.
> > >>>      if (mov->mode == MODE_ISM) {
> > >>>          ffio_wfourcc(pb, "piff");
> > >>> +    } else if (mov->mode == MODE_AVIF) {
> > >>> +        const AVPixFmtDescriptor *pix_fmt_desc =
> > >>> +            av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
> > >>> +        const int depth = pix_fmt_desc->comp[0].depth;
> > >>> +        if (mov->is_animated_avif) {
> > >>> +            // For animated AVIF, major brand is "avis". Add "avif" as a
> > >>> +            // compatible brand.
> > >>> +            ffio_wfourcc(pb, "avif");
> > >>> +            ffio_wfourcc(pb, "msf1");
> > >>> +            ffio_wfourcc(pb, "iso8");
> > >>> +        }
> > >>> +        ffio_wfourcc(pb, "mif1");
> > >>> +        ffio_wfourcc(pb, "miaf");
> > >>> +        if (depth == 8 || depth == 10) {
> > >>> +            // MA1B and MA1A brands are based on AV1 profile. Short hand for
> > >>> +            // computing that is based on chroma subsampling type. 420 chroma
> > >>> +            // subsampling is MA1B.  444 chroma subsampling is MA1A.
> > >>> +            if (pix_fmt_desc->log2_chroma_w == 0 && pix_fmt_desc->log2_chroma_h == 0) {
> > >>> +                // 444 chroma subsampling.
> > >>> +                ffio_wfourcc(pb, "MA1A");
> > >>> +            } else {
> > >>> +                // 420 chroma subsampling.
> > >>> +                ffio_wfourcc(pb, "MA1B");
> > >>> +            }
> > >>> +        }
> > >>>      } else if (mov->mode != MODE_MOV) {
> > >>>          // We add tfdt atoms when fragmenting, signal this with the iso6 compatible
> > >>>          // brand, if not already the major brand. This is compatible with users that
> > >>> @@ -5669,7 +5859,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
> > >>>      if (ret < 0)
> > >>>          return ret;
> > >>>
> > >>> -    if (mov->flags & FF_MOV_FLAG_FRAGMENT) {
> > >>> +    if (mov->flags & FF_MOV_FLAG_FRAGMENT || mov->mode == MODE_AVIF) {
> > >>>          int ret;
> > >>>          if (mov->moov_written || mov->flags & FF_MOV_FLAG_EMPTY_MOOV) {
> > >>>              if (mov->frag_interleave && mov->fragments > 0) {
> > >>> @@ -5802,7 +5992,9 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
> > >>>              }
> > >>>          }
> > >>>      } else if (par->codec_id == AV_CODEC_ID_AV1) {
> > >>> -        if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
> > >>> +        if (trk->mode == MODE_AVIF) {
> > >>
> > >> Why this? AVIF requires that AVI-in-ISOBMFF sample format is honoured
> > >> and this contains e.g. "OBUs of type OBU_TEMPORAL_DELIMITER,
> > >> OBU_PADDING, or OBU_REDUNDANT_FRAME_HEADER SHOULD NOT be used".
> > >> ff_av1_filter_obus(_buf)? merely ensures that these OBUs are stripped away.
> > >>
> > >> If the aim of this check is to disallow hint tracks or so, then it fails
> > >> (all it does is ensuring that both the ordinary track as well as the
> > >> hint track get data that might contain OBUs that should not be there).
> > >>
> > >
> > > Done. I had to update the extent offset code here as well.
> > >
> > >>> +            avio_write(pb, pkt->data, pkt->size);
> > >>> +        } else if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
> > >>>              ret = ff_av1_filter_obus_buf(pkt->data, &reformatted_data,
> > >>>                                           &size, &offset);
> > >>>              if (ret < 0)
> > >>> @@ -6230,6 +6422,10 @@ fail:
> > >>>              }
> > >>>          }
> > >>>
> > >>> +        if (trk->mode == MODE_AVIF && !mov->avif_extent_length) {
> > >>> +            mov->avif_extent_length = pkt->size;
> > >>> +        }
> > >>> +
> > >>>          return mov_write_single_packet(s, pkt);
> > >>>      }
> > >>>  }
> > >>> @@ -6569,11 +6765,15 @@ static int mov_init(AVFormatContext *s)
> > >>>      else if (IS_MODE(ipod, IPOD)) mov->mode = MODE_IPOD;
> > >>>      else if (IS_MODE(ismv, ISMV)) mov->mode = MODE_ISM;
> > >>>      else if (IS_MODE(f4v,   F4V)) mov->mode = MODE_F4V;
> > >>> +    else if (IS_MODE(avif, AVIF)) mov->mode = MODE_AVIF;
> > >>>  #undef IS_MODE
> > >>>
> > >>>      if (mov->flags & FF_MOV_FLAG_DELAY_MOOV)
> > >>>          mov->flags |= FF_MOV_FLAG_EMPTY_MOOV;
> > >>>
> > >>> +    if (mov->mode == MODE_AVIF)
> > >>> +        mov->flags |= FF_MOV_FLAG_DELAY_MOOV;
> > >>> +
> > >>>      /* Set the FRAGMENT flag if any of the fragmentation methods are
> > >>>       * enabled. */
> > >>>      if (mov->max_fragment_duration || mov->max_fragment_size ||
> > >>> @@ -6654,11 +6854,25 @@ static int mov_init(AVFormatContext *s)
> > >>>      /* Non-seekable output is ok if using fragmentation. If ism_lookahead
> > >>>       * is enabled, we don't support non-seekable output at all. */
> > >>>      if (!(s->pb->seekable & AVIO_SEEKABLE_NORMAL) &&
> > >>> -        (!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead)) {
> > >>> +        (!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead ||
> > >>> +         mov->mode == MODE_AVIF)) {
> > >>>          av_log(s, AV_LOG_ERROR, "muxer does not support non seekable output\n");
> > >>>          return AVERROR(EINVAL);
> > >>>      }
> > >>>
> > >>> +    /* AVIF output must have exactly one video stream */
> > >>> +    if (mov->mode == MODE_AVIF) {
> > >>> +        if (s->nb_streams > 1) {
> > >>> +            av_log(s, AV_LOG_ERROR, "AVIF output requires exactly one stream\n");
> > >>> +            return AVERROR(EINVAL);
> > >>> +        }
> > >>> +        if (s->streams[0]->codecpar->codec_type != AVMEDIA_TYPE_VIDEO) {
> > >>> +            av_log(s, AV_LOG_ERROR, "AVIF output requires one video stream\n");
> > >>> +            return AVERROR(EINVAL);
> > >>> +        }
> > >>> +    }
> > >>> +
> > >>> +
> > >>>      mov->nb_streams = s->nb_streams;
> > >>>      if (mov->mode & (MODE_MP4|MODE_MOV|MODE_IPOD) && s->nb_chapters)
> > >>>          mov->chapter_track = mov->nb_streams++;
> > >>> @@ -6797,12 +7011,13 @@ static int mov_init(AVFormatContext *s)
> > >>>                          pix_fmt == AV_PIX_FMT_MONOWHITE ||
> > >>>                          pix_fmt == AV_PIX_FMT_MONOBLACK;
> > >>>              }
> > >>> -            if (track->par->codec_id == AV_CODEC_ID_VP9 ||
> > >>> -                track->par->codec_id == AV_CODEC_ID_AV1) {
> > >>> -                if (track->mode != MODE_MP4) {
> > >>> -                    av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
> > >>> -                    return AVERROR(EINVAL);
> > >>> -                }
> > >>> +            if (track->par->codec_id == AV_CODEC_ID_VP9 && track->mode != MODE_MP4) {
> > >>> +                av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
> > >>> +             return AVERROR(EINVAL);
> > >>> +            } else if (track->par->codec_id == AV_CODEC_ID_AV1 &&
> > >>> +                       track->mode != MODE_MP4 && track->mode != MODE_AVIF) {
> > >>> +                av_log(s, AV_LOG_ERROR, "%s only supported in MP4 and AVIF.\n", avcodec_get_name(track->par->codec_id));
> > >>> +                return AVERROR(EINVAL);
> > >>>              } else if (track->par->codec_id == AV_CODEC_ID_VP8) {
> > >>>                  /* altref frames handling is not defined in the spec as of version v1.0,
> > >>>                   * so just forbid muxing VP8 streams altogether until a new version does */
> > >>> @@ -7003,7 +7218,7 @@ static int mov_write_header(AVFormatContext *s)
> > >>>                              FF_MOV_FLAG_FRAG_EVERY_FRAME)) &&
> > >>>              !mov->max_fragment_duration && !mov->max_fragment_size)
> > >>>              mov->flags |= FF_MOV_FLAG_FRAG_KEYFRAME;
> > >>> -    } else {
> > >>> +    } else if (mov->mode != MODE_AVIF) {
> > >>>          if (mov->flags & FF_MOV_FLAG_FASTSTART)
> > >>>              mov->reserved_header_pos = avio_tell(pb);
> > >>>          mov_write_mdat_tag(pb, mov);
> > >>> @@ -7291,6 +7506,48 @@ static int mov_check_bitstream(AVFormatContext *s, AVStream *st,
> > >>>      return ret;
> > >>>  }
> > >>>
> > >>> +static int avif_write_trailer(AVFormatContext *s)
> > >>> +{
> > >>> +    AVIOContext *pb = s->pb;
> > >>> +    MOVMuxContext *mov = s->priv_data;
> > >>> +    int64_t pos_backup, mdat_pos;
> > >>> +    uint8_t *buf;
> > >>> +    int buf_size, moov_size;
> > >>> +    int i;
> > >>> +
> > >>> +    if (mov->moov_written) return 0;
> > >>> +
> > >>> +    mov->is_animated_avif = s->streams[0]->nb_frames > 1;
> > >>> +    mov_write_identification(pb, s);
> > >>> +    mov_write_meta_tag(pb, mov, s);
> > >>> +
> > >>> +    moov_size = get_moov_size(s);
> > >>> +    for (i = 0; i < mov->nb_streams; i++)
> > >>> +        mov->tracks[i].data_offset = avio_tell(pb) + moov_size + 8;
> > >>
> > >> Don't call avio_tell() in a loop (and I wonder whether this loop is even
> > >> necessary given that s->nb_streams is checked to be one).
> > >>
> > >
> > > Removed the loop. I was thinking about potentially supporting multiple
> > > output tracks in the future (for alpha channel for example), but this
> > > code would not work in that case any way.
> > >
> > >>> +
> > >>> +    if (mov->is_animated_avif) {
> > >>> +        int ret;
> > >>> +        if ((ret = mov_write_moov_tag(pb, mov, s)) < 0)
> > >>> +            return ret;
> > >>> +    }
> > >>> +
> > >>> +    buf_size = avio_get_dyn_buf(mov->mdat_buf, &buf);
> > >>> +    avio_wb32(pb, buf_size + 8);
> > >>> +    ffio_wfourcc(pb, "mdat");
> > >>> +    mdat_pos = avio_tell(pb);
> > >>> +
> > >>> +    avio_write(pb, buf, buf_size);
> > >>> +    ffio_free_dyn_buf(&mov->mdat_buf);
> > >>
> > >> Unnecessary: This will be freed in mov_free().
> > >>
> > >
> > > Removed.
> > >
> > >>> +
> > >>> +    // write extent offset.
> > >>> +    pos_backup = avio_tell(pb);
> > >>> +    avio_seek(pb, mov->avif_extent_pos, SEEK_SET);
> > >>> +    avio_wb32(pb, mdat_pos); /* rewrite offset */
> > >>
> > >> What guarantees that this fits into 32bits?
> > >>
> > >
> > > Added a check to fail if it does not fit in 32-bits.
> > >
> > >>> +    avio_seek(pb, pos_backup, SEEK_SET);
> > >>> +
> > >>> +    return 0;
> > >>> +}
> > >>> +
> > >>>  #if CONFIG_TGP_MUXER || CONFIG_TG2_MUXER
> > >>>  static const AVCodecTag codec_3gp_tags[] = {
> > >>>      { AV_CODEC_ID_H263,     MKTAG('s','2','6','3') },
> > >>> @@ -7373,6 +7630,20 @@ static const AVCodecTag codec_f4v_tags[] = {
> > >>>      { AV_CODEC_ID_NONE, 0 },
> > >>>  };
> > >>>
> > >>> +#if CONFIG_AVIF_MUXER
> > >>> +static const AVCodecTag codec_avif_tags[] = {
> > >>> +    { AV_CODEC_ID_AV1,     MKTAG('a','v','0','1') },
> > >>> +    { AV_CODEC_ID_NONE, 0 },
> > >>> +};
> > >>> +static const AVCodecTag *const codec_avif_tags_list[] = { codec_avif_tags, NULL };
> > >>> +
> > >>> +static const AVClass mov_avif_muxer_class = {
> > >>> +    .class_name = "avif muxer",
> > >>> +    .item_name  = av_default_item_name,
> > >>> +    .version    = LIBAVUTIL_VERSION_INT,
> > >>> +};
> > >>
> > >> It's not mandatory for a muxer to have a private class; it is only
> > >> necessary for options. If you do not have options (like here), then the
> > >> AVClass is useless.
> > >> I actually wanted that you support the options that make sense for AVIF
> > >> and I dislike that the avif muxer uses a different write_trailer than
> > >> every other muxer here (which is the reason why e.g. the faststart
> > >> option is not supported by it). Is it really so different?
> > >> Furthermore, given that you don't use the same AVClass as everyone else,
> > >> the values for fields that are AVOpt-enabled are zero; and this does not
> > >> always coincide with the default value of the relevant option. See e.g.
> > >> skip_iods, iods_audio_profile, iods_video_profile etc. movie_timescale
> > >> will even be set to a value that is outside of its legal range. I don't
> > >> know whether this can lead to any divide-by-zero crashes or asserts, but
> > >> it is certainly very fragile.
> > >>
> > >
> > > Hmm, i see a couple of solutions here:
> > > 1) Use the same private class as the MOV muxer and document that not
> > > all options are supported when the format is AVIF. I think there are
> > > already cases within this file where not all muxers may support all
> > > the options.
> > > 2) Leave it as-is since the code always checks for AVIF mode first and
> > > then does the rest, so the defaults of the non-relevant options should
> > > not matter. But i agree that this is very fragile. One way to ensure
> > > future changes don't break this structure is to add a fate test.
> > >
> >
> > Can you tell which options (if enabled) would work and which would not
> > work or would create spec-incompliant output?
> >
>
> The following options will make sense for AVIF (and may just work
> as-is, i am not sure of this): write_colr, prefer_icc,
> video_track_timescale and a few other generic MOV options like
> use_stream_ids_as_track_ids, empty_hdlr_name, and brand.
>

Any thoughts on this?

> > > Please let me know what you think.
> > >
> > > About using a separate write_trailer function, i can re-use the
> > > existing mov_write_trailer but it will mostly be a special case for
> > > AVIF mode with the code in avif_write_trailer, so i thought it was
> > > cleaner to have a separate function anyway. I am not sure if there are
> > > any other implications because of this. If you prefer that i move it
> > > into mov_write_trailer, please let me know and i can do that as well.
> > >
> >
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel@ffmpeg.org
> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >
> > To unsubscribe, visit link above, or email
> > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
>
>
> --
> Vignesh
Vignesh Venkat April 29, 2022, 4:03 p.m. UTC | #6
On Thu, Apr 21, 2022 at 9:38 AM Vignesh Venkatasubramanian
<vigneshv@google.com> wrote:
>
> On Wed, Apr 13, 2022 at 2:35 PM Vignesh Venkatasubramanian
> <vigneshv@google.com> wrote:
> >
> > On Wed, Apr 13, 2022 at 2:04 PM Andreas Rheinhardt
> > <andreas.rheinhardt@outlook.com> wrote:
> > >
> > > Vignesh Venkatasubramanian:
> > > > On Mon, Mar 21, 2022 at 1:46 PM Andreas Rheinhardt
> > > > <andreas.rheinhardt@outlook.com> wrote:
> > > >>
> > > >> Vignesh Venkatasubramanian:
> > > >>> Add an AVIF muxer by re-using the existing the mov/mp4 muxer.
> > > >>>
> > > >>> AVIF Specifiation: https://aomediacodec.github.io/av1-avif
> > > >>>
> > > >>> Sample usage for still image:
> > > >>> ffmpeg -i image.png -c:v libaom-av1 -avif-image 1 image.avif
> > > >>>
> > > >>> Sample usage for animated AVIF image:
> > > >>> ffmpeg -i video.mp4 animated.avif
> > > >>>
> > > >>> We can re-use any of the AV1 encoding options that will make
> > > >>> sense for image encoding (like bitrate, tiles, encoding speed,
> > > >>> etc).
> > > >>>
> > > >>> The files generated by this muxer has been verified to be valid
> > > >>> AVIF files by the following:
> > > >>> 1) Displays on Chrome (both still and animated images).
> > > >>> 2) Displays on Firefox (only still images, firefox does not support
> > > >>>    animated AVIF yet).
> > > >>> 3) Verfied to be valid by Compliance Warden:
> > > >>>    https://github.com/gpac/ComplianceWarden
> > > >>>
> > > >>> Fixes the encoder/muxer part of Trac Ticket #7621
> > > >>>
> > > >>> Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com>
> > > >>> ---
> > > >>>  configure                |   1 +
> > > >>>  libavformat/allformats.c |   1 +
> > > >>>  libavformat/movenc.c     | 341 ++++++++++++++++++++++++++++++++++++---
> > > >>>  libavformat/movenc.h     |   5 +
> > > >>>  4 files changed, 322 insertions(+), 26 deletions(-)
> > > >>>
> > > >>> diff --git a/configure b/configure
> > > >>> index 8c69ab0c86..6d7020e96b 100755
> > > >>> --- a/configure
> > > >>> +++ b/configure
> > > >>> @@ -3390,6 +3390,7 @@ asf_stream_muxer_select="asf_muxer"
> > > >>>  av1_demuxer_select="av1_frame_merge_bsf av1_parser"
> > > >>>  avi_demuxer_select="riffdec exif"
> > > >>>  avi_muxer_select="riffenc"
> > > >>> +avif_muxer_select="mov_muxer"
> > > >>>  caf_demuxer_select="iso_media"
> > > >>>  caf_muxer_select="iso_media"
> > > >>>  dash_muxer_select="mp4_muxer"
> > > >>> diff --git a/libavformat/allformats.c b/libavformat/allformats.c
> > > >>> index d066a7745b..400c17afbd 100644
> > > >>> --- a/libavformat/allformats.c
> > > >>> +++ b/libavformat/allformats.c
> > > >>> @@ -81,6 +81,7 @@ extern const AVOutputFormat ff_au_muxer;
> > > >>>  extern const AVInputFormat  ff_av1_demuxer;
> > > >>>  extern const AVInputFormat  ff_avi_demuxer;
> > > >>>  extern const AVOutputFormat ff_avi_muxer;
> > > >>> +extern const AVOutputFormat ff_avif_muxer;
> > > >>>  extern const AVInputFormat  ff_avisynth_demuxer;
> > > >>>  extern const AVOutputFormat ff_avm2_muxer;
> > > >>>  extern const AVInputFormat  ff_avr_demuxer;
> > > >>> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> > > >>> index 1a746a67fd..ff41579300 100644
> > > >>> --- a/libavformat/movenc.c
> > > >>> +++ b/libavformat/movenc.c
> > > >>> @@ -1303,7 +1303,7 @@ static int mov_write_av1c_tag(AVIOContext *pb, MOVTrack *track)
> > > >>>
> > > >>>      avio_wb32(pb, 0);
> > > >>>      ffio_wfourcc(pb, "av1C");
> > > >>> -    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, 1);
> > > >>> +    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, track->mode != MODE_AVIF);
> > > >>>      return update_size(pb, pos);
> > > >>>  }
> > > >>>
> > > >>> @@ -2004,12 +2004,13 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
> > > >>>          }
> > > >>>      }
> > > >>>
> > > >>> -    /* We should only ever be called by MOV or MP4. */
> > > >>> -    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4);
> > > >>> +    /* We should only ever be called for MOV, MP4 and AVIF. */
> > > >>> +    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4 ||
> > > >>> +               track->mode == MODE_AVIF);
> > > >>>
> > > >>>      avio_wb32(pb, 0); /* size */
> > > >>>      ffio_wfourcc(pb, "colr");
> > > >>> -    if (track->mode == MODE_MP4)
> > > >>> +    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF)
> > > >>>          ffio_wfourcc(pb, "nclx");
> > > >>>      else
> > > >>>          ffio_wfourcc(pb, "nclc");
> > > >>> @@ -2019,7 +2020,7 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
> > > >>>      avio_wb16(pb, track->par->color_primaries);
> > > >>>      avio_wb16(pb, track->par->color_trc);
> > > >>>      avio_wb16(pb, track->par->color_space);
> > > >>> -    if (track->mode == MODE_MP4) {
> > > >>> +    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
> > > >>>          int full_range = track->par->color_range == AVCOL_RANGE_JPEG;
> > > >>>          avio_w8(pb, full_range << 7);
> > > >>>      }
> > > >>> @@ -2085,7 +2086,7 @@ static void find_compressor(char * compressor_name, int len, MOVTrack *track)
> > > >>>                    || (track->par->width == 1440 && track->par->height == 1080)
> > > >>>                    || (track->par->width == 1920 && track->par->height == 1080);
> > > >>>
> > > >>> -    if (track->mode == MODE_MOV &&
> > > >>> +    if ((track->mode == MODE_AVIF || track->mode == MODE_MOV) &&
> > > >>>          (encoder = av_dict_get(track->st->metadata, "encoder", NULL, 0))) {
> > > >>>          av_strlcpy(compressor_name, encoder->value, 32);
> > > >>>      } else if (track->par->codec_id == AV_CODEC_ID_MPEG2VIDEO && xdcam_res) {
> > > >>> @@ -2106,6 +2107,25 @@ static void find_compressor(char * compressor_name, int len, MOVTrack *track)
> > > >>>      }
> > > >>>  }
> > > >>>
> > > >>> +static int mov_write_ccst_tag(AVIOContext *pb)
> > > >>> +{
> > > >>> +    int64_t pos = avio_tell(pb);
> > > >>> +    // Write sane defaults:
> > > >>> +    // all_ref_pics_intra = 0 : all samples can use any type of reference.
> > > >>> +    // intra_pred_used = 1 : intra prediction may or may not be used.
> > > >>> +    // max_ref_per_pic = 15 : reserved value to indicate that any number of
> > > >>> +    //                        reference images can be used.
> > > >>> +    uint8_t ccstValue = (0 << 7) |  /* all_ref_pics_intra */
> > > >>> +                        (1 << 6) |  /* intra_pred_used */
> > > >>> +                        (15 << 2);  /* max_ref_per_pic */
> > > >>> +    avio_wb32(pb, 0); /* size */
> > > >>> +    ffio_wfourcc(pb, "ccst");
> > > >>> +    avio_wb32(pb, 0); /* Version & flags */
> > > >>> +    avio_w8(pb, ccstValue);
> > > >>> +    avio_wb24(pb, 0);  /* reserved */
> > > >>> +    return update_size(pb, pos);
> > > >>> +}
> > > >>> +
> > > >>>  static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext *mov, MOVTrack *track)
> > > >>>  {
> > > >>>      int ret = AVERROR_BUG;
> > > >>> @@ -2123,6 +2143,8 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
> > > >>>      avio_wb32(pb, 0); /* size */
> > > >>>      if (mov->encryption_scheme != MOV_ENC_NONE) {
> > > >>>          ffio_wfourcc(pb, "encv");
> > > >>> +    } else if (track->mode == MODE_AVIF) {
> > > >>> +        ffio_wfourcc(pb, "av01");
> > > >>>      } else {
> > > >>>          avio_wl32(pb, track->tag); // store it byteswapped
> > > >>>      }
> > > >>> @@ -2239,7 +2261,7 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
> > > >>>          else
> > > >>>              av_log(mov->fc, AV_LOG_WARNING, "Not writing 'gama' atom. Format is not MOV.\n");
> > > >>>      }
> > > >>> -    if (track->mode == MODE_MOV || track->mode == MODE_MP4) {
> > > >>> +    if (track->mode == MODE_MOV || track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
> > > >>>          int has_color_info = track->par->color_primaries != AVCOL_PRI_UNSPECIFIED &&
> > > >>>                               track->par->color_trc != AVCOL_TRC_UNSPECIFIED &&
> > > >>>                               track->par->color_space != AVCOL_SPC_UNSPECIFIED;
> > > >>> @@ -2291,6 +2313,9 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
> > > >>>      if (avid)
> > > >>>          avio_wb32(pb, 0);
> > > >>>
> > > >>> +    if (track->mode == MODE_AVIF)
> > > >>> +        mov_write_ccst_tag(pb);
> > > >>> +
> > > >>>      return update_size(pb, pos);
> > > >>>  }
> > > >>>
> > > >>> @@ -2792,7 +2817,10 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
> > > >>>
> > > >>>      if (track) {
> > > >>>          hdlr = (track->mode == MODE_MOV) ? "mhlr" : "\0\0\0\0";
> > > >>> -        if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
> > > >>> +        if (track->mode == MODE_AVIF) {
> > > >>> +            hdlr_type = "pict";
> > > >>> +            descr = "ffmpeg";
> > > >>> +        } else if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
> > > >>>              hdlr_type = "vide";
> > > >>>              descr     = "VideoHandler";
> > > >>>          } else if (track->par->codec_type == AVMEDIA_TYPE_AUDIO) {
> > > >>> @@ -2859,6 +2887,131 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
> > > >>>      return update_size(pb, pos);
> > > >>>  }
> > > >>>
> > > >>> +static int mov_write_pitm_tag(AVIOContext *pb, int item_id)
> > > >>> +{
> > > >>> +    int64_t pos = avio_tell(pb);
> > > >>> +    avio_wb32(pb, 0); /* size */
> > > >>> +    ffio_wfourcc(pb, "pitm");
> > > >>> +    avio_wb32(pb, 0); /* Version & flags */
> > > >>> +    avio_wb16(pb, item_id); /* item_id */
> > > >>> +    return update_size(pb, pos);
> > > >>> +}
> > > >>> +
> > > >>> +static int mov_write_iloc_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > > >>> +{
> > > >>> +    int64_t pos = avio_tell(pb);
> > > >>> +    avio_wb32(pb, 0); /* size */
> > > >>> +    ffio_wfourcc(pb, "iloc");
> > > >>> +    avio_wb32(pb, 0); /* Version & flags */
> > > >>> +    avio_w8(pb, (4 << 4) + 4); /* offset_size(4) and length_size(4) */
> > > >>> +    avio_w8(pb, 0); /* base_offset_size(4) and reserved(4) */
> > > >>> +    avio_wb16(pb, 1); /* item_count */
> > > >>> +
> > > >>> +    avio_wb16(pb, 1); /* item_id */
> > > >>> +    avio_wb16(pb, 0); /* data_reference_index */
> > > >>> +    avio_wb16(pb, 1); /* extent_count */
> > > >>> +    mov->avif_extent_pos = avio_tell(pb);
> > > >>> +    avio_wb32(pb, 0); /* extent_offset (written later) */
> > > >>> +    // For animated AVIF, we simply write the first packet's size.
> > > >>> +    avio_wb32(pb, mov->avif_extent_length); /* extent_length */
> > > >>> +
> > > >>> +    return update_size(pb, pos);
> > > >>> +}
> > > >>> +
> > > >>> +static int mov_write_iinf_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > > >>> +{
> > > >>> +    int64_t infe_pos;
> > > >>> +    int64_t iinf_pos = avio_tell(pb);
> > > >>> +    avio_wb32(pb, 0); /* size */
> > > >>> +    ffio_wfourcc(pb, "iinf");
> > > >>> +    avio_wb32(pb, 0); /* Version & flags */
> > > >>> +    avio_wb16(pb, 1); /* entry_count */
> > > >>> +
> > > >>> +    infe_pos = avio_tell(pb);
> > > >>> +    avio_wb32(pb, 0); /* size */
> > > >>> +    ffio_wfourcc(pb, "infe");
> > > >>> +    avio_w8(pb, 0x2); /* Version */
> > > >>> +    avio_wb24(pb, 0); /* flags */
> > > >>> +    avio_wb16(pb, 1); /* item_id */
> > > >>> +    avio_wb16(pb, 0); /* item_protection_index */
> > > >>> +    avio_write(pb, "av01", 4); /* item_type */
> > > >>> +    avio_write(pb, "Color\0", 6); /* item_name */
> > > >>> +    update_size(pb, infe_pos);
> > > >>> +
> > > >>> +    return update_size(pb, iinf_pos);
> > > >>> +}
> > > >>> +
> > > >>> +static int mov_write_ispe_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > > >>> +{
> > > >>> +    int64_t pos = avio_tell(pb);
> > > >>> +    avio_wb32(pb, 0); /* size */
> > > >>> +    ffio_wfourcc(pb, "ispe");
> > > >>> +    avio_wb32(pb, 0); /* Version & flags */
> > > >>> +    avio_wb32(pb, s->streams[0]->codecpar->width); /* image_width */
> > > >>> +    avio_wb32(pb, s->streams[0]->codecpar->height); /* image_height */
> > > >>> +    return update_size(pb, pos);
> > > >>> +}
> > > >>> +
> > > >>> +
> > > >>> +static int mov_write_pixi_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > > >>> +{
> > > >>> +    int64_t pos = avio_tell(pb);
> > > >>> +    int num_channels = av_pix_fmt_count_planes(s->streams[0]->codecpar->format);
> > > >>
> > > >> Is the number of planes really the correct number here? After all, for a
> > > >> muxer (instead of a decoder) it does not matter whether the chroma
> > > >> planes are interleaved like in AV_PIX_FMT_NV12 or not like in
> > > >> AV_PIX_FMT_YUV420P. You should better use pixdesc->nb_components.
> > > >>
> > > >
> > > > Done.
> > > >
> > > >>> +    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
> > > >>> +    int i;
> > > >>
> > > >> We allow and use "for (int i = 0;" from C99 to save lines like these.
> > > >>
> > > >
> > > > Done.
> > > >
> > > >>> +    avio_wb32(pb, 0); /* size */
> > > >>> +    ffio_wfourcc(pb, "pixi");
> > > >>> +    avio_wb32(pb, 0); /* Version & flags */
> > > >>> +    avio_w8(pb, num_channels); /* num_channels */
> > > >>> +    for (i = 0; i < num_channels; ++i) {
> > > >>> +      avio_w8(pb, pixdesc->comp[i].depth); /* bits_per_channel */
> > > >>> +    }
> > > >>> +    return update_size(pb, pos);
> > > >>> +}
> > > >>> +
> > > >>> +static int mov_write_ipco_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > > >>> +{
> > > >>> +    int64_t pos = avio_tell(pb);
> > > >>> +    avio_wb32(pb, 0); /* size */
> > > >>> +    ffio_wfourcc(pb, "ipco");
> > > >>> +    mov_write_ispe_tag(pb, mov, s);
> > > >>> +    mov_write_pixi_tag(pb, mov, s);
> > > >>> +    mov_write_av1c_tag(pb, &mov->tracks[0]);
> > > >>> +    mov_write_colr_tag(pb, &mov->tracks[0], 0);
> > > >>> +    return update_size(pb, pos);
> > > >>> +}
> > > >>> +
> > > >>> +static int mov_write_ipma_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > > >>> +{
> > > >>> +    int64_t pos = avio_tell(pb);
> > > >>> +    avio_wb32(pb, 0); /* size */
> > > >>> +    ffio_wfourcc(pb, "ipma");
> > > >>> +    avio_wb32(pb, 0); /* Version & flags */
> > > >>> +    avio_wb32(pb, 1); /* entry_count */
> > > >>> +    avio_wb16(pb, 1); /* item_ID */
> > > >>> +    avio_w8(pb, 4); /* association_count */
> > > >>> +
> > > >>> +    // ispe association.
> > > >>> +    avio_w8(pb, 1); /* essential and property_index */
> > > >>> +    // pixi association.
> > > >>> +    avio_w8(pb, 2); /* essential and property_index */
> > > >>> +    // av1C association.
> > > >>> +    avio_w8(pb, 0x80 | 3); /* essential and property_index */
> > > >>> +    // colr association.
> > > >>> +    avio_w8(pb, 4); /* essential and property_index */
> > > >>> +    return update_size(pb, pos);
> > > >>> +}
> > > >>> +
> > > >>> +static int mov_write_iprp_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
> > > >>> +{
> > > >>> +    int64_t pos = avio_tell(pb);
> > > >>> +    avio_wb32(pb, 0); /* size */
> > > >>> +    ffio_wfourcc(pb, "iprp");
> > > >>> +    mov_write_ipco_tag(pb, mov, s);
> > > >>> +    mov_write_ipma_tag(pb, mov, s);
> > > >>> +    return update_size(pb, pos);
> > > >>> +}
> > > >>> +
> > > >>>  static int mov_write_hmhd_tag(AVIOContext *pb)
> > > >>>  {
> > > >>>      /* This atom must be present, but leaving the values at zero
> > > >>> @@ -3056,7 +3209,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
> > > >>>              display_matrix = NULL;
> > > >>>      }
> > > >>>
> > > >>> -    if (track->flags & MOV_TRACK_ENABLED)
> > > >>> +    if (track->flags & MOV_TRACK_ENABLED || track->mode == MODE_AVIF)
> > > >>>          flags |= MOV_TKHD_FLAG_ENABLED;
> > > >>>
> > > >>>      if (track->mode == MODE_ISM)
> > > >>> @@ -3104,7 +3257,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
> > > >>>      if (st && (track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
> > > >>>                 track->par->codec_type == AVMEDIA_TYPE_SUBTITLE)) {
> > > >>>          int64_t track_width_1616;
> > > >>> -        if (track->mode == MODE_MOV) {
> > > >>> +        if (track->mode == MODE_MOV || track->mode == MODE_AVIF) {
> > > >>>              track_width_1616 = track->par->width * 0x10000ULL;
> > > >>>          } else {
> > > >>>              track_width_1616 = av_rescale(st->sample_aspect_ratio.num,
> > > >>> @@ -3439,7 +3592,8 @@ static int mov_write_trak_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
> > > >>>              mov_write_tapt_tag(pb, track);
> > > >>>          }
> > > >>>      }
> > > >>> -    mov_write_track_udta_tag(pb, mov, st);
> > > >>> +    if (track->mode != MODE_AVIF)
> > > >>> +        mov_write_track_udta_tag(pb, mov, st);
> > > >>>      track->entry = entry_backup;
> > > >>>      track->chunkCount = chunk_backup;
> > > >>>      return update_size(pb, pos);
> > > >>> @@ -3914,8 +4068,15 @@ static int mov_write_meta_tag(AVIOContext *pb, MOVMuxContext *mov,
> > > >>>          mov_write_mdta_hdlr_tag(pb, mov, s);
> > > >>>          mov_write_mdta_keys_tag(pb, mov, s);
> > > >>>          mov_write_mdta_ilst_tag(pb, mov, s);
> > > >>> -    }
> > > >>> -    else {
> > > >>> +    } else if (mov->mode == MODE_AVIF) {
> > > >>> +        mov_write_hdlr_tag(s, pb, &mov->tracks[0]);
> > > >>> +        // We always write the primary item id as 1 since only one track is
> > > >>> +        // supported for AVIF.
> > > >>> +        mov_write_pitm_tag(pb, 1);
> > > >>> +        mov_write_iloc_tag(pb, mov, s);
> > > >>> +        mov_write_iinf_tag(pb, mov, s);
> > > >>> +        mov_write_iprp_tag(pb, mov, s);
> > > >>> +    } else {
> > > >>>          /* iTunes metadata tag */
> > > >>>          mov_write_itunes_hdlr_tag(pb, mov, s);
> > > >>>          mov_write_ilst_tag(pb, mov, s);
> > > >>> @@ -4245,10 +4406,11 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
> > > >>>      }
> > > >>>
> > > >>>      mov_write_mvhd_tag(pb, mov);
> > > >>> -    if (mov->mode != MODE_MOV && !mov->iods_skip)
> > > >>> +    if (mov->mode != MODE_MOV && mov->mode != MODE_AVIF && !mov->iods_skip)
> > > >>>          mov_write_iods_tag(pb, mov);
> > > >>>      for (i = 0; i < mov->nb_streams; i++) {
> > > >>> -        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT) {
> > > >>> +        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT ||
> > > >>> +            mov->mode == MODE_AVIF) {
> > > >>>              int ret = mov_write_trak_tag(s, pb, mov, &(mov->tracks[i]), i < s->nb_streams ? s->streams[i] : NULL);
> > > >>>              if (ret < 0)
> > > >>>                  return ret;
> > > >>> @@ -4259,7 +4421,7 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
> > > >>>
> > > >>>      if (mov->mode == MODE_PSP)
> > > >>>          mov_write_uuidusmt_tag(pb, s);
> > > >>> -    else
> > > >>> +    else if (mov->mode != MODE_AVIF)
> > > >>>          mov_write_udta_tag(pb, mov, s);
> > > >>>
> > > >>>      return update_size(pb, pos);
> > > >>> @@ -5002,6 +5164,9 @@ static void mov_write_ftyp_tag_internal(AVIOContext *pb, AVFormatContext *s,
> > > >>>      else if (mov->mode == MODE_3GP) {
> > > >>>          ffio_wfourcc(pb, has_h264 ? "3gp6"  : "3gp4");
> > > >>>          minor =     has_h264 ?   0x100 :   0x200;
> > > >>> +    } else if (mov->mode == MODE_AVIF) {
> > > >>> +        ffio_wfourcc(pb, mov->is_animated_avif ? "avis" : "avif");
> > > >>> +        minor = 0;
> > > >>>      } else if (mov->mode & MODE_3G2) {
> > > >>>          ffio_wfourcc(pb, has_h264 ? "3g2b"  : "3g2a");
> > > >>>          minor =     has_h264 ? 0x20000 : 0x10000;
> > > >>> @@ -5065,6 +5230,31 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
> > > >>>      // compatible brand a second time.
> > > >>>      if (mov->mode == MODE_ISM) {
> > > >>>          ffio_wfourcc(pb, "piff");
> > > >>> +    } else if (mov->mode == MODE_AVIF) {
> > > >>> +        const AVPixFmtDescriptor *pix_fmt_desc =
> > > >>> +            av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
> > > >>> +        const int depth = pix_fmt_desc->comp[0].depth;
> > > >>> +        if (mov->is_animated_avif) {
> > > >>> +            // For animated AVIF, major brand is "avis". Add "avif" as a
> > > >>> +            // compatible brand.
> > > >>> +            ffio_wfourcc(pb, "avif");
> > > >>> +            ffio_wfourcc(pb, "msf1");
> > > >>> +            ffio_wfourcc(pb, "iso8");
> > > >>> +        }
> > > >>> +        ffio_wfourcc(pb, "mif1");
> > > >>> +        ffio_wfourcc(pb, "miaf");
> > > >>> +        if (depth == 8 || depth == 10) {
> > > >>> +            // MA1B and MA1A brands are based on AV1 profile. Short hand for
> > > >>> +            // computing that is based on chroma subsampling type. 420 chroma
> > > >>> +            // subsampling is MA1B.  444 chroma subsampling is MA1A.
> > > >>> +            if (pix_fmt_desc->log2_chroma_w == 0 && pix_fmt_desc->log2_chroma_h == 0) {
> > > >>> +                // 444 chroma subsampling.
> > > >>> +                ffio_wfourcc(pb, "MA1A");
> > > >>> +            } else {
> > > >>> +                // 420 chroma subsampling.
> > > >>> +                ffio_wfourcc(pb, "MA1B");
> > > >>> +            }
> > > >>> +        }
> > > >>>      } else if (mov->mode != MODE_MOV) {
> > > >>>          // We add tfdt atoms when fragmenting, signal this with the iso6 compatible
> > > >>>          // brand, if not already the major brand. This is compatible with users that
> > > >>> @@ -5669,7 +5859,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
> > > >>>      if (ret < 0)
> > > >>>          return ret;
> > > >>>
> > > >>> -    if (mov->flags & FF_MOV_FLAG_FRAGMENT) {
> > > >>> +    if (mov->flags & FF_MOV_FLAG_FRAGMENT || mov->mode == MODE_AVIF) {
> > > >>>          int ret;
> > > >>>          if (mov->moov_written || mov->flags & FF_MOV_FLAG_EMPTY_MOOV) {
> > > >>>              if (mov->frag_interleave && mov->fragments > 0) {
> > > >>> @@ -5802,7 +5992,9 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
> > > >>>              }
> > > >>>          }
> > > >>>      } else if (par->codec_id == AV_CODEC_ID_AV1) {
> > > >>> -        if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
> > > >>> +        if (trk->mode == MODE_AVIF) {
> > > >>
> > > >> Why this? AVIF requires that AVI-in-ISOBMFF sample format is honoured
> > > >> and this contains e.g. "OBUs of type OBU_TEMPORAL_DELIMITER,
> > > >> OBU_PADDING, or OBU_REDUNDANT_FRAME_HEADER SHOULD NOT be used".
> > > >> ff_av1_filter_obus(_buf)? merely ensures that these OBUs are stripped away.
> > > >>
> > > >> If the aim of this check is to disallow hint tracks or so, then it fails
> > > >> (all it does is ensuring that both the ordinary track as well as the
> > > >> hint track get data that might contain OBUs that should not be there).
> > > >>
> > > >
> > > > Done. I had to update the extent offset code here as well.
> > > >
> > > >>> +            avio_write(pb, pkt->data, pkt->size);
> > > >>> +        } else if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
> > > >>>              ret = ff_av1_filter_obus_buf(pkt->data, &reformatted_data,
> > > >>>                                           &size, &offset);
> > > >>>              if (ret < 0)
> > > >>> @@ -6230,6 +6422,10 @@ fail:
> > > >>>              }
> > > >>>          }
> > > >>>
> > > >>> +        if (trk->mode == MODE_AVIF && !mov->avif_extent_length) {
> > > >>> +            mov->avif_extent_length = pkt->size;
> > > >>> +        }
> > > >>> +
> > > >>>          return mov_write_single_packet(s, pkt);
> > > >>>      }
> > > >>>  }
> > > >>> @@ -6569,11 +6765,15 @@ static int mov_init(AVFormatContext *s)
> > > >>>      else if (IS_MODE(ipod, IPOD)) mov->mode = MODE_IPOD;
> > > >>>      else if (IS_MODE(ismv, ISMV)) mov->mode = MODE_ISM;
> > > >>>      else if (IS_MODE(f4v,   F4V)) mov->mode = MODE_F4V;
> > > >>> +    else if (IS_MODE(avif, AVIF)) mov->mode = MODE_AVIF;
> > > >>>  #undef IS_MODE
> > > >>>
> > > >>>      if (mov->flags & FF_MOV_FLAG_DELAY_MOOV)
> > > >>>          mov->flags |= FF_MOV_FLAG_EMPTY_MOOV;
> > > >>>
> > > >>> +    if (mov->mode == MODE_AVIF)
> > > >>> +        mov->flags |= FF_MOV_FLAG_DELAY_MOOV;
> > > >>> +
> > > >>>      /* Set the FRAGMENT flag if any of the fragmentation methods are
> > > >>>       * enabled. */
> > > >>>      if (mov->max_fragment_duration || mov->max_fragment_size ||
> > > >>> @@ -6654,11 +6854,25 @@ static int mov_init(AVFormatContext *s)
> > > >>>      /* Non-seekable output is ok if using fragmentation. If ism_lookahead
> > > >>>       * is enabled, we don't support non-seekable output at all. */
> > > >>>      if (!(s->pb->seekable & AVIO_SEEKABLE_NORMAL) &&
> > > >>> -        (!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead)) {
> > > >>> +        (!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead ||
> > > >>> +         mov->mode == MODE_AVIF)) {
> > > >>>          av_log(s, AV_LOG_ERROR, "muxer does not support non seekable output\n");
> > > >>>          return AVERROR(EINVAL);
> > > >>>      }
> > > >>>
> > > >>> +    /* AVIF output must have exactly one video stream */
> > > >>> +    if (mov->mode == MODE_AVIF) {
> > > >>> +        if (s->nb_streams > 1) {
> > > >>> +            av_log(s, AV_LOG_ERROR, "AVIF output requires exactly one stream\n");
> > > >>> +            return AVERROR(EINVAL);
> > > >>> +        }
> > > >>> +        if (s->streams[0]->codecpar->codec_type != AVMEDIA_TYPE_VIDEO) {
> > > >>> +            av_log(s, AV_LOG_ERROR, "AVIF output requires one video stream\n");
> > > >>> +            return AVERROR(EINVAL);
> > > >>> +        }
> > > >>> +    }
> > > >>> +
> > > >>> +
> > > >>>      mov->nb_streams = s->nb_streams;
> > > >>>      if (mov->mode & (MODE_MP4|MODE_MOV|MODE_IPOD) && s->nb_chapters)
> > > >>>          mov->chapter_track = mov->nb_streams++;
> > > >>> @@ -6797,12 +7011,13 @@ static int mov_init(AVFormatContext *s)
> > > >>>                          pix_fmt == AV_PIX_FMT_MONOWHITE ||
> > > >>>                          pix_fmt == AV_PIX_FMT_MONOBLACK;
> > > >>>              }
> > > >>> -            if (track->par->codec_id == AV_CODEC_ID_VP9 ||
> > > >>> -                track->par->codec_id == AV_CODEC_ID_AV1) {
> > > >>> -                if (track->mode != MODE_MP4) {
> > > >>> -                    av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
> > > >>> -                    return AVERROR(EINVAL);
> > > >>> -                }
> > > >>> +            if (track->par->codec_id == AV_CODEC_ID_VP9 && track->mode != MODE_MP4) {
> > > >>> +                av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
> > > >>> +             return AVERROR(EINVAL);
> > > >>> +            } else if (track->par->codec_id == AV_CODEC_ID_AV1 &&
> > > >>> +                       track->mode != MODE_MP4 && track->mode != MODE_AVIF) {
> > > >>> +                av_log(s, AV_LOG_ERROR, "%s only supported in MP4 and AVIF.\n", avcodec_get_name(track->par->codec_id));
> > > >>> +                return AVERROR(EINVAL);
> > > >>>              } else if (track->par->codec_id == AV_CODEC_ID_VP8) {
> > > >>>                  /* altref frames handling is not defined in the spec as of version v1.0,
> > > >>>                   * so just forbid muxing VP8 streams altogether until a new version does */
> > > >>> @@ -7003,7 +7218,7 @@ static int mov_write_header(AVFormatContext *s)
> > > >>>                              FF_MOV_FLAG_FRAG_EVERY_FRAME)) &&
> > > >>>              !mov->max_fragment_duration && !mov->max_fragment_size)
> > > >>>              mov->flags |= FF_MOV_FLAG_FRAG_KEYFRAME;
> > > >>> -    } else {
> > > >>> +    } else if (mov->mode != MODE_AVIF) {
> > > >>>          if (mov->flags & FF_MOV_FLAG_FASTSTART)
> > > >>>              mov->reserved_header_pos = avio_tell(pb);
> > > >>>          mov_write_mdat_tag(pb, mov);
> > > >>> @@ -7291,6 +7506,48 @@ static int mov_check_bitstream(AVFormatContext *s, AVStream *st,
> > > >>>      return ret;
> > > >>>  }
> > > >>>
> > > >>> +static int avif_write_trailer(AVFormatContext *s)
> > > >>> +{
> > > >>> +    AVIOContext *pb = s->pb;
> > > >>> +    MOVMuxContext *mov = s->priv_data;
> > > >>> +    int64_t pos_backup, mdat_pos;
> > > >>> +    uint8_t *buf;
> > > >>> +    int buf_size, moov_size;
> > > >>> +    int i;
> > > >>> +
> > > >>> +    if (mov->moov_written) return 0;
> > > >>> +
> > > >>> +    mov->is_animated_avif = s->streams[0]->nb_frames > 1;
> > > >>> +    mov_write_identification(pb, s);
> > > >>> +    mov_write_meta_tag(pb, mov, s);
> > > >>> +
> > > >>> +    moov_size = get_moov_size(s);
> > > >>> +    for (i = 0; i < mov->nb_streams; i++)
> > > >>> +        mov->tracks[i].data_offset = avio_tell(pb) + moov_size + 8;
> > > >>
> > > >> Don't call avio_tell() in a loop (and I wonder whether this loop is even
> > > >> necessary given that s->nb_streams is checked to be one).
> > > >>
> > > >
> > > > Removed the loop. I was thinking about potentially supporting multiple
> > > > output tracks in the future (for alpha channel for example), but this
> > > > code would not work in that case any way.
> > > >
> > > >>> +
> > > >>> +    if (mov->is_animated_avif) {
> > > >>> +        int ret;
> > > >>> +        if ((ret = mov_write_moov_tag(pb, mov, s)) < 0)
> > > >>> +            return ret;
> > > >>> +    }
> > > >>> +
> > > >>> +    buf_size = avio_get_dyn_buf(mov->mdat_buf, &buf);
> > > >>> +    avio_wb32(pb, buf_size + 8);
> > > >>> +    ffio_wfourcc(pb, "mdat");
> > > >>> +    mdat_pos = avio_tell(pb);
> > > >>> +
> > > >>> +    avio_write(pb, buf, buf_size);
> > > >>> +    ffio_free_dyn_buf(&mov->mdat_buf);
> > > >>
> > > >> Unnecessary: This will be freed in mov_free().
> > > >>
> > > >
> > > > Removed.
> > > >
> > > >>> +
> > > >>> +    // write extent offset.
> > > >>> +    pos_backup = avio_tell(pb);
> > > >>> +    avio_seek(pb, mov->avif_extent_pos, SEEK_SET);
> > > >>> +    avio_wb32(pb, mdat_pos); /* rewrite offset */
> > > >>
> > > >> What guarantees that this fits into 32bits?
> > > >>
> > > >
> > > > Added a check to fail if it does not fit in 32-bits.
> > > >
> > > >>> +    avio_seek(pb, pos_backup, SEEK_SET);
> > > >>> +
> > > >>> +    return 0;
> > > >>> +}
> > > >>> +
> > > >>>  #if CONFIG_TGP_MUXER || CONFIG_TG2_MUXER
> > > >>>  static const AVCodecTag codec_3gp_tags[] = {
> > > >>>      { AV_CODEC_ID_H263,     MKTAG('s','2','6','3') },
> > > >>> @@ -7373,6 +7630,20 @@ static const AVCodecTag codec_f4v_tags[] = {
> > > >>>      { AV_CODEC_ID_NONE, 0 },
> > > >>>  };
> > > >>>
> > > >>> +#if CONFIG_AVIF_MUXER
> > > >>> +static const AVCodecTag codec_avif_tags[] = {
> > > >>> +    { AV_CODEC_ID_AV1,     MKTAG('a','v','0','1') },
> > > >>> +    { AV_CODEC_ID_NONE, 0 },
> > > >>> +};
> > > >>> +static const AVCodecTag *const codec_avif_tags_list[] = { codec_avif_tags, NULL };
> > > >>> +
> > > >>> +static const AVClass mov_avif_muxer_class = {
> > > >>> +    .class_name = "avif muxer",
> > > >>> +    .item_name  = av_default_item_name,
> > > >>> +    .version    = LIBAVUTIL_VERSION_INT,
> > > >>> +};
> > > >>
> > > >> It's not mandatory for a muxer to have a private class; it is only
> > > >> necessary for options. If you do not have options (like here), then the
> > > >> AVClass is useless.
> > > >> I actually wanted that you support the options that make sense for AVIF
> > > >> and I dislike that the avif muxer uses a different write_trailer than
> > > >> every other muxer here (which is the reason why e.g. the faststart
> > > >> option is not supported by it). Is it really so different?
> > > >> Furthermore, given that you don't use the same AVClass as everyone else,
> > > >> the values for fields that are AVOpt-enabled are zero; and this does not
> > > >> always coincide with the default value of the relevant option. See e.g.
> > > >> skip_iods, iods_audio_profile, iods_video_profile etc. movie_timescale
> > > >> will even be set to a value that is outside of its legal range. I don't
> > > >> know whether this can lead to any divide-by-zero crashes or asserts, but
> > > >> it is certainly very fragile.
> > > >>
> > > >
> > > > Hmm, i see a couple of solutions here:
> > > > 1) Use the same private class as the MOV muxer and document that not
> > > > all options are supported when the format is AVIF. I think there are
> > > > already cases within this file where not all muxers may support all
> > > > the options.
> > > > 2) Leave it as-is since the code always checks for AVIF mode first and
> > > > then does the rest, so the defaults of the non-relevant options should
> > > > not matter. But i agree that this is very fragile. One way to ensure
> > > > future changes don't break this structure is to add a fate test.
> > > >
> > >
> > > Can you tell which options (if enabled) would work and which would not
> > > work or would create spec-incompliant output?
> > >
> >
> > The following options will make sense for AVIF (and may just work
> > as-is, i am not sure of this): write_colr, prefer_icc,
> > video_track_timescale and a few other generic MOV options like
> > use_stream_ids_as_track_ids, empty_hdlr_name, and brand.
> >
>
> Any thoughts on this?
>

Another ping on this. :)


> > > > Please let me know what you think.
> > > >
> > > > About using a separate write_trailer function, i can re-use the
> > > > existing mov_write_trailer but it will mostly be a special case for
> > > > AVIF mode with the code in avif_write_trailer, so i thought it was
> > > > cleaner to have a separate function anyway. I am not sure if there are
> > > > any other implications because of this. If you prefer that i move it
> > > > into mov_write_trailer, please let me know and i can do that as well.
> > > >
> > >
> > > _______________________________________________
> > > ffmpeg-devel mailing list
> > > ffmpeg-devel@ffmpeg.org
> > > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> > >
> > > To unsubscribe, visit link above, or email
> > > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> >
> >
> >
> > --
> > Vignesh
>
>
>
> --
> Vignesh



--
Vignesh
diff mbox series

Patch

diff --git a/configure b/configure
index 8c69ab0c86..6d7020e96b 100755
--- a/configure
+++ b/configure
@@ -3390,6 +3390,7 @@  asf_stream_muxer_select="asf_muxer"
 av1_demuxer_select="av1_frame_merge_bsf av1_parser"
 avi_demuxer_select="riffdec exif"
 avi_muxer_select="riffenc"
+avif_muxer_select="mov_muxer"
 caf_demuxer_select="iso_media"
 caf_muxer_select="iso_media"
 dash_muxer_select="mp4_muxer"
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index d066a7745b..400c17afbd 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -81,6 +81,7 @@  extern const AVOutputFormat ff_au_muxer;
 extern const AVInputFormat  ff_av1_demuxer;
 extern const AVInputFormat  ff_avi_demuxer;
 extern const AVOutputFormat ff_avi_muxer;
+extern const AVOutputFormat ff_avif_muxer;
 extern const AVInputFormat  ff_avisynth_demuxer;
 extern const AVOutputFormat ff_avm2_muxer;
 extern const AVInputFormat  ff_avr_demuxer;
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 1a746a67fd..ff41579300 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -1303,7 +1303,7 @@  static int mov_write_av1c_tag(AVIOContext *pb, MOVTrack *track)
 
     avio_wb32(pb, 0);
     ffio_wfourcc(pb, "av1C");
-    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, 1);
+    ff_isom_write_av1c(pb, track->vos_data, track->vos_len, track->mode != MODE_AVIF);
     return update_size(pb, pos);
 }
 
@@ -2004,12 +2004,13 @@  static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
         }
     }
 
-    /* We should only ever be called by MOV or MP4. */
-    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4);
+    /* We should only ever be called for MOV, MP4 and AVIF. */
+    av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4 ||
+               track->mode == MODE_AVIF);
 
     avio_wb32(pb, 0); /* size */
     ffio_wfourcc(pb, "colr");
-    if (track->mode == MODE_MP4)
+    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF)
         ffio_wfourcc(pb, "nclx");
     else
         ffio_wfourcc(pb, "nclc");
@@ -2019,7 +2020,7 @@  static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc)
     avio_wb16(pb, track->par->color_primaries);
     avio_wb16(pb, track->par->color_trc);
     avio_wb16(pb, track->par->color_space);
-    if (track->mode == MODE_MP4) {
+    if (track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
         int full_range = track->par->color_range == AVCOL_RANGE_JPEG;
         avio_w8(pb, full_range << 7);
     }
@@ -2085,7 +2086,7 @@  static void find_compressor(char * compressor_name, int len, MOVTrack *track)
                   || (track->par->width == 1440 && track->par->height == 1080)
                   || (track->par->width == 1920 && track->par->height == 1080);
 
-    if (track->mode == MODE_MOV &&
+    if ((track->mode == MODE_AVIF || track->mode == MODE_MOV) &&
         (encoder = av_dict_get(track->st->metadata, "encoder", NULL, 0))) {
         av_strlcpy(compressor_name, encoder->value, 32);
     } else if (track->par->codec_id == AV_CODEC_ID_MPEG2VIDEO && xdcam_res) {
@@ -2106,6 +2107,25 @@  static void find_compressor(char * compressor_name, int len, MOVTrack *track)
     }
 }
 
+static int mov_write_ccst_tag(AVIOContext *pb)
+{
+    int64_t pos = avio_tell(pb);
+    // Write sane defaults:
+    // all_ref_pics_intra = 0 : all samples can use any type of reference.
+    // intra_pred_used = 1 : intra prediction may or may not be used.
+    // max_ref_per_pic = 15 : reserved value to indicate that any number of
+    //                        reference images can be used.
+    uint8_t ccstValue = (0 << 7) |  /* all_ref_pics_intra */
+                        (1 << 6) |  /* intra_pred_used */
+                        (15 << 2);  /* max_ref_per_pic */
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "ccst");
+    avio_wb32(pb, 0); /* Version & flags */
+    avio_w8(pb, ccstValue);
+    avio_wb24(pb, 0);  /* reserved */
+    return update_size(pb, pos);
+}
+
 static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext *mov, MOVTrack *track)
 {
     int ret = AVERROR_BUG;
@@ -2123,6 +2143,8 @@  static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
     avio_wb32(pb, 0); /* size */
     if (mov->encryption_scheme != MOV_ENC_NONE) {
         ffio_wfourcc(pb, "encv");
+    } else if (track->mode == MODE_AVIF) {
+        ffio_wfourcc(pb, "av01");
     } else {
         avio_wl32(pb, track->tag); // store it byteswapped
     }
@@ -2239,7 +2261,7 @@  static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
         else
             av_log(mov->fc, AV_LOG_WARNING, "Not writing 'gama' atom. Format is not MOV.\n");
     }
-    if (track->mode == MODE_MOV || track->mode == MODE_MP4) {
+    if (track->mode == MODE_MOV || track->mode == MODE_MP4 || track->mode == MODE_AVIF) {
         int has_color_info = track->par->color_primaries != AVCOL_PRI_UNSPECIFIED &&
                              track->par->color_trc != AVCOL_TRC_UNSPECIFIED &&
                              track->par->color_space != AVCOL_SPC_UNSPECIFIED;
@@ -2291,6 +2313,9 @@  static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex
     if (avid)
         avio_wb32(pb, 0);
 
+    if (track->mode == MODE_AVIF)
+        mov_write_ccst_tag(pb);
+
     return update_size(pb, pos);
 }
 
@@ -2792,7 +2817,10 @@  static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
 
     if (track) {
         hdlr = (track->mode == MODE_MOV) ? "mhlr" : "\0\0\0\0";
-        if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
+        if (track->mode == MODE_AVIF) {
+            hdlr_type = "pict";
+            descr = "ffmpeg";
+        } else if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) {
             hdlr_type = "vide";
             descr     = "VideoHandler";
         } else if (track->par->codec_type == AVMEDIA_TYPE_AUDIO) {
@@ -2859,6 +2887,131 @@  static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
     return update_size(pb, pos);
 }
 
+static int mov_write_pitm_tag(AVIOContext *pb, int item_id)
+{
+    int64_t pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "pitm");
+    avio_wb32(pb, 0); /* Version & flags */
+    avio_wb16(pb, item_id); /* item_id */
+    return update_size(pb, pos);
+}
+
+static int mov_write_iloc_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
+{
+    int64_t pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "iloc");
+    avio_wb32(pb, 0); /* Version & flags */
+    avio_w8(pb, (4 << 4) + 4); /* offset_size(4) and length_size(4) */
+    avio_w8(pb, 0); /* base_offset_size(4) and reserved(4) */
+    avio_wb16(pb, 1); /* item_count */
+
+    avio_wb16(pb, 1); /* item_id */
+    avio_wb16(pb, 0); /* data_reference_index */
+    avio_wb16(pb, 1); /* extent_count */
+    mov->avif_extent_pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* extent_offset (written later) */
+    // For animated AVIF, we simply write the first packet's size.
+    avio_wb32(pb, mov->avif_extent_length); /* extent_length */
+
+    return update_size(pb, pos);
+}
+
+static int mov_write_iinf_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
+{
+    int64_t infe_pos;
+    int64_t iinf_pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "iinf");
+    avio_wb32(pb, 0); /* Version & flags */
+    avio_wb16(pb, 1); /* entry_count */
+
+    infe_pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "infe");
+    avio_w8(pb, 0x2); /* Version */
+    avio_wb24(pb, 0); /* flags */
+    avio_wb16(pb, 1); /* item_id */
+    avio_wb16(pb, 0); /* item_protection_index */
+    avio_write(pb, "av01", 4); /* item_type */
+    avio_write(pb, "Color\0", 6); /* item_name */
+    update_size(pb, infe_pos);
+
+    return update_size(pb, iinf_pos);
+}
+
+static int mov_write_ispe_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
+{
+    int64_t pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "ispe");
+    avio_wb32(pb, 0); /* Version & flags */
+    avio_wb32(pb, s->streams[0]->codecpar->width); /* image_width */
+    avio_wb32(pb, s->streams[0]->codecpar->height); /* image_height */
+    return update_size(pb, pos);
+}
+
+
+static int mov_write_pixi_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
+{
+    int64_t pos = avio_tell(pb);
+    int num_channels = av_pix_fmt_count_planes(s->streams[0]->codecpar->format);
+    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
+    int i;
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "pixi");
+    avio_wb32(pb, 0); /* Version & flags */
+    avio_w8(pb, num_channels); /* num_channels */
+    for (i = 0; i < num_channels; ++i) {
+      avio_w8(pb, pixdesc->comp[i].depth); /* bits_per_channel */
+    }
+    return update_size(pb, pos);
+}
+
+static int mov_write_ipco_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
+{
+    int64_t pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "ipco");
+    mov_write_ispe_tag(pb, mov, s);
+    mov_write_pixi_tag(pb, mov, s);
+    mov_write_av1c_tag(pb, &mov->tracks[0]);
+    mov_write_colr_tag(pb, &mov->tracks[0], 0);
+    return update_size(pb, pos);
+}
+
+static int mov_write_ipma_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
+{
+    int64_t pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "ipma");
+    avio_wb32(pb, 0); /* Version & flags */
+    avio_wb32(pb, 1); /* entry_count */
+    avio_wb16(pb, 1); /* item_ID */
+    avio_w8(pb, 4); /* association_count */
+
+    // ispe association.
+    avio_w8(pb, 1); /* essential and property_index */
+    // pixi association.
+    avio_w8(pb, 2); /* essential and property_index */
+    // av1C association.
+    avio_w8(pb, 0x80 | 3); /* essential and property_index */
+    // colr association.
+    avio_w8(pb, 4); /* essential and property_index */
+    return update_size(pb, pos);
+}
+
+static int mov_write_iprp_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s)
+{
+    int64_t pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "iprp");
+    mov_write_ipco_tag(pb, mov, s);
+    mov_write_ipma_tag(pb, mov, s);
+    return update_size(pb, pos);
+}
+
 static int mov_write_hmhd_tag(AVIOContext *pb)
 {
     /* This atom must be present, but leaving the values at zero
@@ -3056,7 +3209,7 @@  static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
             display_matrix = NULL;
     }
 
-    if (track->flags & MOV_TRACK_ENABLED)
+    if (track->flags & MOV_TRACK_ENABLED || track->mode == MODE_AVIF)
         flags |= MOV_TKHD_FLAG_ENABLED;
 
     if (track->mode == MODE_ISM)
@@ -3104,7 +3257,7 @@  static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov,
     if (st && (track->par->codec_type == AVMEDIA_TYPE_VIDEO ||
                track->par->codec_type == AVMEDIA_TYPE_SUBTITLE)) {
         int64_t track_width_1616;
-        if (track->mode == MODE_MOV) {
+        if (track->mode == MODE_MOV || track->mode == MODE_AVIF) {
             track_width_1616 = track->par->width * 0x10000ULL;
         } else {
             track_width_1616 = av_rescale(st->sample_aspect_ratio.num,
@@ -3439,7 +3592,8 @@  static int mov_write_trak_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
             mov_write_tapt_tag(pb, track);
         }
     }
-    mov_write_track_udta_tag(pb, mov, st);
+    if (track->mode != MODE_AVIF)
+        mov_write_track_udta_tag(pb, mov, st);
     track->entry = entry_backup;
     track->chunkCount = chunk_backup;
     return update_size(pb, pos);
@@ -3914,8 +4068,15 @@  static int mov_write_meta_tag(AVIOContext *pb, MOVMuxContext *mov,
         mov_write_mdta_hdlr_tag(pb, mov, s);
         mov_write_mdta_keys_tag(pb, mov, s);
         mov_write_mdta_ilst_tag(pb, mov, s);
-    }
-    else {
+    } else if (mov->mode == MODE_AVIF) {
+        mov_write_hdlr_tag(s, pb, &mov->tracks[0]);
+        // We always write the primary item id as 1 since only one track is
+        // supported for AVIF.
+        mov_write_pitm_tag(pb, 1);
+        mov_write_iloc_tag(pb, mov, s);
+        mov_write_iinf_tag(pb, mov, s);
+        mov_write_iprp_tag(pb, mov, s);
+    } else {
         /* iTunes metadata tag */
         mov_write_itunes_hdlr_tag(pb, mov, s);
         mov_write_ilst_tag(pb, mov, s);
@@ -4245,10 +4406,11 @@  static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
     }
 
     mov_write_mvhd_tag(pb, mov);
-    if (mov->mode != MODE_MOV && !mov->iods_skip)
+    if (mov->mode != MODE_MOV && mov->mode != MODE_AVIF && !mov->iods_skip)
         mov_write_iods_tag(pb, mov);
     for (i = 0; i < mov->nb_streams; i++) {
-        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT) {
+        if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT ||
+            mov->mode == MODE_AVIF) {
             int ret = mov_write_trak_tag(s, pb, mov, &(mov->tracks[i]), i < s->nb_streams ? s->streams[i] : NULL);
             if (ret < 0)
                 return ret;
@@ -4259,7 +4421,7 @@  static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
 
     if (mov->mode == MODE_PSP)
         mov_write_uuidusmt_tag(pb, s);
-    else
+    else if (mov->mode != MODE_AVIF)
         mov_write_udta_tag(pb, mov, s);
 
     return update_size(pb, pos);
@@ -5002,6 +5164,9 @@  static void mov_write_ftyp_tag_internal(AVIOContext *pb, AVFormatContext *s,
     else if (mov->mode == MODE_3GP) {
         ffio_wfourcc(pb, has_h264 ? "3gp6"  : "3gp4");
         minor =     has_h264 ?   0x100 :   0x200;
+    } else if (mov->mode == MODE_AVIF) {
+        ffio_wfourcc(pb, mov->is_animated_avif ? "avis" : "avif");
+        minor = 0;
     } else if (mov->mode & MODE_3G2) {
         ffio_wfourcc(pb, has_h264 ? "3g2b"  : "3g2a");
         minor =     has_h264 ? 0x20000 : 0x10000;
@@ -5065,6 +5230,31 @@  static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
     // compatible brand a second time.
     if (mov->mode == MODE_ISM) {
         ffio_wfourcc(pb, "piff");
+    } else if (mov->mode == MODE_AVIF) {
+        const AVPixFmtDescriptor *pix_fmt_desc =
+            av_pix_fmt_desc_get(s->streams[0]->codecpar->format);
+        const int depth = pix_fmt_desc->comp[0].depth;
+        if (mov->is_animated_avif) {
+            // For animated AVIF, major brand is "avis". Add "avif" as a
+            // compatible brand.
+            ffio_wfourcc(pb, "avif");
+            ffio_wfourcc(pb, "msf1");
+            ffio_wfourcc(pb, "iso8");
+        }
+        ffio_wfourcc(pb, "mif1");
+        ffio_wfourcc(pb, "miaf");
+        if (depth == 8 || depth == 10) {
+            // MA1B and MA1A brands are based on AV1 profile. Short hand for
+            // computing that is based on chroma subsampling type. 420 chroma
+            // subsampling is MA1B.  444 chroma subsampling is MA1A.
+            if (pix_fmt_desc->log2_chroma_w == 0 && pix_fmt_desc->log2_chroma_h == 0) {
+                // 444 chroma subsampling.
+                ffio_wfourcc(pb, "MA1A");
+            } else {
+                // 420 chroma subsampling.
+                ffio_wfourcc(pb, "MA1B");
+            }
+        }
     } else if (mov->mode != MODE_MOV) {
         // We add tfdt atoms when fragmenting, signal this with the iso6 compatible
         // brand, if not already the major brand. This is compatible with users that
@@ -5669,7 +5859,7 @@  int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
     if (ret < 0)
         return ret;
 
-    if (mov->flags & FF_MOV_FLAG_FRAGMENT) {
+    if (mov->flags & FF_MOV_FLAG_FRAGMENT || mov->mode == MODE_AVIF) {
         int ret;
         if (mov->moov_written || mov->flags & FF_MOV_FLAG_EMPTY_MOOV) {
             if (mov->frag_interleave && mov->fragments > 0) {
@@ -5802,7 +5992,9 @@  int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
             }
         }
     } else if (par->codec_id == AV_CODEC_ID_AV1) {
-        if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
+        if (trk->mode == MODE_AVIF) {
+            avio_write(pb, pkt->data, pkt->size);
+        } else if (trk->hint_track >= 0 && trk->hint_track < mov->nb_streams) {
             ret = ff_av1_filter_obus_buf(pkt->data, &reformatted_data,
                                          &size, &offset);
             if (ret < 0)
@@ -6230,6 +6422,10 @@  fail:
             }
         }
 
+        if (trk->mode == MODE_AVIF && !mov->avif_extent_length) {
+            mov->avif_extent_length = pkt->size;
+        }
+
         return mov_write_single_packet(s, pkt);
     }
 }
@@ -6569,11 +6765,15 @@  static int mov_init(AVFormatContext *s)
     else if (IS_MODE(ipod, IPOD)) mov->mode = MODE_IPOD;
     else if (IS_MODE(ismv, ISMV)) mov->mode = MODE_ISM;
     else if (IS_MODE(f4v,   F4V)) mov->mode = MODE_F4V;
+    else if (IS_MODE(avif, AVIF)) mov->mode = MODE_AVIF;
 #undef IS_MODE
 
     if (mov->flags & FF_MOV_FLAG_DELAY_MOOV)
         mov->flags |= FF_MOV_FLAG_EMPTY_MOOV;
 
+    if (mov->mode == MODE_AVIF)
+        mov->flags |= FF_MOV_FLAG_DELAY_MOOV;
+
     /* Set the FRAGMENT flag if any of the fragmentation methods are
      * enabled. */
     if (mov->max_fragment_duration || mov->max_fragment_size ||
@@ -6654,11 +6854,25 @@  static int mov_init(AVFormatContext *s)
     /* Non-seekable output is ok if using fragmentation. If ism_lookahead
      * is enabled, we don't support non-seekable output at all. */
     if (!(s->pb->seekable & AVIO_SEEKABLE_NORMAL) &&
-        (!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead)) {
+        (!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead ||
+         mov->mode == MODE_AVIF)) {
         av_log(s, AV_LOG_ERROR, "muxer does not support non seekable output\n");
         return AVERROR(EINVAL);
     }
 
+    /* AVIF output must have exactly one video stream */
+    if (mov->mode == MODE_AVIF) {
+        if (s->nb_streams > 1) {
+            av_log(s, AV_LOG_ERROR, "AVIF output requires exactly one stream\n");
+            return AVERROR(EINVAL);
+        }
+        if (s->streams[0]->codecpar->codec_type != AVMEDIA_TYPE_VIDEO) {
+            av_log(s, AV_LOG_ERROR, "AVIF output requires one video stream\n");
+            return AVERROR(EINVAL);
+        }
+    }
+
+
     mov->nb_streams = s->nb_streams;
     if (mov->mode & (MODE_MP4|MODE_MOV|MODE_IPOD) && s->nb_chapters)
         mov->chapter_track = mov->nb_streams++;
@@ -6797,12 +7011,13 @@  static int mov_init(AVFormatContext *s)
                         pix_fmt == AV_PIX_FMT_MONOWHITE ||
                         pix_fmt == AV_PIX_FMT_MONOBLACK;
             }
-            if (track->par->codec_id == AV_CODEC_ID_VP9 ||
-                track->par->codec_id == AV_CODEC_ID_AV1) {
-                if (track->mode != MODE_MP4) {
-                    av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
-                    return AVERROR(EINVAL);
-                }
+            if (track->par->codec_id == AV_CODEC_ID_VP9 && track->mode != MODE_MP4) {
+                av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id));
+		return AVERROR(EINVAL);
+            } else if (track->par->codec_id == AV_CODEC_ID_AV1 &&
+                       track->mode != MODE_MP4 && track->mode != MODE_AVIF) {
+                av_log(s, AV_LOG_ERROR, "%s only supported in MP4 and AVIF.\n", avcodec_get_name(track->par->codec_id));
+                return AVERROR(EINVAL);
             } else if (track->par->codec_id == AV_CODEC_ID_VP8) {
                 /* altref frames handling is not defined in the spec as of version v1.0,
                  * so just forbid muxing VP8 streams altogether until a new version does */
@@ -7003,7 +7218,7 @@  static int mov_write_header(AVFormatContext *s)
                             FF_MOV_FLAG_FRAG_EVERY_FRAME)) &&
             !mov->max_fragment_duration && !mov->max_fragment_size)
             mov->flags |= FF_MOV_FLAG_FRAG_KEYFRAME;
-    } else {
+    } else if (mov->mode != MODE_AVIF) {
         if (mov->flags & FF_MOV_FLAG_FASTSTART)
             mov->reserved_header_pos = avio_tell(pb);
         mov_write_mdat_tag(pb, mov);
@@ -7291,6 +7506,48 @@  static int mov_check_bitstream(AVFormatContext *s, AVStream *st,
     return ret;
 }
 
+static int avif_write_trailer(AVFormatContext *s)
+{
+    AVIOContext *pb = s->pb;
+    MOVMuxContext *mov = s->priv_data;
+    int64_t pos_backup, mdat_pos;
+    uint8_t *buf;
+    int buf_size, moov_size;
+    int i;
+
+    if (mov->moov_written) return 0;
+
+    mov->is_animated_avif = s->streams[0]->nb_frames > 1;
+    mov_write_identification(pb, s);
+    mov_write_meta_tag(pb, mov, s);
+
+    moov_size = get_moov_size(s);
+    for (i = 0; i < mov->nb_streams; i++)
+        mov->tracks[i].data_offset = avio_tell(pb) + moov_size + 8;
+
+    if (mov->is_animated_avif) {
+        int ret;
+        if ((ret = mov_write_moov_tag(pb, mov, s)) < 0)
+            return ret;
+    }
+
+    buf_size = avio_get_dyn_buf(mov->mdat_buf, &buf);
+    avio_wb32(pb, buf_size + 8);
+    ffio_wfourcc(pb, "mdat");
+    mdat_pos = avio_tell(pb);
+
+    avio_write(pb, buf, buf_size);
+    ffio_free_dyn_buf(&mov->mdat_buf);
+
+    // write extent offset.
+    pos_backup = avio_tell(pb);
+    avio_seek(pb, mov->avif_extent_pos, SEEK_SET);
+    avio_wb32(pb, mdat_pos); /* rewrite offset */
+    avio_seek(pb, pos_backup, SEEK_SET);
+
+    return 0;
+}
+
 #if CONFIG_TGP_MUXER || CONFIG_TG2_MUXER
 static const AVCodecTag codec_3gp_tags[] = {
     { AV_CODEC_ID_H263,     MKTAG('s','2','6','3') },
@@ -7373,6 +7630,20 @@  static const AVCodecTag codec_f4v_tags[] = {
     { AV_CODEC_ID_NONE, 0 },
 };
 
+#if CONFIG_AVIF_MUXER
+static const AVCodecTag codec_avif_tags[] = {
+    { AV_CODEC_ID_AV1,     MKTAG('a','v','0','1') },
+    { AV_CODEC_ID_NONE, 0 },
+};
+static const AVCodecTag *const codec_avif_tags_list[] = { codec_avif_tags, NULL };
+
+static const AVClass mov_avif_muxer_class = {
+    .class_name = "avif muxer",
+    .item_name  = av_default_item_name,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+#endif
+
 #if CONFIG_MOV_MUXER
 const AVOutputFormat ff_mov_muxer = {
     .name              = "mov",
@@ -7535,3 +7806,21 @@  const AVOutputFormat ff_f4v_muxer = {
     .priv_class        = &mov_isobmff_muxer_class,
 };
 #endif
+#if CONFIG_AVIF_MUXER
+const AVOutputFormat ff_avif_muxer = {
+    .name              = "avif",
+    .long_name         = NULL_IF_CONFIG_SMALL("AVIF"),
+    .mime_type         = "image/avif",
+    .extensions        = "avif",
+    .priv_data_size    = sizeof(MOVMuxContext),
+    .video_codec       = AV_CODEC_ID_AV1,
+    .init              = mov_init,
+    .write_header      = mov_write_header,
+    .write_packet      = mov_write_packet,
+    .write_trailer     = avif_write_trailer,
+    .deinit            = mov_free,
+    .flags             = AVFMT_GLOBALHEADER | AVFMT_ALLOW_FLUSH,
+    .codec_tag         = codec_avif_tags_list,
+    .priv_class        = &mov_avif_muxer_class,
+};
+#endif
diff --git a/libavformat/movenc.h b/libavformat/movenc.h
index 2ac84ed070..55b8469f68 100644
--- a/libavformat/movenc.h
+++ b/libavformat/movenc.h
@@ -43,6 +43,7 @@ 
 #define MODE_IPOD 0x20
 #define MODE_ISM  0x40
 #define MODE_F4V  0x80
+#define MODE_AVIF 0x100
 
 typedef struct MOVIentry {
     uint64_t     pos;
@@ -242,6 +243,10 @@  typedef struct MOVMuxContext {
     MOVPrftBox write_prft;
     int empty_hdlr_name;
     int movie_timescale;
+
+    int64_t avif_extent_pos;
+    int avif_extent_length;
+    int is_animated_avif;
 } MOVMuxContext;
 
 #define FF_MOV_FLAG_RTP_HINT              (1 <<  0)