Message ID | 20220512162328.3648920-1-vigneshv@google.com |
---|---|
State | New |
Headers | show |
Series | None | expand |
On 2022-05-12 09:53 pm, Vignesh Venkatasubramanian wrote: > Add an AVIF muxer by re-using the existing the mov/mp4 muxer. Bumped lavf minor and pushed set as ab05e9a7f2...84241e63cf Regards, Gyan > > AVIF Specification: https://aomediacodec.github.io/av1-avif > > Sample usage for still image: > ffmpeg -i image.png -c:v libaom-av1 -still-picture 1 image.avif > > Sample usage for animated AVIF image: > ffmpeg -i video.mp4 animated.avif > > We can re-use any of the AV1 encoding options that will make > sense for image encoding (like bitrate, tiles, encoding speed, > etc). > > The files generated by this muxer has been verified to be valid > AVIF files by the following: > 1) Displays on Chrome (both still and animated images). > 2) Displays on Firefox (only still images, firefox does not support > animated AVIF yet). > 3) Verified to be valid by Compliance Warden: > https://github.com/gpac/ComplianceWarden > > Fixes the encoder/muxer part of Trac Ticket #7621 > > Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com> > --- > configure | 1 + > libavformat/allformats.c | 1 + > libavformat/movenc.c | 341 ++++++++++++++++++++++++++++++++++++--- > libavformat/movenc.h | 5 + > 4 files changed, 323 insertions(+), 25 deletions(-) > > diff --git a/configure b/configure > index 196873c4aa..2992f9760e 100755 > --- a/configure > +++ b/configure > @@ -3404,6 +3404,7 @@ asf_stream_muxer_select="asf_muxer" > av1_demuxer_select="av1_frame_merge_bsf av1_parser" > avi_demuxer_select="riffdec exif" > avi_muxer_select="riffenc" > +avif_muxer_select="mov_muxer" > caf_demuxer_select="iso_media" > caf_muxer_select="iso_media" > dash_muxer_select="mp4_muxer" > diff --git a/libavformat/allformats.c b/libavformat/allformats.c > index 63876c468f..1802536633 100644 > --- a/libavformat/allformats.c > +++ b/libavformat/allformats.c > @@ -81,6 +81,7 @@ extern const AVOutputFormat ff_au_muxer; > extern const AVInputFormat ff_av1_demuxer; > extern const AVInputFormat ff_avi_demuxer; > extern const AVOutputFormat ff_avi_muxer; > +extern const AVOutputFormat ff_avif_muxer; > extern const AVInputFormat ff_avisynth_demuxer; > extern const AVOutputFormat ff_avm2_muxer; > extern const AVInputFormat ff_avr_demuxer; > diff --git a/libavformat/movenc.c b/libavformat/movenc.c > index 271db99b46..a07c0ae2b4 100644 > --- a/libavformat/movenc.c > +++ b/libavformat/movenc.c > @@ -1335,7 +1335,7 @@ static int mov_write_av1c_tag(AVIOContext *pb, MOVTrack *track) > > avio_wb32(pb, 0); > ffio_wfourcc(pb, "av1C"); > - ff_isom_write_av1c(pb, track->vos_data, track->vos_len, 1); > + ff_isom_write_av1c(pb, track->vos_data, track->vos_len, track->mode != MODE_AVIF); > return update_size(pb, pos); > } > > @@ -2037,12 +2037,13 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc) > } > } > > - /* We should only ever be called by MOV or MP4. */ > - av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4); > + /* We should only ever be called for MOV, MP4 and AVIF. */ > + av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4 || > + track->mode == MODE_AVIF); > > avio_wb32(pb, 0); /* size */ > ffio_wfourcc(pb, "colr"); > - if (track->mode == MODE_MP4) > + if (track->mode == MODE_MP4 || track->mode == MODE_AVIF) > ffio_wfourcc(pb, "nclx"); > else > ffio_wfourcc(pb, "nclc"); > @@ -2052,7 +2053,7 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc) > avio_wb16(pb, track->par->color_primaries); > avio_wb16(pb, track->par->color_trc); > avio_wb16(pb, track->par->color_space); > - if (track->mode == MODE_MP4) { > + if (track->mode == MODE_MP4 || track->mode == MODE_AVIF) { > int full_range = track->par->color_range == AVCOL_RANGE_JPEG; > avio_w8(pb, full_range << 7); > } > @@ -2118,7 +2119,7 @@ static void find_compressor(char * compressor_name, int len, MOVTrack *track) > || (track->par->width == 1440 && track->par->height == 1080) > || (track->par->width == 1920 && track->par->height == 1080); > > - if (track->mode == MODE_MOV && > + if ((track->mode == MODE_AVIF || track->mode == MODE_MOV) && > (encoder = av_dict_get(track->st->metadata, "encoder", NULL, 0))) { > av_strlcpy(compressor_name, encoder->value, 32); > } else if (track->par->codec_id == AV_CODEC_ID_MPEG2VIDEO && xdcam_res) { > @@ -2139,6 +2140,25 @@ static void find_compressor(char * compressor_name, int len, MOVTrack *track) > } > } > > +static int mov_write_ccst_tag(AVIOContext *pb) > +{ > + int64_t pos = avio_tell(pb); > + // Write sane defaults: > + // all_ref_pics_intra = 0 : all samples can use any type of reference. > + // intra_pred_used = 1 : intra prediction may or may not be used. > + // max_ref_per_pic = 15 : reserved value to indicate that any number of > + // reference images can be used. > + uint8_t ccstValue = (0 << 7) | /* all_ref_pics_intra */ > + (1 << 6) | /* intra_pred_used */ > + (15 << 2); /* max_ref_per_pic */ > + avio_wb32(pb, 0); /* size */ > + ffio_wfourcc(pb, "ccst"); > + avio_wb32(pb, 0); /* Version & flags */ > + avio_w8(pb, ccstValue); > + avio_wb24(pb, 0); /* reserved */ > + return update_size(pb, pos); > +} > + > static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext *mov, MOVTrack *track) > { > int ret = AVERROR_BUG; > @@ -2272,7 +2292,7 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex > else > av_log(mov->fc, AV_LOG_WARNING, "Not writing 'gama' atom. Format is not MOV.\n"); > } > - if (track->mode == MODE_MOV || track->mode == MODE_MP4) { > + if (track->mode == MODE_MOV || track->mode == MODE_MP4 || track->mode == MODE_AVIF) { > int has_color_info = track->par->color_primaries != AVCOL_PRI_UNSPECIFIED && > track->par->color_trc != AVCOL_TRC_UNSPECIFIED && > track->par->color_space != AVCOL_SPC_UNSPECIFIED; > @@ -2324,6 +2344,9 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex > if (avid) > avio_wb32(pb, 0); > > + if (track->mode == MODE_AVIF) > + mov_write_ccst_tag(pb); > + > return update_size(pb, pos); > } > > @@ -2826,8 +2849,13 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra > if (track) { > hdlr = (track->mode == MODE_MOV) ? "mhlr" : "\0\0\0\0"; > if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) { > - hdlr_type = "vide"; > - descr = "VideoHandler"; > + if (track->mode == MODE_AVIF) { > + hdlr_type = "pict"; > + descr = "PictureHandler"; > + } else { > + hdlr_type = "vide"; > + descr = "VideoHandler"; > + } > } else if (track->par->codec_type == AVMEDIA_TYPE_AUDIO) { > hdlr_type = "soun"; > descr = "SoundHandler"; > @@ -2892,6 +2920,128 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra > return update_size(pb, pos); > } > > +static int mov_write_pitm_tag(AVIOContext *pb, int item_id) > +{ > + int64_t pos = avio_tell(pb); > + avio_wb32(pb, 0); /* size */ > + ffio_wfourcc(pb, "pitm"); > + avio_wb32(pb, 0); /* Version & flags */ > + avio_wb16(pb, item_id); /* item_id */ > + return update_size(pb, pos); > +} > + > +static int mov_write_iloc_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s) > +{ > + int64_t pos = avio_tell(pb); > + avio_wb32(pb, 0); /* size */ > + ffio_wfourcc(pb, "iloc"); > + avio_wb32(pb, 0); /* Version & flags */ > + avio_w8(pb, (4 << 4) + 4); /* offset_size(4) and length_size(4) */ > + avio_w8(pb, 0); /* base_offset_size(4) and reserved(4) */ > + avio_wb16(pb, 1); /* item_count */ > + > + avio_wb16(pb, 1); /* item_id */ > + avio_wb16(pb, 0); /* data_reference_index */ > + avio_wb16(pb, 1); /* extent_count */ > + mov->avif_extent_pos = avio_tell(pb); > + avio_wb32(pb, 0); /* extent_offset (written later) */ > + // For animated AVIF, we simply write the first packet's size. > + avio_wb32(pb, mov->avif_extent_length); /* extent_length */ > + > + return update_size(pb, pos); > +} > + > +static int mov_write_iinf_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s) > +{ > + int64_t infe_pos; > + int64_t iinf_pos = avio_tell(pb); > + avio_wb32(pb, 0); /* size */ > + ffio_wfourcc(pb, "iinf"); > + avio_wb32(pb, 0); /* Version & flags */ > + avio_wb16(pb, 1); /* entry_count */ > + > + infe_pos = avio_tell(pb); > + avio_wb32(pb, 0); /* size */ > + ffio_wfourcc(pb, "infe"); > + avio_w8(pb, 0x2); /* Version */ > + avio_wb24(pb, 0); /* flags */ > + avio_wb16(pb, 1); /* item_id */ > + avio_wb16(pb, 0); /* item_protection_index */ > + avio_write(pb, "av01", 4); /* item_type */ > + avio_write(pb, "Color\0", 6); /* item_name */ > + update_size(pb, infe_pos); > + > + return update_size(pb, iinf_pos); > +} > + > +static int mov_write_ispe_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s) > +{ > + int64_t pos = avio_tell(pb); > + avio_wb32(pb, 0); /* size */ > + ffio_wfourcc(pb, "ispe"); > + avio_wb32(pb, 0); /* Version & flags */ > + avio_wb32(pb, s->streams[0]->codecpar->width); /* image_width */ > + avio_wb32(pb, s->streams[0]->codecpar->height); /* image_height */ > + return update_size(pb, pos); > +} > + > +static int mov_write_pixi_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s) > +{ > + int64_t pos = avio_tell(pb); > + const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->streams[0]->codecpar->format); > + avio_wb32(pb, 0); /* size */ > + ffio_wfourcc(pb, "pixi"); > + avio_wb32(pb, 0); /* Version & flags */ > + avio_w8(pb, pixdesc->nb_components); /* num_channels */ > + for (int i = 0; i < pixdesc->nb_components; ++i) { > + avio_w8(pb, pixdesc->comp[i].depth); /* bits_per_channel */ > + } > + return update_size(pb, pos); > +} > + > +static int mov_write_ipco_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s) > +{ > + int64_t pos = avio_tell(pb); > + avio_wb32(pb, 0); /* size */ > + ffio_wfourcc(pb, "ipco"); > + mov_write_ispe_tag(pb, mov, s); > + mov_write_pixi_tag(pb, mov, s); > + mov_write_av1c_tag(pb, &mov->tracks[0]); > + mov_write_colr_tag(pb, &mov->tracks[0], 0); > + return update_size(pb, pos); > +} > + > +static int mov_write_ipma_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s) > +{ > + int64_t pos = avio_tell(pb); > + avio_wb32(pb, 0); /* size */ > + ffio_wfourcc(pb, "ipma"); > + avio_wb32(pb, 0); /* Version & flags */ > + avio_wb32(pb, 1); /* entry_count */ > + avio_wb16(pb, 1); /* item_ID */ > + avio_w8(pb, 4); /* association_count */ > + > + // ispe association. > + avio_w8(pb, 1); /* essential and property_index */ > + // pixi association. > + avio_w8(pb, 2); /* essential and property_index */ > + // av1C association. > + avio_w8(pb, 0x80 | 3); /* essential and property_index */ > + // colr association. > + avio_w8(pb, 4); /* essential and property_index */ > + return update_size(pb, pos); > +} > + > +static int mov_write_iprp_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s) > +{ > + int64_t pos = avio_tell(pb); > + avio_wb32(pb, 0); /* size */ > + ffio_wfourcc(pb, "iprp"); > + mov_write_ipco_tag(pb, mov, s); > + mov_write_ipma_tag(pb, mov, s); > + return update_size(pb, pos); > +} > + > static int mov_write_hmhd_tag(AVIOContext *pb) > { > /* This atom must be present, but leaving the values at zero > @@ -3137,7 +3287,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov, > if (st && (track->par->codec_type == AVMEDIA_TYPE_VIDEO || > track->par->codec_type == AVMEDIA_TYPE_SUBTITLE)) { > int64_t track_width_1616; > - if (track->mode == MODE_MOV) { > + if (track->mode == MODE_MOV || track->mode == MODE_AVIF) { > track_width_1616 = track->par->width * 0x10000ULL; > } else { > track_width_1616 = av_rescale(st->sample_aspect_ratio.num, > @@ -3536,6 +3686,7 @@ static int mov_write_mvhd_tag(AVIOContext *pb, MOVMuxContext *mov) > int max_track_id = 1, i; > int64_t max_track_len = 0; > int version; > + int timescale; > > for (i = 0; i < mov->nb_streams; i++) { > if (mov->tracks[i].entry > 0 && mov->tracks[i].timescale) { > @@ -3570,7 +3721,12 @@ static int mov_write_mvhd_tag(AVIOContext *pb, MOVMuxContext *mov) > avio_wb32(pb, mov->time); /* creation time */ > avio_wb32(pb, mov->time); /* modification time */ > } > - avio_wb32(pb, mov->movie_timescale); > + > + timescale = mov->movie_timescale; > + if (mov->mode == MODE_AVIF && !timescale) > + timescale = mov->tracks[0].timescale; > + > + avio_wb32(pb, timescale); > (version == 1) ? avio_wb64(pb, max_track_len) : avio_wb32(pb, max_track_len); /* duration of longest track */ > > avio_wb32(pb, 0x00010000); /* reserved (preferred rate) 1.0 = normal */ > @@ -3947,8 +4103,15 @@ static int mov_write_meta_tag(AVIOContext *pb, MOVMuxContext *mov, > mov_write_mdta_hdlr_tag(pb, mov, s); > mov_write_mdta_keys_tag(pb, mov, s); > mov_write_mdta_ilst_tag(pb, mov, s); > - } > - else { > + } else if (mov->mode == MODE_AVIF) { > + mov_write_hdlr_tag(s, pb, &mov->tracks[0]); > + // We always write the primary item id as 1 since only one track is > + // supported for AVIF. > + mov_write_pitm_tag(pb, 1); > + mov_write_iloc_tag(pb, mov, s); > + mov_write_iinf_tag(pb, mov, s); > + mov_write_iprp_tag(pb, mov, s); > + } else { > /* iTunes metadata tag */ > mov_write_itunes_hdlr_tag(pb, mov, s); > mov_write_ilst_tag(pb, mov, s); > @@ -4278,10 +4441,11 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov, > } > > mov_write_mvhd_tag(pb, mov); > - if (mov->mode != MODE_MOV && !mov->iods_skip) > + if (mov->mode != MODE_MOV && mov->mode != MODE_AVIF && !mov->iods_skip) > mov_write_iods_tag(pb, mov); > for (i = 0; i < mov->nb_streams; i++) { > - if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT) { > + if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT || > + mov->mode == MODE_AVIF) { > int ret = mov_write_trak_tag(s, pb, mov, &(mov->tracks[i]), i < s->nb_streams ? s->streams[i] : NULL); > if (ret < 0) > return ret; > @@ -4292,7 +4456,7 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov, > > if (mov->mode == MODE_PSP) > mov_write_uuidusmt_tag(pb, s); > - else > + else if (mov->mode != MODE_AVIF) > mov_write_udta_tag(pb, mov, s); > > return update_size(pb, pos); > @@ -5039,6 +5203,9 @@ static void mov_write_ftyp_tag_internal(AVIOContext *pb, AVFormatContext *s, > else if (mov->mode == MODE_3GP) { > ffio_wfourcc(pb, has_h264 ? "3gp6" : "3gp4"); > minor = has_h264 ? 0x100 : 0x200; > + } else if (mov->mode == MODE_AVIF) { > + ffio_wfourcc(pb, mov->is_animated_avif ? "avis" : "avif"); > + minor = 0; > } else if (mov->mode & MODE_3G2) { > ffio_wfourcc(pb, has_h264 ? "3g2b" : "3g2a"); > minor = has_h264 ? 0x20000 : 0x10000; > @@ -5102,6 +5269,31 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s) > // compatible brand a second time. > if (mov->mode == MODE_ISM) { > ffio_wfourcc(pb, "piff"); > + } else if (mov->mode == MODE_AVIF) { > + const AVPixFmtDescriptor *pix_fmt_desc = > + av_pix_fmt_desc_get(s->streams[0]->codecpar->format); > + const int depth = pix_fmt_desc->comp[0].depth; > + if (mov->is_animated_avif) { > + // For animated AVIF, major brand is "avis". Add "avif" as a > + // compatible brand. > + ffio_wfourcc(pb, "avif"); > + ffio_wfourcc(pb, "msf1"); > + ffio_wfourcc(pb, "iso8"); > + } > + ffio_wfourcc(pb, "mif1"); > + ffio_wfourcc(pb, "miaf"); > + if (depth == 8 || depth == 10) { > + // MA1B and MA1A brands are based on AV1 profile. Short hand for > + // computing that is based on chroma subsampling type. 420 chroma > + // subsampling is MA1B. 444 chroma subsampling is MA1A. > + if (!pix_fmt_desc->log2_chroma_w && !pix_fmt_desc->log2_chroma_h) { > + // 444 chroma subsampling. > + ffio_wfourcc(pb, "MA1A"); > + } else { > + // 420 chroma subsampling. > + ffio_wfourcc(pb, "MA1B"); > + } > + } > } else if (mov->mode != MODE_MOV) { > // We add tfdt atoms when fragmenting, signal this with the iso6 compatible > // brand, if not already the major brand. This is compatible with users that > @@ -5705,7 +5897,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt) > if (ret < 0) > return ret; > > - if (mov->flags & FF_MOV_FLAG_FRAGMENT) { > + if (mov->flags & FF_MOV_FLAG_FRAGMENT || mov->mode == MODE_AVIF) { > int ret; > if (mov->moov_written || mov->flags & FF_MOV_FLAG_EMPTY_MOOV) { > if (mov->frag_interleave && mov->fragments > 0) { > @@ -5846,7 +6038,11 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt) > avio_write(pb, reformatted_data, size); > } else { > size = ff_av1_filter_obus(pb, pkt->data, pkt->size); > + if (trk->mode == MODE_AVIF && !mov->avif_extent_length) { > + mov->avif_extent_length = size; > + } > } > + > #if CONFIG_AC3_PARSER > } else if (par->codec_id == AV_CODEC_ID_EAC3) { > size = handle_eac3(mov, pkt, trk); > @@ -6579,11 +6775,15 @@ static int mov_init(AVFormatContext *s) > else if (IS_MODE(ipod, IPOD)) mov->mode = MODE_IPOD; > else if (IS_MODE(ismv, ISMV)) mov->mode = MODE_ISM; > else if (IS_MODE(f4v, F4V)) mov->mode = MODE_F4V; > + else if (IS_MODE(avif, AVIF)) mov->mode = MODE_AVIF; > #undef IS_MODE > > if (mov->flags & FF_MOV_FLAG_DELAY_MOOV) > mov->flags |= FF_MOV_FLAG_EMPTY_MOOV; > > + if (mov->mode == MODE_AVIF) > + mov->flags |= FF_MOV_FLAG_DELAY_MOOV; > + > /* Set the FRAGMENT flag if any of the fragmentation methods are > * enabled. */ > if (mov->max_fragment_duration || mov->max_fragment_size || > @@ -6664,11 +6864,25 @@ static int mov_init(AVFormatContext *s) > /* Non-seekable output is ok if using fragmentation. If ism_lookahead > * is enabled, we don't support non-seekable output at all. */ > if (!(s->pb->seekable & AVIO_SEEKABLE_NORMAL) && > - (!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead)) { > + (!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead || > + mov->mode == MODE_AVIF)) { > av_log(s, AV_LOG_ERROR, "muxer does not support non seekable output\n"); > return AVERROR(EINVAL); > } > > + /* AVIF output must have exactly one video stream */ > + if (mov->mode == MODE_AVIF) { > + if (s->nb_streams > 1) { > + av_log(s, AV_LOG_ERROR, "AVIF output requires exactly one stream\n"); > + return AVERROR(EINVAL); > + } > + if (s->streams[0]->codecpar->codec_type != AVMEDIA_TYPE_VIDEO) { > + av_log(s, AV_LOG_ERROR, "AVIF output requires one video stream\n"); > + return AVERROR(EINVAL); > + } > + s->streams[0]->disposition |= AV_DISPOSITION_DEFAULT; > + } > + > mov->nb_streams = s->nb_streams; > if (mov->mode & (MODE_MP4|MODE_MOV|MODE_IPOD) && s->nb_chapters) > mov->chapter_track = mov->nb_streams++; > @@ -6811,12 +7025,13 @@ static int mov_init(AVFormatContext *s) > pix_fmt == AV_PIX_FMT_MONOWHITE || > pix_fmt == AV_PIX_FMT_MONOBLACK; > } > - if (track->par->codec_id == AV_CODEC_ID_VP9 || > - track->par->codec_id == AV_CODEC_ID_AV1) { > - if (track->mode != MODE_MP4) { > - av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id)); > - return AVERROR(EINVAL); > - } > + if (track->par->codec_id == AV_CODEC_ID_VP9 && track->mode != MODE_MP4) { > + av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id)); > + return AVERROR(EINVAL); > + } else if (track->par->codec_id == AV_CODEC_ID_AV1 && > + track->mode != MODE_MP4 && track->mode != MODE_AVIF) { > + av_log(s, AV_LOG_ERROR, "%s only supported in MP4 and AVIF.\n", avcodec_get_name(track->par->codec_id)); > + return AVERROR(EINVAL); > } else if (track->par->codec_id == AV_CODEC_ID_VP8) { > /* altref frames handling is not defined in the spec as of version v1.0, > * so just forbid muxing VP8 streams altogether until a new version does */ > @@ -7034,7 +7249,7 @@ static int mov_write_header(AVFormatContext *s) > FF_MOV_FLAG_FRAG_EVERY_FRAME)) && > !mov->max_fragment_duration && !mov->max_fragment_size) > mov->flags |= FF_MOV_FLAG_FRAG_KEYFRAME; > - } else { > + } else if (mov->mode != MODE_AVIF) { > if (mov->flags & FF_MOV_FLAG_FASTSTART) > mov->reserved_header_pos = avio_tell(pb); > mov_write_mdat_tag(pb, mov); > @@ -7322,6 +7537,50 @@ static int mov_check_bitstream(AVFormatContext *s, AVStream *st, > return ret; > } > > +static int avif_write_trailer(AVFormatContext *s) > +{ > + AVIOContext *pb = s->pb; > + MOVMuxContext *mov = s->priv_data; > + int64_t pos_backup, mdat_pos; > + uint8_t *buf; > + int buf_size, moov_size; > + > + if (mov->moov_written) return 0; > + > + mov->is_animated_avif = s->streams[0]->nb_frames > 1; > + mov_write_identification(pb, s); > + mov_write_meta_tag(pb, mov, s); > + > + moov_size = get_moov_size(s); > + mov->tracks[0].data_offset = avio_tell(pb) + moov_size + 8; > + > + if (mov->is_animated_avif) { > + int ret; > + if ((ret = mov_write_moov_tag(pb, mov, s)) < 0) > + return ret; > + } > + > + buf_size = avio_get_dyn_buf(mov->mdat_buf, &buf); > + avio_wb32(pb, buf_size + 8); > + ffio_wfourcc(pb, "mdat"); > + mdat_pos = avio_tell(pb); > + > + if (mdat_pos != (uint32_t)mdat_pos) { > + av_log(s, AV_LOG_ERROR, "mdat offset does not fit in 32 bits\n"); > + return AVERROR_INVALIDDATA; > + } > + > + avio_write(pb, buf, buf_size); > + > + // write extent offset. > + pos_backup = avio_tell(pb); > + avio_seek(pb, mov->avif_extent_pos, SEEK_SET); > + avio_wb32(pb, mdat_pos); /* rewrite offset */ > + avio_seek(pb, pos_backup, SEEK_SET); > + > + return 0; > +} > + > #if CONFIG_TGP_MUXER || CONFIG_TG2_MUXER > static const AVCodecTag codec_3gp_tags[] = { > { AV_CODEC_ID_H263, MKTAG('s','2','6','3') }, > @@ -7404,6 +7663,20 @@ static const AVCodecTag codec_f4v_tags[] = { > { AV_CODEC_ID_NONE, 0 }, > }; > > +#if CONFIG_AVIF_MUXER > +static const AVCodecTag codec_avif_tags[] = { > + { AV_CODEC_ID_AV1, MKTAG('a','v','0','1') }, > + { AV_CODEC_ID_NONE, 0 }, > +}; > +static const AVCodecTag *const codec_avif_tags_list[] = { codec_avif_tags, NULL }; > + > +static const AVClass mov_avif_muxer_class = { > + .class_name = "avif muxer", > + .item_name = av_default_item_name, > + .version = LIBAVUTIL_VERSION_INT, > +}; > +#endif > + > #if CONFIG_MOV_MUXER > const AVOutputFormat ff_mov_muxer = { > .name = "mov", > @@ -7566,3 +7839,21 @@ const AVOutputFormat ff_f4v_muxer = { > .priv_class = &mov_isobmff_muxer_class, > }; > #endif > +#if CONFIG_AVIF_MUXER > +const AVOutputFormat ff_avif_muxer = { > + .name = "avif", > + .long_name = NULL_IF_CONFIG_SMALL("AVIF"), > + .mime_type = "image/avif", > + .extensions = "avif", > + .priv_data_size = sizeof(MOVMuxContext), > + .video_codec = AV_CODEC_ID_AV1, > + .init = mov_init, > + .write_header = mov_write_header, > + .write_packet = mov_write_packet, > + .write_trailer = avif_write_trailer, > + .deinit = mov_free, > + .flags = AVFMT_GLOBALHEADER | AVFMT_ALLOW_FLUSH, > + .codec_tag = codec_avif_tags_list, > + .priv_class = &mov_avif_muxer_class, > +}; > +#endif > diff --git a/libavformat/movenc.h b/libavformat/movenc.h > index ca507e0e04..281576cc66 100644 > --- a/libavformat/movenc.h > +++ b/libavformat/movenc.h > @@ -43,6 +43,7 @@ > #define MODE_IPOD 0x20 > #define MODE_ISM 0x40 > #define MODE_F4V 0x80 > +#define MODE_AVIF 0x100 > > typedef struct MOVIentry { > uint64_t pos; > @@ -244,6 +245,10 @@ typedef struct MOVMuxContext { > MOVPrftBox write_prft; > int empty_hdlr_name; > int movie_timescale; > + > + int64_t avif_extent_pos; > + int avif_extent_length; > + int is_animated_avif; > } MOVMuxContext; > > #define FF_MOV_FLAG_RTP_HINT (1 << 0)
diff --git a/configure b/configure index 196873c4aa..2992f9760e 100755 --- a/configure +++ b/configure @@ -3404,6 +3404,7 @@ asf_stream_muxer_select="asf_muxer" av1_demuxer_select="av1_frame_merge_bsf av1_parser" avi_demuxer_select="riffdec exif" avi_muxer_select="riffenc" +avif_muxer_select="mov_muxer" caf_demuxer_select="iso_media" caf_muxer_select="iso_media" dash_muxer_select="mp4_muxer" diff --git a/libavformat/allformats.c b/libavformat/allformats.c index 63876c468f..1802536633 100644 --- a/libavformat/allformats.c +++ b/libavformat/allformats.c @@ -81,6 +81,7 @@ extern const AVOutputFormat ff_au_muxer; extern const AVInputFormat ff_av1_demuxer; extern const AVInputFormat ff_avi_demuxer; extern const AVOutputFormat ff_avi_muxer; +extern const AVOutputFormat ff_avif_muxer; extern const AVInputFormat ff_avisynth_demuxer; extern const AVOutputFormat ff_avm2_muxer; extern const AVInputFormat ff_avr_demuxer; diff --git a/libavformat/movenc.c b/libavformat/movenc.c index 271db99b46..a07c0ae2b4 100644 --- a/libavformat/movenc.c +++ b/libavformat/movenc.c @@ -1335,7 +1335,7 @@ static int mov_write_av1c_tag(AVIOContext *pb, MOVTrack *track) avio_wb32(pb, 0); ffio_wfourcc(pb, "av1C"); - ff_isom_write_av1c(pb, track->vos_data, track->vos_len, 1); + ff_isom_write_av1c(pb, track->vos_data, track->vos_len, track->mode != MODE_AVIF); return update_size(pb, pos); } @@ -2037,12 +2037,13 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc) } } - /* We should only ever be called by MOV or MP4. */ - av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4); + /* We should only ever be called for MOV, MP4 and AVIF. */ + av_assert0(track->mode == MODE_MOV || track->mode == MODE_MP4 || + track->mode == MODE_AVIF); avio_wb32(pb, 0); /* size */ ffio_wfourcc(pb, "colr"); - if (track->mode == MODE_MP4) + if (track->mode == MODE_MP4 || track->mode == MODE_AVIF) ffio_wfourcc(pb, "nclx"); else ffio_wfourcc(pb, "nclc"); @@ -2052,7 +2053,7 @@ static int mov_write_colr_tag(AVIOContext *pb, MOVTrack *track, int prefer_icc) avio_wb16(pb, track->par->color_primaries); avio_wb16(pb, track->par->color_trc); avio_wb16(pb, track->par->color_space); - if (track->mode == MODE_MP4) { + if (track->mode == MODE_MP4 || track->mode == MODE_AVIF) { int full_range = track->par->color_range == AVCOL_RANGE_JPEG; avio_w8(pb, full_range << 7); } @@ -2118,7 +2119,7 @@ static void find_compressor(char * compressor_name, int len, MOVTrack *track) || (track->par->width == 1440 && track->par->height == 1080) || (track->par->width == 1920 && track->par->height == 1080); - if (track->mode == MODE_MOV && + if ((track->mode == MODE_AVIF || track->mode == MODE_MOV) && (encoder = av_dict_get(track->st->metadata, "encoder", NULL, 0))) { av_strlcpy(compressor_name, encoder->value, 32); } else if (track->par->codec_id == AV_CODEC_ID_MPEG2VIDEO && xdcam_res) { @@ -2139,6 +2140,25 @@ static void find_compressor(char * compressor_name, int len, MOVTrack *track) } } +static int mov_write_ccst_tag(AVIOContext *pb) +{ + int64_t pos = avio_tell(pb); + // Write sane defaults: + // all_ref_pics_intra = 0 : all samples can use any type of reference. + // intra_pred_used = 1 : intra prediction may or may not be used. + // max_ref_per_pic = 15 : reserved value to indicate that any number of + // reference images can be used. + uint8_t ccstValue = (0 << 7) | /* all_ref_pics_intra */ + (1 << 6) | /* intra_pred_used */ + (15 << 2); /* max_ref_per_pic */ + avio_wb32(pb, 0); /* size */ + ffio_wfourcc(pb, "ccst"); + avio_wb32(pb, 0); /* Version & flags */ + avio_w8(pb, ccstValue); + avio_wb24(pb, 0); /* reserved */ + return update_size(pb, pos); +} + static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext *mov, MOVTrack *track) { int ret = AVERROR_BUG; @@ -2272,7 +2292,7 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex else av_log(mov->fc, AV_LOG_WARNING, "Not writing 'gama' atom. Format is not MOV.\n"); } - if (track->mode == MODE_MOV || track->mode == MODE_MP4) { + if (track->mode == MODE_MOV || track->mode == MODE_MP4 || track->mode == MODE_AVIF) { int has_color_info = track->par->color_primaries != AVCOL_PRI_UNSPECIFIED && track->par->color_trc != AVCOL_TRC_UNSPECIFIED && track->par->color_space != AVCOL_SPC_UNSPECIFIED; @@ -2324,6 +2344,9 @@ static int mov_write_video_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContex if (avid) avio_wb32(pb, 0); + if (track->mode == MODE_AVIF) + mov_write_ccst_tag(pb); + return update_size(pb, pos); } @@ -2826,8 +2849,13 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra if (track) { hdlr = (track->mode == MODE_MOV) ? "mhlr" : "\0\0\0\0"; if (track->par->codec_type == AVMEDIA_TYPE_VIDEO) { - hdlr_type = "vide"; - descr = "VideoHandler"; + if (track->mode == MODE_AVIF) { + hdlr_type = "pict"; + descr = "PictureHandler"; + } else { + hdlr_type = "vide"; + descr = "VideoHandler"; + } } else if (track->par->codec_type == AVMEDIA_TYPE_AUDIO) { hdlr_type = "soun"; descr = "SoundHandler"; @@ -2892,6 +2920,128 @@ static int mov_write_hdlr_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra return update_size(pb, pos); } +static int mov_write_pitm_tag(AVIOContext *pb, int item_id) +{ + int64_t pos = avio_tell(pb); + avio_wb32(pb, 0); /* size */ + ffio_wfourcc(pb, "pitm"); + avio_wb32(pb, 0); /* Version & flags */ + avio_wb16(pb, item_id); /* item_id */ + return update_size(pb, pos); +} + +static int mov_write_iloc_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s) +{ + int64_t pos = avio_tell(pb); + avio_wb32(pb, 0); /* size */ + ffio_wfourcc(pb, "iloc"); + avio_wb32(pb, 0); /* Version & flags */ + avio_w8(pb, (4 << 4) + 4); /* offset_size(4) and length_size(4) */ + avio_w8(pb, 0); /* base_offset_size(4) and reserved(4) */ + avio_wb16(pb, 1); /* item_count */ + + avio_wb16(pb, 1); /* item_id */ + avio_wb16(pb, 0); /* data_reference_index */ + avio_wb16(pb, 1); /* extent_count */ + mov->avif_extent_pos = avio_tell(pb); + avio_wb32(pb, 0); /* extent_offset (written later) */ + // For animated AVIF, we simply write the first packet's size. + avio_wb32(pb, mov->avif_extent_length); /* extent_length */ + + return update_size(pb, pos); +} + +static int mov_write_iinf_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s) +{ + int64_t infe_pos; + int64_t iinf_pos = avio_tell(pb); + avio_wb32(pb, 0); /* size */ + ffio_wfourcc(pb, "iinf"); + avio_wb32(pb, 0); /* Version & flags */ + avio_wb16(pb, 1); /* entry_count */ + + infe_pos = avio_tell(pb); + avio_wb32(pb, 0); /* size */ + ffio_wfourcc(pb, "infe"); + avio_w8(pb, 0x2); /* Version */ + avio_wb24(pb, 0); /* flags */ + avio_wb16(pb, 1); /* item_id */ + avio_wb16(pb, 0); /* item_protection_index */ + avio_write(pb, "av01", 4); /* item_type */ + avio_write(pb, "Color\0", 6); /* item_name */ + update_size(pb, infe_pos); + + return update_size(pb, iinf_pos); +} + +static int mov_write_ispe_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s) +{ + int64_t pos = avio_tell(pb); + avio_wb32(pb, 0); /* size */ + ffio_wfourcc(pb, "ispe"); + avio_wb32(pb, 0); /* Version & flags */ + avio_wb32(pb, s->streams[0]->codecpar->width); /* image_width */ + avio_wb32(pb, s->streams[0]->codecpar->height); /* image_height */ + return update_size(pb, pos); +} + +static int mov_write_pixi_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s) +{ + int64_t pos = avio_tell(pb); + const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->streams[0]->codecpar->format); + avio_wb32(pb, 0); /* size */ + ffio_wfourcc(pb, "pixi"); + avio_wb32(pb, 0); /* Version & flags */ + avio_w8(pb, pixdesc->nb_components); /* num_channels */ + for (int i = 0; i < pixdesc->nb_components; ++i) { + avio_w8(pb, pixdesc->comp[i].depth); /* bits_per_channel */ + } + return update_size(pb, pos); +} + +static int mov_write_ipco_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s) +{ + int64_t pos = avio_tell(pb); + avio_wb32(pb, 0); /* size */ + ffio_wfourcc(pb, "ipco"); + mov_write_ispe_tag(pb, mov, s); + mov_write_pixi_tag(pb, mov, s); + mov_write_av1c_tag(pb, &mov->tracks[0]); + mov_write_colr_tag(pb, &mov->tracks[0], 0); + return update_size(pb, pos); +} + +static int mov_write_ipma_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s) +{ + int64_t pos = avio_tell(pb); + avio_wb32(pb, 0); /* size */ + ffio_wfourcc(pb, "ipma"); + avio_wb32(pb, 0); /* Version & flags */ + avio_wb32(pb, 1); /* entry_count */ + avio_wb16(pb, 1); /* item_ID */ + avio_w8(pb, 4); /* association_count */ + + // ispe association. + avio_w8(pb, 1); /* essential and property_index */ + // pixi association. + avio_w8(pb, 2); /* essential and property_index */ + // av1C association. + avio_w8(pb, 0x80 | 3); /* essential and property_index */ + // colr association. + avio_w8(pb, 4); /* essential and property_index */ + return update_size(pb, pos); +} + +static int mov_write_iprp_tag(AVIOContext *pb, MOVMuxContext *mov, AVFormatContext *s) +{ + int64_t pos = avio_tell(pb); + avio_wb32(pb, 0); /* size */ + ffio_wfourcc(pb, "iprp"); + mov_write_ipco_tag(pb, mov, s); + mov_write_ipma_tag(pb, mov, s); + return update_size(pb, pos); +} + static int mov_write_hmhd_tag(AVIOContext *pb) { /* This atom must be present, but leaving the values at zero @@ -3137,7 +3287,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVMuxContext *mov, if (st && (track->par->codec_type == AVMEDIA_TYPE_VIDEO || track->par->codec_type == AVMEDIA_TYPE_SUBTITLE)) { int64_t track_width_1616; - if (track->mode == MODE_MOV) { + if (track->mode == MODE_MOV || track->mode == MODE_AVIF) { track_width_1616 = track->par->width * 0x10000ULL; } else { track_width_1616 = av_rescale(st->sample_aspect_ratio.num, @@ -3536,6 +3686,7 @@ static int mov_write_mvhd_tag(AVIOContext *pb, MOVMuxContext *mov) int max_track_id = 1, i; int64_t max_track_len = 0; int version; + int timescale; for (i = 0; i < mov->nb_streams; i++) { if (mov->tracks[i].entry > 0 && mov->tracks[i].timescale) { @@ -3570,7 +3721,12 @@ static int mov_write_mvhd_tag(AVIOContext *pb, MOVMuxContext *mov) avio_wb32(pb, mov->time); /* creation time */ avio_wb32(pb, mov->time); /* modification time */ } - avio_wb32(pb, mov->movie_timescale); + + timescale = mov->movie_timescale; + if (mov->mode == MODE_AVIF && !timescale) + timescale = mov->tracks[0].timescale; + + avio_wb32(pb, timescale); (version == 1) ? avio_wb64(pb, max_track_len) : avio_wb32(pb, max_track_len); /* duration of longest track */ avio_wb32(pb, 0x00010000); /* reserved (preferred rate) 1.0 = normal */ @@ -3947,8 +4103,15 @@ static int mov_write_meta_tag(AVIOContext *pb, MOVMuxContext *mov, mov_write_mdta_hdlr_tag(pb, mov, s); mov_write_mdta_keys_tag(pb, mov, s); mov_write_mdta_ilst_tag(pb, mov, s); - } - else { + } else if (mov->mode == MODE_AVIF) { + mov_write_hdlr_tag(s, pb, &mov->tracks[0]); + // We always write the primary item id as 1 since only one track is + // supported for AVIF. + mov_write_pitm_tag(pb, 1); + mov_write_iloc_tag(pb, mov, s); + mov_write_iinf_tag(pb, mov, s); + mov_write_iprp_tag(pb, mov, s); + } else { /* iTunes metadata tag */ mov_write_itunes_hdlr_tag(pb, mov, s); mov_write_ilst_tag(pb, mov, s); @@ -4278,10 +4441,11 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov, } mov_write_mvhd_tag(pb, mov); - if (mov->mode != MODE_MOV && !mov->iods_skip) + if (mov->mode != MODE_MOV && mov->mode != MODE_AVIF && !mov->iods_skip) mov_write_iods_tag(pb, mov); for (i = 0; i < mov->nb_streams; i++) { - if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT) { + if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT || + mov->mode == MODE_AVIF) { int ret = mov_write_trak_tag(s, pb, mov, &(mov->tracks[i]), i < s->nb_streams ? s->streams[i] : NULL); if (ret < 0) return ret; @@ -4292,7 +4456,7 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov, if (mov->mode == MODE_PSP) mov_write_uuidusmt_tag(pb, s); - else + else if (mov->mode != MODE_AVIF) mov_write_udta_tag(pb, mov, s); return update_size(pb, pos); @@ -5039,6 +5203,9 @@ static void mov_write_ftyp_tag_internal(AVIOContext *pb, AVFormatContext *s, else if (mov->mode == MODE_3GP) { ffio_wfourcc(pb, has_h264 ? "3gp6" : "3gp4"); minor = has_h264 ? 0x100 : 0x200; + } else if (mov->mode == MODE_AVIF) { + ffio_wfourcc(pb, mov->is_animated_avif ? "avis" : "avif"); + minor = 0; } else if (mov->mode & MODE_3G2) { ffio_wfourcc(pb, has_h264 ? "3g2b" : "3g2a"); minor = has_h264 ? 0x20000 : 0x10000; @@ -5102,6 +5269,31 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s) // compatible brand a second time. if (mov->mode == MODE_ISM) { ffio_wfourcc(pb, "piff"); + } else if (mov->mode == MODE_AVIF) { + const AVPixFmtDescriptor *pix_fmt_desc = + av_pix_fmt_desc_get(s->streams[0]->codecpar->format); + const int depth = pix_fmt_desc->comp[0].depth; + if (mov->is_animated_avif) { + // For animated AVIF, major brand is "avis". Add "avif" as a + // compatible brand. + ffio_wfourcc(pb, "avif"); + ffio_wfourcc(pb, "msf1"); + ffio_wfourcc(pb, "iso8"); + } + ffio_wfourcc(pb, "mif1"); + ffio_wfourcc(pb, "miaf"); + if (depth == 8 || depth == 10) { + // MA1B and MA1A brands are based on AV1 profile. Short hand for + // computing that is based on chroma subsampling type. 420 chroma + // subsampling is MA1B. 444 chroma subsampling is MA1A. + if (!pix_fmt_desc->log2_chroma_w && !pix_fmt_desc->log2_chroma_h) { + // 444 chroma subsampling. + ffio_wfourcc(pb, "MA1A"); + } else { + // 420 chroma subsampling. + ffio_wfourcc(pb, "MA1B"); + } + } } else if (mov->mode != MODE_MOV) { // We add tfdt atoms when fragmenting, signal this with the iso6 compatible // brand, if not already the major brand. This is compatible with users that @@ -5705,7 +5897,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt) if (ret < 0) return ret; - if (mov->flags & FF_MOV_FLAG_FRAGMENT) { + if (mov->flags & FF_MOV_FLAG_FRAGMENT || mov->mode == MODE_AVIF) { int ret; if (mov->moov_written || mov->flags & FF_MOV_FLAG_EMPTY_MOOV) { if (mov->frag_interleave && mov->fragments > 0) { @@ -5846,7 +6038,11 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt) avio_write(pb, reformatted_data, size); } else { size = ff_av1_filter_obus(pb, pkt->data, pkt->size); + if (trk->mode == MODE_AVIF && !mov->avif_extent_length) { + mov->avif_extent_length = size; + } } + #if CONFIG_AC3_PARSER } else if (par->codec_id == AV_CODEC_ID_EAC3) { size = handle_eac3(mov, pkt, trk); @@ -6579,11 +6775,15 @@ static int mov_init(AVFormatContext *s) else if (IS_MODE(ipod, IPOD)) mov->mode = MODE_IPOD; else if (IS_MODE(ismv, ISMV)) mov->mode = MODE_ISM; else if (IS_MODE(f4v, F4V)) mov->mode = MODE_F4V; + else if (IS_MODE(avif, AVIF)) mov->mode = MODE_AVIF; #undef IS_MODE if (mov->flags & FF_MOV_FLAG_DELAY_MOOV) mov->flags |= FF_MOV_FLAG_EMPTY_MOOV; + if (mov->mode == MODE_AVIF) + mov->flags |= FF_MOV_FLAG_DELAY_MOOV; + /* Set the FRAGMENT flag if any of the fragmentation methods are * enabled. */ if (mov->max_fragment_duration || mov->max_fragment_size || @@ -6664,11 +6864,25 @@ static int mov_init(AVFormatContext *s) /* Non-seekable output is ok if using fragmentation. If ism_lookahead * is enabled, we don't support non-seekable output at all. */ if (!(s->pb->seekable & AVIO_SEEKABLE_NORMAL) && - (!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead)) { + (!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead || + mov->mode == MODE_AVIF)) { av_log(s, AV_LOG_ERROR, "muxer does not support non seekable output\n"); return AVERROR(EINVAL); } + /* AVIF output must have exactly one video stream */ + if (mov->mode == MODE_AVIF) { + if (s->nb_streams > 1) { + av_log(s, AV_LOG_ERROR, "AVIF output requires exactly one stream\n"); + return AVERROR(EINVAL); + } + if (s->streams[0]->codecpar->codec_type != AVMEDIA_TYPE_VIDEO) { + av_log(s, AV_LOG_ERROR, "AVIF output requires one video stream\n"); + return AVERROR(EINVAL); + } + s->streams[0]->disposition |= AV_DISPOSITION_DEFAULT; + } + mov->nb_streams = s->nb_streams; if (mov->mode & (MODE_MP4|MODE_MOV|MODE_IPOD) && s->nb_chapters) mov->chapter_track = mov->nb_streams++; @@ -6811,12 +7025,13 @@ static int mov_init(AVFormatContext *s) pix_fmt == AV_PIX_FMT_MONOWHITE || pix_fmt == AV_PIX_FMT_MONOBLACK; } - if (track->par->codec_id == AV_CODEC_ID_VP9 || - track->par->codec_id == AV_CODEC_ID_AV1) { - if (track->mode != MODE_MP4) { - av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id)); - return AVERROR(EINVAL); - } + if (track->par->codec_id == AV_CODEC_ID_VP9 && track->mode != MODE_MP4) { + av_log(s, AV_LOG_ERROR, "%s only supported in MP4.\n", avcodec_get_name(track->par->codec_id)); + return AVERROR(EINVAL); + } else if (track->par->codec_id == AV_CODEC_ID_AV1 && + track->mode != MODE_MP4 && track->mode != MODE_AVIF) { + av_log(s, AV_LOG_ERROR, "%s only supported in MP4 and AVIF.\n", avcodec_get_name(track->par->codec_id)); + return AVERROR(EINVAL); } else if (track->par->codec_id == AV_CODEC_ID_VP8) { /* altref frames handling is not defined in the spec as of version v1.0, * so just forbid muxing VP8 streams altogether until a new version does */ @@ -7034,7 +7249,7 @@ static int mov_write_header(AVFormatContext *s) FF_MOV_FLAG_FRAG_EVERY_FRAME)) && !mov->max_fragment_duration && !mov->max_fragment_size) mov->flags |= FF_MOV_FLAG_FRAG_KEYFRAME; - } else { + } else if (mov->mode != MODE_AVIF) { if (mov->flags & FF_MOV_FLAG_FASTSTART) mov->reserved_header_pos = avio_tell(pb); mov_write_mdat_tag(pb, mov); @@ -7322,6 +7537,50 @@ static int mov_check_bitstream(AVFormatContext *s, AVStream *st, return ret; } +static int avif_write_trailer(AVFormatContext *s) +{ + AVIOContext *pb = s->pb; + MOVMuxContext *mov = s->priv_data; + int64_t pos_backup, mdat_pos; + uint8_t *buf; + int buf_size, moov_size; + + if (mov->moov_written) return 0; + + mov->is_animated_avif = s->streams[0]->nb_frames > 1; + mov_write_identification(pb, s); + mov_write_meta_tag(pb, mov, s); + + moov_size = get_moov_size(s); + mov->tracks[0].data_offset = avio_tell(pb) + moov_size + 8; + + if (mov->is_animated_avif) { + int ret; + if ((ret = mov_write_moov_tag(pb, mov, s)) < 0) + return ret; + } + + buf_size = avio_get_dyn_buf(mov->mdat_buf, &buf); + avio_wb32(pb, buf_size + 8); + ffio_wfourcc(pb, "mdat"); + mdat_pos = avio_tell(pb); + + if (mdat_pos != (uint32_t)mdat_pos) { + av_log(s, AV_LOG_ERROR, "mdat offset does not fit in 32 bits\n"); + return AVERROR_INVALIDDATA; + } + + avio_write(pb, buf, buf_size); + + // write extent offset. + pos_backup = avio_tell(pb); + avio_seek(pb, mov->avif_extent_pos, SEEK_SET); + avio_wb32(pb, mdat_pos); /* rewrite offset */ + avio_seek(pb, pos_backup, SEEK_SET); + + return 0; +} + #if CONFIG_TGP_MUXER || CONFIG_TG2_MUXER static const AVCodecTag codec_3gp_tags[] = { { AV_CODEC_ID_H263, MKTAG('s','2','6','3') }, @@ -7404,6 +7663,20 @@ static const AVCodecTag codec_f4v_tags[] = { { AV_CODEC_ID_NONE, 0 }, }; +#if CONFIG_AVIF_MUXER +static const AVCodecTag codec_avif_tags[] = { + { AV_CODEC_ID_AV1, MKTAG('a','v','0','1') }, + { AV_CODEC_ID_NONE, 0 }, +}; +static const AVCodecTag *const codec_avif_tags_list[] = { codec_avif_tags, NULL }; + +static const AVClass mov_avif_muxer_class = { + .class_name = "avif muxer", + .item_name = av_default_item_name, + .version = LIBAVUTIL_VERSION_INT, +}; +#endif + #if CONFIG_MOV_MUXER const AVOutputFormat ff_mov_muxer = { .name = "mov", @@ -7566,3 +7839,21 @@ const AVOutputFormat ff_f4v_muxer = { .priv_class = &mov_isobmff_muxer_class, }; #endif +#if CONFIG_AVIF_MUXER +const AVOutputFormat ff_avif_muxer = { + .name = "avif", + .long_name = NULL_IF_CONFIG_SMALL("AVIF"), + .mime_type = "image/avif", + .extensions = "avif", + .priv_data_size = sizeof(MOVMuxContext), + .video_codec = AV_CODEC_ID_AV1, + .init = mov_init, + .write_header = mov_write_header, + .write_packet = mov_write_packet, + .write_trailer = avif_write_trailer, + .deinit = mov_free, + .flags = AVFMT_GLOBALHEADER | AVFMT_ALLOW_FLUSH, + .codec_tag = codec_avif_tags_list, + .priv_class = &mov_avif_muxer_class, +}; +#endif diff --git a/libavformat/movenc.h b/libavformat/movenc.h index ca507e0e04..281576cc66 100644 --- a/libavformat/movenc.h +++ b/libavformat/movenc.h @@ -43,6 +43,7 @@ #define MODE_IPOD 0x20 #define MODE_ISM 0x40 #define MODE_F4V 0x80 +#define MODE_AVIF 0x100 typedef struct MOVIentry { uint64_t pos; @@ -244,6 +245,10 @@ typedef struct MOVMuxContext { MOVPrftBox write_prft; int empty_hdlr_name; int movie_timescale; + + int64_t avif_extent_pos; + int avif_extent_length; + int is_animated_avif; } MOVMuxContext; #define FF_MOV_FLAG_RTP_HINT (1 << 0)
Add an AVIF muxer by re-using the existing the mov/mp4 muxer. AVIF Specification: https://aomediacodec.github.io/av1-avif Sample usage for still image: ffmpeg -i image.png -c:v libaom-av1 -still-picture 1 image.avif Sample usage for animated AVIF image: ffmpeg -i video.mp4 animated.avif We can re-use any of the AV1 encoding options that will make sense for image encoding (like bitrate, tiles, encoding speed, etc). The files generated by this muxer has been verified to be valid AVIF files by the following: 1) Displays on Chrome (both still and animated images). 2) Displays on Firefox (only still images, firefox does not support animated AVIF yet). 3) Verified to be valid by Compliance Warden: https://github.com/gpac/ComplianceWarden Fixes the encoder/muxer part of Trac Ticket #7621 Signed-off-by: Vignesh Venkatasubramanian <vigneshv@google.com> --- configure | 1 + libavformat/allformats.c | 1 + libavformat/movenc.c | 341 ++++++++++++++++++++++++++++++++++++--- libavformat/movenc.h | 5 + 4 files changed, 323 insertions(+), 25 deletions(-)