[FFmpeg-devel,v7] avformat/ogg: Add support for embedding cover art in Ogg

Message ID	yX5SBw_vSCjil59H35lLeLwqCpdeY2KI2RWFtDnJuCCA7gnyRcaekNnNwk701f47aS1G_ldPQyIP-x7c80zJX4ZBY5zaUxFhzikQ03ORepQ=@protonmail.com
State	New
Headers	show Delivered-To: ffmpegpatchwork2@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; Date: Thu, 07 Nov 2024 15:42:01 +0000 To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Message-ID: <yX5SBw_vSCjil59H35lLeLwqCpdeY2KI2RWFtDnJuCCA7gnyRcaekNnNwk701f47aS1G_ldPQyIP-x7c80zJX4ZBY5zaUxFhzikQ03ORepQ=@protonmail.com> Feedback-ID: 28710920:user:proton MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH v7] avformat/ogg: Add support for embedding cover art in Ogg Precedence: list From: =?utf-8?q?Zsolt_Vad=C3=A1sz_via_ffmpeg-devel?= <ffmpeg-devel@ffmpeg.org> Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Cc: =?utf-8?q?Zsolt_Vad=C3=A1sz?= <zsolt_vadasz@protonmail.com> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Series	[FFmpeg-devel,v7] avformat/ogg: Add support for embedding cover art in Ogg \| expand [FFmpeg-devel,v7] avformat/ogg: Add support for embedding cover art in Ogg

Context	Check	Description
yinshiyou/make_loongarch64	success	Make finished
yinshiyou/make_fate_loongarch64	success	Make fate finished
andriy/make_x86	success	Make finished
andriy/make_fate_x86	fail	Make fate failed

diff --git a/libavformat/flac_picture.c b/libavformat/flac_picture.c index c9f3f11edd..38e49144a9 100644 --- a/libavformat/flac_picture.c +++ b/libavformat/flac_picture.c @@ -20,6 +20,10 @@ */ #include "libavutil/intreadwrite.h" +#include "libavutil/avstring.h" +#include "libavutil/base64.h" +#include "libavutil/pixdesc.h" +#include "libavutil/mem.h" #include "libavcodec/bytestream.h" #include "libavcodec/png.h" #include "avformat.h" @@ -186,3 +190,132 @@ fail: return ret; } + +int ff_flac_write_picture(struct AVFormatContext *s, + int isogg, + unsigned *attached_types, + int audio_stream_idx, // unused if !isogg + AVPacket *pkt) +{ + AVIOContext *pb = s->pb; + const AVPixFmtDescriptor *pixdesc; + const CodecMime *mime = ff_id3v2_mime_tags; + AVDictionaryEntry *e; + const char *mimetype = NULL, *desc = ""; + const AVStream *st = s->streams[pkt->stream_index]; + int i, mimelen, desclen, type = 0, blocklen; + + if (!pkt->data) + return 0; + + while (mime->id != AV_CODEC_ID_NONE) { + if (mime->id == st->codecpar->codec_id) { + mimetype = mime->str; + break; + } + mime++; + } + if (!mimetype) { + av_log(s, AV_LOG_ERROR, "No mimetype is known for stream %d, cannot " + "write an attached picture.\n", st->index); + return AVERROR(EINVAL); + } + mimelen = strlen(mimetype); + + /* get the picture type */ + e = av_dict_get(st->metadata, "comment", NULL, 0); + for (i = 0; e && i < FF_ARRAY_ELEMS(ff_id3v2_picture_types); i++) { + if (!av_strcasecmp(e->value, ff_id3v2_picture_types[i])) { + type = i; + break; + } + } + + if (((*attached_types) & (1 << type)) & 0x6) { + av_log(s, AV_LOG_ERROR, "Duplicate attachment for type '%s'\n", ff_id3v2_picture_types[type]); + return AVERROR(EINVAL); + } + + if (type == 1 && (st->codecpar->codec_id != AV_CODEC_ID_PNG || + st->codecpar->width != 32 || + st->codecpar->height != 32)) { + av_log(s, AV_LOG_ERROR, "File icon attachment must be a 32x32 PNG"); + return AVERROR(EINVAL); + } + + *attached_types |= (1 << type); + + /* get the description */ + if ((e = av_dict_get(st->metadata, "title", NULL, 0))) + desc = e->value; + desclen = strlen(desc); + + blocklen = 4 + 4 + mimelen + 4 + desclen + 4 + 4 + 4 + 4 + 4 + pkt->size; + if (blocklen >= 1<<24) { + av_log(s, AV_LOG_ERROR, "Picture block too big %d >= %d\n", blocklen, 1<<24); + return AVERROR(EINVAL); + } + + if(!isogg) { + avio_w8(pb, 0x06); + avio_wb24(pb, blocklen); + + avio_wb32(pb, type); + + avio_wb32(pb, mimelen); + avio_write(pb, mimetype, mimelen); + + avio_wb32(pb, desclen); + avio_write(pb, desc, desclen); + + avio_wb32(pb, st->codecpar->width); + avio_wb32(pb, st->codecpar->height); + if ((pixdesc = av_pix_fmt_desc_get(st->codecpar->format))) + avio_wb32(pb, av_get_bits_per_pixel(pixdesc)); + else + avio_wb32(pb, 0); + avio_wb32(pb, 0); + + avio_wb32(pb, pkt->size); + avio_write(pb, pkt->data, pkt->size); + } else { + uint8_t *metadata_block_picture, *ptr; + int encoded_len, ret; + char *encoded; + AVStream *audio_stream = s->streams[audio_stream_idx]; + + metadata_block_picture = av_mallocz(blocklen); + ptr = metadata_block_picture; + bytestream_put_be32(&ptr, type); + + bytestream_put_be32(&ptr, mimelen); + bytestream_put_buffer(&ptr, mimetype, mimelen); + + bytestream_put_be32(&ptr, desclen); + bytestream_put_buffer(&ptr, desc, desclen); + + bytestream_put_be32(&ptr, st->codecpar->width); + bytestream_put_be32(&ptr, st->codecpar->height); + if ((pixdesc = av_pix_fmt_desc_get(st->codecpar->format))) + bytestream_put_be32(&ptr, av_get_bits_per_pixel(pixdesc)); + else + bytestream_put_be32(&ptr, 0); + bytestream_put_be32(&ptr, 0); + + bytestream_put_be32(&ptr, pkt->size); + bytestream_put_buffer(&ptr, pkt->data, pkt->size); + + encoded_len = AV_BASE64_SIZE(blocklen); + encoded = av_mallocz(encoded_len); + av_base64_encode(encoded, encoded_len, metadata_block_picture, blocklen); + av_free(metadata_block_picture); + + ret = av_dict_set(&audio_stream->metadata, "METADATA_BLOCK_PICTURE", encoded, 0); + av_free(encoded); + av_packet_unref(pkt); + + if (ret < 0) + return ret; + } + return 0; +} diff --git a/libavformat/flac_picture.h b/libavformat/flac_picture.h index db074e531d..efa11aee32 100644 --- a/libavformat/flac_picture.h +++ b/libavformat/flac_picture.h @@ -39,5 +39,10 @@ */ int ff_flac_parse_picture(AVFormatContext *s, uint8_t **buf, int buf_size, int truncate_workaround); +int ff_flac_write_picture(struct AVFormatContext *s, + int isogg, + unsigned *attached_types, + int audio_stream_idx, + AVPacket *pkt); #endif /* AVFORMAT_FLAC_PICTURE_H */ diff --git a/libavformat/flacenc.c b/libavformat/flacenc.c index a8beec7750..7970c2531d 100644 --- a/libavformat/flacenc.c +++ b/libavformat/flacenc.c @@ -33,6 +33,7 @@ #include "mux.h" #include "version.h" #include "vorbiscomment.h" +#include "flac_picture.h" typedef struct FlacMuxerContext { @@ -79,94 +80,9 @@ static int flac_write_block_comment(AVIOContext *pb, AVDictionary **m, return 0; } -static int flac_write_picture(struct AVFormatContext *s, AVPacket *pkt) -{ - FlacMuxerContext *c = s->priv_data; - AVIOContext *pb = s->pb; - const AVPixFmtDescriptor *pixdesc; - const CodecMime *mime = ff_id3v2_mime_tags; - AVDictionaryEntry *e; - const char *mimetype = NULL, *desc = ""; - const AVStream *st = s->streams[pkt->stream_index]; - int i, mimelen, desclen, type = 0, blocklen; - - if (!pkt->data) - return 0; - - while (mime->id != AV_CODEC_ID_NONE) { - if (mime->id == st->codecpar->codec_id) { - mimetype = mime->str; - break; - } - mime++; - } - if (!mimetype) { - av_log(s, AV_LOG_ERROR, "No mimetype is known for stream %d, cannot " - "write an attached picture.\n", st->index); - return AVERROR(EINVAL); - } - mimelen = strlen(mimetype); - - /* get the picture type */ - e = av_dict_get(st->metadata, "comment", NULL, 0); - for (i = 0; e && i < FF_ARRAY_ELEMS(ff_id3v2_picture_types); i++) { - if (!av_strcasecmp(e->value, ff_id3v2_picture_types[i])) { - type = i; - break; - } - } - - if ((c->attached_types & (1 << type)) & 0x6) { - av_log(s, AV_LOG_ERROR, "Duplicate attachment for type '%s'\n", ff_id3v2_picture_types[type]); - return AVERROR(EINVAL); - } - - if (type == 1 && (st->codecpar->codec_id != AV_CODEC_ID_PNG || - st->codecpar->width != 32 || - st->codecpar->height != 32)) { - av_log(s, AV_LOG_ERROR, "File icon attachment must be a 32x32 PNG"); - return AVERROR(EINVAL); - } - - c->attached_types |= (1 << type); - - /* get the description */ - if ((e = av_dict_get(st->metadata, "title", NULL, 0))) - desc = e->value; - desclen = strlen(desc); - - blocklen = 4 + 4 + mimelen + 4 + desclen + 4 + 4 + 4 + 4 + 4 + pkt->size; - if (blocklen >= 1<<24) { - av_log(s, AV_LOG_ERROR, "Picture block too big %d >= %d\n", blocklen, 1<<24); - return AVERROR(EINVAL); - } - - avio_w8(pb, 0x06); - avio_wb24(pb, blocklen); - - avio_wb32(pb, type); - - avio_wb32(pb, mimelen); - avio_write(pb, mimetype, mimelen); - - avio_wb32(pb, desclen); - avio_write(pb, desc, desclen); - - avio_wb32(pb, st->codecpar->width); - avio_wb32(pb, st->codecpar->height); - if ((pixdesc = av_pix_fmt_desc_get(st->codecpar->format))) - avio_wb32(pb, av_get_bits_per_pixel(pixdesc)); - else - avio_wb32(pb, 0); - avio_wb32(pb, 0); - - avio_wb32(pb, pkt->size); - avio_write(pb, pkt->data, pkt->size); - return 0; -} - static int flac_finish_header(struct AVFormatContext *s) { + FlacMuxerContext *c = s->priv_data; int i, ret, padding = s->metadata_header_padding; if (padding < 0) padding = 8192; @@ -179,7 +95,7 @@ static int flac_finish_header(struct AVFormatContext *s) AVPacket *pkt = st->priv_data; if (!pkt) continue; - ret = flac_write_picture(s, pkt); + ret = ff_flac_write_picture(s, 0, &c->attached_types, -1, pkt); av_packet_unref(pkt); if (ret < 0 && (s->error_recognition & AV_EF_EXPLODE)) return ret; diff --git a/libavformat/oggenc.c b/libavformat/oggenc.c index 224519a4da..1d3cfcd2be 100644 --- a/libavformat/oggenc.c +++ b/libavformat/oggenc.c @@ -23,7 +23,10 @@ #include <stdint.h> +#include "libavcodec/codec_id.h" +#include "libavutil/avutil.h" #include "libavutil/crc.h" +#include "libavutil/log.h" #include "libavutil/mathematics.h" #include "libavutil/mem.h" #include "libavutil/opt.h" @@ -37,6 +40,7 @@ #include "mux.h" #include "version.h" #include "vorbiscomment.h" +#include "flac_picture.h" #define MAX_PAGE_SIZE 65025 @@ -79,6 +83,11 @@ typedef struct OGGContext { int pref_size; ///< preferred page size (0 => fill all segments) int64_t pref_duration; ///< preferred page duration (0 => fill all segments) int serial_offset; + + PacketList queue; + int audio_stream_idx; + int waiting_pics; + unsigned attached_types; } OGGContext; #define OFFSET(x) offsetof(OGGContext, x) @@ -470,12 +479,14 @@ static void ogg_write_pages(AVFormatContext *s, int flush) ogg->page_list = p; } -static int ogg_init(AVFormatContext *s) +static int ogg_finish_init(AVFormatContext *s) { OGGContext *ogg = s->priv_data; OGGStreamContext *oggstream = NULL; int i, j; + ogg->waiting_pics = 0; + if (ogg->pref_size) av_log(s, AV_LOG_WARNING, "The pagesize option is deprecated\n"); @@ -483,29 +494,10 @@ static int ogg_init(AVFormatContext *s) AVStream *st = s->streams[i]; unsigned serial_num = i + ogg->serial_offset; - if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { - if (st->codecpar->codec_id == AV_CODEC_ID_OPUS) - /* Opus requires a fixed 48kHz clock */ - avpriv_set_pts_info(st, 64, 1, 48000); - else - avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate); - } - - if (st->codecpar->codec_id != AV_CODEC_ID_VORBIS && - st->codecpar->codec_id != AV_CODEC_ID_THEORA && - st->codecpar->codec_id != AV_CODEC_ID_SPEEX && - st->codecpar->codec_id != AV_CODEC_ID_FLAC && - st->codecpar->codec_id != AV_CODEC_ID_OPUS && - st->codecpar->codec_id != AV_CODEC_ID_VP8) { - av_log(s, AV_LOG_ERROR, "Unsupported codec id in stream %d\n", i); - return AVERROR(EINVAL); - } + if(st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO && + (st->disposition & AV_DISPOSITION_ATTACHED_PIC)) + continue; - if ((!st->codecpar->extradata || !st->codecpar->extradata_size) && - st->codecpar->codec_id != AV_CODEC_ID_VP8) { - av_log(s, AV_LOG_ERROR, "No extradata present\n"); - return AVERROR_INVALIDDATA; - } oggstream = av_mallocz(sizeof(*oggstream)); if (!oggstream) return AVERROR(ENOMEM); @@ -516,8 +508,11 @@ static int ogg_init(AVFormatContext *s) do { serial_num = av_get_random_seed(); for (j = 0; j < i; j++) { + // NULL for attached_pic OGGStreamContext *sc = s->streams[j]->priv_data; - if (serial_num == sc->serial_num) + if(!sc) + continue; + else if (serial_num == sc->serial_num) break; } } while (j < i); @@ -564,9 +559,9 @@ static int ogg_init(AVFormatContext *s) int framing_bit = st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 1 : 0; if (avpriv_split_xiph_headers(st->codecpar->extradata, st->codecpar->extradata_size, - st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 30 : 42, - (const uint8_t**)oggstream->header, oggstream->header_len) < 0) { - av_log(s, AV_LOG_ERROR, "Extradata corrupted\n"); + st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 30 : 42, + (const uint8_t**)oggstream->header, oggstream->header_len) < 0) { + av_log(s, AV_LOG_ERROR, "Extradata corrupted for stream #%d\n", i); oggstream->header[1] = NULL; return AVERROR_INVALIDDATA; } @@ -603,13 +598,67 @@ static int ogg_init(AVFormatContext *s) return 0; } -static int ogg_write_header(AVFormatContext *s) +static int ogg_init(AVFormatContext *s) +{ + OGGContext *ogg = s->priv_data; + int i; + + ogg->waiting_pics = 0; + ogg->attached_types = 0; + + if (ogg->pref_size) + av_log(s, AV_LOG_WARNING, "The pagesize option is deprecated\n"); + + for (i = 0; i < s->nb_streams; i++) { + AVStream *st = s->streams[i]; + + if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { + ogg->audio_stream_idx = i; + if (st->codecpar->codec_id == AV_CODEC_ID_OPUS) + /* Opus requires a fixed 48kHz clock */ + avpriv_set_pts_info(st, 64, 1, 48000); + else + avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate); + } + + if (st->codecpar->codec_id != AV_CODEC_ID_VORBIS && + st->codecpar->codec_id != AV_CODEC_ID_THEORA && + st->codecpar->codec_id != AV_CODEC_ID_SPEEX && + st->codecpar->codec_id != AV_CODEC_ID_FLAC && + st->codecpar->codec_id != AV_CODEC_ID_OPUS && + st->codecpar->codec_id != AV_CODEC_ID_VP8 && + st->codecpar->codec_id != AV_CODEC_ID_PNG && + st->codecpar->codec_id != AV_CODEC_ID_MJPEG) { + av_log(s, AV_LOG_ERROR, "Unsupported codec id in stream %d\n", i); + return AVERROR(EINVAL); + } + + if ((!st->codecpar->extradata || !st->codecpar->extradata_size) && + st->codecpar->codec_id != AV_CODEC_ID_VP8 && + st->codecpar->codec_id != AV_CODEC_ID_PNG && + st->codecpar->codec_id != AV_CODEC_ID_MJPEG) { + av_log(s, AV_LOG_ERROR, "No extradata present\n"); + return AVERROR_INVALIDDATA; + } + if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO && + (st->disposition & AV_DISPOSITION_ATTACHED_PIC)) + ogg->waiting_pics++; + } + + if (!ogg->waiting_pics) + return ogg_finish_init(s); + return 0; +} + +static int ogg_finish_header(AVFormatContext *s) { OGGStreamContext *oggstream = NULL; int i, j; for (j = 0; j < s->nb_streams; j++) { oggstream = s->streams[j]->priv_data; + if(!oggstream) + continue; ogg_buffer_data(s, s->streams[j], oggstream->header[0], oggstream->header_len[0], 0, 1); oggstream->page.flags |= 2; // bos @@ -618,6 +667,8 @@ static int ogg_write_header(AVFormatContext *s) for (j = 0; j < s->nb_streams; j++) { AVStream *st = s->streams[j]; oggstream = st->priv_data; + if(!oggstream) + continue; for (i = 1; i < 3; i++) { if (oggstream->header_len[i]) ogg_buffer_data(s, st, oggstream->header[i], @@ -626,13 +677,22 @@ static int ogg_write_header(AVFormatContext *s) ogg_buffer_page(s, oggstream); } - oggstream->page.start_granule = AV_NOPTS_VALUE; + if (oggstream) + oggstream->page.start_granule = AV_NOPTS_VALUE; ogg_write_pages(s, 2); return 0; } +static int ogg_write_header(AVFormatContext *s) +{ + OGGContext *ogg = s->priv_data; + if (!ogg->waiting_pics) + return ogg_finish_header(s); + return 0; +} + static int ogg_write_packet_internal(AVFormatContext *s, AVPacket *pkt) { AVStream *st = s->streams[pkt->stream_index]; @@ -685,20 +745,86 @@ static int ogg_write_packet_internal(AVFormatContext *s, AVPacket *pkt) return 0; } +static int ogg_queue_flush(AVFormatContext *s) +{ + OGGContext *c = s->priv_data; + AVPacket *const pkt = ffformatcontext(s)->pkt; + int ret, write = 1; + ret = ogg_finish_init(s); + if (ret < 0) + write = 0; + ret = ogg_finish_header(s); + if (ret < 0) + write = 0; + + while (c->queue.head) { + avpriv_packet_list_get(&c->queue, pkt); + if (write && (ret = ogg_write_packet_internal(s, pkt)) < 0) + write = 0; + av_packet_unref(pkt); + } + return ret; +} + static int ogg_write_packet(AVFormatContext *s, AVPacket *pkt) { - int i; + OGGContext *c = s->priv_data; + int i, ret; + AVStream *st = s->streams[pkt->stream_index]; - if (pkt) - return pkt->size ? ogg_write_packet_internal(s, pkt) : 0; + if (pkt) { + if (pkt->stream_index == c->audio_stream_idx) { + if (c->waiting_pics) { + /* buffer audio packets until we get all the pictures */ + ret = avpriv_packet_list_put(&c->queue, pkt, NULL, 0); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Out of memory in packet queue; skipping attached pictures\n"); + c->waiting_pics = 0; + ret = ogg_queue_flush(s); + if (ret < 0) + return ret; + return pkt->size ? ogg_write_packet_internal(s, pkt) : 0; + } + } else + return pkt->size ? ogg_write_packet_internal(s, pkt) : 0; + } else if(c->waiting_pics && + (st->disposition & AV_DISPOSITION_ATTACHED_PIC)) { + /* warn only once for each stream */ + if (st->nb_frames == 1) { + av_log(s, AV_LOG_WARNING, "Got more than one picture in stream %d," + " ignoring.\n", pkt->stream_index); + } + if (st->nb_frames >= 1) { + av_log(s, AV_LOG_WARNING, "Attached picture must not have more than one frame.\n"); + return 0; + } - for (i = 0; i < s->nb_streams; i++) { - OGGStreamContext *oggstream = s->streams[i]->priv_data; - if (oggstream->page.segments_count) - ogg_buffer_page(s, oggstream); - } + ret = ff_flac_write_picture(s, + 1, + &c->attached_types, + c->audio_stream_idx, + pkt); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to process attached picture.\n"); + return ret; + } + c->waiting_pics--; + + /* flush the buffered audio packets */ + if (!c->waiting_pics && + (ret = ogg_queue_flush(s)) < 0) + return ret; + } else + return pkt->size ? ogg_write_packet_internal(s, pkt) : 0; + } else { + for (i = 0; i < s->nb_streams; i++) { + OGGStreamContext *oggstream = s->streams[i]->priv_data; + if (oggstream->page.segments_count) + ogg_buffer_page(s, oggstream); + } - ogg_write_pages(s, 2); + ogg_write_pages(s, 2); + } return 1; } @@ -709,6 +835,8 @@ static int ogg_write_trailer(AVFormatContext *s) /* flush current page if needed */ for (i = 0; i < s->nb_streams; i++) { OGGStreamContext *oggstream = s->streams[i]->priv_data; + if(!oggstream) + continue; if (oggstream->page.size > 0) ogg_buffer_page(s, oggstream); @@ -736,7 +864,9 @@ static void ogg_free(AVFormatContext *s) st->codecpar->codec_id == AV_CODEC_ID_VP8) { av_freep(&oggstream->header[0]); } - av_freep(&oggstream->header[1]); + if (st->codecpar->codec_id != AV_CODEC_ID_PNG && + st->codecpar->codec_id != AV_CODEC_ID_MJPEG) + av_freep(&oggstream->header[1]); } while (p) { @@ -862,6 +992,7 @@ const FFOutputFormat ff_opus_muxer = { .p.extensions = "opus", .priv_data_size = sizeof(OGGContext), .p.audio_codec = AV_CODEC_ID_OPUS, + .p.video_codec = AV_CODEC_ID_PNG, .init = ogg_init, .write_header = ogg_write_header, .write_packet = ogg_write_packet,

[FFmpeg-devel,v7] avformat/ogg: Add support for embedding cover art in Ogg

Checks

Commit Message

Patch