diff mbox series

[FFmpeg-devel] Subject: [PATCH v4] avformat: Add support for embedding cover art in Ogg files

Message ID _QxagD9_kAfB0Pl4W8zwTIf_YA0Ys4JPYP6LEJ2XV-U1bDi9d1qamUyperdrJrkSgVd5t5OiEGeoV-iaNTaaA7XbRtC8Q9a0OTGq6-EP_WQ=@protonmail.com
State New
Headers show
Series [FFmpeg-devel] Subject: [PATCH v4] avformat: Add support for embedding cover art in Ogg files | expand

Commit Message

Zsolt Vadász March 4, 2023, 4:01 p.m. UTC
Signed-off-by: Zsolt Vadasz <zsolt_vadasz@protonmail.com>
---
 libavformat/flac_picture.c | 132 ++++++++++++++++++++++
 libavformat/flac_picture.h |   5 +
 libavformat/flacenc.c      |  90 +--------------
 libavformat/oggenc.c       | 217 ++++++++++++++++++++++++++++++-------
 4 files changed, 319 insertions(+), 125 deletions(-)

Comments

Marton Balint March 5, 2023, 7:25 p.m. UTC | #1
On Sat, 4 Mar 2023, Zsolt Vadász wrote:

> Signed-off-by: Zsolt Vadasz <zsolt_vadasz@protonmail.com>
> ---
> libavformat/flac_picture.c | 132 ++++++++++++++++++++++
> libavformat/flac_picture.h |   5 +
> libavformat/flacenc.c      |  90 +--------------
> libavformat/oggenc.c       | 217 ++++++++++++++++++++++++++++++-------
> 4 files changed, 319 insertions(+), 125 deletions(-)

Can you split this into two patches? One factorizes flac_write_picture, 
but contains no change in functionality, the other adds the actual support 
to ogg.

Thanks,
Marton

>
> diff --git a/libavformat/flac_picture.c b/libavformat/flac_picture.c
> index b33fee75b4..30152a2ba9 100644
> --- a/libavformat/flac_picture.c
> +++ b/libavformat/flac_picture.c
> @@ -20,6 +20,9 @@
>  */
>
> #include "libavutil/intreadwrite.h"
> +#include "libavutil/avstring.h"
> +#include "libavutil/base64.h"
> +#include "libavutil/pixdesc.h"
> #include "libavcodec/bytestream.h"
> #include "libavcodec/png.h"
> #include "avformat.h"
> @@ -188,3 +191,132 @@ fail:
>
>     return ret;
> }
> +
> +int ff_flac_write_picture(struct AVFormatContext *s,
> +                          int isogg,
> +                          unsigned *attached_types,
> +                          int audio_stream_idx, // unused if !isogg
> +                          AVPacket *pkt)
> +{
> +    AVIOContext *pb = s->pb;
> +    const AVPixFmtDescriptor *pixdesc;
> +    const CodecMime *mime = ff_id3v2_mime_tags;
> +    AVDictionaryEntry *e;
> +    const char *mimetype = NULL, *desc = "";
> +    const AVStream *st = s->streams[pkt->stream_index];
> +    int i, mimelen, desclen, type = 0, blocklen;
> +
> +    if (!pkt->data)
> +        return 0;
> +
> +    while (mime->id != AV_CODEC_ID_NONE) {
> +        if (mime->id == st->codecpar->codec_id) {
> +            mimetype = mime->str;
> +            break;
> +        }
> +        mime++;
> +    }
> +    if (!mimetype) {
> +        av_log(s, AV_LOG_ERROR, "No mimetype is known for stream %d, cannot "
> +               "write an attached picture.\n", st->index);
> +        return AVERROR(EINVAL);
> +    }
> +    mimelen = strlen(mimetype);
> +
> +    /* get the picture type */
> +    e = av_dict_get(st->metadata, "comment", NULL, 0);
> +    for (i = 0; e && i < FF_ARRAY_ELEMS(ff_id3v2_picture_types); i++) {
> +        if (!av_strcasecmp(e->value, ff_id3v2_picture_types[i])) {
> +            type = i;
> +            break;
> +        }
> +    }
> +
> +    if (((*attached_types) & (1 << type)) & 0x6) {
> +        av_log(s, AV_LOG_ERROR, "Duplicate attachment for type '%s'\n", ff_id3v2_picture_types[type]);
> +        return AVERROR(EINVAL);
> +    }
> +
> +    if (type == 1 && (st->codecpar->codec_id != AV_CODEC_ID_PNG ||
> +                      st->codecpar->width != 32 ||
> +                      st->codecpar->height != 32)) {
> +        av_log(s, AV_LOG_ERROR, "File icon attachment must be a 32x32 PNG");
> +        return AVERROR(EINVAL);
> +    }
> +
> +    *attached_types |= (1 << type);
> +
> +    /* get the description */
> +    if ((e = av_dict_get(st->metadata, "title", NULL, 0)))
> +        desc = e->value;
> +    desclen = strlen(desc);
> +
> +    blocklen = 4 + 4 + mimelen + 4 + desclen + 4 + 4 + 4 + 4 + 4 + pkt->size;
> +    if (blocklen >= 1<<24) {
> +        av_log(s, AV_LOG_ERROR, "Picture block too big %d >= %d\n", blocklen, 1<<24);
> +        return AVERROR(EINVAL);
> +    }
> +
> +    if(!isogg) {
> +        avio_w8(pb, 0x06);
> +        avio_wb24(pb, blocklen);
> +
> +        avio_wb32(pb, type);
> +
> +        avio_wb32(pb, mimelen);
> +        avio_write(pb, mimetype, mimelen);
> +
> +        avio_wb32(pb, desclen);
> +        avio_write(pb, desc, desclen);
> +
> +        avio_wb32(pb, st->codecpar->width);
> +        avio_wb32(pb, st->codecpar->height);
> +        if ((pixdesc = av_pix_fmt_desc_get(st->codecpar->format)))
> +            avio_wb32(pb, av_get_bits_per_pixel(pixdesc));
> +        else
> +            avio_wb32(pb, 0);
> +        avio_wb32(pb, 0);
> +
> +        avio_wb32(pb, pkt->size);
> +        avio_write(pb, pkt->data, pkt->size);
> +    } else {
> +        uint8_t *metadata_block_picture, *ptr;
> +        int encoded_len, ret;
> +        char *encoded;
> +        AVStream *audio_stream = s->streams[audio_stream_idx];
> +
> +        metadata_block_picture = av_mallocz(blocklen);
> +        ptr = metadata_block_picture;
> +        bytestream_put_be32(&ptr, type);
> +
> +        bytestream_put_be32(&ptr, mimelen);
> +        bytestream_put_buffer(&ptr, mimetype, mimelen);
> +
> +        bytestream_put_be32(&ptr, desclen);
> +        bytestream_put_buffer(&ptr, desc, desclen);
> +
> +        bytestream_put_be32(&ptr, st->codecpar->width);
> +        bytestream_put_be32(&ptr, st->codecpar->height);
> +        if ((pixdesc = av_pix_fmt_desc_get(st->codecpar->format)))
> +            bytestream_put_be32(&ptr, av_get_bits_per_pixel(pixdesc));
> +        else
> +            bytestream_put_be32(&ptr, 0);
> +        bytestream_put_be32(&ptr, 0);
> +
> +        bytestream_put_be32(&ptr, pkt->size);
> +        bytestream_put_buffer(&ptr, pkt->data, pkt->size);
> +
> +        encoded_len = AV_BASE64_SIZE(blocklen);
> +        encoded = av_mallocz(encoded_len);
> +        av_base64_encode(encoded, encoded_len, metadata_block_picture, blocklen);
> +        av_free(metadata_block_picture);
> +
> +        ret = av_dict_set(&audio_stream->metadata, "METADATA_BLOCK_PICTURE", encoded, 0);
> +        av_free(encoded);
> +        av_packet_unref(pkt);
> +
> +        if (ret < 0)
> +            return ret;
> +    }
> +    return 0;
> +}
> diff --git a/libavformat/flac_picture.h b/libavformat/flac_picture.h
> index db074e531d..efa11aee32 100644
> --- a/libavformat/flac_picture.h
> +++ b/libavformat/flac_picture.h
> @@ -39,5 +39,10 @@
>  */
> int ff_flac_parse_picture(AVFormatContext *s, uint8_t **buf, int buf_size,
>                           int truncate_workaround);
> +int ff_flac_write_picture(struct AVFormatContext *s,
> +                          int isogg,
> +                          unsigned *attached_types,
> +                          int audio_stream_idx,
> +                          AVPacket *pkt);
>
> #endif /* AVFORMAT_FLAC_PICTURE_H */
> diff --git a/libavformat/flacenc.c b/libavformat/flacenc.c
> index a8beec7750..7970c2531d 100644
> --- a/libavformat/flacenc.c
> +++ b/libavformat/flacenc.c
> @@ -33,6 +33,7 @@
> #include "mux.h"
> #include "version.h"
> #include "vorbiscomment.h"
> +#include "flac_picture.h"
>
>
> typedef struct FlacMuxerContext {
> @@ -79,94 +80,9 @@ static int flac_write_block_comment(AVIOContext *pb, AVDictionary **m,
>     return 0;
> }
>
> -static int flac_write_picture(struct AVFormatContext *s, AVPacket *pkt)
> -{
> -    FlacMuxerContext *c = s->priv_data;
> -    AVIOContext *pb = s->pb;
> -    const AVPixFmtDescriptor *pixdesc;
> -    const CodecMime *mime = ff_id3v2_mime_tags;
> -    AVDictionaryEntry *e;
> -    const char *mimetype = NULL, *desc = "";
> -    const AVStream *st = s->streams[pkt->stream_index];
> -    int i, mimelen, desclen, type = 0, blocklen;
> -
> -    if (!pkt->data)
> -        return 0;
> -
> -    while (mime->id != AV_CODEC_ID_NONE) {
> -        if (mime->id == st->codecpar->codec_id) {
> -            mimetype = mime->str;
> -            break;
> -        }
> -        mime++;
> -    }
> -    if (!mimetype) {
> -        av_log(s, AV_LOG_ERROR, "No mimetype is known for stream %d, cannot "
> -               "write an attached picture.\n", st->index);
> -        return AVERROR(EINVAL);
> -    }
> -    mimelen = strlen(mimetype);
> -
> -    /* get the picture type */
> -    e = av_dict_get(st->metadata, "comment", NULL, 0);
> -    for (i = 0; e && i < FF_ARRAY_ELEMS(ff_id3v2_picture_types); i++) {
> -        if (!av_strcasecmp(e->value, ff_id3v2_picture_types[i])) {
> -            type = i;
> -            break;
> -        }
> -    }
> -
> -    if ((c->attached_types & (1 << type)) & 0x6) {
> -        av_log(s, AV_LOG_ERROR, "Duplicate attachment for type '%s'\n", ff_id3v2_picture_types[type]);
> -        return AVERROR(EINVAL);
> -    }
> -
> -    if (type == 1 && (st->codecpar->codec_id != AV_CODEC_ID_PNG ||
> -                      st->codecpar->width != 32 ||
> -                      st->codecpar->height != 32)) {
> -        av_log(s, AV_LOG_ERROR, "File icon attachment must be a 32x32 PNG");
> -        return AVERROR(EINVAL);
> -    }
> -
> -    c->attached_types |= (1 << type);
> -
> -    /* get the description */
> -    if ((e = av_dict_get(st->metadata, "title", NULL, 0)))
> -        desc = e->value;
> -    desclen = strlen(desc);
> -
> -    blocklen = 4 + 4 + mimelen + 4 + desclen + 4 + 4 + 4 + 4 + 4 + pkt->size;
> -    if (blocklen >= 1<<24) {
> -        av_log(s, AV_LOG_ERROR, "Picture block too big %d >= %d\n", blocklen, 1<<24);
> -        return AVERROR(EINVAL);
> -    }
> -
> -    avio_w8(pb, 0x06);
> -    avio_wb24(pb, blocklen);
> -
> -    avio_wb32(pb, type);
> -
> -    avio_wb32(pb, mimelen);
> -    avio_write(pb, mimetype, mimelen);
> -
> -    avio_wb32(pb, desclen);
> -    avio_write(pb, desc, desclen);
> -
> -    avio_wb32(pb, st->codecpar->width);
> -    avio_wb32(pb, st->codecpar->height);
> -    if ((pixdesc = av_pix_fmt_desc_get(st->codecpar->format)))
> -        avio_wb32(pb, av_get_bits_per_pixel(pixdesc));
> -    else
> -        avio_wb32(pb, 0);
> -    avio_wb32(pb, 0);
> -
> -    avio_wb32(pb, pkt->size);
> -    avio_write(pb, pkt->data, pkt->size);
> -    return 0;
> -}
> -
> static int flac_finish_header(struct AVFormatContext *s)
> {
> +    FlacMuxerContext *c = s->priv_data;
>     int i, ret, padding = s->metadata_header_padding;
>     if (padding < 0)
>         padding = 8192;
> @@ -179,7 +95,7 @@ static int flac_finish_header(struct AVFormatContext *s)
>         AVPacket *pkt = st->priv_data;
>         if (!pkt)
>             continue;
> -        ret = flac_write_picture(s, pkt);
> +        ret = ff_flac_write_picture(s, 0, &c->attached_types, -1, pkt);
>         av_packet_unref(pkt);
>         if (ret < 0 && (s->error_recognition & AV_EF_EXPLODE))
>             return ret;
> diff --git a/libavformat/oggenc.c b/libavformat/oggenc.c
> index 2e582d0754..6804c3f1dd 100644
> --- a/libavformat/oggenc.c
> +++ b/libavformat/oggenc.c
> @@ -23,19 +23,28 @@
>
> #include <stdint.h>
>
> +#include "libavcodec/codec_id.h"
> +#include "libavutil/avutil.h"
> #include "libavutil/crc.h"
> +#include "libavutil/log.h"
> #include "libavutil/mathematics.h"
> #include "libavutil/opt.h"
> #include "libavutil/random_seed.h"
> +#include "libavutil/pixdesc.h"
> +#include "libavutil/avstring.h"
> +#include "libavutil/base64.h"
> +#include "libavutil/bswap.h"
> #include "libavcodec/xiph.h"
> #include "libavcodec/bytestream.h"
> #include "libavcodec/flac.h"
> #include "avformat.h"
> +#include "id3v2.h"
> #include "avio_internal.h"
> #include "internal.h"
> #include "mux.h"
> #include "version.h"
> #include "vorbiscomment.h"
> +#include "flac_picture.h"
>
> #define MAX_PAGE_SIZE 65025
>
> @@ -78,6 +87,11 @@ typedef struct OGGContext {
>     int pref_size; ///< preferred page size (0 => fill all segments)
>     int64_t pref_duration;      ///< preferred page duration (0 => fill all segments)
>     int serial_offset;
> +
> +    PacketList queue;
> +    int audio_stream_idx;
> +    int waiting_pics;
> +    unsigned attached_types;
> } OGGContext;
>
> #define OFFSET(x) offsetof(OGGContext, x)
> @@ -469,12 +483,14 @@ static void ogg_write_pages(AVFormatContext *s, int flush)
>     ogg->page_list = p;
> }
>
> -static int ogg_init(AVFormatContext *s)
> +static int ogg_finish_init(AVFormatContext *s)
> {
>     OGGContext *ogg = s->priv_data;
>     OGGStreamContext *oggstream = NULL;
>     int i, j;
>
> +    ogg->waiting_pics = 0;
> +
>     if (ogg->pref_size)
>         av_log(s, AV_LOG_WARNING, "The pagesize option is deprecated\n");
>
> @@ -482,29 +498,10 @@ static int ogg_init(AVFormatContext *s)
>         AVStream *st = s->streams[i];
>         unsigned serial_num = i + ogg->serial_offset;
>
> -        if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
> -            if (st->codecpar->codec_id == AV_CODEC_ID_OPUS)
> -                /* Opus requires a fixed 48kHz clock */
> -                avpriv_set_pts_info(st, 64, 1, 48000);
> -            else
> -                avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
> -        }
> -
> -        if (st->codecpar->codec_id != AV_CODEC_ID_VORBIS &&
> -            st->codecpar->codec_id != AV_CODEC_ID_THEORA &&
> -            st->codecpar->codec_id != AV_CODEC_ID_SPEEX  &&
> -            st->codecpar->codec_id != AV_CODEC_ID_FLAC   &&
> -            st->codecpar->codec_id != AV_CODEC_ID_OPUS   &&
> -            st->codecpar->codec_id != AV_CODEC_ID_VP8) {
> -            av_log(s, AV_LOG_ERROR, "Unsupported codec id in stream %d\n", i);
> -            return AVERROR(EINVAL);
> -        }
> +        if(st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO &&
> +           (st->disposition & AV_DISPOSITION_ATTACHED_PIC))
> +            continue;
>
> -        if ((!st->codecpar->extradata || !st->codecpar->extradata_size) &&
> -            st->codecpar->codec_id != AV_CODEC_ID_VP8) {
> -            av_log(s, AV_LOG_ERROR, "No extradata present\n");
> -            return AVERROR_INVALIDDATA;
> -        }
>         oggstream = av_mallocz(sizeof(*oggstream));
>         if (!oggstream)
>             return AVERROR(ENOMEM);
> @@ -515,8 +512,11 @@ static int ogg_init(AVFormatContext *s)
>             do {
>                 serial_num = av_get_random_seed();
>                 for (j = 0; j < i; j++) {
> +                    // NULL for attached_pic
>                     OGGStreamContext *sc = s->streams[j]->priv_data;
> -                    if (serial_num == sc->serial_num)
> +                    if(!sc)
> +                        continue;
> +                    else if (serial_num == sc->serial_num)
>                         break;
>                 }
>             } while (j < i);
> @@ -563,9 +563,9 @@ static int ogg_init(AVFormatContext *s)
>             int framing_bit = st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 1 : 0;
>
>             if (avpriv_split_xiph_headers(st->codecpar->extradata, st->codecpar->extradata_size,
> -                                      st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 30 : 42,
> -                                      (const uint8_t**)oggstream->header, oggstream->header_len) < 0) {
> -                av_log(s, AV_LOG_ERROR, "Extradata corrupted\n");
> +                                          st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 30 : 42,
> +                                          (const uint8_t**)oggstream->header, oggstream->header_len) < 0) {
> +                av_log(s, AV_LOG_ERROR, "Extradata corrupted for stream #%d\n", i);
>                 oggstream->header[1] = NULL;
>                 return AVERROR_INVALIDDATA;
>             }
> @@ -602,13 +602,67 @@ static int ogg_init(AVFormatContext *s)
>     return 0;
> }
>
> -static int ogg_write_header(AVFormatContext *s)
> +static int ogg_init(AVFormatContext *s)
> +{
> +    OGGContext *ogg = s->priv_data;
> +    int i;
> +
> +    ogg->waiting_pics = 0;
> +    ogg->attached_types = 0;
> +
> +    if (ogg->pref_size)
> +        av_log(s, AV_LOG_WARNING, "The pagesize option is deprecated\n");
> +
> +    for (i = 0; i < s->nb_streams; i++) {
> +        AVStream *st = s->streams[i];
> +
> +        if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
> +            ogg->audio_stream_idx = i;
> +            if (st->codecpar->codec_id == AV_CODEC_ID_OPUS)
> +                /* Opus requires a fixed 48kHz clock */
> +                avpriv_set_pts_info(st, 64, 1, 48000);
> +            else
> +                avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
> +        }
> +
> +        if (st->codecpar->codec_id != AV_CODEC_ID_VORBIS &&
> +            st->codecpar->codec_id != AV_CODEC_ID_THEORA &&
> +            st->codecpar->codec_id != AV_CODEC_ID_SPEEX  &&
> +            st->codecpar->codec_id != AV_CODEC_ID_FLAC   &&
> +            st->codecpar->codec_id != AV_CODEC_ID_OPUS   &&
> +            st->codecpar->codec_id != AV_CODEC_ID_VP8    &&
> +            st->codecpar->codec_id != AV_CODEC_ID_PNG    &&
> +            st->codecpar->codec_id != AV_CODEC_ID_MJPEG) {
> +            av_log(s, AV_LOG_ERROR, "Unsupported codec id in stream %d\n", i);
> +            return AVERROR(EINVAL);
> +        }
> +
> +        if ((!st->codecpar->extradata || !st->codecpar->extradata_size) &&
> +            st->codecpar->codec_id != AV_CODEC_ID_VP8 &&
> +            st->codecpar->codec_id != AV_CODEC_ID_PNG &&
> +            st->codecpar->codec_id != AV_CODEC_ID_MJPEG) {
> +            av_log(s, AV_LOG_ERROR, "No extradata present\n");
> +            return AVERROR_INVALIDDATA;
> +        }
> +        if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO &&
> +            (st->disposition & AV_DISPOSITION_ATTACHED_PIC))
> +            ogg->waiting_pics++;
> +    }
> +
> +    if (!ogg->waiting_pics)
> +        return ogg_finish_init(s);
> +    return 0;
> +}
> +
> +static int ogg_finish_header(AVFormatContext *s)
> {
>     OGGStreamContext *oggstream = NULL;
>     int i, j;
>
>     for (j = 0; j < s->nb_streams; j++) {
>         oggstream = s->streams[j]->priv_data;
> +        if(!oggstream)
> +            continue;
>         ogg_buffer_data(s, s->streams[j], oggstream->header[0],
>                         oggstream->header_len[0], 0, 1);
>         oggstream->page.flags |= 2; // bos
> @@ -617,6 +671,8 @@ static int ogg_write_header(AVFormatContext *s)
>     for (j = 0; j < s->nb_streams; j++) {
>         AVStream *st = s->streams[j];
>         oggstream = st->priv_data;
> +        if(!oggstream)
> +            continue;
>         for (i = 1; i < 3; i++) {
>             if (oggstream->header_len[i])
>                 ogg_buffer_data(s, st, oggstream->header[i],
> @@ -632,6 +688,14 @@ static int ogg_write_header(AVFormatContext *s)
>     return 0;
> }
>
> +static int ogg_write_header(AVFormatContext *s)
> +{
> +    OGGContext *ogg = s->priv_data;
> +    if (!ogg->waiting_pics)
> +        return ogg_finish_header(s);
> +    return 0;
> +}
> +
> static int ogg_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
> {
>     AVStream *st = s->streams[pkt->stream_index];
> @@ -684,20 +748,92 @@ static int ogg_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
>     return 0;
> }
>
> +static int ogg_queue_flush(AVFormatContext *s)
> +{
> +    OGGContext *c = s->priv_data;
> +    AVPacket *const pkt = ffformatcontext(s)->pkt;
> +    int ret, write = 1;
> +    ret = ogg_finish_init(s);
> +    if (ret < 0)
> +        write = 0;
> +    ret = ogg_finish_header(s);
> +    if (ret < 0)
> +        write = 0;
> +
> +    while (c->queue.head) {
> +        avpriv_packet_list_get(&c->queue, pkt);
> +        if (write && (ret = ogg_write_packet_internal(s, pkt)) < 0)
> +            write = 0;
> +        av_packet_unref(pkt);
> +    }
> +    return ret;
> +}
> +
> static int ogg_write_packet(AVFormatContext *s, AVPacket *pkt)
> {
> -    int i;
> +    OGGContext *c = s->priv_data;
> +    int i, ret;
> +
> +    if (pkt) {
> +        if (pkt->stream_index == c->audio_stream_idx) {
> +            if (c->waiting_pics) {
> +                /* buffer audio packets until we get all the pictures */
> +                ret = avpriv_packet_list_put(&c->queue, pkt, NULL, 0);
> +                if (ret < 0) {
> +                    av_log(s, AV_LOG_ERROR, "Out of memory in packet queue; skipping attached pictures\n");
> +                    c->waiting_pics = 0;
> +                    ret = ogg_queue_flush(s);
> +                    if (ret < 0)
> +                        return ret;
> +                    return pkt->size ? ogg_write_packet_internal(s, pkt) : 0;
> +                }
> +            } else
> +                return pkt->size ? ogg_write_packet_internal(s, pkt) : 0;
> +        } else {
> +            AVStream *st = s->streams[pkt->stream_index];
>
> -    if (pkt)
> -        return pkt->size ? ogg_write_packet_internal(s, pkt) : 0;
> +            if (!c->waiting_pics ||
> +                !(st->disposition & AV_DISPOSITION_ATTACHED_PIC))
> +                return 0;
>
> -    for (i = 0; i < s->nb_streams; i++) {
> -        OGGStreamContext *oggstream = s->streams[i]->priv_data;
> -        if (oggstream->page.segments_count)
> -            ogg_buffer_page(s, oggstream);
> -    }
> +            /* warn only once for each stream */
> +            if (st->nb_frames == 1) {
> +                av_log(s, AV_LOG_WARNING, "Got more than one picture in stream %d,"
> +                       " ignoring.\n", pkt->stream_index);
> +            }
> +            if (st->nb_frames >= 1) {
> +                av_log(s, AV_LOG_WARNING, "Attached picture must not have more than one frame.\n");
> +                return 0;
> +            }
>
> -    ogg_write_pages(s, 2);
> +            //st->priv_data = av_packet_clone(pkt);
> +            //if (!st->priv_data)
> +            //    av_log(s, AV_LOG_ERROR, "Out of memory queueing an attached picture; skipping\n");
> +            ret = ff_flac_write_picture(s,
> +                                        1,
> +                                        &c->attached_types,
> +                                        c->audio_stream_idx,
> +                                        pkt);
> +            if (ret < 0) {
> +                av_log(s, AV_LOG_ERROR, "Failed to process attached picture.\n");
> +                return ret;
> +            }
> +            c->waiting_pics--;
> +
> +            /* flush the buffered audio packets */
> +            if (!c->waiting_pics &&
> +                (ret = ogg_queue_flush(s)) < 0)
> +                return ret;
> +        }
> +    } else {
> +        for (i = 0; i < s->nb_streams; i++) {
> +            OGGStreamContext *oggstream = s->streams[i]->priv_data;
> +            if (oggstream->page.segments_count)
> +                ogg_buffer_page(s, oggstream);
> +        }
> +
> +        ogg_write_pages(s, 2);
> +    }
>     return 1;
> }
>
> @@ -708,6 +844,8 @@ static int ogg_write_trailer(AVFormatContext *s)
>     /* flush current page if needed */
>     for (i = 0; i < s->nb_streams; i++) {
>         OGGStreamContext *oggstream = s->streams[i]->priv_data;
> +        if(!oggstream)
> +            continue;
>
>         if (oggstream->page.size > 0)
>             ogg_buffer_page(s, oggstream);
> @@ -735,7 +873,9 @@ static void ogg_free(AVFormatContext *s)
>             st->codecpar->codec_id == AV_CODEC_ID_VP8) {
>             av_freep(&oggstream->header[0]);
>         }
> -        av_freep(&oggstream->header[1]);
> +        if (st->codecpar->codec_id != AV_CODEC_ID_PNG &&
> +            st->codecpar->codec_id != AV_CODEC_ID_MJPEG)
> +            av_freep(&oggstream->header[1]);
>     }
>
>     while (p) {
> @@ -841,6 +981,7 @@ const FFOutputFormat ff_opus_muxer = {
>     .p.extensions      = "opus",
>     .priv_data_size    = sizeof(OGGContext),
>     .p.audio_codec     = AV_CODEC_ID_OPUS,
> +    .p.video_codec       = AV_CODEC_ID_PNG,
>     .init              = ogg_init,
>     .write_header      = ogg_write_header,
>     .write_packet      = ogg_write_packet,
> -- 
> 2.34.1
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
diff mbox series

Patch

diff --git a/libavformat/flac_picture.c b/libavformat/flac_picture.c
index b33fee75b4..30152a2ba9 100644
--- a/libavformat/flac_picture.c
+++ b/libavformat/flac_picture.c
@@ -20,6 +20,9 @@ 
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/avstring.h"
+#include "libavutil/base64.h"
+#include "libavutil/pixdesc.h"
 #include "libavcodec/bytestream.h"
 #include "libavcodec/png.h"
 #include "avformat.h"
@@ -188,3 +191,132 @@  fail:
 
     return ret;
 }
+
+int ff_flac_write_picture(struct AVFormatContext *s,
+                          int isogg,
+                          unsigned *attached_types,
+                          int audio_stream_idx, // unused if !isogg
+                          AVPacket *pkt)
+{
+    AVIOContext *pb = s->pb;
+    const AVPixFmtDescriptor *pixdesc;
+    const CodecMime *mime = ff_id3v2_mime_tags;
+    AVDictionaryEntry *e;
+    const char *mimetype = NULL, *desc = "";
+    const AVStream *st = s->streams[pkt->stream_index];
+    int i, mimelen, desclen, type = 0, blocklen;
+
+    if (!pkt->data)
+        return 0;
+
+    while (mime->id != AV_CODEC_ID_NONE) {
+        if (mime->id == st->codecpar->codec_id) {
+            mimetype = mime->str;
+            break;
+        }
+        mime++;
+    }
+    if (!mimetype) {
+        av_log(s, AV_LOG_ERROR, "No mimetype is known for stream %d, cannot "
+               "write an attached picture.\n", st->index);
+        return AVERROR(EINVAL);
+    }
+    mimelen = strlen(mimetype);
+
+    /* get the picture type */
+    e = av_dict_get(st->metadata, "comment", NULL, 0);
+    for (i = 0; e && i < FF_ARRAY_ELEMS(ff_id3v2_picture_types); i++) {
+        if (!av_strcasecmp(e->value, ff_id3v2_picture_types[i])) {
+            type = i;
+            break;
+        }
+    }
+
+    if (((*attached_types) & (1 << type)) & 0x6) {
+        av_log(s, AV_LOG_ERROR, "Duplicate attachment for type '%s'\n", ff_id3v2_picture_types[type]);
+        return AVERROR(EINVAL);
+    }
+
+    if (type == 1 && (st->codecpar->codec_id != AV_CODEC_ID_PNG ||
+                      st->codecpar->width != 32 ||
+                      st->codecpar->height != 32)) {
+        av_log(s, AV_LOG_ERROR, "File icon attachment must be a 32x32 PNG");
+        return AVERROR(EINVAL);
+    }
+
+    *attached_types |= (1 << type);
+
+    /* get the description */
+    if ((e = av_dict_get(st->metadata, "title", NULL, 0)))
+        desc = e->value;
+    desclen = strlen(desc);
+
+    blocklen = 4 + 4 + mimelen + 4 + desclen + 4 + 4 + 4 + 4 + 4 + pkt->size;
+    if (blocklen >= 1<<24) {
+        av_log(s, AV_LOG_ERROR, "Picture block too big %d >= %d\n", blocklen, 1<<24);
+        return AVERROR(EINVAL);
+    }
+
+    if(!isogg) {
+        avio_w8(pb, 0x06);
+        avio_wb24(pb, blocklen);
+
+        avio_wb32(pb, type);
+
+        avio_wb32(pb, mimelen);
+        avio_write(pb, mimetype, mimelen);
+
+        avio_wb32(pb, desclen);
+        avio_write(pb, desc, desclen);
+
+        avio_wb32(pb, st->codecpar->width);
+        avio_wb32(pb, st->codecpar->height);
+        if ((pixdesc = av_pix_fmt_desc_get(st->codecpar->format)))
+            avio_wb32(pb, av_get_bits_per_pixel(pixdesc));
+        else
+            avio_wb32(pb, 0);
+        avio_wb32(pb, 0);
+
+        avio_wb32(pb, pkt->size);
+        avio_write(pb, pkt->data, pkt->size);
+    } else {
+        uint8_t *metadata_block_picture, *ptr;
+        int encoded_len, ret;
+        char *encoded;
+        AVStream *audio_stream = s->streams[audio_stream_idx];
+
+        metadata_block_picture = av_mallocz(blocklen);
+        ptr = metadata_block_picture;
+        bytestream_put_be32(&ptr, type);
+
+        bytestream_put_be32(&ptr, mimelen);
+        bytestream_put_buffer(&ptr, mimetype, mimelen);
+
+        bytestream_put_be32(&ptr, desclen);
+        bytestream_put_buffer(&ptr, desc, desclen);
+
+        bytestream_put_be32(&ptr, st->codecpar->width);
+        bytestream_put_be32(&ptr, st->codecpar->height);
+        if ((pixdesc = av_pix_fmt_desc_get(st->codecpar->format)))
+            bytestream_put_be32(&ptr, av_get_bits_per_pixel(pixdesc));
+        else
+            bytestream_put_be32(&ptr, 0);
+        bytestream_put_be32(&ptr, 0);
+
+        bytestream_put_be32(&ptr, pkt->size);
+        bytestream_put_buffer(&ptr, pkt->data, pkt->size);
+
+        encoded_len = AV_BASE64_SIZE(blocklen);
+        encoded = av_mallocz(encoded_len);
+        av_base64_encode(encoded, encoded_len, metadata_block_picture, blocklen);
+        av_free(metadata_block_picture);
+
+        ret = av_dict_set(&audio_stream->metadata, "METADATA_BLOCK_PICTURE", encoded, 0);
+        av_free(encoded);
+        av_packet_unref(pkt);
+
+        if (ret < 0)
+            return ret;
+    }
+    return 0;
+}
diff --git a/libavformat/flac_picture.h b/libavformat/flac_picture.h
index db074e531d..efa11aee32 100644
--- a/libavformat/flac_picture.h
+++ b/libavformat/flac_picture.h
@@ -39,5 +39,10 @@ 
  */
 int ff_flac_parse_picture(AVFormatContext *s, uint8_t **buf, int buf_size,
                           int truncate_workaround);
+int ff_flac_write_picture(struct AVFormatContext *s,
+                          int isogg,
+                          unsigned *attached_types,
+                          int audio_stream_idx,
+                          AVPacket *pkt);
 
 #endif /* AVFORMAT_FLAC_PICTURE_H */
diff --git a/libavformat/flacenc.c b/libavformat/flacenc.c
index a8beec7750..7970c2531d 100644
--- a/libavformat/flacenc.c
+++ b/libavformat/flacenc.c
@@ -33,6 +33,7 @@ 
 #include "mux.h"
 #include "version.h"
 #include "vorbiscomment.h"
+#include "flac_picture.h"
 
 
 typedef struct FlacMuxerContext {
@@ -79,94 +80,9 @@  static int flac_write_block_comment(AVIOContext *pb, AVDictionary **m,
     return 0;
 }
 
-static int flac_write_picture(struct AVFormatContext *s, AVPacket *pkt)
-{
-    FlacMuxerContext *c = s->priv_data;
-    AVIOContext *pb = s->pb;
-    const AVPixFmtDescriptor *pixdesc;
-    const CodecMime *mime = ff_id3v2_mime_tags;
-    AVDictionaryEntry *e;
-    const char *mimetype = NULL, *desc = "";
-    const AVStream *st = s->streams[pkt->stream_index];
-    int i, mimelen, desclen, type = 0, blocklen;
-
-    if (!pkt->data)
-        return 0;
-
-    while (mime->id != AV_CODEC_ID_NONE) {
-        if (mime->id == st->codecpar->codec_id) {
-            mimetype = mime->str;
-            break;
-        }
-        mime++;
-    }
-    if (!mimetype) {
-        av_log(s, AV_LOG_ERROR, "No mimetype is known for stream %d, cannot "
-               "write an attached picture.\n", st->index);
-        return AVERROR(EINVAL);
-    }
-    mimelen = strlen(mimetype);
-
-    /* get the picture type */
-    e = av_dict_get(st->metadata, "comment", NULL, 0);
-    for (i = 0; e && i < FF_ARRAY_ELEMS(ff_id3v2_picture_types); i++) {
-        if (!av_strcasecmp(e->value, ff_id3v2_picture_types[i])) {
-            type = i;
-            break;
-        }
-    }
-
-    if ((c->attached_types & (1 << type)) & 0x6) {
-        av_log(s, AV_LOG_ERROR, "Duplicate attachment for type '%s'\n", ff_id3v2_picture_types[type]);
-        return AVERROR(EINVAL);
-    }
-
-    if (type == 1 && (st->codecpar->codec_id != AV_CODEC_ID_PNG ||
-                      st->codecpar->width != 32 ||
-                      st->codecpar->height != 32)) {
-        av_log(s, AV_LOG_ERROR, "File icon attachment must be a 32x32 PNG");
-        return AVERROR(EINVAL);
-    }
-
-    c->attached_types |= (1 << type);
-
-    /* get the description */
-    if ((e = av_dict_get(st->metadata, "title", NULL, 0)))
-        desc = e->value;
-    desclen = strlen(desc);
-
-    blocklen = 4 + 4 + mimelen + 4 + desclen + 4 + 4 + 4 + 4 + 4 + pkt->size;
-    if (blocklen >= 1<<24) {
-        av_log(s, AV_LOG_ERROR, "Picture block too big %d >= %d\n", blocklen, 1<<24);
-        return AVERROR(EINVAL);
-    }
-
-    avio_w8(pb, 0x06);
-    avio_wb24(pb, blocklen);
-
-    avio_wb32(pb, type);
-
-    avio_wb32(pb, mimelen);
-    avio_write(pb, mimetype, mimelen);
-
-    avio_wb32(pb, desclen);
-    avio_write(pb, desc, desclen);
-
-    avio_wb32(pb, st->codecpar->width);
-    avio_wb32(pb, st->codecpar->height);
-    if ((pixdesc = av_pix_fmt_desc_get(st->codecpar->format)))
-        avio_wb32(pb, av_get_bits_per_pixel(pixdesc));
-    else
-        avio_wb32(pb, 0);
-    avio_wb32(pb, 0);
-
-    avio_wb32(pb, pkt->size);
-    avio_write(pb, pkt->data, pkt->size);
-    return 0;
-}
-
 static int flac_finish_header(struct AVFormatContext *s)
 {
+    FlacMuxerContext *c = s->priv_data;
     int i, ret, padding = s->metadata_header_padding;
     if (padding < 0)
         padding = 8192;
@@ -179,7 +95,7 @@  static int flac_finish_header(struct AVFormatContext *s)
         AVPacket *pkt = st->priv_data;
         if (!pkt)
             continue;
-        ret = flac_write_picture(s, pkt);
+        ret = ff_flac_write_picture(s, 0, &c->attached_types, -1, pkt);
         av_packet_unref(pkt);
         if (ret < 0 && (s->error_recognition & AV_EF_EXPLODE))
             return ret;
diff --git a/libavformat/oggenc.c b/libavformat/oggenc.c
index 2e582d0754..6804c3f1dd 100644
--- a/libavformat/oggenc.c
+++ b/libavformat/oggenc.c
@@ -23,19 +23,28 @@ 
 
 #include <stdint.h>
 
+#include "libavcodec/codec_id.h"
+#include "libavutil/avutil.h"
 #include "libavutil/crc.h"
+#include "libavutil/log.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/opt.h"
 #include "libavutil/random_seed.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/avstring.h"
+#include "libavutil/base64.h"
+#include "libavutil/bswap.h"
 #include "libavcodec/xiph.h"
 #include "libavcodec/bytestream.h"
 #include "libavcodec/flac.h"
 #include "avformat.h"
+#include "id3v2.h"
 #include "avio_internal.h"
 #include "internal.h"
 #include "mux.h"
 #include "version.h"
 #include "vorbiscomment.h"
+#include "flac_picture.h"
 
 #define MAX_PAGE_SIZE 65025
 
@@ -78,6 +87,11 @@  typedef struct OGGContext {
     int pref_size; ///< preferred page size (0 => fill all segments)
     int64_t pref_duration;      ///< preferred page duration (0 => fill all segments)
     int serial_offset;
+
+    PacketList queue;
+    int audio_stream_idx;
+    int waiting_pics;
+    unsigned attached_types;
 } OGGContext;
 
 #define OFFSET(x) offsetof(OGGContext, x)
@@ -469,12 +483,14 @@  static void ogg_write_pages(AVFormatContext *s, int flush)
     ogg->page_list = p;
 }
 
-static int ogg_init(AVFormatContext *s)
+static int ogg_finish_init(AVFormatContext *s)
 {
     OGGContext *ogg = s->priv_data;
     OGGStreamContext *oggstream = NULL;
     int i, j;
 
+    ogg->waiting_pics = 0;
+
     if (ogg->pref_size)
         av_log(s, AV_LOG_WARNING, "The pagesize option is deprecated\n");
 
@@ -482,29 +498,10 @@  static int ogg_init(AVFormatContext *s)
         AVStream *st = s->streams[i];
         unsigned serial_num = i + ogg->serial_offset;
 
-        if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
-            if (st->codecpar->codec_id == AV_CODEC_ID_OPUS)
-                /* Opus requires a fixed 48kHz clock */
-                avpriv_set_pts_info(st, 64, 1, 48000);
-            else
-                avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
-        }
-
-        if (st->codecpar->codec_id != AV_CODEC_ID_VORBIS &&
-            st->codecpar->codec_id != AV_CODEC_ID_THEORA &&
-            st->codecpar->codec_id != AV_CODEC_ID_SPEEX  &&
-            st->codecpar->codec_id != AV_CODEC_ID_FLAC   &&
-            st->codecpar->codec_id != AV_CODEC_ID_OPUS   &&
-            st->codecpar->codec_id != AV_CODEC_ID_VP8) {
-            av_log(s, AV_LOG_ERROR, "Unsupported codec id in stream %d\n", i);
-            return AVERROR(EINVAL);
-        }
+        if(st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO &&
+           (st->disposition & AV_DISPOSITION_ATTACHED_PIC))
+            continue;
 
-        if ((!st->codecpar->extradata || !st->codecpar->extradata_size) &&
-            st->codecpar->codec_id != AV_CODEC_ID_VP8) {
-            av_log(s, AV_LOG_ERROR, "No extradata present\n");
-            return AVERROR_INVALIDDATA;
-        }
         oggstream = av_mallocz(sizeof(*oggstream));
         if (!oggstream)
             return AVERROR(ENOMEM);
@@ -515,8 +512,11 @@  static int ogg_init(AVFormatContext *s)
             do {
                 serial_num = av_get_random_seed();
                 for (j = 0; j < i; j++) {
+                    // NULL for attached_pic
                     OGGStreamContext *sc = s->streams[j]->priv_data;
-                    if (serial_num == sc->serial_num)
+                    if(!sc)
+                        continue;
+                    else if (serial_num == sc->serial_num)
                         break;
                 }
             } while (j < i);
@@ -563,9 +563,9 @@  static int ogg_init(AVFormatContext *s)
             int framing_bit = st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 1 : 0;
 
             if (avpriv_split_xiph_headers(st->codecpar->extradata, st->codecpar->extradata_size,
-                                      st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 30 : 42,
-                                      (const uint8_t**)oggstream->header, oggstream->header_len) < 0) {
-                av_log(s, AV_LOG_ERROR, "Extradata corrupted\n");
+                                          st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 30 : 42,
+                                          (const uint8_t**)oggstream->header, oggstream->header_len) < 0) {
+                av_log(s, AV_LOG_ERROR, "Extradata corrupted for stream #%d\n", i);
                 oggstream->header[1] = NULL;
                 return AVERROR_INVALIDDATA;
             }
@@ -602,13 +602,67 @@  static int ogg_init(AVFormatContext *s)
     return 0;
 }
 
-static int ogg_write_header(AVFormatContext *s)
+static int ogg_init(AVFormatContext *s)
+{
+    OGGContext *ogg = s->priv_data;
+    int i;
+
+    ogg->waiting_pics = 0;
+    ogg->attached_types = 0;
+
+    if (ogg->pref_size)
+        av_log(s, AV_LOG_WARNING, "The pagesize option is deprecated\n");
+
+    for (i = 0; i < s->nb_streams; i++) {
+        AVStream *st = s->streams[i];
+
+        if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
+            ogg->audio_stream_idx = i;
+            if (st->codecpar->codec_id == AV_CODEC_ID_OPUS)
+                /* Opus requires a fixed 48kHz clock */
+                avpriv_set_pts_info(st, 64, 1, 48000);
+            else
+                avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
+        }
+
+        if (st->codecpar->codec_id != AV_CODEC_ID_VORBIS &&
+            st->codecpar->codec_id != AV_CODEC_ID_THEORA &&
+            st->codecpar->codec_id != AV_CODEC_ID_SPEEX  &&
+            st->codecpar->codec_id != AV_CODEC_ID_FLAC   &&
+            st->codecpar->codec_id != AV_CODEC_ID_OPUS   &&
+            st->codecpar->codec_id != AV_CODEC_ID_VP8    &&
+            st->codecpar->codec_id != AV_CODEC_ID_PNG    &&
+            st->codecpar->codec_id != AV_CODEC_ID_MJPEG) {
+            av_log(s, AV_LOG_ERROR, "Unsupported codec id in stream %d\n", i);
+            return AVERROR(EINVAL);
+        }
+
+        if ((!st->codecpar->extradata || !st->codecpar->extradata_size) &&
+            st->codecpar->codec_id != AV_CODEC_ID_VP8 &&
+            st->codecpar->codec_id != AV_CODEC_ID_PNG &&
+            st->codecpar->codec_id != AV_CODEC_ID_MJPEG) {
+            av_log(s, AV_LOG_ERROR, "No extradata present\n");
+            return AVERROR_INVALIDDATA;
+        }
+        if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO &&
+            (st->disposition & AV_DISPOSITION_ATTACHED_PIC))
+            ogg->waiting_pics++;
+    }
+
+    if (!ogg->waiting_pics)
+        return ogg_finish_init(s);
+    return 0;
+}
+
+static int ogg_finish_header(AVFormatContext *s)
 {
     OGGStreamContext *oggstream = NULL;
     int i, j;
 
     for (j = 0; j < s->nb_streams; j++) {
         oggstream = s->streams[j]->priv_data;
+        if(!oggstream)
+            continue;
         ogg_buffer_data(s, s->streams[j], oggstream->header[0],
                         oggstream->header_len[0], 0, 1);
         oggstream->page.flags |= 2; // bos
@@ -617,6 +671,8 @@  static int ogg_write_header(AVFormatContext *s)
     for (j = 0; j < s->nb_streams; j++) {
         AVStream *st = s->streams[j];
         oggstream = st->priv_data;
+        if(!oggstream)
+            continue;
         for (i = 1; i < 3; i++) {
             if (oggstream->header_len[i])
                 ogg_buffer_data(s, st, oggstream->header[i],
@@ -632,6 +688,14 @@  static int ogg_write_header(AVFormatContext *s)
     return 0;
 }
 
+static int ogg_write_header(AVFormatContext *s)
+{
+    OGGContext *ogg = s->priv_data;
+    if (!ogg->waiting_pics)
+        return ogg_finish_header(s);
+    return 0;
+}
+
 static int ogg_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
 {
     AVStream *st = s->streams[pkt->stream_index];
@@ -684,20 +748,92 @@  static int ogg_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
     return 0;
 }
 
+static int ogg_queue_flush(AVFormatContext *s)
+{
+    OGGContext *c = s->priv_data;
+    AVPacket *const pkt = ffformatcontext(s)->pkt;
+    int ret, write = 1;
+    ret = ogg_finish_init(s);
+    if (ret < 0)
+        write = 0;
+    ret = ogg_finish_header(s);
+    if (ret < 0)
+        write = 0;
+
+    while (c->queue.head) {
+        avpriv_packet_list_get(&c->queue, pkt);
+        if (write && (ret = ogg_write_packet_internal(s, pkt)) < 0)
+            write = 0;
+        av_packet_unref(pkt);
+    }
+    return ret;
+}
+
 static int ogg_write_packet(AVFormatContext *s, AVPacket *pkt)
 {
-    int i;
+    OGGContext *c = s->priv_data;
+    int i, ret;
+
+    if (pkt) {
+        if (pkt->stream_index == c->audio_stream_idx) {
+            if (c->waiting_pics) {
+                /* buffer audio packets until we get all the pictures */
+                ret = avpriv_packet_list_put(&c->queue, pkt, NULL, 0);
+                if (ret < 0) {
+                    av_log(s, AV_LOG_ERROR, "Out of memory in packet queue; skipping attached pictures\n");
+                    c->waiting_pics = 0;
+                    ret = ogg_queue_flush(s);
+                    if (ret < 0)
+                        return ret;
+                    return pkt->size ? ogg_write_packet_internal(s, pkt) : 0;
+                }
+            } else
+                return pkt->size ? ogg_write_packet_internal(s, pkt) : 0;
+        } else {
+            AVStream *st = s->streams[pkt->stream_index];
 
-    if (pkt)
-        return pkt->size ? ogg_write_packet_internal(s, pkt) : 0;
+            if (!c->waiting_pics ||
+                !(st->disposition & AV_DISPOSITION_ATTACHED_PIC))
+                return 0;
 
-    for (i = 0; i < s->nb_streams; i++) {
-        OGGStreamContext *oggstream = s->streams[i]->priv_data;
-        if (oggstream->page.segments_count)
-            ogg_buffer_page(s, oggstream);
-    }
+            /* warn only once for each stream */
+            if (st->nb_frames == 1) {
+                av_log(s, AV_LOG_WARNING, "Got more than one picture in stream %d,"
+                       " ignoring.\n", pkt->stream_index);
+            }
+            if (st->nb_frames >= 1) {
+                av_log(s, AV_LOG_WARNING, "Attached picture must not have more than one frame.\n");
+                return 0;
+            }
 
-    ogg_write_pages(s, 2);
+            //st->priv_data = av_packet_clone(pkt);
+            //if (!st->priv_data)
+            //    av_log(s, AV_LOG_ERROR, "Out of memory queueing an attached picture; skipping\n");
+            ret = ff_flac_write_picture(s,
+                                        1,
+                                        &c->attached_types,
+                                        c->audio_stream_idx,
+                                        pkt);
+            if (ret < 0) {
+                av_log(s, AV_LOG_ERROR, "Failed to process attached picture.\n");
+                return ret;
+            }
+            c->waiting_pics--;
+
+            /* flush the buffered audio packets */
+            if (!c->waiting_pics &&
+                (ret = ogg_queue_flush(s)) < 0)
+                return ret;
+        }
+    } else {
+        for (i = 0; i < s->nb_streams; i++) {
+            OGGStreamContext *oggstream = s->streams[i]->priv_data;
+            if (oggstream->page.segments_count)
+                ogg_buffer_page(s, oggstream);
+        }
+
+        ogg_write_pages(s, 2);
+    }
     return 1;
 }
 
@@ -708,6 +844,8 @@  static int ogg_write_trailer(AVFormatContext *s)
     /* flush current page if needed */
     for (i = 0; i < s->nb_streams; i++) {
         OGGStreamContext *oggstream = s->streams[i]->priv_data;
+        if(!oggstream)
+            continue;
 
         if (oggstream->page.size > 0)
             ogg_buffer_page(s, oggstream);
@@ -735,7 +873,9 @@  static void ogg_free(AVFormatContext *s)
             st->codecpar->codec_id == AV_CODEC_ID_VP8) {
             av_freep(&oggstream->header[0]);
         }
-        av_freep(&oggstream->header[1]);
+        if (st->codecpar->codec_id != AV_CODEC_ID_PNG &&
+            st->codecpar->codec_id != AV_CODEC_ID_MJPEG)
+            av_freep(&oggstream->header[1]);
     }
 
     while (p) {
@@ -841,6 +981,7 @@  const FFOutputFormat ff_opus_muxer = {
     .p.extensions      = "opus",
     .priv_data_size    = sizeof(OGGContext),
     .p.audio_codec     = AV_CODEC_ID_OPUS,
+    .p.video_codec       = AV_CODEC_ID_PNG,
     .init              = ogg_init,
     .write_header      = ogg_write_header,
     .write_packet      = ogg_write_packet,