diff mbox series

[FFmpeg-devel] libavformat: Add support for embedding cover art in Ogg files

Message ID Cjh45MY4Dq9FpIZN22MHoEXu2o0B88-HoIauO_utbjTjOI4JSM-qQLk2uvZp7r1FJiH-Fcs91Fkf9TAAKEmyVEHmlS8mo9hwWezz_UdTm8U=@protonmail.com
State New
Headers show
Series [FFmpeg-devel] libavformat: Add support for embedding cover art in Ogg files | expand

Commit Message

Zsolt Vadász Aug. 27, 2022, 10:26 a.m. UTC
It's done similarly to how the flac muxer does it, so I reused most of the code and adapted it.
Currently only supports a single picture.

Signed-off-by: Zsolt Vadasz <zsolt_vadasz@protonmail.com>
---
 libavformat/oggenc.c | 299 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 260 insertions(+), 39 deletions(-)
diff mbox series

Patch

diff --git a/libavformat/oggenc.c b/libavformat/oggenc.c
index ae0705ba54..1bd95b97ce 100644
--- a/libavformat/oggenc.c
+++ b/libavformat/oggenc.c
@@ -22,15 +22,24 @@ 
 #include "config_components.h"
 
 #include <stdint.h>
+#include <stdbool.h>
 
+#include "libavcodec/codec_id.h"
+#include "libavutil/avutil.h"
 #include "libavutil/crc.h"
+#include "libavutil/log.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/opt.h"
 #include "libavutil/random_seed.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/avstring.h"
+#include "libavutil/base64.h"
+#include "libavutil/bswap.h"
 #include "libavcodec/xiph.h"
 #include "libavcodec/bytestream.h"
 #include "libavcodec/flac.h"
 #include "avformat.h"
+#include "id3v2.h"
 #include "avio_internal.h"
 #include "internal.h"
 #include "version.h"
@@ -77,6 +86,11 @@  typedef struct OGGContext {
     int pref_size; ///< preferred page size (0 => fill all segments)
     int64_t pref_duration;      ///< preferred page duration (0 => fill all segments)
     int serial_offset;
+
+    PacketList queue;
+    int audio_stream_idx;
+    int attached_pic_idx;
+    bool has_attached_pic;
 } OGGContext;
 
 #define OFFSET(x) offsetof(OGGContext, x)
@@ -468,12 +482,14 @@  static void ogg_write_pages(AVFormatContext *s, int flush)
     ogg->page_list = p;
 }
 
-static int ogg_init(AVFormatContext *s)
+static int ogg_finish_init(AVFormatContext *s)
 {
     OGGContext *ogg = s->priv_data;
     OGGStreamContext *oggstream = NULL;
     int i, j;
 
+    ogg->has_attached_pic = false;
+
     if (ogg->pref_size)
         av_log(s, AV_LOG_WARNING, "The pagesize option is deprecated\n");
 
@@ -481,29 +497,6 @@  static int ogg_init(AVFormatContext *s)
         AVStream *st = s->streams[i];
         unsigned serial_num = i + ogg->serial_offset;
 
-        if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
-            if (st->codecpar->codec_id == AV_CODEC_ID_OPUS)
-                /* Opus requires a fixed 48kHz clock */
-                avpriv_set_pts_info(st, 64, 1, 48000);
-            else
-                avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
-        }
-
-        if (st->codecpar->codec_id != AV_CODEC_ID_VORBIS &&
-            st->codecpar->codec_id != AV_CODEC_ID_THEORA &&
-            st->codecpar->codec_id != AV_CODEC_ID_SPEEX  &&
-            st->codecpar->codec_id != AV_CODEC_ID_FLAC   &&
-            st->codecpar->codec_id != AV_CODEC_ID_OPUS   &&
-            st->codecpar->codec_id != AV_CODEC_ID_VP8) {
-            av_log(s, AV_LOG_ERROR, "Unsupported codec id in stream %d\n", i);
-            return AVERROR(EINVAL);
-        }
-
-        if ((!st->codecpar->extradata || !st->codecpar->extradata_size) &&
-            st->codecpar->codec_id != AV_CODEC_ID_VP8) {
-            av_log(s, AV_LOG_ERROR, "No extradata present\n");
-            return AVERROR_INVALIDDATA;
-        }
         oggstream = av_mallocz(sizeof(*oggstream));
         if (!oggstream)
             return AVERROR(ENOMEM);
@@ -561,10 +554,11 @@  static int ogg_init(AVFormatContext *s)
             int header_type = st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 3 : 0x81;
             int framing_bit = st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 1 : 0;
 
-            if (avpriv_split_xiph_headers(st->codecpar->extradata, st->codecpar->extradata_size,
-                                      st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 30 : 42,
-                                      (const uint8_t**)oggstream->header, oggstream->header_len) < 0) {
-                av_log(s, AV_LOG_ERROR, "Extradata corrupted\n");
+            if (!(st->disposition & AV_DISPOSITION_ATTACHED_PIC) &&
+                avpriv_split_xiph_headers(st->codecpar->extradata, st->codecpar->extradata_size,
+                                          st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 30 : 42,
+                                          (const uint8_t**)oggstream->header, oggstream->header_len) < 0) {
+                av_log(s, AV_LOG_ERROR, "Extradata corrupted for stream #%d\n", i);
                 oggstream->header[1] = NULL;
                 return AVERROR_INVALIDDATA;
             }
@@ -601,7 +595,65 @@  static int ogg_init(AVFormatContext *s)
     return 0;
 }
 
-static int ogg_write_header(AVFormatContext *s)
+static int ogg_init(AVFormatContext *s)
+{
+    OGGContext *ogg = s->priv_data;
+    int i;
+
+    ogg->has_attached_pic = false;
+
+    if (ogg->pref_size)
+        av_log(s, AV_LOG_WARNING, "The pagesize option is deprecated\n");
+
+    for (i = 0; i < s->nb_streams; i++) {
+        AVStream *st = s->streams[i];
+
+        if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
+            ogg->audio_stream_idx = i;
+            if (st->codecpar->codec_id == AV_CODEC_ID_OPUS)
+                /* Opus requires a fixed 48kHz clock */
+                avpriv_set_pts_info(st, 64, 1, 48000);
+            else
+                avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
+        }
+
+        if (st->codecpar->codec_id != AV_CODEC_ID_VORBIS &&
+            st->codecpar->codec_id != AV_CODEC_ID_THEORA &&
+            st->codecpar->codec_id != AV_CODEC_ID_SPEEX  &&
+            st->codecpar->codec_id != AV_CODEC_ID_FLAC   &&
+            st->codecpar->codec_id != AV_CODEC_ID_OPUS   &&
+            st->codecpar->codec_id != AV_CODEC_ID_VP8    &&
+            st->codecpar->codec_id != AV_CODEC_ID_PNG    &&
+            st->codecpar->codec_id != AV_CODEC_ID_MJPEG) {
+            av_log(s, AV_LOG_ERROR, "Unsupported codec id in stream %d\n", i);
+            return AVERROR(EINVAL);
+        }
+
+        if ((!st->codecpar->extradata || !st->codecpar->extradata_size) &&
+            st->codecpar->codec_id != AV_CODEC_ID_VP8 &&
+            st->codecpar->codec_id != AV_CODEC_ID_PNG &&
+            st->codecpar->codec_id != AV_CODEC_ID_MJPEG) {
+            av_log(s, AV_LOG_ERROR, "No extradata present\n");
+            return AVERROR_INVALIDDATA;
+        }
+        if(st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO &&
+           (st->disposition & AV_DISPOSITION_ATTACHED_PIC)) {
+            if(ogg->has_attached_pic) {
+                // FIXME add support for embedding multiple pictures in ogg
+                av_log(s, AV_LOG_WARNING, "The implementation currently only supports a single attached_pic.\n");
+                continue;
+            }
+            ogg->has_attached_pic = true;
+            ogg->attached_pic_idx = i;
+        }
+    }
+
+    if(!ogg->has_attached_pic)
+        return ogg_finish_init(s);
+    return 0;
+}
+
+static int ogg_finish_header(AVFormatContext *s)
 {
     OGGStreamContext *oggstream = NULL;
     int i, j;
@@ -631,6 +683,14 @@  static int ogg_write_header(AVFormatContext *s)
     return 0;
 }
 
+static int ogg_write_header(AVFormatContext *s)
+{
+    OGGContext *ogg = s->priv_data;
+    if(!ogg->has_attached_pic)
+        return ogg_finish_header(s);
+    return 0;
+}
+
 static int ogg_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
 {
     AVStream *st = s->streams[pkt->stream_index];
@@ -683,20 +743,178 @@  static int ogg_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
     return 0;
 }
 
-static int ogg_write_packet(AVFormatContext *s, AVPacket *pkt)
+static int ogg_attach_pic_to_audio_metadata(AVFormatContext *s)
 {
-    int i;
+    OGGContext *c = s->priv_data;
+    AVPacket *pkt = s->streams[c->attached_pic_idx]->priv_data;
+    const AVPixFmtDescriptor *pixdesc;
+    const CodecMime *mime = ff_id3v2_mime_tags;
+    AVDictionaryEntry *e;
+    const char *mimetype = NULL, *desc = "";
+    const AVStream *st = s->streams[pkt->stream_index];
+    AVStream *audio_stream = s->streams[c->audio_stream_idx];
+    unsigned int i, mimelen, desclen, type = 0, blocklen;
+    uint8_t *ptr, *metadata_block_picture = NULL;
+    int encoded_len, ret;
+    char *encoded;
+
+    if (!pkt->data)
+        return 0;
 
-    if (pkt)
-        return ogg_write_packet_internal(s, pkt);
 
-    for (i = 0; i < s->nb_streams; i++) {
-        OGGStreamContext *oggstream = s->streams[i]->priv_data;
-        if (oggstream->page.segments_count)
-            ogg_buffer_page(s, oggstream);
+    while (mime->id != AV_CODEC_ID_NONE) {
+        if (mime->id == st->codecpar->codec_id) {
+            mimetype = mime->str;
+            break;
+        }
+        mime++;
     }
+    if (!mimetype) {
+        av_log(s, AV_LOG_ERROR, "No mimetype is known for stream %d, cannot "
+               "write an attached picture.\n", st->index);
+        return AVERROR(EINVAL);
+    }
+    mimelen = strlen(mimetype);
 
-    ogg_write_pages(s, 2);
+    /* get the picture type */
+    e = av_dict_get(st->metadata, "comment", NULL, 0);
+    for (i = 0; e && i < FF_ARRAY_ELEMS(ff_id3v2_picture_types); i++) {
+        if (!av_strcasecmp(e->value, ff_id3v2_picture_types[i])) {
+            type = i;
+            break;
+        }
+    }
+
+    if (type == 1 && (st->codecpar->codec_id != AV_CODEC_ID_PNG ||
+                      st->codecpar->width != 32 ||
+                      st->codecpar->height != 32)) {
+        av_log(s, AV_LOG_ERROR, "File icon attachment must be a 32x32 PNG");
+        return AVERROR(EINVAL);
+    }
+
+    /* get the description */
+    if ((e = av_dict_get(st->metadata, "title", NULL, 0)))
+        desc = e->value;
+    desclen = strlen(desc);
+
+    blocklen = 4 + 4 + mimelen + 4 + desclen + 4 + 4 + 4 + 4 + 4 + pkt->size;
+    if (blocklen >= 1<<24) {
+        av_log(s, AV_LOG_ERROR, "Picture block too big %d >= %d\n", blocklen, 1<<24);
+        return AVERROR(EINVAL);
+    }
+
+    metadata_block_picture = av_mallocz(blocklen);
+    ptr = metadata_block_picture;
+    bytestream_put_be32(&ptr, type);
+
+    bytestream_put_be32(&ptr, mimelen);
+    bytestream_put_buffer(&ptr, mimetype, mimelen);
+
+    bytestream_put_be32(&ptr, desclen);
+    bytestream_put_buffer(&ptr, desc, desclen);
+
+    bytestream_put_be32(&ptr, st->codecpar->width);
+    bytestream_put_be32(&ptr, st->codecpar->height);
+    if ((pixdesc = av_pix_fmt_desc_get(st->codecpar->format)))
+        bytestream_put_be32(&ptr, av_get_bits_per_pixel(pixdesc));
+    else
+        bytestream_put_be32(&ptr, 0);
+    bytestream_put_be32(&ptr, 0);
+
+    bytestream_put_be32(&ptr, pkt->size);
+    bytestream_put_buffer(&ptr, pkt->data, pkt->size);
+
+    encoded_len = AV_BASE64_SIZE(blocklen);
+    encoded = av_mallocz(encoded_len);
+    av_base64_encode(encoded, encoded_len, metadata_block_picture, blocklen);
+    av_free(metadata_block_picture);
+
+    ret = av_dict_set(&audio_stream->metadata, "METADATA_BLOCK_PICTURE", encoded, 0);
+    av_free(encoded);
+
+    if(ret < 0)
+        return ret;
+    return 0;
+}
+
+static int ogg_queue_flush(AVFormatContext *s)
+{
+    OGGContext *c = s->priv_data;
+    AVPacket *const pkt = ffformatcontext(s)->pkt;
+    int ret, write = 1;
+    ret = ogg_attach_pic_to_audio_metadata(s);
+    if(ret < 0)
+        return ret;
+    ret = ogg_finish_init(s);
+    if (ret < 0)
+        write = 0;
+    ret = ogg_finish_header(s);
+    if (ret < 0)
+        write = 0;
+
+    while (c->queue.head) {
+        avpriv_packet_list_get(&c->queue, pkt);
+        if (write && (ret = ogg_write_packet_internal(s, pkt)) < 0)
+            write = 0;
+        av_packet_unref(pkt);
+    }
+    return ret;
+}
+
+static int ogg_write_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    OGGContext *c = s->priv_data;
+    int i, ret;
+
+    if (pkt) {
+        if(pkt->stream_index == c->audio_stream_idx) {
+            if(c->has_attached_pic) {
+                /* buffer audio packets until we get all the pictures */
+                ret = avpriv_packet_list_put(&c->queue, pkt, NULL, 0);
+                if (ret < 0) {
+                    av_log(s, AV_LOG_ERROR, "Out of memory in packet queue; skipping attached pictures\n");
+                    c->has_attached_pic = false;
+                    ret = ogg_queue_flush(s);
+                    if (ret < 0)
+                        return ret;
+                    return ogg_write_packet_internal(s, pkt);
+                }
+            } else
+                return ogg_write_packet_internal(s, pkt);
+        } else {
+            AVStream *st = s->streams[pkt->stream_index];
+
+            if (!c->has_attached_pic ||
+                !(st->disposition & AV_DISPOSITION_ATTACHED_PIC))
+                return 0;
+
+            /* warn only once for each stream */
+            if (st->nb_frames == 1) {
+                av_log(s, AV_LOG_WARNING, "Got more than one picture in stream %d,"
+                       " ignoring.\n", pkt->stream_index);
+            }
+            if (st->nb_frames >= 1)
+                return 0;
+
+            st->priv_data = av_packet_clone(pkt);
+            if (!st->priv_data)
+                av_log(s, AV_LOG_ERROR, "Out of memory queueing an attached picture; skipping\n");
+            c->has_attached_pic = false;
+
+            /* flush the buffered audio packets */
+            if (!c->has_attached_pic &&
+                (ret = ogg_queue_flush(s)) < 0)
+                return ret;
+        }
+    } else {
+        for (i = 0; i < s->nb_streams; i++) {
+            OGGStreamContext *oggstream = s->streams[i]->priv_data;
+            if (oggstream->page.segments_count)
+                ogg_buffer_page(s, oggstream);
+        }
+
+        ogg_write_pages(s, 2);
+    }
     return 1;
 }
 
@@ -734,7 +952,9 @@  static void ogg_free(AVFormatContext *s)
             st->codecpar->codec_id == AV_CODEC_ID_VP8) {
             av_freep(&oggstream->header[0]);
         }
-        av_freep(&oggstream->header[1]);
+        if(st->codecpar->codec_id != AV_CODEC_ID_PNG &&
+           st->codecpar->codec_id != AV_CODEC_ID_MJPEG)
+            av_freep(&oggstream->header[1]);
     }
 
     while (p) {
@@ -840,6 +1060,7 @@  const AVOutputFormat ff_opus_muxer = {
     .extensions        = "opus",
     .priv_data_size    = sizeof(OGGContext),
     .audio_codec       = AV_CODEC_ID_OPUS,
+    .video_codec       = AV_CODEC_ID_PNG,
     .init              = ogg_init,
     .write_header      = ogg_write_header,
     .write_packet      = ogg_write_packet,