diff mbox series

[FFmpeg-devel,v5,2/2] avformat/oggenc: Add support for embedding cover art

Message ID 2prhXCcpV70CstHKWSS-wQ489jmNJ_qmpT7duv2eVrM4ApWiybsNv3DU8WzJwpTN6PPf2Iu8AeDJ6Djk9_so8sMjEdSqNmjinYBFxEXzfoA=@protonmail.com
State New
Headers show
Series [FFmpeg-devel,v5,1/2] avformat/flac_picture: Add ff_flac_write_picture | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Zsolt Vadász March 10, 2023, 11:26 a.m. UTC
Fixes #4448. The cover art must have DISPOSITION_ATTACHED_PIC.

Signed-off-by: Zsolt Vadasz <zsolt_vadasz@protonmail.com>
---
 libavformat/oggenc.c | 214 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 176 insertions(+), 38 deletions(-)
diff mbox series

Patch

diff --git a/libavformat/oggenc.c b/libavformat/oggenc.c
index 2e582d0754..ace2ef840b 100644
--- a/libavformat/oggenc.c
+++ b/libavformat/oggenc.c
@@ -23,19 +23,28 @@ 
 
 #include <stdint.h>
 
+#include "libavcodec/codec_id.h"
+#include "libavutil/avutil.h"
 #include "libavutil/crc.h"
+#include "libavutil/log.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/opt.h"
 #include "libavutil/random_seed.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/avstring.h"
+#include "libavutil/base64.h"
+#include "libavutil/bswap.h"
 #include "libavcodec/xiph.h"
 #include "libavcodec/bytestream.h"
 #include "libavcodec/flac.h"
 #include "avformat.h"
+#include "id3v2.h"
 #include "avio_internal.h"
 #include "internal.h"
 #include "mux.h"
 #include "version.h"
 #include "vorbiscomment.h"
+#include "flac_picture.h"
 
 #define MAX_PAGE_SIZE 65025
 
@@ -78,6 +87,11 @@  typedef struct OGGContext {
     int pref_size; ///< preferred page size (0 => fill all segments)
     int64_t pref_duration;      ///< preferred page duration (0 => fill all segments)
     int serial_offset;
+
+    PacketList queue;
+    int audio_stream_idx;
+    int waiting_pics;
+    unsigned attached_types;
 } OGGContext;
 
 #define OFFSET(x) offsetof(OGGContext, x)
@@ -469,12 +483,14 @@  static void ogg_write_pages(AVFormatContext *s, int flush)
     ogg->page_list = p;
 }
 
-static int ogg_init(AVFormatContext *s)
+static int ogg_finish_init(AVFormatContext *s)
 {
     OGGContext *ogg = s->priv_data;
     OGGStreamContext *oggstream = NULL;
     int i, j;
 
+    ogg->waiting_pics = 0;
+
     if (ogg->pref_size)
         av_log(s, AV_LOG_WARNING, "The pagesize option is deprecated\n");
 
@@ -482,29 +498,10 @@  static int ogg_init(AVFormatContext *s)
         AVStream *st = s->streams[i];
         unsigned serial_num = i + ogg->serial_offset;
 
-        if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
-            if (st->codecpar->codec_id == AV_CODEC_ID_OPUS)
-                /* Opus requires a fixed 48kHz clock */
-                avpriv_set_pts_info(st, 64, 1, 48000);
-            else
-                avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
-        }
-
-        if (st->codecpar->codec_id != AV_CODEC_ID_VORBIS &&
-            st->codecpar->codec_id != AV_CODEC_ID_THEORA &&
-            st->codecpar->codec_id != AV_CODEC_ID_SPEEX  &&
-            st->codecpar->codec_id != AV_CODEC_ID_FLAC   &&
-            st->codecpar->codec_id != AV_CODEC_ID_OPUS   &&
-            st->codecpar->codec_id != AV_CODEC_ID_VP8) {
-            av_log(s, AV_LOG_ERROR, "Unsupported codec id in stream %d\n", i);
-            return AVERROR(EINVAL);
-        }
+        if(st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO &&
+           (st->disposition & AV_DISPOSITION_ATTACHED_PIC))
+            continue;
 
-        if ((!st->codecpar->extradata || !st->codecpar->extradata_size) &&
-            st->codecpar->codec_id != AV_CODEC_ID_VP8) {
-            av_log(s, AV_LOG_ERROR, "No extradata present\n");
-            return AVERROR_INVALIDDATA;
-        }
         oggstream = av_mallocz(sizeof(*oggstream));
         if (!oggstream)
             return AVERROR(ENOMEM);
@@ -515,8 +512,11 @@  static int ogg_init(AVFormatContext *s)
             do {
                 serial_num = av_get_random_seed();
                 for (j = 0; j < i; j++) {
+                    // NULL for attached_pic
                     OGGStreamContext *sc = s->streams[j]->priv_data;
-                    if (serial_num == sc->serial_num)
+                    if(!sc)
+                        continue;
+                    else if (serial_num == sc->serial_num)
                         break;
                 }
             } while (j < i);
@@ -563,9 +563,9 @@  static int ogg_init(AVFormatContext *s)
             int framing_bit = st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 1 : 0;
 
             if (avpriv_split_xiph_headers(st->codecpar->extradata, st->codecpar->extradata_size,
-                                      st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 30 : 42,
-                                      (const uint8_t**)oggstream->header, oggstream->header_len) < 0) {
-                av_log(s, AV_LOG_ERROR, "Extradata corrupted\n");
+                                          st->codecpar->codec_id == AV_CODEC_ID_VORBIS ? 30 : 42,
+                                          (const uint8_t**)oggstream->header, oggstream->header_len) < 0) {
+                av_log(s, AV_LOG_ERROR, "Extradata corrupted for stream #%d\n", i);
                 oggstream->header[1] = NULL;
                 return AVERROR_INVALIDDATA;
             }
@@ -602,13 +602,67 @@  static int ogg_init(AVFormatContext *s)
     return 0;
 }
 
-static int ogg_write_header(AVFormatContext *s)
+static int ogg_init(AVFormatContext *s)
+{
+    OGGContext *ogg = s->priv_data;
+    int i;
+
+    ogg->waiting_pics = 0;
+    ogg->attached_types = 0;
+
+    if (ogg->pref_size)
+        av_log(s, AV_LOG_WARNING, "The pagesize option is deprecated\n");
+
+    for (i = 0; i < s->nb_streams; i++) {
+        AVStream *st = s->streams[i];
+
+        if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
+            ogg->audio_stream_idx = i;
+            if (st->codecpar->codec_id == AV_CODEC_ID_OPUS)
+                /* Opus requires a fixed 48kHz clock */
+                avpriv_set_pts_info(st, 64, 1, 48000);
+            else
+                avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
+        }
+
+        if (st->codecpar->codec_id != AV_CODEC_ID_VORBIS &&
+            st->codecpar->codec_id != AV_CODEC_ID_THEORA &&
+            st->codecpar->codec_id != AV_CODEC_ID_SPEEX  &&
+            st->codecpar->codec_id != AV_CODEC_ID_FLAC   &&
+            st->codecpar->codec_id != AV_CODEC_ID_OPUS   &&
+            st->codecpar->codec_id != AV_CODEC_ID_VP8    &&
+            st->codecpar->codec_id != AV_CODEC_ID_PNG    &&
+            st->codecpar->codec_id != AV_CODEC_ID_MJPEG) {
+            av_log(s, AV_LOG_ERROR, "Unsupported codec id in stream %d\n", i);
+            return AVERROR(EINVAL);
+        }
+
+        if ((!st->codecpar->extradata || !st->codecpar->extradata_size) &&
+            st->codecpar->codec_id != AV_CODEC_ID_VP8 &&
+            st->codecpar->codec_id != AV_CODEC_ID_PNG &&
+            st->codecpar->codec_id != AV_CODEC_ID_MJPEG) {
+            av_log(s, AV_LOG_ERROR, "No extradata present\n");
+            return AVERROR_INVALIDDATA;
+        }
+        if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO &&
+            (st->disposition & AV_DISPOSITION_ATTACHED_PIC))
+            ogg->waiting_pics++;
+    }
+
+    if (!ogg->waiting_pics)
+        return ogg_finish_init(s);
+    return 0;
+}
+
+static int ogg_finish_header(AVFormatContext *s)
 {
     OGGStreamContext *oggstream = NULL;
     int i, j;
 
     for (j = 0; j < s->nb_streams; j++) {
         oggstream = s->streams[j]->priv_data;
+        if(!oggstream)
+            continue;
         ogg_buffer_data(s, s->streams[j], oggstream->header[0],
                         oggstream->header_len[0], 0, 1);
         oggstream->page.flags |= 2; // bos
@@ -617,6 +671,8 @@  static int ogg_write_header(AVFormatContext *s)
     for (j = 0; j < s->nb_streams; j++) {
         AVStream *st = s->streams[j];
         oggstream = st->priv_data;
+        if(!oggstream)
+            continue;
         for (i = 1; i < 3; i++) {
             if (oggstream->header_len[i])
                 ogg_buffer_data(s, st, oggstream->header[i],
@@ -632,6 +688,14 @@  static int ogg_write_header(AVFormatContext *s)
     return 0;
 }
 
+static int ogg_write_header(AVFormatContext *s)
+{
+    OGGContext *ogg = s->priv_data;
+    if (!ogg->waiting_pics)
+        return ogg_finish_header(s);
+    return 0;
+}
+
 static int ogg_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
 {
     AVStream *st = s->streams[pkt->stream_index];
@@ -684,20 +748,89 @@  static int ogg_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
     return 0;
 }
 
+static int ogg_queue_flush(AVFormatContext *s)
+{
+    OGGContext *c = s->priv_data;
+    AVPacket *const pkt = ffformatcontext(s)->pkt;
+    int ret, write = 1;
+    ret = ogg_finish_init(s);
+    if (ret < 0)
+        write = 0;
+    ret = ogg_finish_header(s);
+    if (ret < 0)
+        write = 0;
+
+    while (c->queue.head) {
+        avpriv_packet_list_get(&c->queue, pkt);
+        if (write && (ret = ogg_write_packet_internal(s, pkt)) < 0)
+            write = 0;
+        av_packet_unref(pkt);
+    }
+    return ret;
+}
+
 static int ogg_write_packet(AVFormatContext *s, AVPacket *pkt)
 {
-    int i;
+    OGGContext *c = s->priv_data;
+    int i, ret;
+    AVStream *st = s->streams[pkt->stream_index];
 
-    if (pkt)
-        return pkt->size ? ogg_write_packet_internal(s, pkt) : 0;
+    if (pkt) {
+        if (pkt->stream_index == c->audio_stream_idx) {
+            if (c->waiting_pics) {
+                /* buffer audio packets until we get all the pictures */
+                ret = avpriv_packet_list_put(&c->queue, pkt, NULL, 0);
+                if (ret < 0) {
+                    av_log(s, AV_LOG_ERROR, "Out of memory in packet queue; skipping attached pictures\n");
+                    c->waiting_pics = 0;
+                    ret = ogg_queue_flush(s);
+                    if (ret < 0)
+                        return ret;
+                    return pkt->size ? ogg_write_packet_internal(s, pkt) : 0;
+                }
+            } else
+                return pkt->size ? ogg_write_packet_internal(s, pkt) : 0;
+        } else if(c->waiting_pics &&
+                  (st->disposition & AV_DISPOSITION_ATTACHED_PIC)) {
+            /* warn only once for each stream */
+            if (st->nb_frames == 1) {
+                av_log(s, AV_LOG_WARNING, "Got more than one picture in stream %d,"
+                       " ignoring.\n", pkt->stream_index);
+            }
+            if (st->nb_frames >= 1) {
+                av_log(s, AV_LOG_WARNING, "Attached picture must not have more than one frame.\n");
+                return 0;
+            }
 
-    for (i = 0; i < s->nb_streams; i++) {
-        OGGStreamContext *oggstream = s->streams[i]->priv_data;
-        if (oggstream->page.segments_count)
-            ogg_buffer_page(s, oggstream);
-    }
+            //st->priv_data = av_packet_clone(pkt);
+            //if (!st->priv_data)
+            //    av_log(s, AV_LOG_ERROR, "Out of memory queueing an attached picture; skipping\n");
+            ret = ff_flac_write_picture(s,
+                                        1,
+                                        &c->attached_types,
+                                        c->audio_stream_idx,
+                                        pkt);
+            if (ret < 0) {
+                av_log(s, AV_LOG_ERROR, "Failed to process attached picture.\n");
+                return ret;
+            }
+            c->waiting_pics--;
+
+            /* flush the buffered audio packets */
+            if (!c->waiting_pics &&
+                (ret = ogg_queue_flush(s)) < 0)
+                return ret;
+        } else
+            return pkt->size ? ogg_write_packet_internal(s, pkt) : 0;
+    } else {
+        for (i = 0; i < s->nb_streams; i++) {
+            OGGStreamContext *oggstream = s->streams[i]->priv_data;
+            if (oggstream->page.segments_count)
+                ogg_buffer_page(s, oggstream);
+        }
 
-    ogg_write_pages(s, 2);
+        ogg_write_pages(s, 2);
+    }
     return 1;
 }
 
@@ -708,6 +841,8 @@  static int ogg_write_trailer(AVFormatContext *s)
     /* flush current page if needed */
     for (i = 0; i < s->nb_streams; i++) {
         OGGStreamContext *oggstream = s->streams[i]->priv_data;
+        if(!oggstream)
+            continue;
 
         if (oggstream->page.size > 0)
             ogg_buffer_page(s, oggstream);
@@ -735,7 +870,9 @@  static void ogg_free(AVFormatContext *s)
             st->codecpar->codec_id == AV_CODEC_ID_VP8) {
             av_freep(&oggstream->header[0]);
         }
-        av_freep(&oggstream->header[1]);
+        if (st->codecpar->codec_id != AV_CODEC_ID_PNG &&
+            st->codecpar->codec_id != AV_CODEC_ID_MJPEG)
+            av_freep(&oggstream->header[1]);
     }
 
     while (p) {
@@ -841,6 +978,7 @@  const FFOutputFormat ff_opus_muxer = {
     .p.extensions      = "opus",
     .priv_data_size    = sizeof(OGGContext),
     .p.audio_codec     = AV_CODEC_ID_OPUS,
+    .p.video_codec       = AV_CODEC_ID_PNG,
     .init              = ogg_init,
     .write_header      = ogg_write_header,
     .write_packet      = ogg_write_packet,