diff mbox

[FFmpeg-devel,3/3] avformat/movenc: support writing iTunes cover image

Message ID 20180402083658.24460-4-timo.teras@iki.fi
State Superseded
Headers show

Commit Message

Timo Teräs April 2, 2018, 8:36 a.m. UTC
Fixes https://trac.ffmpeg.org/ticket/2798

This makes movenc handle AV_DISPOSITION_ATTACHED_PIC and write
the associated pictures in iTunes cover atom. This corresponds
to how 'mov' demuxer parses and exposes the cover images when
reading.

Most of the existing track handling loops properly ignore
these 'virtual streams' as MOVTrack->entry is never incremented
for them. However, additional tests are added as needed to ignore
them.

Tested to produce valid output with:
  ffmpeg -i movie.mp4 -i thumb.jpg -disposition:v:1 attached_pic \
         -map 0 -map 1 -c copy movie-with-cover.mp4

The cover image is also copied correctly with:
  ffmpeg -i movie-with-cover.mp4 -map 0 -c copy out.mp4

AtomicParseley says that the attached_pic stream is properly
not visible in the main tracks of the file.

Signed-off-by: Timo Teräs <timo.teras@iki.fi>
---
 libavformat/avformat.h |  2 +-
 libavformat/movenc.c   | 77 ++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 75 insertions(+), 4 deletions(-)

Comments

Timo Teräs April 11, 2018, 5:22 a.m. UTC | #1
Ping.

Any comments on this?

There was also recently the flacenc support for cover images, and the
question if some of this code should be moved to generic code in mux.c so
both can share it: e.g. storing the image packets to attached_pic - or if
that should be moved to the muxer private structs.

Thanks
Timo


On Mon, Apr 2, 2018 at 11:36 AM, Timo Teräs <timo.teras@iki.fi> wrote:

> Fixes https://trac.ffmpeg.org/ticket/2798
>
> This makes movenc handle AV_DISPOSITION_ATTACHED_PIC and write
> the associated pictures in iTunes cover atom. This corresponds
> to how 'mov' demuxer parses and exposes the cover images when
> reading.
>
> Most of the existing track handling loops properly ignore
> these 'virtual streams' as MOVTrack->entry is never incremented
> for them. However, additional tests are added as needed to ignore
> them.
>
> Tested to produce valid output with:
>   ffmpeg -i movie.mp4 -i thumb.jpg -disposition:v:1 attached_pic \
>          -map 0 -map 1 -c copy movie-with-cover.mp4
>
> The cover image is also copied correctly with:
>   ffmpeg -i movie-with-cover.mp4 -map 0 -c copy out.mp4
>
> AtomicParseley says that the attached_pic stream is properly
> not visible in the main tracks of the file.
>
> Signed-off-by: Timo Teräs <timo.teras@iki.fi>
> ---
>  libavformat/avformat.h |  2 +-
>  libavformat/movenc.c   | 77 ++++++++++++++++++++++++++++++
> ++++++++++++++++++--
>  2 files changed, 75 insertions(+), 4 deletions(-)
>
> diff --git a/libavformat/avformat.h b/libavformat/avformat.h
> index a2fe7c6bb2..b0e6092a35 100644
> --- a/libavformat/avformat.h
> +++ b/libavformat/avformat.h
> @@ -941,7 +941,7 @@ typedef struct AVStream {
>       * will contain the attached picture.
>       *
>       * decoding: set by libavformat, must not be modified by the caller.
> -     * encoding: unused
> +     * encoding: may be set by muxer to store the image until time of
> writing.
>       */
>      AVPacket attached_pic;
>
> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> index f6a314894c..69201c4092 100644
> --- a/libavformat/movenc.c
> +++ b/libavformat/movenc.c
> @@ -143,7 +143,9 @@ static int co64_required(const MOVTrack *track)
>
>  static int rtp_hinting_needed(const AVStream *st)
>  {
> -    /* Add hint tracks for each audio and video stream */
> +    /* Add hint tracks for each real audio and video stream */
> +    if (st->disposition & AV_DISPOSITION_ATTACHED_PIC)
> +        return 0;
>      return st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO ||
>             st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO;
>  }
> @@ -3421,6 +3423,49 @@ static int mov_write_int8_metadata(AVFormatContext
> *s, AVIOContext *pb,
>      return size;
>  }
>
> +static int mov_write_covr(AVIOContext *pb, AVFormatContext *s)
> +{
> +    int64_t pos = 0;
> +    int i, type;
> +
> +    for (i = 0; i < s->nb_streams; i++) {
> +        AVStream *st = s->streams[i];
> +
> +        if (!(st->disposition & AV_DISPOSITION_ATTACHED_PIC) ||
> +            st->attached_pic.size <= 0)
> +            continue;
> +
> +        switch (st->codecpar->codec_id) {
> +        case AV_CODEC_ID_MJPEG:
> +            type = 0xD;
> +            break;
> +        case AV_CODEC_ID_PNG:
> +            type = 0xE;
> +            break;
> +        case AV_CODEC_ID_BMP:
> +            type = 0x1B;
> +            break;
> +        default:
> +            av_log(s, AV_LOG_ERROR, "unsupported codec_id (0x%x) for
> cover",
> +                   st->codecpar->codec_id);
> +            continue;
> +        }
> +
> +        if (!pos) {
> +            pos = avio_tell(pb);
> +            avio_wb32(pb, 0);
> +            ffio_wfourcc(pb, "covr");
> +        }
> +        avio_wb32(pb, 16 + st->attached_pic.size);
> +        ffio_wfourcc(pb, "data");
> +        avio_wb32(pb, type);
> +        avio_wb32(pb , 0);
> +        avio_write(pb, st->attached_pic.data, st->attached_pic.size);
> +    }
> +
> +    return pos ? update_size(pb, pos) : 0;
> +}
> +
>  /* iTunes meta data list */
>  static int mov_write_ilst_tag(AVIOContext *pb, MOVMuxContext *mov,
>                                AVFormatContext *s)
> @@ -3458,6 +3503,7 @@ static int mov_write_ilst_tag(AVIOContext *pb,
> MOVMuxContext *mov,
>      mov_write_int8_metadata  (s, pb, "hdvd",    "hd_video",  1);
>      mov_write_int8_metadata  (s, pb, "pgap",    "gapless_playback",1);
>      mov_write_int8_metadata  (s, pb, "cpil",    "compilation", 1);
> +    mov_write_covr(pb, s);
>      mov_write_trkn_tag(pb, mov, s, 0); // track number
>      mov_write_trkn_tag(pb, mov, s, 1); // disc number
>      mov_write_tmpo_tag(pb, s);
> @@ -3955,6 +4001,8 @@ static int mov_write_isml_manifest(AVIOContext *pb,
> MOVMuxContext *mov, AVFormat
>          } else {
>              continue;
>          }
> +        if (st->disposition & AV_DISPOSITION_ATTACHED_PIC)
> +            continue;
>
>          props = (AVCPBProperties*)av_stream_get_side_data(track->st,
> AV_PKT_DATA_CPB_PROPERTIES, NULL);
>
> @@ -4568,6 +4616,8 @@ static int mov_write_ftyp_tag(AVIOContext *pb,
> AVFormatContext *s)
>
>      for (i = 0; i < s->nb_streams; i++) {
>          AVStream *st = s->streams[i];
> +        if (st->disposition & AV_DISPOSITION_ATTACHED_PIC)
> +            continue;
>          if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
>              has_video = 1;
>          if (st->codecpar->codec_id == AV_CODEC_ID_H264)
> @@ -4716,6 +4766,8 @@ static int mov_write_identification(AVIOContext
> *pb, AVFormatContext *s)
>          int video_streams_nb = 0, audio_streams_nb = 0, other_streams_nb
> = 0;
>          for (i = 0; i < s->nb_streams; i++) {
>              AVStream *st = s->streams[i];
> +            if (st->disposition & AV_DISPOSITION_ATTACHED_PIC)
> +                continue;
>              if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
>                  video_streams_nb++;
>              else if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
> @@ -4905,7 +4957,8 @@ static int mov_flush_fragment(AVFormatContext *s,
> int force)
>          int buf_size, moov_size;
>
>          for (i = 0; i < mov->nb_streams; i++)
> -            if (!mov->tracks[i].entry)
> +            if (!mov->tracks[i].entry &&
> +                (i >= s->nb_streams || !(s->streams[i]->disposition &
> AV_DISPOSITION_ATTACHED_PIC)))
>                  break;
>          /* Don't write the initial moov unless all tracks have data */
>          if (i < mov->nb_streams && !force)
> @@ -5487,6 +5540,23 @@ static int mov_write_packet(AVFormatContext *s,
> AVPacket *pkt)
>      if (!pkt) {
>          mov_flush_fragment(s, 1);
>          return 1;
> +    } else if (s->streams[pkt->stream_index]->disposition &
> AV_DISPOSITION_ATTACHED_PIC) {
> +        AVStream *st = s->streams[pkt->stream_index];
> +        int ret;
> +
> +        if (st->nb_frames >= 1) {
> +            if (st->nb_frames == 1)
> +                av_log(s, AV_LOG_WARNING, "Got more than one picture in
> stream %d,"
> +                       " ignoring.\n", pkt->stream_index);
> +            return 0;
> +        }
> +
> +        if ((ret = av_packet_ref(&st->attached_pic, pkt)) < 0)
> +            return ret;
> +
> +        st->attached_pic.stream_index = st->index;
> +        st->attached_pic.flags       |= AV_PKT_FLAG_KEY;
> +        return 0;
>      } else {
>          int i;
>          MOVMuxContext *mov = s->priv_data;
> @@ -5737,7 +5807,8 @@ static void enable_tracks(AVFormatContext *s)
>          AVStream *st = s->streams[i];
>
>          if (st->codecpar->codec_type <= AVMEDIA_TYPE_UNKNOWN ||
> -            st->codecpar->codec_type >= AVMEDIA_TYPE_NB)
> +            st->codecpar->codec_type >= AVMEDIA_TYPE_NB ||
> +            st->disposition & AV_DISPOSITION_ATTACHED_PIC)
>              continue;
>
>          if (first[st->codecpar->codec_type] < 0)
> --
> 2.16.2
>
>
Rostislav Pehlivanov April 11, 2018, 1:25 p.m. UTC | #2
On 11 April 2018 at 06:22, Timo Teräs <timo.teras@iki.fi> wrote:

> Ping.
>
> Any comments on this?
>
> There was also recently the flacenc support for cover images, and the
> question if some of this code should be moved to generic code in mux.c so
> both can share it: e.g. storing the image packets to attached_pic - or if
> that should be moved to the muxer private structs.
>
>
I think this could be done at a later stage as well, but if you or jamrial
think it can be reused, sure, go ahead and submit a separate patch which
puts it in some common code.
Timo Teräs April 11, 2018, 5:50 p.m. UTC | #3
On Wed, 11 Apr 2018 14:25:15 +0100
Rostislav Pehlivanov <atomnuker@gmail.com> wrote:

> On 11 April 2018 at 06:22, Timo Teräs <timo.teras@iki.fi> wrote:
> 
> > Ping.
> >
> > Any comments on this?
> >
> > There was also recently the flacenc support for cover images, and
> > the question if some of this code should be moved to generic code
> > in mux.c so both can share it: e.g. storing the image packets to
> > attached_pic - or if that should be moved to the muxer private
> > structs.
>
> I think this could be done at a later stage as well, but if you or
> jamrial think it can be reused, sure, go ahead and submit a separate
> patch which puts it in some common code.

I agree that it might be easier done later. This way we get the commits
in with proper authors and have common code base to work on. Producing
a suitable framework interface might need additional discussions too.

Would there be other concerns or comments for my patch then, or could
it be considered to be committed?

Thanks
timo
diff mbox

Patch

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index a2fe7c6bb2..b0e6092a35 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -941,7 +941,7 @@  typedef struct AVStream {
      * will contain the attached picture.
      *
      * decoding: set by libavformat, must not be modified by the caller.
-     * encoding: unused
+     * encoding: may be set by muxer to store the image until time of writing.
      */
     AVPacket attached_pic;
 
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index f6a314894c..69201c4092 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -143,7 +143,9 @@  static int co64_required(const MOVTrack *track)
 
 static int rtp_hinting_needed(const AVStream *st)
 {
-    /* Add hint tracks for each audio and video stream */
+    /* Add hint tracks for each real audio and video stream */
+    if (st->disposition & AV_DISPOSITION_ATTACHED_PIC)
+        return 0;
     return st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO ||
            st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO;
 }
@@ -3421,6 +3423,49 @@  static int mov_write_int8_metadata(AVFormatContext *s, AVIOContext *pb,
     return size;
 }
 
+static int mov_write_covr(AVIOContext *pb, AVFormatContext *s)
+{
+    int64_t pos = 0;
+    int i, type;
+
+    for (i = 0; i < s->nb_streams; i++) {
+        AVStream *st = s->streams[i];
+
+        if (!(st->disposition & AV_DISPOSITION_ATTACHED_PIC) ||
+            st->attached_pic.size <= 0)
+            continue;
+
+        switch (st->codecpar->codec_id) {
+        case AV_CODEC_ID_MJPEG:
+            type = 0xD;
+            break;
+        case AV_CODEC_ID_PNG:
+            type = 0xE;
+            break;
+        case AV_CODEC_ID_BMP:
+            type = 0x1B;
+            break;
+        default:
+            av_log(s, AV_LOG_ERROR, "unsupported codec_id (0x%x) for cover",
+                   st->codecpar->codec_id);
+            continue;
+        }
+
+        if (!pos) {
+            pos = avio_tell(pb);
+            avio_wb32(pb, 0);
+            ffio_wfourcc(pb, "covr");
+        }
+        avio_wb32(pb, 16 + st->attached_pic.size);
+        ffio_wfourcc(pb, "data");
+        avio_wb32(pb, type);
+        avio_wb32(pb , 0);
+        avio_write(pb, st->attached_pic.data, st->attached_pic.size);
+    }
+
+    return pos ? update_size(pb, pos) : 0;
+}
+
 /* iTunes meta data list */
 static int mov_write_ilst_tag(AVIOContext *pb, MOVMuxContext *mov,
                               AVFormatContext *s)
@@ -3458,6 +3503,7 @@  static int mov_write_ilst_tag(AVIOContext *pb, MOVMuxContext *mov,
     mov_write_int8_metadata  (s, pb, "hdvd",    "hd_video",  1);
     mov_write_int8_metadata  (s, pb, "pgap",    "gapless_playback",1);
     mov_write_int8_metadata  (s, pb, "cpil",    "compilation", 1);
+    mov_write_covr(pb, s);
     mov_write_trkn_tag(pb, mov, s, 0); // track number
     mov_write_trkn_tag(pb, mov, s, 1); // disc number
     mov_write_tmpo_tag(pb, s);
@@ -3955,6 +4001,8 @@  static int mov_write_isml_manifest(AVIOContext *pb, MOVMuxContext *mov, AVFormat
         } else {
             continue;
         }
+        if (st->disposition & AV_DISPOSITION_ATTACHED_PIC)
+            continue;
 
         props = (AVCPBProperties*)av_stream_get_side_data(track->st, AV_PKT_DATA_CPB_PROPERTIES, NULL);
 
@@ -4568,6 +4616,8 @@  static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
 
     for (i = 0; i < s->nb_streams; i++) {
         AVStream *st = s->streams[i];
+        if (st->disposition & AV_DISPOSITION_ATTACHED_PIC)
+            continue;
         if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
             has_video = 1;
         if (st->codecpar->codec_id == AV_CODEC_ID_H264)
@@ -4716,6 +4766,8 @@  static int mov_write_identification(AVIOContext *pb, AVFormatContext *s)
         int video_streams_nb = 0, audio_streams_nb = 0, other_streams_nb = 0;
         for (i = 0; i < s->nb_streams; i++) {
             AVStream *st = s->streams[i];
+            if (st->disposition & AV_DISPOSITION_ATTACHED_PIC)
+                continue;
             if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
                 video_streams_nb++;
             else if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
@@ -4905,7 +4957,8 @@  static int mov_flush_fragment(AVFormatContext *s, int force)
         int buf_size, moov_size;
 
         for (i = 0; i < mov->nb_streams; i++)
-            if (!mov->tracks[i].entry)
+            if (!mov->tracks[i].entry &&
+                (i >= s->nb_streams || !(s->streams[i]->disposition & AV_DISPOSITION_ATTACHED_PIC)))
                 break;
         /* Don't write the initial moov unless all tracks have data */
         if (i < mov->nb_streams && !force)
@@ -5487,6 +5540,23 @@  static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
     if (!pkt) {
         mov_flush_fragment(s, 1);
         return 1;
+    } else if (s->streams[pkt->stream_index]->disposition & AV_DISPOSITION_ATTACHED_PIC) {
+        AVStream *st = s->streams[pkt->stream_index];
+        int ret;
+
+        if (st->nb_frames >= 1) {
+            if (st->nb_frames == 1)
+                av_log(s, AV_LOG_WARNING, "Got more than one picture in stream %d,"
+                       " ignoring.\n", pkt->stream_index);
+            return 0;
+        }
+
+        if ((ret = av_packet_ref(&st->attached_pic, pkt)) < 0)
+            return ret;
+
+        st->attached_pic.stream_index = st->index;
+        st->attached_pic.flags       |= AV_PKT_FLAG_KEY;
+        return 0;
     } else {
         int i;
         MOVMuxContext *mov = s->priv_data;
@@ -5737,7 +5807,8 @@  static void enable_tracks(AVFormatContext *s)
         AVStream *st = s->streams[i];
 
         if (st->codecpar->codec_type <= AVMEDIA_TYPE_UNKNOWN ||
-            st->codecpar->codec_type >= AVMEDIA_TYPE_NB)
+            st->codecpar->codec_type >= AVMEDIA_TYPE_NB ||
+            st->disposition & AV_DISPOSITION_ATTACHED_PIC)
             continue;
 
         if (first[st->codecpar->codec_type] < 0)