diff mbox series

[FFmpeg-devel] ffmpeg: pass encoder init AVFrame side data to output AVStream

Message ID 20230305111720.6019-1-jeebjp@gmail.com
State New
Headers show
Series [FFmpeg-devel] ffmpeg: pass encoder init AVFrame side data to output AVStream | expand

Commit Message

Jan Ekström March 5, 2023, 11:17 a.m. UTC
This enables passing through various side data during encoding,
which is not yet in AVCodecContext/AVCodecParameters, but is read
from AVStream side data in muxers.

Additionally, add a FATE test that demonstrates PNG->J2K MP4
transcoding with the ICC profile getting passed through.
---
 fftools/ffmpeg.c                             |  6 ++
 fftools/ffmpeg.h                             |  1 +
 fftools/ffmpeg_mux.c                         | 47 +++++++++++++
 tests/fate/ffmpeg.mak                        |  4 ++
 tests/ref/fate/ffmpeg-side-data-to-avstreams | 72 ++++++++++++++++++++
 5 files changed, 130 insertions(+)
 create mode 100644 tests/ref/fate/ffmpeg-side-data-to-avstreams

Comments

Jan Ekström March 5, 2023, 6:16 p.m. UTC | #1
On Sun, Mar 5, 2023 at 1:17 PM Jan Ekström <jeebjp@gmail.com> wrote:
>
> This enables passing through various side data during encoding,
> which is not yet in AVCodecContext/AVCodecParameters, but is read
> from AVStream side data in muxers.
>
> Additionally, add a FATE test that demonstrates PNG->J2K MP4
> transcoding with the ICC profile getting passed through.
> ---

Just tested locally and going PNG->MP4 (RGB H.264)->PNG actually does
work with ffmpeg.c now with regards to ICC side data :) So something
like this does actually help with round-tripping (most likely since
the side data goes from the AVStream to the first AVPacket and thus
into the first AVFrame, which then gets picked up by pngenc),

Currently this is only limited to video, as audio cannot work with the
avfilter peek API, as the peeked frame gets buffered, and thus any
further adjustment of audio frame size will fail, as the buffered
frame gets output first (with whatever the original requested size
was).

>  fftools/ffmpeg.c                             |  6 ++
>  fftools/ffmpeg.h                             |  1 +
>  fftools/ffmpeg_mux.c                         | 47 +++++++++++++
>  tests/fate/ffmpeg.mak                        |  4 ++
>  tests/ref/fate/ffmpeg-side-data-to-avstreams | 72 ++++++++++++++++++++
>  5 files changed, 130 insertions(+)
>  create mode 100644 tests/ref/fate/ffmpeg-side-data-to-avstreams
>
> diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
> index d721a5e721..134258b825 100644
> --- a/fftools/ffmpeg.c
> +++ b/fftools/ffmpeg.c
> @@ -3113,6 +3113,12 @@ static int init_output_stream_encode(OutputStream *ost, AVFrame *frame)
>                                                   av_pix_fmt_desc_get(enc_ctx->pix_fmt)->comp[0].depth);
>
>          if (frame) {
> +            if (!(ost->side_data_frame = av_frame_alloc()))
> +                return AVERROR(ENOMEM);
> +
> +            if ((ret = av_frame_copy_props(ost->side_data_frame, frame)) < 0)
> +                return ret;
> +
>              enc_ctx->color_range            = frame->color_range;
>              enc_ctx->color_primaries        = frame->color_primaries;
>              enc_ctx->color_trc              = frame->color_trc;
> diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
> index 4d4433f5ba..21dd0f3a9e 100644
> --- a/fftools/ffmpeg.h
> +++ b/fftools/ffmpeg.h
> @@ -593,6 +593,7 @@ typedef struct OutputStream {
>      AVFrame *filtered_frame;
>      AVFrame *last_frame;
>      AVFrame *sq_frame;
> +    AVFrame *side_data_frame;
>      AVPacket *pkt;
>      int64_t last_dropped;
>      int64_t last_nb0_frames[3];
> diff --git a/fftools/ffmpeg_mux.c b/fftools/ffmpeg_mux.c
> index cf58051949..13da9ab8da 100644
> --- a/fftools/ffmpeg_mux.c
> +++ b/fftools/ffmpeg_mux.c
> @@ -580,6 +580,49 @@ static int bsf_init(MuxStream *ms)
>      return 0;
>  }
>
> +static int avframe_side_data_to_avstream(AVStream *stream, const AVFrame *frame)
> +{
> +    static const struct sd_mapping {
> +        enum AVPacketSideDataType packet;
> +        enum AVFrameSideDataType frame;
> +    } sd_list[] = {
> +        { AV_PKT_DATA_REPLAYGAIN ,                AV_FRAME_DATA_REPLAYGAIN },
> +        { AV_PKT_DATA_DISPLAYMATRIX,              AV_FRAME_DATA_DISPLAYMATRIX },
> +        { AV_PKT_DATA_SPHERICAL,                  AV_FRAME_DATA_SPHERICAL },
> +        { AV_PKT_DATA_STEREO3D,                   AV_FRAME_DATA_STEREO3D },
> +        { AV_PKT_DATA_AUDIO_SERVICE_TYPE,         AV_FRAME_DATA_AUDIO_SERVICE_TYPE },
> +        { AV_PKT_DATA_MASTERING_DISPLAY_METADATA, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA },
> +        { AV_PKT_DATA_CONTENT_LIGHT_LEVEL,        AV_FRAME_DATA_CONTENT_LIGHT_LEVEL },
> +        { AV_PKT_DATA_A53_CC,                     AV_FRAME_DATA_A53_CC },
> +        { AV_PKT_DATA_ICC_PROFILE,                AV_FRAME_DATA_ICC_PROFILE },
> +        { AV_PKT_DATA_S12M_TIMECODE,              AV_FRAME_DATA_S12M_TIMECODE },
> +        { AV_PKT_DATA_DYNAMIC_HDR10_PLUS,         AV_FRAME_DATA_DYNAMIC_HDR_PLUS },
> +    };

For the record, this listing was taken from
ff_decode_frame_props_from_pkt, and probably at the end of the day
should be in a "get one from the other" or "iterate me all the
mappings" sort of API in avcodec.

Then this function adding stuff to AVStream could be in avformat
without the list being duplicated.

This thing is here in this patch mostly to prove that this kind of
passing through additional side data would help.

Jan
diff mbox series

Patch

diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index d721a5e721..134258b825 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -3113,6 +3113,12 @@  static int init_output_stream_encode(OutputStream *ost, AVFrame *frame)
                                                  av_pix_fmt_desc_get(enc_ctx->pix_fmt)->comp[0].depth);
 
         if (frame) {
+            if (!(ost->side_data_frame = av_frame_alloc()))
+                return AVERROR(ENOMEM);
+
+            if ((ret = av_frame_copy_props(ost->side_data_frame, frame)) < 0)
+                return ret;
+
             enc_ctx->color_range            = frame->color_range;
             enc_ctx->color_primaries        = frame->color_primaries;
             enc_ctx->color_trc              = frame->color_trc;
diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
index 4d4433f5ba..21dd0f3a9e 100644
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@@ -593,6 +593,7 @@  typedef struct OutputStream {
     AVFrame *filtered_frame;
     AVFrame *last_frame;
     AVFrame *sq_frame;
+    AVFrame *side_data_frame;
     AVPacket *pkt;
     int64_t last_dropped;
     int64_t last_nb0_frames[3];
diff --git a/fftools/ffmpeg_mux.c b/fftools/ffmpeg_mux.c
index cf58051949..13da9ab8da 100644
--- a/fftools/ffmpeg_mux.c
+++ b/fftools/ffmpeg_mux.c
@@ -580,6 +580,49 @@  static int bsf_init(MuxStream *ms)
     return 0;
 }
 
+static int avframe_side_data_to_avstream(AVStream *stream, const AVFrame *frame)
+{
+    static const struct sd_mapping {
+        enum AVPacketSideDataType packet;
+        enum AVFrameSideDataType frame;
+    } sd_list[] = {
+        { AV_PKT_DATA_REPLAYGAIN ,                AV_FRAME_DATA_REPLAYGAIN },
+        { AV_PKT_DATA_DISPLAYMATRIX,              AV_FRAME_DATA_DISPLAYMATRIX },
+        { AV_PKT_DATA_SPHERICAL,                  AV_FRAME_DATA_SPHERICAL },
+        { AV_PKT_DATA_STEREO3D,                   AV_FRAME_DATA_STEREO3D },
+        { AV_PKT_DATA_AUDIO_SERVICE_TYPE,         AV_FRAME_DATA_AUDIO_SERVICE_TYPE },
+        { AV_PKT_DATA_MASTERING_DISPLAY_METADATA, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA },
+        { AV_PKT_DATA_CONTENT_LIGHT_LEVEL,        AV_FRAME_DATA_CONTENT_LIGHT_LEVEL },
+        { AV_PKT_DATA_A53_CC,                     AV_FRAME_DATA_A53_CC },
+        { AV_PKT_DATA_ICC_PROFILE,                AV_FRAME_DATA_ICC_PROFILE },
+        { AV_PKT_DATA_S12M_TIMECODE,              AV_FRAME_DATA_S12M_TIMECODE },
+        { AV_PKT_DATA_DYNAMIC_HDR10_PLUS,         AV_FRAME_DATA_DYNAMIC_HDR_PLUS },
+    };
+
+    if (!frame || !frame->nb_side_data)
+        return 0;
+
+    if (!stream)
+        return AVERROR(EINVAL);
+
+    for (int i = 0; i < FF_ARRAY_ELEMS(sd_list); i++) {
+        const struct sd_mapping mapping = sd_list[i];
+        uint8_t *packet_sd  = NULL;
+        AVFrameSideData *sd = av_frame_get_side_data(frame, mapping.frame);
+        if (!sd)
+            continue;
+
+        packet_sd = av_stream_new_side_data(stream, mapping.packet,
+                                            sd->size);
+        if (!packet_sd)
+            return AVERROR(ENOMEM);
+
+        memcpy(packet_sd, sd->data, sd->size);
+    }
+
+    return 0;
+}
+
 int of_stream_init(OutputFile *of, OutputStream *ost)
 {
     Muxer *mux = mux_from_of(of);
@@ -589,6 +632,9 @@  int of_stream_init(OutputFile *of, OutputStream *ost)
     if (ost->sq_idx_mux >= 0)
         sq_set_tb(mux->sq_mux, ost->sq_idx_mux, ost->mux_timebase);
 
+    if ((ret = avframe_side_data_to_avstream(ost->st, ost->side_data_frame)) < 0)
+        return ret;
+
     /* initialize bitstream filters for the output stream
      * needs to be done here, because the codec id for streamcopy is not
      * known until now */
@@ -666,6 +712,7 @@  static void ost_free(OutputStream **post)
     av_frame_free(&ost->filtered_frame);
     av_frame_free(&ost->sq_frame);
     av_frame_free(&ost->last_frame);
+    av_frame_free(&ost->side_data_frame);
     av_packet_free(&ost->pkt);
     av_dict_free(&ost->encoder_opts);
 
diff --git a/tests/fate/ffmpeg.mak b/tests/fate/ffmpeg.mak
index 0f33c2a0ed..3d6ef2a8fb 100644
--- a/tests/fate/ffmpeg.mak
+++ b/tests/fate/ffmpeg.mak
@@ -224,3 +224,7 @@  FATE_TIME_BASE-$(call PARSERDEMDEC, MPEGVIDEO, MPEGPS, MPEG2VIDEO, MPEGVIDEO_DEM
 fate-time_base: CMD = md5 -i $(TARGET_SAMPLES)/mpeg2/dvd_single_frame.vob -an -sn -c:v copy -r 25 -time_base 1001:30000 -fflags +bitexact -f mxf
 
 FATE_SAMPLES_FFMPEG-yes += $(FATE_TIME_BASE-yes)
+
+FATE_SAMPLES_FFMPEG-$(call TRANSCODE, JPEG2000 PNG, MP4 IMAGE_PNG_PIPE) += fate-ffmpeg-side-data-to-avstreams
+fate-ffmpeg-side-data-to-avstreams: CMD = transcode png_pipe $(TARGET_SAMPLES)/png1/lena-int_rgb24.png\
+                                          mp4 "-c jpeg2000" "" "-show_streams"
diff --git a/tests/ref/fate/ffmpeg-side-data-to-avstreams b/tests/ref/fate/ffmpeg-side-data-to-avstreams
new file mode 100644
index 0000000000..7681861c94
--- /dev/null
+++ b/tests/ref/fate/ffmpeg-side-data-to-avstreams
@@ -0,0 +1,72 @@ 
+95be5b4c97ba8989fb738df35cb8b7f2 *tests/data/fate/ffmpeg-side-data-to-avstreams.mp4
+36472 tests/data/fate/ffmpeg-side-data-to-avstreams.mp4
+#tb 0: 1/25
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 128x128
+#sar 0: 1/1
+0,          0,          0,        1,    49152, 0xf6fe3b30
+[STREAM]
+index=0
+codec_name=jpeg2000
+profile=0
+codec_type=video
+codec_tag_string=mp4v
+codec_tag=0x7634706d
+width=128
+height=128
+coded_width=128
+coded_height=128
+closed_captions=0
+film_grain=0
+has_b_frames=0
+sample_aspect_ratio=1:1
+display_aspect_ratio=1:1
+pix_fmt=rgb24
+level=-99
+color_range=unknown
+color_space=unknown
+color_transfer=unknown
+color_primaries=unknown
+chroma_location=unspecified
+field_order=bt
+refs=1
+id=0x1
+r_frame_rate=25/1
+avg_frame_rate=25/1
+time_base=1/12800
+start_pts=0
+start_time=0.000000
+duration_ts=512
+duration=0.040000
+bit_rate=6503600
+max_bit_rate=N/A
+bits_per_raw_sample=8
+nb_frames=1
+nb_read_frames=N/A
+nb_read_packets=N/A
+DISPOSITION:default=1
+DISPOSITION:dub=0
+DISPOSITION:original=0
+DISPOSITION:comment=0
+DISPOSITION:lyrics=0
+DISPOSITION:karaoke=0
+DISPOSITION:forced=0
+DISPOSITION:hearing_impaired=0
+DISPOSITION:visual_impaired=0
+DISPOSITION:clean_effects=0
+DISPOSITION:attached_pic=0
+DISPOSITION:timed_thumbnails=0
+DISPOSITION:captions=0
+DISPOSITION:descriptions=0
+DISPOSITION:metadata=0
+DISPOSITION:dependent=0
+DISPOSITION:still_image=0
+TAG:language=und
+TAG:handler_name=VideoHandler
+TAG:vendor_id=[0][0][0][0]
+TAG:encoder=Lavc jpeg2000
+[SIDE_DATA]
+side_data_type=ICC Profile
+[/SIDE_DATA]
+[/STREAM]