diff mbox series

[FFmpeg-devel] Pass the HDR10+ metadata to the packet side data in VP9 encoder

Message ID 20210423155828.3433455-1-izadi@google.com
State Superseded
Headers show
Series [FFmpeg-devel] Pass the HDR10+ metadata to the packet side data in VP9 encoder
Related show

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Mohammad Izadi April 23, 2021, 3:58 p.m. UTC
HDR10+ metadata is stored in the bit stream for HEVC. The story is different for VP9 and cannot store the metadata in the bit stream. HDR10+ should be passed to packet side data an stored in the container (mkv) for VP9.

This CL is taking HDR10+ from AVFrame side data in libvpxenc and is passing it to the AVPacket side data.
---
 libavcodec/avpacket.c  |  1 +
 libavcodec/decode.c    |  1 +
 libavcodec/libvpxenc.c | 71 ++++++++++++++++++++++++++++++++++++++++++
 libavcodec/packet.h    | 10 +++++-
 4 files changed, 82 insertions(+), 1 deletion(-)

Comments

James Zern April 23, 2021, 5:56 p.m. UTC | #1
Hi,

On Fri, Apr 23, 2021 at 8:58 AM Mohammad Izadi
<izadi-at-google.com@ffmpeg.org> wrote:
>
> HDR10+ metadata is stored in the bit stream for HEVC. The story is different for VP9 and cannot store the metadata in the bit stream. HDR10+ should be passed to packet side data an stored in the container (mkv) for VP9.
>
> This CL is taking HDR10+ from AVFrame side data in libvpxenc and is passing it to the AVPacket side data.
> ---
>  libavcodec/avpacket.c  |  1 +
>  libavcodec/decode.c    |  1 +
>  libavcodec/libvpxenc.c | 71 ++++++++++++++++++++++++++++++++++++++++++
>  libavcodec/packet.h    | 10 +++++-
>  4 files changed, 82 insertions(+), 1 deletion(-)
>

Just some quick cosmetics, I didn't take a close look at the implementation.

> [...]
> +static int copy_hdr10_plus_to_pkt(void *list, AVPacket *pkt)
> +{
> +    struct FrameHDR10PlusList **p = list;
> +    struct FrameHDR10PlusList *head = *p;
> +
> +    if(head && pkt && head->hdr10_plus && head->pts == pkt->pts) {

Add whitespace after 'if' and elsewhere before the '{'. You can use
tools/patcheck as a guide to the formatting [1].

> [...]
>
> @@ -1245,6 +1300,12 @@ FF_ENABLE_DEPRECATION_WARNINGS
>              AV_WB64(side_data, 1);
>              memcpy(side_data + 8, cx_frame->buf_alpha, cx_frame->sz_alpha);
>          }
> +        if(cx_frame->frame_number != -1) { // Not invisible frame

VP9 won't emit frames like this, but this is a shared path with VP8. Can we get
this metadata (unnecessarily) when doing a 8-bit encode?

> +            VPxContext *ctx = avctx->priv_data;
> +            int err = copy_hdr10_plus_to_pkt(&ctx->hdr10_plus_list, pkt);
> +            if (err < 0)
> +                return err;
> +        }
>      } else {
>          return ret;
>      }
> @@ -1579,6 +1640,7 @@ static int vpx_encode(AVCodecContext *avctx, AVPacket *pkt,
>      const struct vpx_codec_enc_cfg *enccfg = ctx->encoder.config.enc;
>      vpx_svc_layer_id_t layer_id;
>      int layer_id_valid = 0;
> +    AVFrameSideData *hdr10_plus_metadata;
>
>      if (frame) {
>          const AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_REGIONS_OF_INTEREST);
> @@ -1655,6 +1717,15 @@ static int vpx_encode(AVCodecContext *avctx, AVPacket *pkt,
>                  vp9_encode_set_roi(avctx, frame->width, frame->height, sd);
>              }
>          }
> +
> +        // Add HDR10+ metadata to queue.
> +        hdr10_plus_metadata = av_frame_get_side_data(frame, AV_FRAME_DATA_DYNAMIC_HDR_PLUS);
> +        if(hdr10_plus_metadata){
> +            struct FrameHDR10PlusList *data =  av_malloc(sizeof(struct FrameHDR10PlusList));

This allocation should be checked and prefer sizeof(*data).

> +            data->pts = frame->pts;
> +            data->hdr10_plus = av_buffer_ref(hdr10_plus_metadata->buf);
> +            add_hdr10_plus(&ctx->hdr10_plus_list, data);
> +        }
>      }
>
>      // this is for encoding with preset temporal layering patterns defined in
> diff --git a/libavcodec/packet.h b/libavcodec/packet.h
> index ca18ae631f..89e683b357 100644
> --- a/libavcodec/packet.h
> +++ b/libavcodec/packet.h
> @@ -298,7 +298,15 @@ enum AVPacketSideDataType {
>       * If its value becomes huge, some code using it
>       * needs to be updated as it assumes it to be smaller than other limits.
>       */
> -    AV_PKT_DATA_NB
> +    AV_PKT_DATA_NB,

This should be the final entry in the enum, see the comment above.

> +
> +    /**
> +     * HDR10+ dynamic metadata associated with a video frame. The metadata is in
> +     * the form of the AVDynamicHDRPlus struct and contains
> +     * information for color volume transform - application 4 of
> +     * SPMTE 2094-40:2016 standard.
> +     */
> +    AV_PKT_DATA_DYNAMIC_HDR10_PLUS
>  };
>
>  #define AV_PKT_DATA_QUALITY_FACTOR AV_PKT_DATA_QUALITY_STATS //DEPRECATED

[1] https://ffmpeg.org/developer.html#Coding-Rules-1
Mohammad Izadi April 27, 2021, 1:51 a.m. UTC | #2
On Fri, Apr 23, 2021 at 11:53 AM James Zern <jzern-at-google.com@ffmpeg.org>
wrote:

> Hi,
>
> On Fri, Apr 23, 2021 at 8:58 AM Mohammad Izadi
> <izadi-at-google.com@ffmpeg.org> wrote:
> >
> > HDR10+ metadata is stored in the bit stream for HEVC. The story is
> different for VP9 and cannot store the metadata in the bit stream. HDR10+
> should be passed to packet side data an stored in the container (mkv) for
> VP9.
> >
> > This CL is taking HDR10+ from AVFrame side data in libvpxenc and is
> passing it to the AVPacket side data.
> > ---
> >  libavcodec/avpacket.c  |  1 +
> >  libavcodec/decode.c    |  1 +
> >  libavcodec/libvpxenc.c | 71 ++++++++++++++++++++++++++++++++++++++++++
> >  libavcodec/packet.h    | 10 +++++-
> >  4 files changed, 82 insertions(+), 1 deletion(-)
> >
>
> Just some quick cosmetics, I didn't take a close look at the
> implementation.
>
> > [...]
> > +static int copy_hdr10_plus_to_pkt(void *list, AVPacket *pkt)
> > +{
> > +    struct FrameHDR10PlusList **p = list;
> > +    struct FrameHDR10PlusList *head = *p;
> > +
> > +    if(head && pkt && head->hdr10_plus && head->pts == pkt->pts) {
>
> Add whitespace after 'if' and elsewhere before the '{'. You can use
> tools/patcheck as a guide to the formatting [1].
>
*Fixed.*

>
> > [...]
> >
> > @@ -1245,6 +1300,12 @@ FF_ENABLE_DEPRECATION_WARNINGS
> >              AV_WB64(side_data, 1);
> >              memcpy(side_data + 8, cx_frame->buf_alpha,
> cx_frame->sz_alpha);
> >          }
> > +        if(cx_frame->frame_number != -1) { // Not invisible frame
>
> VP9 won't emit frames like this, but this is a shared path with VP8. Can
> we get
> this metadata (unnecessarily) when doing a 8-bit encode?
>
*Great point! planned to discard it in matroska, but we can discard HDR10+
if it is PQ HDR (high bit depth and SPMTE2084). We shouldn't get it. Fixed
in the code.*

> > +            VPxContext *ctx = avctx->priv_data;
> > +            int err = copy_hdr10_plus_to_pkt(&ctx->hdr10_plus_list,
> pkt);
> > +            if (err < 0)
> > +                return err;
> > +        }
> >      } else {
> >          return ret;
> >      }
> > @@ -1579,6 +1640,7 @@ static int vpx_encode(AVCodecContext *avctx,
> AVPacket *pkt,
> >      const struct vpx_codec_enc_cfg *enccfg = ctx->encoder.config.enc;
> >      vpx_svc_layer_id_t layer_id;
> >      int layer_id_valid = 0;
> > +    AVFrameSideData *hdr10_plus_metadata;
> >
> >      if (frame) {
> >          const AVFrameSideData *sd = av_frame_get_side_data(frame,
> AV_FRAME_DATA_REGIONS_OF_INTEREST);
> > @@ -1655,6 +1717,15 @@ static int vpx_encode(AVCodecContext *avctx,
> AVPacket *pkt,
> >                  vp9_encode_set_roi(avctx, frame->width, frame->height,
> sd);
> >              }
> >          }
> > +
> > +        // Add HDR10+ metadata to queue.
> > +        hdr10_plus_metadata = av_frame_get_side_data(frame,
> AV_FRAME_DATA_DYNAMIC_HDR_PLUS);
> > +        if(hdr10_plus_metadata){
> > +            struct FrameHDR10PlusList *data =  av_malloc(sizeof(struct
> FrameHDR10PlusList));
>
> This allocation should be checked and prefer sizeof(*data).
>
*Fixed.*

>
> > +            data->pts = frame->pts;
> > +            data->hdr10_plus = av_buffer_ref(hdr10_plus_metadata->buf);
> > +            add_hdr10_plus(&ctx->hdr10_plus_list, data);
> > +        }
> >      }
> >
> >      // this is for encoding with preset temporal layering patterns
> defined in
> > diff --git a/libavcodec/packet.h b/libavcodec/packet.h
> > index ca18ae631f..89e683b357 100644
> > --- a/libavcodec/packet.h
> > +++ b/libavcodec/packet.h
> > @@ -298,7 +298,15 @@ enum AVPacketSideDataType {
> >       * If its value becomes huge, some code using it
> >       * needs to be updated as it assumes it to be smaller than other
> limits.
> >       */
> > -    AV_PKT_DATA_NB
> > +    AV_PKT_DATA_NB,
>
> This should be the final entry in the enum, see the comment above.
>
*Fixed.*

>

>
> > +
> > +    /**
> > +     * HDR10+ dynamic metadata associated with a video frame. The
> metadata is in
> > +     * the form of the AVDynamicHDRPlus struct and contains
> > +     * information for color volume transform - application 4 of
> > +     * SPMTE 2094-40:2016 standard.
> > +     */
> > +    AV_PKT_DATA_DYNAMIC_HDR10_PLUS
> >  };
> >
> >  #define AV_PKT_DATA_QUALITY_FACTOR AV_PKT_DATA_QUALITY_STATS
> //DEPRECATED
>
> [1] https://ffmpeg.org/developer.html#Coding-Rules-1
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
diff mbox series

Patch

diff --git a/libavcodec/avpacket.c b/libavcodec/avpacket.c
index b5bac5c5f2..7a3b0a73e3 100644
--- a/libavcodec/avpacket.c
+++ b/libavcodec/avpacket.c
@@ -416,6 +416,7 @@  const char *av_packet_side_data_name(enum AVPacketSideDataType type)
     case AV_PKT_DATA_ICC_PROFILE:                return "ICC Profile";
     case AV_PKT_DATA_DOVI_CONF:                  return "DOVI configuration record";
     case AV_PKT_DATA_S12M_TIMECODE:              return "SMPTE ST 12-1:2014 timecode";
+    case AV_PKT_DATA_DYNAMIC_HDR10_PLUS:         return "HDR10+ Dynamic Metadata (SMPTE 2094-40)";
     }
     return NULL;
 }
diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index 0956a6ac6f..bf5fbcca97 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -1736,6 +1736,7 @@  int ff_decode_frame_props(AVCodecContext *avctx, AVFrame *frame)
         { AV_PKT_DATA_A53_CC,                     AV_FRAME_DATA_A53_CC },
         { AV_PKT_DATA_ICC_PROFILE,                AV_FRAME_DATA_ICC_PROFILE },
         { AV_PKT_DATA_S12M_TIMECODE,              AV_FRAME_DATA_S12M_TIMECODE },
+        { AV_PKT_DATA_DYNAMIC_HDR10_PLUS,         AV_FRAME_DATA_DYNAMIC_HDR_PLUS },
     };
 
     if (IS_EMPTY(pkt) && av_fifo_size(avctx->internal->pkt_props) >= sizeof(*pkt))
diff --git a/libavcodec/libvpxenc.c b/libavcodec/libvpxenc.c
index 3f36943c12..373c6e8418 100644
--- a/libavcodec/libvpxenc.c
+++ b/libavcodec/libvpxenc.c
@@ -63,6 +63,12 @@  struct FrameListData {
     struct FrameListData *next;
 };
 
+typedef struct FrameHDR10PlusList {
+    int64_t pts;
+    AVBufferRef *hdr10_plus;
+    struct FrameHDR10PlusList *next;
+} FrameHDR10PlusList;
+
 typedef struct VPxEncoderContext {
     AVClass *class;
     struct vpx_codec_ctx encoder;
@@ -120,6 +126,7 @@  typedef struct VPxEncoderContext {
     int tune_content;
     int corpus_complexity;
     int tpl_model;
+    struct FrameHDR10PlusList *hdr10_plus_list;
     /**
      * If the driver does not support ROI then warn the first time we
      * encounter a frame with ROI side data.
@@ -315,6 +322,53 @@  static av_cold void free_frame_list(struct FrameListData *list)
     }
 }
 
+
+static void add_hdr10_plus(void *list, struct FrameHDR10PlusList *data)
+{
+    struct FrameHDR10PlusList **p = list;
+    while (*p)
+        p = &(*p)->next;
+    *p = data;
+    data->next = NULL;
+}
+
+static av_cold void free_hdr10_plus(struct FrameHDR10PlusList *p)
+{
+    av_buffer_unref(&p->hdr10_plus);
+    av_free(p);
+}
+
+static av_cold void free_hdr10_plus_list(struct FrameHDR10PlusList *list)
+{
+    struct FrameHDR10PlusList *p = list;
+    while (p) {
+        list = list->next;
+        free_hdr10_plus(p);
+        p = list;
+    }
+}
+
+static int copy_hdr10_plus_to_pkt(void *list, AVPacket *pkt)
+{
+    struct FrameHDR10PlusList **p = list;
+    struct FrameHDR10PlusList *head = *p;
+
+    if(head && pkt && head->hdr10_plus && head->pts == pkt->pts) {
+        uint8_t *data;
+        *p = (*p)->next;
+        data = av_packet_new_side_data(pkt, AV_PKT_DATA_DYNAMIC_HDR10_PLUS, head->hdr10_plus->size);
+
+        if (!data) {
+            free_hdr10_plus(head);
+            return AVERROR(ENOMEM);
+        }
+        memcpy(data, head->hdr10_plus->data, head->hdr10_plus->size);
+        free_hdr10_plus(head);
+
+    }
+    return 0;
+}
+
 static av_cold int codecctl_int(AVCodecContext *avctx,
                                 enum vp8e_enc_control_id id, int val)
 {
@@ -383,6 +437,7 @@  static av_cold int vpx_free(AVCodecContext *avctx)
     av_freep(&ctx->twopass_stats.buf);
     av_freep(&avctx->stats_out);
     free_frame_list(ctx->coded_frame_list);
+    free_hdr10_plus_list(ctx->hdr10_plus_list);
     return 0;
 }
 
@@ -1245,6 +1300,12 @@  FF_ENABLE_DEPRECATION_WARNINGS
             AV_WB64(side_data, 1);
             memcpy(side_data + 8, cx_frame->buf_alpha, cx_frame->sz_alpha);
         }
+        if(cx_frame->frame_number != -1) { // Not invisible frame
+            VPxContext *ctx = avctx->priv_data;
+            int err = copy_hdr10_plus_to_pkt(&ctx->hdr10_plus_list, pkt);
+            if (err < 0)
+                return err;
+        }
     } else {
         return ret;
     }
@@ -1579,6 +1640,7 @@  static int vpx_encode(AVCodecContext *avctx, AVPacket *pkt,
     const struct vpx_codec_enc_cfg *enccfg = ctx->encoder.config.enc;
     vpx_svc_layer_id_t layer_id;
     int layer_id_valid = 0;
+    AVFrameSideData *hdr10_plus_metadata;
 
     if (frame) {
         const AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_REGIONS_OF_INTEREST);
@@ -1655,6 +1717,15 @@  static int vpx_encode(AVCodecContext *avctx, AVPacket *pkt,
                 vp9_encode_set_roi(avctx, frame->width, frame->height, sd);
             }
         }
+
+        // Add HDR10+ metadata to queue.
+        hdr10_plus_metadata = av_frame_get_side_data(frame, AV_FRAME_DATA_DYNAMIC_HDR_PLUS);
+        if(hdr10_plus_metadata){
+            struct FrameHDR10PlusList *data =  av_malloc(sizeof(struct FrameHDR10PlusList));
+            data->pts = frame->pts;
+            data->hdr10_plus = av_buffer_ref(hdr10_plus_metadata->buf);
+            add_hdr10_plus(&ctx->hdr10_plus_list, data);
+        }
     }
 
     // this is for encoding with preset temporal layering patterns defined in
diff --git a/libavcodec/packet.h b/libavcodec/packet.h
index ca18ae631f..89e683b357 100644
--- a/libavcodec/packet.h
+++ b/libavcodec/packet.h
@@ -298,7 +298,15 @@  enum AVPacketSideDataType {
      * If its value becomes huge, some code using it
      * needs to be updated as it assumes it to be smaller than other limits.
      */
-    AV_PKT_DATA_NB
+    AV_PKT_DATA_NB,
+
+    /**
+     * HDR10+ dynamic metadata associated with a video frame. The metadata is in
+     * the form of the AVDynamicHDRPlus struct and contains
+     * information for color volume transform - application 4 of
+     * SPMTE 2094-40:2016 standard.
+     */
+    AV_PKT_DATA_DYNAMIC_HDR10_PLUS
 };
 
 #define AV_PKT_DATA_QUALITY_FACTOR AV_PKT_DATA_QUALITY_STATS //DEPRECATED