diff mbox series

[FFmpeg-devel] Support HDR10+ metadata for HEVC.

Message ID 20200723230549.3411379-1-izadi@google.com
State New
Headers show
Series [FFmpeg-devel] Support HDR10+ metadata for HEVC.
Related show

Checks

Context Check Description
andriy/default pending
andriy/configure warning Failed to apply patch

Commit Message

Mohammad Izadi July 23, 2020, 11:05 p.m. UTC
From: Mohammad Izadi <moh.izadi@gmail.com>

HDR10+ is dynamic metadata (A/341 Amendment - SMPTE2094-40) that needs to be decoded from ITU-T T.35 in HEVC bitstream. The HDR10+ is transferred to side data packet to be used or passed through.
---
 libavcodec/avpacket.c |   1 +
 libavcodec/decode.c   |   1 +
 libavcodec/hevc_sei.c |  39 ++++++---
 libavcodec/hevc_sei.h |   5 ++
 libavcodec/hevcdec.c  |  16 ++++
 libavcodec/internal.h |   9 +++
 libavcodec/packet.h   |   8 ++
 libavcodec/utils.c    | 180 ++++++++++++++++++++++++++++++++++++++++++
 libavcodec/version.h  |   2 +-
 9 files changed, 248 insertions(+), 13 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/avpacket.c b/libavcodec/avpacket.c
index dce26cb31a..8307032335 100644
--- a/libavcodec/avpacket.c
+++ b/libavcodec/avpacket.c
@@ -394,6 +394,7 @@  const char *av_packet_side_data_name(enum AVPacketSideDataType type)
     case AV_PKT_DATA_CONTENT_LIGHT_LEVEL:        return "Content light level metadata";
     case AV_PKT_DATA_SPHERICAL:                  return "Spherical Mapping";
     case AV_PKT_DATA_A53_CC:                     return "A53 Closed Captions";
+    case AV_PKT_DATA_DYNAMIC_HDR_PLUS:           return "HDR10+ Dynamic Metadata (SMPTE 2094-40)";
     case AV_PKT_DATA_ENCRYPTION_INIT_INFO:       return "Encryption initialization data";
     case AV_PKT_DATA_ENCRYPTION_INFO:            return "Encryption info";
     case AV_PKT_DATA_AFD:                        return "Active Format Description data";
diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index de9c079f9d..cd3286f7fb 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -1698,6 +1698,7 @@  int ff_decode_frame_props(AVCodecContext *avctx, AVFrame *frame)
         { AV_PKT_DATA_MASTERING_DISPLAY_METADATA, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA },
         { AV_PKT_DATA_CONTENT_LIGHT_LEVEL,        AV_FRAME_DATA_CONTENT_LIGHT_LEVEL },
         { AV_PKT_DATA_A53_CC,                     AV_FRAME_DATA_A53_CC },
+        { AV_PKT_DATA_DYNAMIC_HDR_PLUS,           AV_FRAME_DATA_DYNAMIC_HDR_PLUS },
         { AV_PKT_DATA_ICC_PROFILE,                AV_FRAME_DATA_ICC_PROFILE },
     };
 
diff --git a/libavcodec/hevc_sei.c b/libavcodec/hevc_sei.c
index a4ec65dc1a..a490e752dd 100644
--- a/libavcodec/hevc_sei.c
+++ b/libavcodec/hevc_sei.c
@@ -25,6 +25,7 @@ 
 #include "golomb.h"
 #include "hevc_ps.h"
 #include "hevc_sei.h"
+#include "internal.h"
 
 static int decode_nal_sei_decoded_picture_hash(HEVCSEIPictureHash *s, GetBitContext *gb)
 {
@@ -242,8 +243,8 @@  static int decode_nal_sei_user_data_unregistered(HEVCSEIUnregistered *s, GetBitC
 static int decode_nal_sei_user_data_registered_itu_t_t35(HEVCSEI *s, GetBitContext *gb,
                                                          int size)
 {
-    uint32_t country_code;
-    uint32_t user_identifier;
+    uint8_t country_code;
+    uint16_t provider_code;
 
     if (size < 7)
         return AVERROR(EINVAL);
@@ -255,18 +256,31 @@  static int decode_nal_sei_user_data_registered_itu_t_t35(HEVCSEI *s, GetBitConte
         size--;
     }
 
-    skip_bits(gb, 8);
-    skip_bits(gb, 8);
-
-    user_identifier = get_bits_long(gb, 32);
-
-    switch (user_identifier) {
-        case MKBETAG('G', 'A', '9', '4'):
+    provider_code = get_bits(gb, 16);
+
+    const uint8_t usa_country_code = 0xB5;
+    const uint16_t smpte_provider_code = 0x003C;
+    if (country_code == usa_country_code &&
+        provider_code == smpte_provider_code) {
+        // A/341 Amendment – 2094-40
+        uint16_t provider_oriented_code = get_bits(gb, 16);
+        uint8_t application_identifier = get_bits(gb, 8);
+        const uint16_t smpte2094_40_provider_oriented_code = 0x0001;
+        const uint16_t smpte2094_40_application_identifier = 0x04;
+        if (provider_oriented_code == smpte2094_40_provider_oriented_code &&
+            application_identifier == smpte2094_40_application_identifier) {
+            int err = ff_read_itu_t_t35_to_dynamic_hdr_plus(gb, s->dynamic_hdr_plus.info);
+            if (err < 0 && s->dynamic_hdr_plus.info) {
+                av_buffer_unref(&s->dynamic_hdr_plus.info);
+            }
+            return err;
+        }
+    } else {
+        uint32_t  user_identifier = get_bits_long(gb, 32);
+        if(user_identifier == MKBETAG('G', 'A', '9', '4'))
             return decode_registered_user_data_closed_caption(&s->a53_caption, gb, size);
-        default:
-            skip_bits_long(gb, size * 8);
-            break;
     }
+    skip_bits_long(gb, size * 8);
     return 0;
 }
 
@@ -453,4 +467,5 @@  void ff_hevc_reset_sei(HEVCSEI *s)
         av_buffer_unref(&s->unregistered.buf_ref[i]);
     s->unregistered.nb_buf_ref = 0;
     av_freep(&s->unregistered.buf_ref);
+    av_buffer_unref(&s->dynamic_hdr_plus.info);
 }
diff --git a/libavcodec/hevc_sei.h b/libavcodec/hevc_sei.h
index 5ee7a4796d..e9e2d46ed4 100644
--- a/libavcodec/hevc_sei.h
+++ b/libavcodec/hevc_sei.h
@@ -104,6 +104,10 @@  typedef struct HEVCSEIMasteringDisplay {
     uint32_t min_luminance;
 } HEVCSEIMasteringDisplay;
 
+typedef struct HEVCSEIDynamicHDRPlus {
+    AVBufferRef *info;
+} HEVCSEIDynamicHDRPlus;
+
 typedef struct HEVCSEIContentLight {
     int present;
     uint16_t max_content_light_level;
@@ -143,6 +147,7 @@  typedef struct HEVCSEI {
     HEVCSEIA53Caption a53_caption;
     HEVCSEIUnregistered unregistered;
     HEVCSEIMasteringDisplay mastering_display;
+    HEVCSEIDynamicHDRPlus dynamic_hdr_plus;
     HEVCSEIContentLight content_light;
     int active_seq_parameter_set_id;
     HEVCSEIAlternativeTransfer alternative_transfer;
diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index b77df8d89f..cd794231e8 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -2849,6 +2849,16 @@  static int set_side_data(HEVCContext *s)
 
         s->sei.timecode.num_clock_ts = 0;
     }
+    if (s->sei.dynamic_hdr_plus.info){
+      AVBufferRef *info_ref = av_buffer_ref(s->sei.dynamic_hdr_plus.info);
+      if (!info_ref)
+	return AVERROR(ENOMEM);
+
+      if(!av_frame_new_side_data_from_buf(out, AV_FRAME_DATA_DYNAMIC_HDR_PLUS, info_ref)){
+        av_buffer_unref(&info_ref);
+        return AVERROR(ENOMEM);
+      }
+    }
 
     return 0;
 }
@@ -3530,6 +3540,12 @@  static int hevc_update_thread_context(AVCodecContext *dst,
         if (!s->sei.a53_caption.buf_ref)
             return AVERROR(ENOMEM);
     }
+    av_buffer_unref(&s->sei.dynamic_hdr_plus.info);
+    if (s0->sei.dynamic_hdr_plus.info) {
+        s->sei.dynamic_hdr_plus.info = av_buffer_ref(s0->sei.dynamic_hdr_plus.info);
+        if (!s->sei.dynamic_hdr_plus.info)
+            return AVERROR(ENOMEM);
+    }
 
     s->sei.frame_packing        = s0->sei.frame_packing;
     s->sei.display_orientation  = s0->sei.display_orientation;
diff --git a/libavcodec/internal.h b/libavcodec/internal.h
index 0a1c0a17ec..744ace534c 100644
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h
@@ -413,6 +413,15 @@  int ff_int_from_list_or_default(void *ctx, const char * val_name, int val,
 
 void ff_dvdsub_parse_palette(uint32_t *palette, const char *p);
 
+/**
+ * Reads and decode the user data registered ITU-T T.35 to AVbuffer (AVDynamicHDRPlus).
+ * @param gbc The bit content to be decoded.
+ * @param output A buffer containing the decoded AVDynamicHDRPlus structure.
+ *
+ * @return 0 if succeed. Otherwise, returns the appropriate AVERROR.
+ */
+int ff_read_itu_t_t35_to_dynamic_hdr_plus(void *gbc, AVBufferRef *output);
+
 #if defined(_WIN32) && CONFIG_SHARED && !defined(BUILDING_avcodec)
 #    define av_export_avcodec __declspec(dllimport)
 #else
diff --git a/libavcodec/packet.h b/libavcodec/packet.h
index 96f237f091..60fbf2c1e8 100644
--- a/libavcodec/packet.h
+++ b/libavcodec/packet.h
@@ -282,6 +282,14 @@  enum AVPacketSideDataType {
      */
     AV_PKT_DATA_DOVI_CONF,
 
+    /**
+     * HDR10+ dynamic metadata associated with a video frame. The metadata is in
+     * the form of the AVDynamicHDRPlus struct and contains
+     * information for color volume transform - application 4 of
+     * SPMTE 2094-40:2016 standard.
+     */
+    AV_PKT_DATA_DYNAMIC_HDR_PLUS,
+
     /**
      * The number of side data types.
      * This is not part of the public API/ABI in the sense that it may
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 2ece34f921..b537909a3f 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -42,8 +42,10 @@ 
 #include "libavutil/samplefmt.h"
 #include "libavutil/dict.h"
 #include "libavutil/thread.h"
+#include "libavutil/hdr_dynamic_metadata.h"
 #include "avcodec.h"
 #include "decode.h"
+#include "get_bits.h"
 #include "hwconfig.h"
 #include "libavutil/opt.h"
 #include "mpegvideo.h"
@@ -67,6 +69,17 @@ 
 const char av_codec_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
 
 static AVMutex codec_mutex = AV_MUTEX_INITIALIZER;
+static const uint8_t usa_country_code = 0xB5;
+static const uint16_t smpte_provider_code = 0x003C;
+static const uint16_t smpte2094_40_provider_oriented_code = 0x0001;
+static const uint16_t smpte2094_40_application_identifier = 0x04;
+static const int64_t luminance_den = 1;
+static const int32_t peak_luminance_den = 15;
+static const int64_t rgb_den = 100000;
+static const int32_t fraction_pixel_den = 1000;
+static const int32_t knee_point_den = 4095;
+static const int32_t bezier_anchor_den = 1023;
+static const int32_t saturation_weight_den = 8;
 
 void av_fast_padded_malloc(void *ptr, unsigned int *size, size_t min_size)
 {
@@ -2346,3 +2359,170 @@  int ff_int_from_list_or_default(void *ctx, const char * val_name, int val,
            "%s %d are not supported. Set to default value : %d\n", val_name, val, default_value);
     return default_value;
 }
+
+int ff_read_itu_t_t35_to_dynamic_hdr_plus(void *gbc,  AVBufferRef *output)
+{
+    GetBitContext *gb = (GetBitContext *)gbc;
+    uint8_t application_version  = get_bits(gbc, 8);
+    size_t size;
+    int w, i, j;
+    AVDynamicHDRPlus *s = av_dynamic_hdr_plus_alloc(&size);
+    if (!s)
+        return AVERROR(ENOMEM);
+
+    if (output)
+        av_buffer_unref(&output);
+
+    output = av_buffer_create(
+        (uint8_t *)s, size, av_buffer_default_free, NULL, 0);
+    if (!output) {
+        av_freep(&s);
+        return AVERROR(ENOMEM);
+    }
+    s->application_version = application_version;
+    if (get_bits_left(gb) < 2)
+        return AVERROR_INVALIDDATA;
+    s->num_windows = get_bits(gb, 2);
+
+    if (s->num_windows < 1 || s->num_windows > 3) {
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (get_bits_left(gb) < ((19 * 8 + 1) * (s->num_windows - 1)))
+        return AVERROR_INVALIDDATA;
+    for (w = 1; w < s->num_windows; w++) {
+        s->params[w].window_upper_left_corner_x.num = get_bits(gb, 16);
+        s->params[w].window_upper_left_corner_y.num = get_bits(gb, 16);
+        s->params[w].window_lower_right_corner_x.num = get_bits(gb, 16);
+        s->params[w].window_lower_right_corner_y.num = get_bits(gb, 16);
+        // The corners are set to absolute coordinates here. They should be
+        // converted to the relative coordinates (in [0, 1]) in the decoder.
+        s->params[w].window_upper_left_corner_x.den = 1;
+        s->params[w].window_upper_left_corner_y.den = 1;
+        s->params[w].window_lower_right_corner_x.den = 1;
+        s->params[w].window_lower_right_corner_y.den = 1;
+
+        s->params[w].center_of_ellipse_x = get_bits(gb, 16);
+        s->params[w].center_of_ellipse_y = get_bits(gb, 16);
+        s->params[w].rotation_angle = get_bits(gb, 8);
+        s->params[w].semimajor_axis_internal_ellipse = get_bits(gb, 16);
+        s->params[w].semimajor_axis_external_ellipse = get_bits(gb, 16);
+        s->params[w].semiminor_axis_external_ellipse = get_bits(gb, 16);
+        s->params[w].overlap_process_option = get_bits1(gb);
+    }
+
+    if (get_bits_left(gb) < 28)
+        return AVERROR(EINVAL);
+    s->targeted_system_display_maximum_luminance.num = get_bits(gb, 27);
+    s->targeted_system_display_maximum_luminance.den = luminance_den;
+    s->targeted_system_display_actual_peak_luminance_flag = get_bits1(gb);
+
+    if (s->targeted_system_display_actual_peak_luminance_flag) {
+        int rows, cols;
+        if (get_bits_left(gb) < 10)
+            return AVERROR(EINVAL);
+        rows = get_bits(gb, 5);
+        cols = get_bits(gb, 5);
+        if (((rows < 2) || (rows > 25)) || ((cols < 2) || (cols > 25))) {
+            return AVERROR_INVALIDDATA;
+        }
+        s->num_rows_targeted_system_display_actual_peak_luminance = rows;
+        s->num_cols_targeted_system_display_actual_peak_luminance = cols;
+
+        if (get_bits_left(gb) < (rows * cols * 4))
+            return AVERROR(EINVAL);
+
+        for (i = 0; i < rows; i++) {
+            for (j = 0; j < cols; j++) {
+                s->targeted_system_display_actual_peak_luminance[i][j].num = get_bits(gb, 4);
+                s->targeted_system_display_actual_peak_luminance[i][j].den = peak_luminance_den;
+            }
+        }
+    }
+    for (w = 0; w < s->num_windows; w++) {
+        if (get_bits_left(gb) < (3 * 17 + 17 + 4))
+            return AVERROR(EINVAL);
+        for (i = 0; i < 3; i++) {
+            s->params[w].maxscl[i].num = get_bits(gb, 17);
+            s->params[w].maxscl[i].den = rgb_den;
+        }
+        s->params[w].average_maxrgb.num = get_bits(gb, 17);
+        s->params[w].average_maxrgb.den = rgb_den;
+        s->params[w].num_distribution_maxrgb_percentiles = get_bits(gb, 4);
+
+        if (get_bits_left(gb) <
+            (s->params[w].num_distribution_maxrgb_percentiles * 24))
+            return AVERROR(EINVAL);
+        for (i = 0; i < s->params[w].num_distribution_maxrgb_percentiles; i++) {
+            s->params[w].distribution_maxrgb[i].percentage = get_bits(gb, 7);
+            s->params[w].distribution_maxrgb[i].percentile.num = get_bits(gb, 17);
+            s->params[w].distribution_maxrgb[i].percentile.den = rgb_den;
+        }
+
+        if (get_bits_left(gb) < 10)
+            return AVERROR(EINVAL);
+        s->params[w].fraction_bright_pixels.num = get_bits(gb, 10);
+        s->params[w].fraction_bright_pixels.den = fraction_pixel_den;
+    }
+    if (get_bits_left(gb) < 1)
+        return AVERROR(EINVAL);
+    s->mastering_display_actual_peak_luminance_flag = get_bits1(gb);
+    if (s->mastering_display_actual_peak_luminance_flag) {
+        int rows, cols;
+        if (get_bits_left(gb) < 10)
+            return AVERROR(EINVAL);
+        rows = get_bits(gb, 5);
+        cols = get_bits(gb, 5);
+        if (((rows < 2) || (rows > 25)) || ((cols < 2) || (cols > 25))) {
+            return AVERROR_INVALIDDATA;
+        }
+        s->num_rows_mastering_display_actual_peak_luminance = rows;
+        s->num_cols_mastering_display_actual_peak_luminance = cols;
+
+        if (get_bits_left(gb) < (rows * cols * 4))
+            return AVERROR(EINVAL);
+
+        for (i = 0; i < rows; i++) {
+            for (j = 0; j < cols; j++) {
+                s->mastering_display_actual_peak_luminance[i][j].num = get_bits(gb, 4);
+                s->mastering_display_actual_peak_luminance[i][j].den = peak_luminance_den;
+            }
+        }
+    }
+
+    for (w = 0; w < s->num_windows; w++) {
+        if (get_bits_left(gb) < 1)
+            return AVERROR(EINVAL);
+        s->params[w].tone_mapping_flag = get_bits1(gb);
+        if (s->params[w].tone_mapping_flag) {
+            if (get_bits_left(gb) < 28)
+                return AVERROR(EINVAL);
+            s->params[w].knee_point_x.num = get_bits(gb, 12);
+            s->params[w].knee_point_x.den = knee_point_den;
+            s->params[w].knee_point_y.num = get_bits(gb, 12);
+            s->params[w].knee_point_y.den = knee_point_den;
+            s->params[w].num_bezier_curve_anchors = get_bits(gb, 4);
+
+            if (get_bits_left(gb) < (s->params[w].num_bezier_curve_anchors * 10))
+                return AVERROR(EINVAL);
+            for (i = 0; i < s->params[w].num_bezier_curve_anchors; i++) {
+                s->params[w].bezier_curve_anchors[i].num = get_bits(gb, 10);
+                s->params[w].bezier_curve_anchors[i].den = bezier_anchor_den;
+            }
+        }
+
+        if (get_bits_left(gb) < 1)
+            return AVERROR(EINVAL);
+        s->params[w].color_saturation_mapping_flag = get_bits1(gb);
+        if (s->params[w].color_saturation_mapping_flag) {
+            if (get_bits_left(gb) < 6)
+                return AVERROR(EINVAL);
+            s->params[w].color_saturation_weight.num = get_bits(gb, 6);
+            s->params[w].color_saturation_weight.den = saturation_weight_den;
+        }
+    }
+
+    skip_bits(gb, get_bits_left(gb));
+
+    return 0;
+}
diff --git a/libavcodec/version.h b/libavcodec/version.h
index e75891d463..ad0bfd619d 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -28,7 +28,7 @@ 
 #include "libavutil/version.h"
 
 #define LIBAVCODEC_VERSION_MAJOR  58
-#define LIBAVCODEC_VERSION_MINOR  95
+#define LIBAVCODEC_VERSION_MINOR  96
 #define LIBAVCODEC_VERSION_MICRO 100
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \