diff mbox series

[FFmpeg-devel,v5,1/6] lavu/frame: Add Dolby Vision metadata side data type

Message ID 20211212115546.119662-1-ffmpeg@haasn.xyz
State New
Headers show
Series [FFmpeg-devel,v5,1/6] lavu/frame: Add Dolby Vision metadata side data type | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished

Commit Message

Niklas Haas Dec. 12, 2021, 11:55 a.m. UTC
From: Niklas Haas <git@haasn.dev>

Signed-off-by: Niklas Haas <git@haasn.dev>
---
 doc/APIchanges        |   3 +
 libavutil/dovi_meta.c |  12 ++++
 libavutil/dovi_meta.h | 132 ++++++++++++++++++++++++++++++++++++++++++
 libavutil/frame.c     |   1 +
 libavutil/frame.h     |   9 ++-
 libavutil/version.h   |   2 +-
 6 files changed, 157 insertions(+), 2 deletions(-)

Comments

Hendrik Leppkes Dec. 15, 2021, 11:33 a.m. UTC | #1
On Sun, Dec 12, 2021 at 12:56 PM Niklas Haas <ffmpeg@haasn.xyz> wrote:
>
> From: Niklas Haas <git@haasn.dev>
>

Any more comments on this set? LGTM now, but others have also had
comments before.
I would prefer if this gets in soon before 5.0, so user-apps can start
making use of this data.

- Hendrik
Derek Buitenhuis Dec. 15, 2021, 2:19 p.m. UTC | #2
On 12/15/2021 11:33 AM, Hendrik Leppkes wrote:
> Any more comments on this set? LGTM now, but others have also had
> comments before.
> I would prefer if this gets in soon before 5.0, so user-apps can start
> making use of this data.

No objections from me.

- Derek
Hendrik Leppkes Dec. 18, 2021, 6:34 p.m. UTC | #3
On Wed, Dec 15, 2021 at 12:33 PM Hendrik Leppkes <h.leppkes@gmail.com> wrote:
>
> On Sun, Dec 12, 2021 at 12:56 PM Niklas Haas <ffmpeg@haasn.xyz> wrote:
> >
> > From: Niklas Haas <git@haasn.dev>
> >
>
> Any more comments on this set? LGTM now, but others have also had
> comments before.
> I would prefer if this gets in soon before 5.0, so user-apps can start
> making use of this data.
>

If nothing else comes up i'll apply this set after the weekend.

- Hendrik
Anton Khirnov Dec. 20, 2021, 9:53 a.m. UTC | #4
Quoting Niklas Haas (2021-12-12 12:55:41)
> +/**
> + * Coefficients of a piece-wise function. The pieces of the function span the
> + * value ranges between two adjacent pivot values.
> + */
> +#define FF_DOVI_MAX_PIECES 8

Why FF_? It's public, no?
Anton Khirnov Dec. 20, 2021, 10:02 a.m. UTC | #5
Quoting Niklas Haas (2021-12-12 12:55:41)
> From: Niklas Haas <git@haasn.dev>
> 
> Signed-off-by: Niklas Haas <git@haasn.dev>
> ---
>  doc/APIchanges        |   3 +
>  libavutil/dovi_meta.c |  12 ++++
>  libavutil/dovi_meta.h | 132 ++++++++++++++++++++++++++++++++++++++++++
>  libavutil/frame.c     |   1 +
>  libavutil/frame.h     |   9 ++-
>  libavutil/version.h   |   2 +-
>  6 files changed, 157 insertions(+), 2 deletions(-)
> 
> diff --git a/doc/APIchanges b/doc/APIchanges
> index 17aa664ca3..0ddde40bdf 100644
> --- a/doc/APIchanges
> +++ b/doc/APIchanges
> @@ -14,6 +14,9 @@ libavutil:     2021-04-27
>  
>  API changes, most recent first:
>  
> +2021-12-xx - xxxxxxxxxx - lavu 57.12.100 - frame.h
> +  Add AV_FRAME_DATA_DOVI_RESHAPING.

Doesn't match the actual name.

> +
>  2021-12-xx - xxxxxxxxxx - lavf 59.10.100 - avformat.h
>    Add AVFormatContext io_close2 which returns an int
>  
> diff --git a/libavutil/dovi_meta.c b/libavutil/dovi_meta.c
> index 7bd08f6c54..60b4cb2376 100644
> --- a/libavutil/dovi_meta.c
> +++ b/libavutil/dovi_meta.c
> @@ -33,3 +33,15 @@ AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size)
>  
>      return dovi;
>  }
> +
> +AVDOVIMetadata *av_dovi_metadata_alloc(size_t *size)
> +{
> +    AVDOVIMetadata *dovi = av_mallocz(sizeof(AVDOVIMetadata));
> +    if (!dovi)
> +        return NULL;
> +
> +    if (size)
> +        *size = sizeof(*dovi);
> +
> +    return dovi;
> +}
> diff --git a/libavutil/dovi_meta.h b/libavutil/dovi_meta.h
> index 299911d434..20efd41676 100644
> --- a/libavutil/dovi_meta.h
> +++ b/libavutil/dovi_meta.h
> @@ -29,6 +29,7 @@
>  
>  #include <stdint.h>
>  #include <stddef.h>
> +#include "rational.h"
>  
>  /*
>   * DOVI configuration
> @@ -67,4 +68,135 @@ typedef struct AVDOVIDecoderConfigurationRecord {
>   */
>  AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size);
>  
> +/**
> + * Dolby Vision RPU data header.
> + */
> +typedef struct AVDOVIRpuDataHeader {
> +    uint8_t rpu_type;
> +    uint16_t rpu_format;
> +    uint8_t vdr_rpu_profile;
> +    uint8_t vdr_rpu_level;
> +    int chroma_resampling_explicit_filter_flag;

Why are flags ints, while other things can be uint8_t?

> +
> +/**
> + * Combined struct representing a combination of header, mapping and color
> + * metadata, for attaching to frames as side data.
> + *
> + * @note The struct must be allocated with av_dovi_metadata_alloc() and
> + *       its size is not a part of the public ABI.

Maybe mention that the individual structs (header, mapping, color)
CANNOT be extended without a major bump.

> + */
> +
> +typedef struct AVDOVIMetadata {
> +    AVDOVIRpuDataHeader header;
> +    AVDOVIDataMapping mapping;
> +    AVDOVIColorMetadata color;
> +} AVDOVIMetadata;
> +
> +/**
> + * Allocate an AVDOVIMetadata structure and initialize its
> + * fields to default values.

+ * @param size If this parameter is non-NULL, the size in bytes of the
                allocated struct will be written here on success
diff mbox series

Patch

diff --git a/doc/APIchanges b/doc/APIchanges
index 17aa664ca3..0ddde40bdf 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -14,6 +14,9 @@  libavutil:     2021-04-27
 
 API changes, most recent first:
 
+2021-12-xx - xxxxxxxxxx - lavu 57.12.100 - frame.h
+  Add AV_FRAME_DATA_DOVI_RESHAPING.
+
 2021-12-xx - xxxxxxxxxx - lavf 59.10.100 - avformat.h
   Add AVFormatContext io_close2 which returns an int
 
diff --git a/libavutil/dovi_meta.c b/libavutil/dovi_meta.c
index 7bd08f6c54..60b4cb2376 100644
--- a/libavutil/dovi_meta.c
+++ b/libavutil/dovi_meta.c
@@ -33,3 +33,15 @@  AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size)
 
     return dovi;
 }
+
+AVDOVIMetadata *av_dovi_metadata_alloc(size_t *size)
+{
+    AVDOVIMetadata *dovi = av_mallocz(sizeof(AVDOVIMetadata));
+    if (!dovi)
+        return NULL;
+
+    if (size)
+        *size = sizeof(*dovi);
+
+    return dovi;
+}
diff --git a/libavutil/dovi_meta.h b/libavutil/dovi_meta.h
index 299911d434..20efd41676 100644
--- a/libavutil/dovi_meta.h
+++ b/libavutil/dovi_meta.h
@@ -29,6 +29,7 @@ 
 
 #include <stdint.h>
 #include <stddef.h>
+#include "rational.h"
 
 /*
  * DOVI configuration
@@ -67,4 +68,135 @@  typedef struct AVDOVIDecoderConfigurationRecord {
  */
 AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size);
 
+/**
+ * Dolby Vision RPU data header.
+ */
+typedef struct AVDOVIRpuDataHeader {
+    uint8_t rpu_type;
+    uint16_t rpu_format;
+    uint8_t vdr_rpu_profile;
+    uint8_t vdr_rpu_level;
+    int chroma_resampling_explicit_filter_flag;
+    uint8_t coef_data_type; /* informative, lavc always converts to fixed */
+    uint8_t coef_log2_denom;
+    uint8_t vdr_rpu_normalized_idc;
+    int bl_video_full_range_flag;
+    uint8_t bl_bit_depth; /* [8, 16] */
+    uint8_t el_bit_depth; /* [8, 16] */
+    uint8_t vdr_bit_depth; /* [8, 16] */
+    int spatial_resampling_filter_flag;
+    int el_spatial_resampling_filter_flag;
+    int disable_residual_flag;
+} AVDOVIRpuDataHeader;
+
+enum AVDOVIMappingMethod {
+    AV_DOVI_MAPPING_POLYNOMIAL = 0,
+    AV_DOVI_MAPPING_MMR = 1,
+};
+
+/**
+ * Coefficients of a piece-wise function. The pieces of the function span the
+ * value ranges between two adjacent pivot values.
+ */
+#define FF_DOVI_MAX_PIECES 8
+typedef struct AVDOVIReshapingCurve {
+    uint8_t num_pivots;                         /* [2, 9] */
+    uint16_t pivots[FF_DOVI_MAX_PIECES + 1];    /* sorted ascending */
+    enum AVDOVIMappingMethod mapping_idc[FF_DOVI_MAX_PIECES];
+    /* AV_DOVI_MAPPING_POLYNOMIAL */
+    uint8_t poly_order[FF_DOVI_MAX_PIECES];     /* [1, 2] */
+    int64_t poly_coef[FF_DOVI_MAX_PIECES][3];   /* x^0, x^1, x^2 */
+    /* AV_DOVI_MAPPING_MMR */
+    uint8_t mmr_order[FF_DOVI_MAX_PIECES];      /* [1, 3] */
+    int64_t mmr_constant[FF_DOVI_MAX_PIECES];
+    int64_t mmr_coef[FF_DOVI_MAX_PIECES][3/* order - 1 */][7];
+} AVDOVIReshapingCurve;
+
+enum AVDOVINLQMethod {
+    AV_DOVI_NLQ_NONE = -1,
+    AV_DOVI_NLQ_LINEAR_DZ = 0,
+};
+
+/**
+ * Coefficients of the non-linear inverse quantization. For the interpretation
+ * of these, see ETSI GS CCM 001.
+ */
+typedef struct AVDOVINLQParams {
+    uint64_t nlq_offset;
+    uint64_t vdr_in_max;
+    /* AV_DOVI_NLQ_LINEAR_DZ */
+    uint64_t linear_deadzone_slope;
+    uint64_t linear_deadzone_threshold;
+} AVDOVINLQParams;
+
+/**
+ * Dolby Vision RPU data mapping parameters.
+ */
+typedef struct AVDOVIDataMapping {
+    uint8_t vdr_rpu_id;
+    uint8_t mapping_color_space;
+    uint8_t mapping_chroma_format_idc;
+    AVDOVIReshapingCurve curves[3]; /* per component */
+
+    /* Non-linear inverse quantization */
+    enum AVDOVINLQMethod nlq_method_idc;
+    uint32_t num_x_partitions;
+    uint32_t num_y_partitions;
+    AVDOVINLQParams nlq[3]; /* per component */
+} AVDOVIDataMapping;
+
+typedef struct AVDOVIColorMetadata {
+    uint8_t dm_metadata_id;
+    int scene_refresh_flag;
+
+    /**
+     * Coefficients of the custom Dolby Vision IPT-PQ matrices. These are to be
+     * used instead of the matrices indicated by the frame's colorspace tags.
+     * The output of rgb_to_lms_matrix is to be fed into a BT.2020 LMS->RGB
+     * matrix based on a Hunt-Pointer-Estevez transform, but without any
+     * crosstalk. (See the definition of the ICtCp colorspace for more
+     * information.)
+     */
+    AVRational ycc_to_rgb_matrix[9]; /* before PQ linearization */
+    AVRational ycc_to_rgb_offset[3]; /* input offset of neutral value */
+    AVRational rgb_to_lms_matrix[9]; /* after PQ linearization */
+
+    /**
+     * Extra signal metadata (see Dolby patents for more info).
+     */
+    uint16_t signal_eotf;
+    uint16_t signal_eotf_param0;
+    uint16_t signal_eotf_param1;
+    uint32_t signal_eotf_param2;
+    uint8_t signal_bit_depth;
+    uint8_t signal_color_space;
+    uint8_t signal_chroma_format;
+    uint8_t signal_full_range_flag; /* [0, 3] */
+    uint16_t source_min_pq;
+    uint16_t source_max_pq;
+    uint16_t source_diagonal;
+} AVDOVIColorMetadata;
+
+/**
+ * Combined struct representing a combination of header, mapping and color
+ * metadata, for attaching to frames as side data.
+ *
+ * @note The struct must be allocated with av_dovi_metadata_alloc() and
+ *       its size is not a part of the public ABI.
+ */
+
+typedef struct AVDOVIMetadata {
+    AVDOVIRpuDataHeader header;
+    AVDOVIDataMapping mapping;
+    AVDOVIColorMetadata color;
+} AVDOVIMetadata;
+
+/**
+ * Allocate an AVDOVIMetadata structure and initialize its
+ * fields to default values.
+ *
+ * @return the newly allocated struct or NULL on failure
+ */
+AVDOVIMetadata *av_dovi_metadata_alloc(size_t *size);
+
 #endif /* AVUTIL_DOVI_META_H */
diff --git a/libavutil/frame.c b/libavutil/frame.c
index 0912ad9131..8997c85e35 100644
--- a/libavutil/frame.c
+++ b/libavutil/frame.c
@@ -729,6 +729,7 @@  const char *av_frame_side_data_name(enum AVFrameSideDataType type)
     case AV_FRAME_DATA_FILM_GRAIN_PARAMS:           return "Film grain parameters";
     case AV_FRAME_DATA_DETECTION_BBOXES:            return "Bounding boxes for object detection and classification";
     case AV_FRAME_DATA_DOVI_RPU_BUFFER:             return "Dolby Vision RPU Data";
+    case AV_FRAME_DATA_DOVI_METADATA:               return "Dolby Vision Metadata";
     }
     return NULL;
 }
diff --git a/libavutil/frame.h b/libavutil/frame.h
index 3f295f6b9e..18e239f870 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h
@@ -189,11 +189,18 @@  enum AVFrameSideDataType {
     AV_FRAME_DATA_DETECTION_BBOXES,
 
     /**
-     * Dolby Vision RPU data, suitable for passing to x265
+     * Dolby Vision RPU raw data, suitable for passing to x265
      * or other libraries. Array of uint8_t, with NAL emulation
      * bytes intact.
      */
     AV_FRAME_DATA_DOVI_RPU_BUFFER,
+
+    /**
+     * Parsed Dolby Vision metadata, suitable for passing to a software
+     * implementation. The payload is the AVDOVIMetadata struct defined in
+     * libavutil/dovi_meta.h.
+     */
+    AV_FRAME_DATA_DOVI_METADATA,
 };
 
 enum AVActiveFormatDescription {
diff --git a/libavutil/version.h b/libavutil/version.h
index 0e7b36865a..668f9206fe 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -79,7 +79,7 @@ 
  */
 
 #define LIBAVUTIL_VERSION_MAJOR  57
-#define LIBAVUTIL_VERSION_MINOR  11
+#define LIBAVUTIL_VERSION_MINOR  12
 #define LIBAVUTIL_VERSION_MICRO 100
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \