diff mbox series

[FFmpeg-devel,v8,1/6] lavu/frame: Add Dolby Vision metadata side data type

Message ID 20211220153122.54839-1-ffmpeg@haasn.xyz
State New
Headers show
Series [FFmpeg-devel,v8,1/6] lavu/frame: Add Dolby Vision metadata side data type | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished

Commit Message

Niklas Haas Dec. 20, 2021, 3:31 p.m. UTC
From: Niklas Haas <git@haasn.dev>

Signed-off-by: Niklas Haas <git@haasn.dev>
---
 doc/APIchanges        |   3 +
 libavutil/dovi_meta.c |  12 ++++
 libavutil/dovi_meta.h | 143 ++++++++++++++++++++++++++++++++++++++++++
 libavutil/frame.c     |   1 +
 libavutil/frame.h     |   9 ++-
 libavutil/version.h   |   2 +-
 6 files changed, 168 insertions(+), 2 deletions(-)

Comments

Lynne Dec. 22, 2021, 1 p.m. UTC | #1
20 Dec 2021, 16:31 by ffmpeg@haasn.xyz:

> From: Niklas Haas <git@haasn.dev>
>
> Signed-off-by: Niklas Haas <git@haasn.dev>
> ---
>  doc/APIchanges        |   3 +
>  libavutil/dovi_meta.c |  12 ++++
>  libavutil/dovi_meta.h | 143 ++++++++++++++++++++++++++++++++++++++++++
>  libavutil/frame.c     |   1 +
>  libavutil/frame.h     |   9 ++-
>  libavutil/version.h   |   2 +-
>  6 files changed, 168 insertions(+), 2 deletions(-)
>
> diff --git a/doc/APIchanges b/doc/APIchanges
> index 17aa664ca3..ff78edec88 100644
> --- a/doc/APIchanges
> +++ b/doc/APIchanges
> @@ -14,6 +14,9 @@ libavutil:     2021-04-27
>  
>  API changes, most recent first:
>  
> +2021-12-xx - xxxxxxxxxx - lavu 57.12.100 - frame.h
> +  Add AV_FRAME_DATA_DOVI_METADATA.
> +
>  2021-12-xx - xxxxxxxxxx - lavf 59.10.100 - avformat.h
>  Add AVFormatContext io_close2 which returns an int
>  
> diff --git a/libavutil/dovi_meta.c b/libavutil/dovi_meta.c
> index 7bd08f6c54..60b4cb2376 100644
> --- a/libavutil/dovi_meta.c
> +++ b/libavutil/dovi_meta.c
> @@ -33,3 +33,15 @@ AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size)
>  
>  return dovi;
>  }
> +
> +AVDOVIMetadata *av_dovi_metadata_alloc(size_t *size)
> +{
> +    AVDOVIMetadata *dovi = av_mallocz(sizeof(AVDOVIMetadata));
> +    if (!dovi)
> +        return NULL;
> +
> +    if (size)
> +        *size = sizeof(*dovi);
> +
> +    return dovi;
> +}
> diff --git a/libavutil/dovi_meta.h b/libavutil/dovi_meta.h
> index 299911d434..25e6d7b42f 100644
> --- a/libavutil/dovi_meta.h
> +++ b/libavutil/dovi_meta.h
> @@ -29,6 +29,7 @@
>  
>  #include <stdint.h>
>  #include <stddef.h>
> +#include "rational.h"
>  
>  /*
>  * DOVI configuration
> @@ -67,4 +68,146 @@ typedef struct AVDOVIDecoderConfigurationRecord {
>  */
>  AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size);
>  
> +/**
> + * Dolby Vision RPU data header.
> + *
> + * @note Cannot be extended without an ABI bump.
> + */
> +typedef struct AVDOVIRpuDataHeader {
> +    uint8_t rpu_type;
> +    uint16_t rpu_format;
> +    uint8_t vdr_rpu_profile;
> +    uint8_t vdr_rpu_level;
> +    uint8_t chroma_resampling_explicit_filter_flag;
> +    uint8_t coef_data_type; /* informative, lavc always converts to fixed */
> +    uint8_t coef_log2_denom;
> +    uint8_t vdr_rpu_normalized_idc;
> +    uint8_t bl_video_full_range_flag;
> +    uint8_t bl_bit_depth; /* [8, 16] */
> +    uint8_t el_bit_depth; /* [8, 16] */
> +    uint8_t vdr_bit_depth; /* [8, 16] */
> +    uint8_t spatial_resampling_filter_flag;
> +    uint8_t el_spatial_resampling_filter_flag;
> +    uint8_t disable_residual_flag;
> +} AVDOVIRpuDataHeader;
> +
> +enum AVDOVIMappingMethod {
> +    AV_DOVI_MAPPING_POLYNOMIAL = 0,
> +    AV_DOVI_MAPPING_MMR = 1,
> +};
> +
> +/**
> + * Coefficients of a piece-wise function. The pieces of the function span the
> + * value ranges between two adjacent pivot values.
> + *
> + * @note Cannot be extended without an ABI bump.
> + */
> +#define AV_DOVI_MAX_PIECES 8
> +typedef struct AVDOVIReshapingCurve {
> +    uint8_t num_pivots;                         /* [2, 9] */
> +    uint16_t pivots[AV_DOVI_MAX_PIECES + 1];    /* sorted ascending */
> +    enum AVDOVIMappingMethod mapping_idc[AV_DOVI_MAX_PIECES];
> +    /* AV_DOVI_MAPPING_POLYNOMIAL */
> +    uint8_t poly_order[AV_DOVI_MAX_PIECES];     /* [1, 2] */
> +    int64_t poly_coef[AV_DOVI_MAX_PIECES][3];   /* x^0, x^1, x^2 */
> +    /* AV_DOVI_MAPPING_MMR */
> +    uint8_t mmr_order[AV_DOVI_MAX_PIECES];      /* [1, 3] */
> +    int64_t mmr_constant[AV_DOVI_MAX_PIECES];
> +    int64_t mmr_coef[AV_DOVI_MAX_PIECES][3/* order - 1 */][7];
> +} AVDOVIReshapingCurve;
> +
> +enum AVDOVINLQMethod {
> +    AV_DOVI_NLQ_NONE = -1,
> +    AV_DOVI_NLQ_LINEAR_DZ = 0,
> +};
> +
> +/**
> + * Coefficients of the non-linear inverse quantization. For the interpretation
> + * of these, see ETSI GS CCM 001.
> + *
> + * @note Cannot be extended without an ABI bump.
> + */
> +typedef struct AVDOVINLQParams {
> +    uint64_t nlq_offset;
> +    uint64_t vdr_in_max;
> +    /* AV_DOVI_NLQ_LINEAR_DZ */
> +    uint64_t linear_deadzone_slope;
> +    uint64_t linear_deadzone_threshold;
> +} AVDOVINLQParams;
> +
> +/**
> + * Dolby Vision RPU data mapping parameters.
> + *
> + * @note Cannot be extended without an ABI bump.
> + */
> +typedef struct AVDOVIDataMapping {
> +    uint8_t vdr_rpu_id;
> +    uint8_t mapping_color_space;
> +    uint8_t mapping_chroma_format_idc;
> +    AVDOVIReshapingCurve curves[3]; /* per component */
> +
> +    /* Non-linear inverse quantization */
> +    enum AVDOVINLQMethod nlq_method_idc;
> +    uint32_t num_x_partitions;
> +    uint32_t num_y_partitions;
> +    AVDOVINLQParams nlq[3]; /* per component */
> +} AVDOVIDataMapping;
> +
> +typedef struct AVDOVIColorMetadata {
> +    uint8_t dm_metadata_id;
> +    uint8_t scene_refresh_flag;
> +
> +    /**
> +     * Coefficients of the custom Dolby Vision IPT-PQ matrices. These are to be
> +     * used instead of the matrices indicated by the frame's colorspace tags.
> +     * The output of rgb_to_lms_matrix is to be fed into a BT.2020 LMS->RGB
> +     * matrix based on a Hunt-Pointer-Estevez transform, but without any
> +     * crosstalk. (See the definition of the ICtCp colorspace for more
> +     * information.)
> +     */
> +    AVRational ycc_to_rgb_matrix[9]; /* before PQ linearization */
> +    AVRational ycc_to_rgb_offset[3]; /* input offset of neutral value */
> +    AVRational rgb_to_lms_matrix[9]; /* after PQ linearization */
> +
> +    /**
> +     * Extra signal metadata (see Dolby patents for more info).
> +     */
> +    uint16_t signal_eotf;
> +    uint16_t signal_eotf_param0;
> +    uint16_t signal_eotf_param1;
> +    uint32_t signal_eotf_param2;
> +    uint8_t signal_bit_depth;
> +    uint8_t signal_color_space;
> +    uint8_t signal_chroma_format;
> +    uint8_t signal_full_range_flag; /* [0, 3] */
> +    uint16_t source_min_pq;
> +    uint16_t source_max_pq;
> +    uint16_t source_diagonal;
> +} AVDOVIColorMetadata;
> +
> +/**
> + * Combined struct representing a combination of header, mapping and color
> + * metadata, for attaching to frames as side data.
> + *
> + * @note The struct must be allocated with av_dovi_metadata_alloc() and
> + *       its size is not a part of the public ABI.
> + */
> +
> +typedef struct AVDOVIMetadata {
> +    AVDOVIRpuDataHeader header;
> +    AVDOVIDataMapping mapping;
> +    AVDOVIColorMetadata color;
>

I think we should version this by adding an integer version
field, and a compile-time header #define.
just in case more extra signalling is added. Then
we could support it in a sort of backwards-compatible way
by just documenting what has changed, and users could
make their own decisions about whether to present it 
as-is or error out.

Apart from that, patchset looks good to me.
Hendrik Leppkes Dec. 22, 2021, 1:24 p.m. UTC | #2
On Wed, Dec 22, 2021 at 2:00 PM Lynne <dev@lynne.ee> wrote:
>
> 20 Dec 2021, 16:31 by ffmpeg@haasn.xyz:
>
> > From: Niklas Haas <git@haasn.dev>
> >
> > Signed-off-by: Niklas Haas <git@haasn.dev>
> > ---
> >  doc/APIchanges        |   3 +
> >  libavutil/dovi_meta.c |  12 ++++
> >  libavutil/dovi_meta.h | 143 ++++++++++++++++++++++++++++++++++++++++++
> >  libavutil/frame.c     |   1 +
> >  libavutil/frame.h     |   9 ++-
> >  libavutil/version.h   |   2 +-
> >  6 files changed, 168 insertions(+), 2 deletions(-)
> >
> > diff --git a/doc/APIchanges b/doc/APIchanges
> > index 17aa664ca3..ff78edec88 100644
> > --- a/doc/APIchanges
> > +++ b/doc/APIchanges
> > @@ -14,6 +14,9 @@ libavutil:     2021-04-27
> >
> >  API changes, most recent first:
> >
> > +2021-12-xx - xxxxxxxxxx - lavu 57.12.100 - frame.h
> > +  Add AV_FRAME_DATA_DOVI_METADATA.
> > +
> >  2021-12-xx - xxxxxxxxxx - lavf 59.10.100 - avformat.h
> >  Add AVFormatContext io_close2 which returns an int
> >
> > diff --git a/libavutil/dovi_meta.c b/libavutil/dovi_meta.c
> > index 7bd08f6c54..60b4cb2376 100644
> > --- a/libavutil/dovi_meta.c
> > +++ b/libavutil/dovi_meta.c
> > @@ -33,3 +33,15 @@ AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size)
> >
> >  return dovi;
> >  }
> > +
> > +AVDOVIMetadata *av_dovi_metadata_alloc(size_t *size)
> > +{
> > +    AVDOVIMetadata *dovi = av_mallocz(sizeof(AVDOVIMetadata));
> > +    if (!dovi)
> > +        return NULL;
> > +
> > +    if (size)
> > +        *size = sizeof(*dovi);
> > +
> > +    return dovi;
> > +}
> > diff --git a/libavutil/dovi_meta.h b/libavutil/dovi_meta.h
> > index 299911d434..25e6d7b42f 100644
> > --- a/libavutil/dovi_meta.h
> > +++ b/libavutil/dovi_meta.h
> > @@ -29,6 +29,7 @@
> >
> >  #include <stdint.h>
> >  #include <stddef.h>
> > +#include "rational.h"
> >
> >  /*
> >  * DOVI configuration
> > @@ -67,4 +68,146 @@ typedef struct AVDOVIDecoderConfigurationRecord {
> >  */
> >  AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size);
> >
> > +/**
> > + * Dolby Vision RPU data header.
> > + *
> > + * @note Cannot be extended without an ABI bump.
> > + */
> > +typedef struct AVDOVIRpuDataHeader {
> > +    uint8_t rpu_type;
> > +    uint16_t rpu_format;
> > +    uint8_t vdr_rpu_profile;
> > +    uint8_t vdr_rpu_level;
> > +    uint8_t chroma_resampling_explicit_filter_flag;
> > +    uint8_t coef_data_type; /* informative, lavc always converts to fixed */
> > +    uint8_t coef_log2_denom;
> > +    uint8_t vdr_rpu_normalized_idc;
> > +    uint8_t bl_video_full_range_flag;
> > +    uint8_t bl_bit_depth; /* [8, 16] */
> > +    uint8_t el_bit_depth; /* [8, 16] */
> > +    uint8_t vdr_bit_depth; /* [8, 16] */
> > +    uint8_t spatial_resampling_filter_flag;
> > +    uint8_t el_spatial_resampling_filter_flag;
> > +    uint8_t disable_residual_flag;
> > +} AVDOVIRpuDataHeader;
> > +
> > +enum AVDOVIMappingMethod {
> > +    AV_DOVI_MAPPING_POLYNOMIAL = 0,
> > +    AV_DOVI_MAPPING_MMR = 1,
> > +};
> > +
> > +/**
> > + * Coefficients of a piece-wise function. The pieces of the function span the
> > + * value ranges between two adjacent pivot values.
> > + *
> > + * @note Cannot be extended without an ABI bump.
> > + */
> > +#define AV_DOVI_MAX_PIECES 8
> > +typedef struct AVDOVIReshapingCurve {
> > +    uint8_t num_pivots;                         /* [2, 9] */
> > +    uint16_t pivots[AV_DOVI_MAX_PIECES + 1];    /* sorted ascending */
> > +    enum AVDOVIMappingMethod mapping_idc[AV_DOVI_MAX_PIECES];
> > +    /* AV_DOVI_MAPPING_POLYNOMIAL */
> > +    uint8_t poly_order[AV_DOVI_MAX_PIECES];     /* [1, 2] */
> > +    int64_t poly_coef[AV_DOVI_MAX_PIECES][3];   /* x^0, x^1, x^2 */
> > +    /* AV_DOVI_MAPPING_MMR */
> > +    uint8_t mmr_order[AV_DOVI_MAX_PIECES];      /* [1, 3] */
> > +    int64_t mmr_constant[AV_DOVI_MAX_PIECES];
> > +    int64_t mmr_coef[AV_DOVI_MAX_PIECES][3/* order - 1 */][7];
> > +} AVDOVIReshapingCurve;
> > +
> > +enum AVDOVINLQMethod {
> > +    AV_DOVI_NLQ_NONE = -1,
> > +    AV_DOVI_NLQ_LINEAR_DZ = 0,
> > +};
> > +
> > +/**
> > + * Coefficients of the non-linear inverse quantization. For the interpretation
> > + * of these, see ETSI GS CCM 001.
> > + *
> > + * @note Cannot be extended without an ABI bump.
> > + */
> > +typedef struct AVDOVINLQParams {
> > +    uint64_t nlq_offset;
> > +    uint64_t vdr_in_max;
> > +    /* AV_DOVI_NLQ_LINEAR_DZ */
> > +    uint64_t linear_deadzone_slope;
> > +    uint64_t linear_deadzone_threshold;
> > +} AVDOVINLQParams;
> > +
> > +/**
> > + * Dolby Vision RPU data mapping parameters.
> > + *
> > + * @note Cannot be extended without an ABI bump.
> > + */
> > +typedef struct AVDOVIDataMapping {
> > +    uint8_t vdr_rpu_id;
> > +    uint8_t mapping_color_space;
> > +    uint8_t mapping_chroma_format_idc;
> > +    AVDOVIReshapingCurve curves[3]; /* per component */
> > +
> > +    /* Non-linear inverse quantization */
> > +    enum AVDOVINLQMethod nlq_method_idc;
> > +    uint32_t num_x_partitions;
> > +    uint32_t num_y_partitions;
> > +    AVDOVINLQParams nlq[3]; /* per component */
> > +} AVDOVIDataMapping;
> > +
> > +typedef struct AVDOVIColorMetadata {
> > +    uint8_t dm_metadata_id;
> > +    uint8_t scene_refresh_flag;
> > +
> > +    /**
> > +     * Coefficients of the custom Dolby Vision IPT-PQ matrices. These are to be
> > +     * used instead of the matrices indicated by the frame's colorspace tags.
> > +     * The output of rgb_to_lms_matrix is to be fed into a BT.2020 LMS->RGB
> > +     * matrix based on a Hunt-Pointer-Estevez transform, but without any
> > +     * crosstalk. (See the definition of the ICtCp colorspace for more
> > +     * information.)
> > +     */
> > +    AVRational ycc_to_rgb_matrix[9]; /* before PQ linearization */
> > +    AVRational ycc_to_rgb_offset[3]; /* input offset of neutral value */
> > +    AVRational rgb_to_lms_matrix[9]; /* after PQ linearization */
> > +
> > +    /**
> > +     * Extra signal metadata (see Dolby patents for more info).
> > +     */
> > +    uint16_t signal_eotf;
> > +    uint16_t signal_eotf_param0;
> > +    uint16_t signal_eotf_param1;
> > +    uint32_t signal_eotf_param2;
> > +    uint8_t signal_bit_depth;
> > +    uint8_t signal_color_space;
> > +    uint8_t signal_chroma_format;
> > +    uint8_t signal_full_range_flag; /* [0, 3] */
> > +    uint16_t source_min_pq;
> > +    uint16_t source_max_pq;
> > +    uint16_t source_diagonal;
> > +} AVDOVIColorMetadata;
> > +
> > +/**
> > + * Combined struct representing a combination of header, mapping and color
> > + * metadata, for attaching to frames as side data.
> > + *
> > + * @note The struct must be allocated with av_dovi_metadata_alloc() and
> > + *       its size is not a part of the public ABI.
> > + */
> > +
> > +typedef struct AVDOVIMetadata {
> > +    AVDOVIRpuDataHeader header;
> > +    AVDOVIDataMapping mapping;
> > +    AVDOVIColorMetadata color;
> >
>
> I think we should version this by adding an integer version
> field, and a compile-time header #define.
> just in case more extra signalling is added. Then
> we could support it in a sort of backwards-compatible way
> by just documenting what has changed, and users could
> make their own decisions about whether to present it
> as-is or error out.
>

That seems rather pointless to me when API users can just tie that to
avutil or avcodec version.
Additionally, how would I ever make use of that? We never support
running binaries older then the headers you have, and newer data can't
be used because you don't have the struct definition that corresponds
with it.

- Hendrik
Andreas Rheinhardt Dec. 22, 2021, 1:33 p.m. UTC | #3
Hendrik Leppkes:
> On Wed, Dec 22, 2021 at 2:00 PM Lynne <dev@lynne.ee> wrote:
>>
>> 20 Dec 2021, 16:31 by ffmpeg@haasn.xyz:
>>
>>> From: Niklas Haas <git@haasn.dev>
>>>
>>> Signed-off-by: Niklas Haas <git@haasn.dev>
>>> ---
>>>  doc/APIchanges        |   3 +
>>>  libavutil/dovi_meta.c |  12 ++++
>>>  libavutil/dovi_meta.h | 143 ++++++++++++++++++++++++++++++++++++++++++
>>>  libavutil/frame.c     |   1 +
>>>  libavutil/frame.h     |   9 ++-
>>>  libavutil/version.h   |   2 +-
>>>  6 files changed, 168 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/doc/APIchanges b/doc/APIchanges
>>> index 17aa664ca3..ff78edec88 100644
>>> --- a/doc/APIchanges
>>> +++ b/doc/APIchanges
>>> @@ -14,6 +14,9 @@ libavutil:     2021-04-27
>>>
>>>  API changes, most recent first:
>>>
>>> +2021-12-xx - xxxxxxxxxx - lavu 57.12.100 - frame.h
>>> +  Add AV_FRAME_DATA_DOVI_METADATA.
>>> +
>>>  2021-12-xx - xxxxxxxxxx - lavf 59.10.100 - avformat.h
>>>  Add AVFormatContext io_close2 which returns an int
>>>
>>> diff --git a/libavutil/dovi_meta.c b/libavutil/dovi_meta.c
>>> index 7bd08f6c54..60b4cb2376 100644
>>> --- a/libavutil/dovi_meta.c
>>> +++ b/libavutil/dovi_meta.c
>>> @@ -33,3 +33,15 @@ AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size)
>>>
>>>  return dovi;
>>>  }
>>> +
>>> +AVDOVIMetadata *av_dovi_metadata_alloc(size_t *size)
>>> +{
>>> +    AVDOVIMetadata *dovi = av_mallocz(sizeof(AVDOVIMetadata));
>>> +    if (!dovi)
>>> +        return NULL;
>>> +
>>> +    if (size)
>>> +        *size = sizeof(*dovi);
>>> +
>>> +    return dovi;
>>> +}
>>> diff --git a/libavutil/dovi_meta.h b/libavutil/dovi_meta.h
>>> index 299911d434..25e6d7b42f 100644
>>> --- a/libavutil/dovi_meta.h
>>> +++ b/libavutil/dovi_meta.h
>>> @@ -29,6 +29,7 @@
>>>
>>>  #include <stdint.h>
>>>  #include <stddef.h>
>>> +#include "rational.h"
>>>
>>>  /*
>>>  * DOVI configuration
>>> @@ -67,4 +68,146 @@ typedef struct AVDOVIDecoderConfigurationRecord {
>>>  */
>>>  AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size);
>>>
>>> +/**
>>> + * Dolby Vision RPU data header.
>>> + *
>>> + * @note Cannot be extended without an ABI bump.
>>> + */
>>> +typedef struct AVDOVIRpuDataHeader {
>>> +    uint8_t rpu_type;
>>> +    uint16_t rpu_format;
>>> +    uint8_t vdr_rpu_profile;
>>> +    uint8_t vdr_rpu_level;
>>> +    uint8_t chroma_resampling_explicit_filter_flag;
>>> +    uint8_t coef_data_type; /* informative, lavc always converts to fixed */
>>> +    uint8_t coef_log2_denom;
>>> +    uint8_t vdr_rpu_normalized_idc;
>>> +    uint8_t bl_video_full_range_flag;
>>> +    uint8_t bl_bit_depth; /* [8, 16] */
>>> +    uint8_t el_bit_depth; /* [8, 16] */
>>> +    uint8_t vdr_bit_depth; /* [8, 16] */
>>> +    uint8_t spatial_resampling_filter_flag;
>>> +    uint8_t el_spatial_resampling_filter_flag;
>>> +    uint8_t disable_residual_flag;
>>> +} AVDOVIRpuDataHeader;
>>> +
>>> +enum AVDOVIMappingMethod {
>>> +    AV_DOVI_MAPPING_POLYNOMIAL = 0,
>>> +    AV_DOVI_MAPPING_MMR = 1,
>>> +};
>>> +
>>> +/**
>>> + * Coefficients of a piece-wise function. The pieces of the function span the
>>> + * value ranges between two adjacent pivot values.
>>> + *
>>> + * @note Cannot be extended without an ABI bump.
>>> + */
>>> +#define AV_DOVI_MAX_PIECES 8
>>> +typedef struct AVDOVIReshapingCurve {
>>> +    uint8_t num_pivots;                         /* [2, 9] */
>>> +    uint16_t pivots[AV_DOVI_MAX_PIECES + 1];    /* sorted ascending */
>>> +    enum AVDOVIMappingMethod mapping_idc[AV_DOVI_MAX_PIECES];
>>> +    /* AV_DOVI_MAPPING_POLYNOMIAL */
>>> +    uint8_t poly_order[AV_DOVI_MAX_PIECES];     /* [1, 2] */
>>> +    int64_t poly_coef[AV_DOVI_MAX_PIECES][3];   /* x^0, x^1, x^2 */
>>> +    /* AV_DOVI_MAPPING_MMR */
>>> +    uint8_t mmr_order[AV_DOVI_MAX_PIECES];      /* [1, 3] */
>>> +    int64_t mmr_constant[AV_DOVI_MAX_PIECES];
>>> +    int64_t mmr_coef[AV_DOVI_MAX_PIECES][3/* order - 1 */][7];
>>> +} AVDOVIReshapingCurve;
>>> +
>>> +enum AVDOVINLQMethod {
>>> +    AV_DOVI_NLQ_NONE = -1,
>>> +    AV_DOVI_NLQ_LINEAR_DZ = 0,
>>> +};
>>> +
>>> +/**
>>> + * Coefficients of the non-linear inverse quantization. For the interpretation
>>> + * of these, see ETSI GS CCM 001.
>>> + *
>>> + * @note Cannot be extended without an ABI bump.
>>> + */
>>> +typedef struct AVDOVINLQParams {
>>> +    uint64_t nlq_offset;
>>> +    uint64_t vdr_in_max;
>>> +    /* AV_DOVI_NLQ_LINEAR_DZ */
>>> +    uint64_t linear_deadzone_slope;
>>> +    uint64_t linear_deadzone_threshold;
>>> +} AVDOVINLQParams;
>>> +
>>> +/**
>>> + * Dolby Vision RPU data mapping parameters.
>>> + *
>>> + * @note Cannot be extended without an ABI bump.
>>> + */
>>> +typedef struct AVDOVIDataMapping {
>>> +    uint8_t vdr_rpu_id;
>>> +    uint8_t mapping_color_space;
>>> +    uint8_t mapping_chroma_format_idc;
>>> +    AVDOVIReshapingCurve curves[3]; /* per component */
>>> +
>>> +    /* Non-linear inverse quantization */
>>> +    enum AVDOVINLQMethod nlq_method_idc;
>>> +    uint32_t num_x_partitions;
>>> +    uint32_t num_y_partitions;
>>> +    AVDOVINLQParams nlq[3]; /* per component */
>>> +} AVDOVIDataMapping;
>>> +
>>> +typedef struct AVDOVIColorMetadata {
>>> +    uint8_t dm_metadata_id;
>>> +    uint8_t scene_refresh_flag;
>>> +
>>> +    /**
>>> +     * Coefficients of the custom Dolby Vision IPT-PQ matrices. These are to be
>>> +     * used instead of the matrices indicated by the frame's colorspace tags.
>>> +     * The output of rgb_to_lms_matrix is to be fed into a BT.2020 LMS->RGB
>>> +     * matrix based on a Hunt-Pointer-Estevez transform, but without any
>>> +     * crosstalk. (See the definition of the ICtCp colorspace for more
>>> +     * information.)
>>> +     */
>>> +    AVRational ycc_to_rgb_matrix[9]; /* before PQ linearization */
>>> +    AVRational ycc_to_rgb_offset[3]; /* input offset of neutral value */
>>> +    AVRational rgb_to_lms_matrix[9]; /* after PQ linearization */
>>> +
>>> +    /**
>>> +     * Extra signal metadata (see Dolby patents for more info).
>>> +     */
>>> +    uint16_t signal_eotf;
>>> +    uint16_t signal_eotf_param0;
>>> +    uint16_t signal_eotf_param1;
>>> +    uint32_t signal_eotf_param2;
>>> +    uint8_t signal_bit_depth;
>>> +    uint8_t signal_color_space;
>>> +    uint8_t signal_chroma_format;
>>> +    uint8_t signal_full_range_flag; /* [0, 3] */
>>> +    uint16_t source_min_pq;
>>> +    uint16_t source_max_pq;
>>> +    uint16_t source_diagonal;
>>> +} AVDOVIColorMetadata;
>>> +
>>> +/**
>>> + * Combined struct representing a combination of header, mapping and color
>>> + * metadata, for attaching to frames as side data.
>>> + *
>>> + * @note The struct must be allocated with av_dovi_metadata_alloc() and
>>> + *       its size is not a part of the public ABI.
>>> + */
>>> +
>>> +typedef struct AVDOVIMetadata {
>>> +    AVDOVIRpuDataHeader header;
>>> +    AVDOVIDataMapping mapping;
>>> +    AVDOVIColorMetadata color;
>>>
>>
>> I think we should version this by adding an integer version
>> field, and a compile-time header #define.
>> just in case more extra signalling is added. Then
>> we could support it in a sort of backwards-compatible way
>> by just documenting what has changed, and users could
>> make their own decisions about whether to present it
>> as-is or error out.
>>
> 
> That seems rather pointless to me when API users can just tie that to
> avutil or avcodec version.
> Additionally, how would I ever make use of that? We never support
> running binaries older then the headers you have, and newer data can't
> be used because you don't have the struct definition that corresponds
> with it.
> 

One way to make these structs extensible while keeping this side data a
single buffer is by adding the offsets of header, mapping and color to
the start of AVDOVIMetadata at the start of AVDOVIMetadata. That way
users could use the parts they know and ignore the rest. It would even
allow for adding further structs to AVDOVIMetadata.

- Andreas
Hendrik Leppkes Dec. 22, 2021, 1:48 p.m. UTC | #4
On Wed, Dec 22, 2021 at 2:33 PM Andreas Rheinhardt
<andreas.rheinhardt@outlook.com> wrote:
>
> Hendrik Leppkes:
> > On Wed, Dec 22, 2021 at 2:00 PM Lynne <dev@lynne.ee> wrote:
> >>
> >> 20 Dec 2021, 16:31 by ffmpeg@haasn.xyz:
> >>
> >>> From: Niklas Haas <git@haasn.dev>
> >>>
> >>> Signed-off-by: Niklas Haas <git@haasn.dev>
> >>> ---
> >>>  doc/APIchanges        |   3 +
> >>>  libavutil/dovi_meta.c |  12 ++++
> >>>  libavutil/dovi_meta.h | 143 ++++++++++++++++++++++++++++++++++++++++++
> >>>  libavutil/frame.c     |   1 +
> >>>  libavutil/frame.h     |   9 ++-
> >>>  libavutil/version.h   |   2 +-
> >>>  6 files changed, 168 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/doc/APIchanges b/doc/APIchanges
> >>> index 17aa664ca3..ff78edec88 100644
> >>> --- a/doc/APIchanges
> >>> +++ b/doc/APIchanges
> >>> @@ -14,6 +14,9 @@ libavutil:     2021-04-27
> >>>
> >>>  API changes, most recent first:
> >>>
> >>> +2021-12-xx - xxxxxxxxxx - lavu 57.12.100 - frame.h
> >>> +  Add AV_FRAME_DATA_DOVI_METADATA.
> >>> +
> >>>  2021-12-xx - xxxxxxxxxx - lavf 59.10.100 - avformat.h
> >>>  Add AVFormatContext io_close2 which returns an int
> >>>
> >>> diff --git a/libavutil/dovi_meta.c b/libavutil/dovi_meta.c
> >>> index 7bd08f6c54..60b4cb2376 100644
> >>> --- a/libavutil/dovi_meta.c
> >>> +++ b/libavutil/dovi_meta.c
> >>> @@ -33,3 +33,15 @@ AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size)
> >>>
> >>>  return dovi;
> >>>  }
> >>> +
> >>> +AVDOVIMetadata *av_dovi_metadata_alloc(size_t *size)
> >>> +{
> >>> +    AVDOVIMetadata *dovi = av_mallocz(sizeof(AVDOVIMetadata));
> >>> +    if (!dovi)
> >>> +        return NULL;
> >>> +
> >>> +    if (size)
> >>> +        *size = sizeof(*dovi);
> >>> +
> >>> +    return dovi;
> >>> +}
> >>> diff --git a/libavutil/dovi_meta.h b/libavutil/dovi_meta.h
> >>> index 299911d434..25e6d7b42f 100644
> >>> --- a/libavutil/dovi_meta.h
> >>> +++ b/libavutil/dovi_meta.h
> >>> @@ -29,6 +29,7 @@
> >>>
> >>>  #include <stdint.h>
> >>>  #include <stddef.h>
> >>> +#include "rational.h"
> >>>
> >>>  /*
> >>>  * DOVI configuration
> >>> @@ -67,4 +68,146 @@ typedef struct AVDOVIDecoderConfigurationRecord {
> >>>  */
> >>>  AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size);
> >>>
> >>> +/**
> >>> + * Dolby Vision RPU data header.
> >>> + *
> >>> + * @note Cannot be extended without an ABI bump.
> >>> + */
> >>> +typedef struct AVDOVIRpuDataHeader {
> >>> +    uint8_t rpu_type;
> >>> +    uint16_t rpu_format;
> >>> +    uint8_t vdr_rpu_profile;
> >>> +    uint8_t vdr_rpu_level;
> >>> +    uint8_t chroma_resampling_explicit_filter_flag;
> >>> +    uint8_t coef_data_type; /* informative, lavc always converts to fixed */
> >>> +    uint8_t coef_log2_denom;
> >>> +    uint8_t vdr_rpu_normalized_idc;
> >>> +    uint8_t bl_video_full_range_flag;
> >>> +    uint8_t bl_bit_depth; /* [8, 16] */
> >>> +    uint8_t el_bit_depth; /* [8, 16] */
> >>> +    uint8_t vdr_bit_depth; /* [8, 16] */
> >>> +    uint8_t spatial_resampling_filter_flag;
> >>> +    uint8_t el_spatial_resampling_filter_flag;
> >>> +    uint8_t disable_residual_flag;
> >>> +} AVDOVIRpuDataHeader;
> >>> +
> >>> +enum AVDOVIMappingMethod {
> >>> +    AV_DOVI_MAPPING_POLYNOMIAL = 0,
> >>> +    AV_DOVI_MAPPING_MMR = 1,
> >>> +};
> >>> +
> >>> +/**
> >>> + * Coefficients of a piece-wise function. The pieces of the function span the
> >>> + * value ranges between two adjacent pivot values.
> >>> + *
> >>> + * @note Cannot be extended without an ABI bump.
> >>> + */
> >>> +#define AV_DOVI_MAX_PIECES 8
> >>> +typedef struct AVDOVIReshapingCurve {
> >>> +    uint8_t num_pivots;                         /* [2, 9] */
> >>> +    uint16_t pivots[AV_DOVI_MAX_PIECES + 1];    /* sorted ascending */
> >>> +    enum AVDOVIMappingMethod mapping_idc[AV_DOVI_MAX_PIECES];
> >>> +    /* AV_DOVI_MAPPING_POLYNOMIAL */
> >>> +    uint8_t poly_order[AV_DOVI_MAX_PIECES];     /* [1, 2] */
> >>> +    int64_t poly_coef[AV_DOVI_MAX_PIECES][3];   /* x^0, x^1, x^2 */
> >>> +    /* AV_DOVI_MAPPING_MMR */
> >>> +    uint8_t mmr_order[AV_DOVI_MAX_PIECES];      /* [1, 3] */
> >>> +    int64_t mmr_constant[AV_DOVI_MAX_PIECES];
> >>> +    int64_t mmr_coef[AV_DOVI_MAX_PIECES][3/* order - 1 */][7];
> >>> +} AVDOVIReshapingCurve;
> >>> +
> >>> +enum AVDOVINLQMethod {
> >>> +    AV_DOVI_NLQ_NONE = -1,
> >>> +    AV_DOVI_NLQ_LINEAR_DZ = 0,
> >>> +};
> >>> +
> >>> +/**
> >>> + * Coefficients of the non-linear inverse quantization. For the interpretation
> >>> + * of these, see ETSI GS CCM 001.
> >>> + *
> >>> + * @note Cannot be extended without an ABI bump.
> >>> + */
> >>> +typedef struct AVDOVINLQParams {
> >>> +    uint64_t nlq_offset;
> >>> +    uint64_t vdr_in_max;
> >>> +    /* AV_DOVI_NLQ_LINEAR_DZ */
> >>> +    uint64_t linear_deadzone_slope;
> >>> +    uint64_t linear_deadzone_threshold;
> >>> +} AVDOVINLQParams;
> >>> +
> >>> +/**
> >>> + * Dolby Vision RPU data mapping parameters.
> >>> + *
> >>> + * @note Cannot be extended without an ABI bump.
> >>> + */
> >>> +typedef struct AVDOVIDataMapping {
> >>> +    uint8_t vdr_rpu_id;
> >>> +    uint8_t mapping_color_space;
> >>> +    uint8_t mapping_chroma_format_idc;
> >>> +    AVDOVIReshapingCurve curves[3]; /* per component */
> >>> +
> >>> +    /* Non-linear inverse quantization */
> >>> +    enum AVDOVINLQMethod nlq_method_idc;
> >>> +    uint32_t num_x_partitions;
> >>> +    uint32_t num_y_partitions;
> >>> +    AVDOVINLQParams nlq[3]; /* per component */
> >>> +} AVDOVIDataMapping;
> >>> +
> >>> +typedef struct AVDOVIColorMetadata {
> >>> +    uint8_t dm_metadata_id;
> >>> +    uint8_t scene_refresh_flag;
> >>> +
> >>> +    /**
> >>> +     * Coefficients of the custom Dolby Vision IPT-PQ matrices. These are to be
> >>> +     * used instead of the matrices indicated by the frame's colorspace tags.
> >>> +     * The output of rgb_to_lms_matrix is to be fed into a BT.2020 LMS->RGB
> >>> +     * matrix based on a Hunt-Pointer-Estevez transform, but without any
> >>> +     * crosstalk. (See the definition of the ICtCp colorspace for more
> >>> +     * information.)
> >>> +     */
> >>> +    AVRational ycc_to_rgb_matrix[9]; /* before PQ linearization */
> >>> +    AVRational ycc_to_rgb_offset[3]; /* input offset of neutral value */
> >>> +    AVRational rgb_to_lms_matrix[9]; /* after PQ linearization */
> >>> +
> >>> +    /**
> >>> +     * Extra signal metadata (see Dolby patents for more info).
> >>> +     */
> >>> +    uint16_t signal_eotf;
> >>> +    uint16_t signal_eotf_param0;
> >>> +    uint16_t signal_eotf_param1;
> >>> +    uint32_t signal_eotf_param2;
> >>> +    uint8_t signal_bit_depth;
> >>> +    uint8_t signal_color_space;
> >>> +    uint8_t signal_chroma_format;
> >>> +    uint8_t signal_full_range_flag; /* [0, 3] */
> >>> +    uint16_t source_min_pq;
> >>> +    uint16_t source_max_pq;
> >>> +    uint16_t source_diagonal;
> >>> +} AVDOVIColorMetadata;
> >>> +
> >>> +/**
> >>> + * Combined struct representing a combination of header, mapping and color
> >>> + * metadata, for attaching to frames as side data.
> >>> + *
> >>> + * @note The struct must be allocated with av_dovi_metadata_alloc() and
> >>> + *       its size is not a part of the public ABI.
> >>> + */
> >>> +
> >>> +typedef struct AVDOVIMetadata {
> >>> +    AVDOVIRpuDataHeader header;
> >>> +    AVDOVIDataMapping mapping;
> >>> +    AVDOVIColorMetadata color;
> >>>
> >>
> >> I think we should version this by adding an integer version
> >> field, and a compile-time header #define.
> >> just in case more extra signalling is added. Then
> >> we could support it in a sort of backwards-compatible way
> >> by just documenting what has changed, and users could
> >> make their own decisions about whether to present it
> >> as-is or error out.
> >>
> >
> > That seems rather pointless to me when API users can just tie that to
> > avutil or avcodec version.
> > Additionally, how would I ever make use of that? We never support
> > running binaries older then the headers you have, and newer data can't
> > be used because you don't have the struct definition that corresponds
> > with it.
> >
>
> One way to make these structs extensible while keeping this side data a
> single buffer is by adding the offsets of header, mapping and color to
> the start of AVDOVIMetadata at the start of AVDOVIMetadata. That way
> users could use the parts they know and ignore the rest. It would even
> allow for adding further structs to AVDOVIMetadata.
>

How would you define the struct then? If we define it as a normal
struct and people naively access stuff, it might suddenly point to
something else - so if we allow us to change it, then we probably
should define it in a way that doesn't even allow the naive accidental
mis-reading.
Keeping that in mind then, it sounds like it would get rather
complicated to use.

- Hendrik
Andreas Rheinhardt Dec. 22, 2021, 2:29 p.m. UTC | #5
Hendrik Leppkes:
> On Wed, Dec 22, 2021 at 2:33 PM Andreas Rheinhardt
> <andreas.rheinhardt@outlook.com> wrote:
>>
>> Hendrik Leppkes:
>>> On Wed, Dec 22, 2021 at 2:00 PM Lynne <dev@lynne.ee> wrote:
>>>>
>>>> 20 Dec 2021, 16:31 by ffmpeg@haasn.xyz:
>>>>
>>>>> From: Niklas Haas <git@haasn.dev>
>>>>>
>>>>> Signed-off-by: Niklas Haas <git@haasn.dev>
>>>>> ---
>>>>>  doc/APIchanges        |   3 +
>>>>>  libavutil/dovi_meta.c |  12 ++++
>>>>>  libavutil/dovi_meta.h | 143 ++++++++++++++++++++++++++++++++++++++++++
>>>>>  libavutil/frame.c     |   1 +
>>>>>  libavutil/frame.h     |   9 ++-
>>>>>  libavutil/version.h   |   2 +-
>>>>>  6 files changed, 168 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/doc/APIchanges b/doc/APIchanges
>>>>> index 17aa664ca3..ff78edec88 100644
>>>>> --- a/doc/APIchanges
>>>>> +++ b/doc/APIchanges
>>>>> @@ -14,6 +14,9 @@ libavutil:     2021-04-27
>>>>>
>>>>>  API changes, most recent first:
>>>>>
>>>>> +2021-12-xx - xxxxxxxxxx - lavu 57.12.100 - frame.h
>>>>> +  Add AV_FRAME_DATA_DOVI_METADATA.
>>>>> +
>>>>>  2021-12-xx - xxxxxxxxxx - lavf 59.10.100 - avformat.h
>>>>>  Add AVFormatContext io_close2 which returns an int
>>>>>
>>>>> diff --git a/libavutil/dovi_meta.c b/libavutil/dovi_meta.c
>>>>> index 7bd08f6c54..60b4cb2376 100644
>>>>> --- a/libavutil/dovi_meta.c
>>>>> +++ b/libavutil/dovi_meta.c
>>>>> @@ -33,3 +33,15 @@ AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size)
>>>>>
>>>>>  return dovi;
>>>>>  }
>>>>> +
>>>>> +AVDOVIMetadata *av_dovi_metadata_alloc(size_t *size)
>>>>> +{
>>>>> +    AVDOVIMetadata *dovi = av_mallocz(sizeof(AVDOVIMetadata));
>>>>> +    if (!dovi)
>>>>> +        return NULL;
>>>>> +
>>>>> +    if (size)
>>>>> +        *size = sizeof(*dovi);
>>>>> +
>>>>> +    return dovi;
>>>>> +}
>>>>> diff --git a/libavutil/dovi_meta.h b/libavutil/dovi_meta.h
>>>>> index 299911d434..25e6d7b42f 100644
>>>>> --- a/libavutil/dovi_meta.h
>>>>> +++ b/libavutil/dovi_meta.h
>>>>> @@ -29,6 +29,7 @@
>>>>>
>>>>>  #include <stdint.h>
>>>>>  #include <stddef.h>
>>>>> +#include "rational.h"
>>>>>
>>>>>  /*
>>>>>  * DOVI configuration
>>>>> @@ -67,4 +68,146 @@ typedef struct AVDOVIDecoderConfigurationRecord {
>>>>>  */
>>>>>  AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size);
>>>>>
>>>>> +/**
>>>>> + * Dolby Vision RPU data header.
>>>>> + *
>>>>> + * @note Cannot be extended without an ABI bump.
>>>>> + */
>>>>> +typedef struct AVDOVIRpuDataHeader {
>>>>> +    uint8_t rpu_type;
>>>>> +    uint16_t rpu_format;
>>>>> +    uint8_t vdr_rpu_profile;
>>>>> +    uint8_t vdr_rpu_level;
>>>>> +    uint8_t chroma_resampling_explicit_filter_flag;
>>>>> +    uint8_t coef_data_type; /* informative, lavc always converts to fixed */
>>>>> +    uint8_t coef_log2_denom;
>>>>> +    uint8_t vdr_rpu_normalized_idc;
>>>>> +    uint8_t bl_video_full_range_flag;
>>>>> +    uint8_t bl_bit_depth; /* [8, 16] */
>>>>> +    uint8_t el_bit_depth; /* [8, 16] */
>>>>> +    uint8_t vdr_bit_depth; /* [8, 16] */
>>>>> +    uint8_t spatial_resampling_filter_flag;
>>>>> +    uint8_t el_spatial_resampling_filter_flag;
>>>>> +    uint8_t disable_residual_flag;
>>>>> +} AVDOVIRpuDataHeader;
>>>>> +
>>>>> +enum AVDOVIMappingMethod {
>>>>> +    AV_DOVI_MAPPING_POLYNOMIAL = 0,
>>>>> +    AV_DOVI_MAPPING_MMR = 1,
>>>>> +};
>>>>> +
>>>>> +/**
>>>>> + * Coefficients of a piece-wise function. The pieces of the function span the
>>>>> + * value ranges between two adjacent pivot values.
>>>>> + *
>>>>> + * @note Cannot be extended without an ABI bump.
>>>>> + */
>>>>> +#define AV_DOVI_MAX_PIECES 8
>>>>> +typedef struct AVDOVIReshapingCurve {
>>>>> +    uint8_t num_pivots;                         /* [2, 9] */
>>>>> +    uint16_t pivots[AV_DOVI_MAX_PIECES + 1];    /* sorted ascending */
>>>>> +    enum AVDOVIMappingMethod mapping_idc[AV_DOVI_MAX_PIECES];
>>>>> +    /* AV_DOVI_MAPPING_POLYNOMIAL */
>>>>> +    uint8_t poly_order[AV_DOVI_MAX_PIECES];     /* [1, 2] */
>>>>> +    int64_t poly_coef[AV_DOVI_MAX_PIECES][3];   /* x^0, x^1, x^2 */
>>>>> +    /* AV_DOVI_MAPPING_MMR */
>>>>> +    uint8_t mmr_order[AV_DOVI_MAX_PIECES];      /* [1, 3] */
>>>>> +    int64_t mmr_constant[AV_DOVI_MAX_PIECES];
>>>>> +    int64_t mmr_coef[AV_DOVI_MAX_PIECES][3/* order - 1 */][7];
>>>>> +} AVDOVIReshapingCurve;
>>>>> +
>>>>> +enum AVDOVINLQMethod {
>>>>> +    AV_DOVI_NLQ_NONE = -1,
>>>>> +    AV_DOVI_NLQ_LINEAR_DZ = 0,
>>>>> +};
>>>>> +
>>>>> +/**
>>>>> + * Coefficients of the non-linear inverse quantization. For the interpretation
>>>>> + * of these, see ETSI GS CCM 001.
>>>>> + *
>>>>> + * @note Cannot be extended without an ABI bump.
>>>>> + */
>>>>> +typedef struct AVDOVINLQParams {
>>>>> +    uint64_t nlq_offset;
>>>>> +    uint64_t vdr_in_max;
>>>>> +    /* AV_DOVI_NLQ_LINEAR_DZ */
>>>>> +    uint64_t linear_deadzone_slope;
>>>>> +    uint64_t linear_deadzone_threshold;
>>>>> +} AVDOVINLQParams;
>>>>> +
>>>>> +/**
>>>>> + * Dolby Vision RPU data mapping parameters.
>>>>> + *
>>>>> + * @note Cannot be extended without an ABI bump.
>>>>> + */
>>>>> +typedef struct AVDOVIDataMapping {
>>>>> +    uint8_t vdr_rpu_id;
>>>>> +    uint8_t mapping_color_space;
>>>>> +    uint8_t mapping_chroma_format_idc;
>>>>> +    AVDOVIReshapingCurve curves[3]; /* per component */
>>>>> +
>>>>> +    /* Non-linear inverse quantization */
>>>>> +    enum AVDOVINLQMethod nlq_method_idc;
>>>>> +    uint32_t num_x_partitions;
>>>>> +    uint32_t num_y_partitions;
>>>>> +    AVDOVINLQParams nlq[3]; /* per component */
>>>>> +} AVDOVIDataMapping;
>>>>> +
>>>>> +typedef struct AVDOVIColorMetadata {
>>>>> +    uint8_t dm_metadata_id;
>>>>> +    uint8_t scene_refresh_flag;
>>>>> +
>>>>> +    /**
>>>>> +     * Coefficients of the custom Dolby Vision IPT-PQ matrices. These are to be
>>>>> +     * used instead of the matrices indicated by the frame's colorspace tags.
>>>>> +     * The output of rgb_to_lms_matrix is to be fed into a BT.2020 LMS->RGB
>>>>> +     * matrix based on a Hunt-Pointer-Estevez transform, but without any
>>>>> +     * crosstalk. (See the definition of the ICtCp colorspace for more
>>>>> +     * information.)
>>>>> +     */
>>>>> +    AVRational ycc_to_rgb_matrix[9]; /* before PQ linearization */
>>>>> +    AVRational ycc_to_rgb_offset[3]; /* input offset of neutral value */
>>>>> +    AVRational rgb_to_lms_matrix[9]; /* after PQ linearization */
>>>>> +
>>>>> +    /**
>>>>> +     * Extra signal metadata (see Dolby patents for more info).
>>>>> +     */
>>>>> +    uint16_t signal_eotf;
>>>>> +    uint16_t signal_eotf_param0;
>>>>> +    uint16_t signal_eotf_param1;
>>>>> +    uint32_t signal_eotf_param2;
>>>>> +    uint8_t signal_bit_depth;
>>>>> +    uint8_t signal_color_space;
>>>>> +    uint8_t signal_chroma_format;
>>>>> +    uint8_t signal_full_range_flag; /* [0, 3] */
>>>>> +    uint16_t source_min_pq;
>>>>> +    uint16_t source_max_pq;
>>>>> +    uint16_t source_diagonal;
>>>>> +} AVDOVIColorMetadata;
>>>>> +
>>>>> +/**
>>>>> + * Combined struct representing a combination of header, mapping and color
>>>>> + * metadata, for attaching to frames as side data.
>>>>> + *
>>>>> + * @note The struct must be allocated with av_dovi_metadata_alloc() and
>>>>> + *       its size is not a part of the public ABI.
>>>>> + */
>>>>> +
>>>>> +typedef struct AVDOVIMetadata {
>>>>> +    AVDOVIRpuDataHeader header;
>>>>> +    AVDOVIDataMapping mapping;
>>>>> +    AVDOVIColorMetadata color;
>>>>>
>>>>
>>>> I think we should version this by adding an integer version
>>>> field, and a compile-time header #define.
>>>> just in case more extra signalling is added. Then
>>>> we could support it in a sort of backwards-compatible way
>>>> by just documenting what has changed, and users could
>>>> make their own decisions about whether to present it
>>>> as-is or error out.
>>>>
>>>
>>> That seems rather pointless to me when API users can just tie that to
>>> avutil or avcodec version.
>>> Additionally, how would I ever make use of that? We never support
>>> running binaries older then the headers you have, and newer data can't
>>> be used because you don't have the struct definition that corresponds
>>> with it.
>>>
>>
>> One way to make these structs extensible while keeping this side data a
>> single buffer is by adding the offsets of header, mapping and color to
>> the start of AVDOVIMetadata at the start of AVDOVIMetadata. That way
>> users could use the parts they know and ignore the rest. It would even
>> allow for adding further structs to AVDOVIMetadata.
>>
> 
> How would you define the struct then? If we define it as a normal
> struct and people naively access stuff, it might suddenly point to
> something else - so if we allow us to change it, then we probably
> should define it in a way that doesn't even allow the naive accidental
> mis-reading.
> Keeping that in mind then, it sounds like it would get rather
> complicated to use.
> 

Similar to how it is done in detection_bbox.h: Add some inline accessor
functions. We could even remove the substructs from the publically
visibly part of the struct altogether and only allow access through
these accessors.

- Andreas
diff mbox series

Patch

diff --git a/doc/APIchanges b/doc/APIchanges
index 17aa664ca3..ff78edec88 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -14,6 +14,9 @@  libavutil:     2021-04-27
 
 API changes, most recent first:
 
+2021-12-xx - xxxxxxxxxx - lavu 57.12.100 - frame.h
+  Add AV_FRAME_DATA_DOVI_METADATA.
+
 2021-12-xx - xxxxxxxxxx - lavf 59.10.100 - avformat.h
   Add AVFormatContext io_close2 which returns an int
 
diff --git a/libavutil/dovi_meta.c b/libavutil/dovi_meta.c
index 7bd08f6c54..60b4cb2376 100644
--- a/libavutil/dovi_meta.c
+++ b/libavutil/dovi_meta.c
@@ -33,3 +33,15 @@  AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size)
 
     return dovi;
 }
+
+AVDOVIMetadata *av_dovi_metadata_alloc(size_t *size)
+{
+    AVDOVIMetadata *dovi = av_mallocz(sizeof(AVDOVIMetadata));
+    if (!dovi)
+        return NULL;
+
+    if (size)
+        *size = sizeof(*dovi);
+
+    return dovi;
+}
diff --git a/libavutil/dovi_meta.h b/libavutil/dovi_meta.h
index 299911d434..25e6d7b42f 100644
--- a/libavutil/dovi_meta.h
+++ b/libavutil/dovi_meta.h
@@ -29,6 +29,7 @@ 
 
 #include <stdint.h>
 #include <stddef.h>
+#include "rational.h"
 
 /*
  * DOVI configuration
@@ -67,4 +68,146 @@  typedef struct AVDOVIDecoderConfigurationRecord {
  */
 AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size);
 
+/**
+ * Dolby Vision RPU data header.
+ *
+ * @note Cannot be extended without an ABI bump.
+ */
+typedef struct AVDOVIRpuDataHeader {
+    uint8_t rpu_type;
+    uint16_t rpu_format;
+    uint8_t vdr_rpu_profile;
+    uint8_t vdr_rpu_level;
+    uint8_t chroma_resampling_explicit_filter_flag;
+    uint8_t coef_data_type; /* informative, lavc always converts to fixed */
+    uint8_t coef_log2_denom;
+    uint8_t vdr_rpu_normalized_idc;
+    uint8_t bl_video_full_range_flag;
+    uint8_t bl_bit_depth; /* [8, 16] */
+    uint8_t el_bit_depth; /* [8, 16] */
+    uint8_t vdr_bit_depth; /* [8, 16] */
+    uint8_t spatial_resampling_filter_flag;
+    uint8_t el_spatial_resampling_filter_flag;
+    uint8_t disable_residual_flag;
+} AVDOVIRpuDataHeader;
+
+enum AVDOVIMappingMethod {
+    AV_DOVI_MAPPING_POLYNOMIAL = 0,
+    AV_DOVI_MAPPING_MMR = 1,
+};
+
+/**
+ * Coefficients of a piece-wise function. The pieces of the function span the
+ * value ranges between two adjacent pivot values.
+ *
+ * @note Cannot be extended without an ABI bump.
+ */
+#define AV_DOVI_MAX_PIECES 8
+typedef struct AVDOVIReshapingCurve {
+    uint8_t num_pivots;                         /* [2, 9] */
+    uint16_t pivots[AV_DOVI_MAX_PIECES + 1];    /* sorted ascending */
+    enum AVDOVIMappingMethod mapping_idc[AV_DOVI_MAX_PIECES];
+    /* AV_DOVI_MAPPING_POLYNOMIAL */
+    uint8_t poly_order[AV_DOVI_MAX_PIECES];     /* [1, 2] */
+    int64_t poly_coef[AV_DOVI_MAX_PIECES][3];   /* x^0, x^1, x^2 */
+    /* AV_DOVI_MAPPING_MMR */
+    uint8_t mmr_order[AV_DOVI_MAX_PIECES];      /* [1, 3] */
+    int64_t mmr_constant[AV_DOVI_MAX_PIECES];
+    int64_t mmr_coef[AV_DOVI_MAX_PIECES][3/* order - 1 */][7];
+} AVDOVIReshapingCurve;
+
+enum AVDOVINLQMethod {
+    AV_DOVI_NLQ_NONE = -1,
+    AV_DOVI_NLQ_LINEAR_DZ = 0,
+};
+
+/**
+ * Coefficients of the non-linear inverse quantization. For the interpretation
+ * of these, see ETSI GS CCM 001.
+ *
+ * @note Cannot be extended without an ABI bump.
+ */
+typedef struct AVDOVINLQParams {
+    uint64_t nlq_offset;
+    uint64_t vdr_in_max;
+    /* AV_DOVI_NLQ_LINEAR_DZ */
+    uint64_t linear_deadzone_slope;
+    uint64_t linear_deadzone_threshold;
+} AVDOVINLQParams;
+
+/**
+ * Dolby Vision RPU data mapping parameters.
+ *
+ * @note Cannot be extended without an ABI bump.
+ */
+typedef struct AVDOVIDataMapping {
+    uint8_t vdr_rpu_id;
+    uint8_t mapping_color_space;
+    uint8_t mapping_chroma_format_idc;
+    AVDOVIReshapingCurve curves[3]; /* per component */
+
+    /* Non-linear inverse quantization */
+    enum AVDOVINLQMethod nlq_method_idc;
+    uint32_t num_x_partitions;
+    uint32_t num_y_partitions;
+    AVDOVINLQParams nlq[3]; /* per component */
+} AVDOVIDataMapping;
+
+typedef struct AVDOVIColorMetadata {
+    uint8_t dm_metadata_id;
+    uint8_t scene_refresh_flag;
+
+    /**
+     * Coefficients of the custom Dolby Vision IPT-PQ matrices. These are to be
+     * used instead of the matrices indicated by the frame's colorspace tags.
+     * The output of rgb_to_lms_matrix is to be fed into a BT.2020 LMS->RGB
+     * matrix based on a Hunt-Pointer-Estevez transform, but without any
+     * crosstalk. (See the definition of the ICtCp colorspace for more
+     * information.)
+     */
+    AVRational ycc_to_rgb_matrix[9]; /* before PQ linearization */
+    AVRational ycc_to_rgb_offset[3]; /* input offset of neutral value */
+    AVRational rgb_to_lms_matrix[9]; /* after PQ linearization */
+
+    /**
+     * Extra signal metadata (see Dolby patents for more info).
+     */
+    uint16_t signal_eotf;
+    uint16_t signal_eotf_param0;
+    uint16_t signal_eotf_param1;
+    uint32_t signal_eotf_param2;
+    uint8_t signal_bit_depth;
+    uint8_t signal_color_space;
+    uint8_t signal_chroma_format;
+    uint8_t signal_full_range_flag; /* [0, 3] */
+    uint16_t source_min_pq;
+    uint16_t source_max_pq;
+    uint16_t source_diagonal;
+} AVDOVIColorMetadata;
+
+/**
+ * Combined struct representing a combination of header, mapping and color
+ * metadata, for attaching to frames as side data.
+ *
+ * @note The struct must be allocated with av_dovi_metadata_alloc() and
+ *       its size is not a part of the public ABI.
+ */
+
+typedef struct AVDOVIMetadata {
+    AVDOVIRpuDataHeader header;
+    AVDOVIDataMapping mapping;
+    AVDOVIColorMetadata color;
+} AVDOVIMetadata;
+
+/**
+ * Allocate an AVDOVIMetadata structure and initialize its
+ * fields to default values.
+ *
+ * @param size If this parameter is non-NULL, the size in bytes of the
+ *             allocated struct will be written here on success
+ *
+ * @return the newly allocated struct or NULL on failure
+ */
+AVDOVIMetadata *av_dovi_metadata_alloc(size_t *size);
+
 #endif /* AVUTIL_DOVI_META_H */
diff --git a/libavutil/frame.c b/libavutil/frame.c
index 0912ad9131..8997c85e35 100644
--- a/libavutil/frame.c
+++ b/libavutil/frame.c
@@ -729,6 +729,7 @@  const char *av_frame_side_data_name(enum AVFrameSideDataType type)
     case AV_FRAME_DATA_FILM_GRAIN_PARAMS:           return "Film grain parameters";
     case AV_FRAME_DATA_DETECTION_BBOXES:            return "Bounding boxes for object detection and classification";
     case AV_FRAME_DATA_DOVI_RPU_BUFFER:             return "Dolby Vision RPU Data";
+    case AV_FRAME_DATA_DOVI_METADATA:               return "Dolby Vision Metadata";
     }
     return NULL;
 }
diff --git a/libavutil/frame.h b/libavutil/frame.h
index 3f295f6b9e..18e239f870 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h
@@ -189,11 +189,18 @@  enum AVFrameSideDataType {
     AV_FRAME_DATA_DETECTION_BBOXES,
 
     /**
-     * Dolby Vision RPU data, suitable for passing to x265
+     * Dolby Vision RPU raw data, suitable for passing to x265
      * or other libraries. Array of uint8_t, with NAL emulation
      * bytes intact.
      */
     AV_FRAME_DATA_DOVI_RPU_BUFFER,
+
+    /**
+     * Parsed Dolby Vision metadata, suitable for passing to a software
+     * implementation. The payload is the AVDOVIMetadata struct defined in
+     * libavutil/dovi_meta.h.
+     */
+    AV_FRAME_DATA_DOVI_METADATA,
 };
 
 enum AVActiveFormatDescription {
diff --git a/libavutil/version.h b/libavutil/version.h
index 0e7b36865a..668f9206fe 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -79,7 +79,7 @@ 
  */
 
 #define LIBAVUTIL_VERSION_MAJOR  57
-#define LIBAVUTIL_VERSION_MINOR  11
+#define LIBAVUTIL_VERSION_MINOR  12
 #define LIBAVUTIL_VERSION_MICRO 100
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \