Message ID | 20220103085510.7309-1-ffmpeg@haasn.xyz |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,v11] lavu/frame: Add Dolby Vision metadata side data type | expand |
Context | Check | Description |
---|---|---|
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
andriy/make_ppc | success | Make finished |
andriy/make_fate_ppc | fail | Make fate failed |
Niklas Haas: > From: Niklas Haas <git@haasn.dev> > > Yeah, I think I agree that this is probably the best compromise here. > > Updated documentation (and also changed one unnecessarily-large uint64_t > to uint16_t) > > --- > In order to be able to extend this struct later (as the Dolby Vision RPU > evolves), all of the 'container' structs are considered extensible, and > the individual constituent fields must instead be accessed via offsets. > The precedent for this style of access is set in > <libavutil/detection_bbox.h> > > Signed-off-by: Niklas Haas <git@haasn.dev> > --- > doc/APIchanges | 3 + > libavutil/dovi_meta.c | 25 +++++++ > libavutil/dovi_meta.h | 166 ++++++++++++++++++++++++++++++++++++++++++ > libavutil/frame.c | 1 + > libavutil/frame.h | 9 ++- > libavutil/version.h | 2 +- > 6 files changed, 204 insertions(+), 2 deletions(-) > > diff --git a/doc/APIchanges b/doc/APIchanges > index 670a59329e..5721486f09 100644 > --- a/doc/APIchanges > +++ b/doc/APIchanges > @@ -14,6 +14,9 @@ libavutil: 2021-04-27 > > API changes, most recent first: > > +2021-12-xx - xxxxxxxxxx - lavu 57.14.100 - frame.h > + Add AV_FRAME_DATA_DOVI_METADATA. > + > 2021-12-xx - xxxxxxxxxx - lavu 57.13.100 - hwcontext_videotoolbox.h > Add av_vt_pixbuf_set_attachments > > diff --git a/libavutil/dovi_meta.c b/libavutil/dovi_meta.c > index 7bd08f6c54..9c50da561e 100644 > --- a/libavutil/dovi_meta.c > +++ b/libavutil/dovi_meta.c > @@ -33,3 +33,28 @@ AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size) > > return dovi; > } > + > +typedef struct AVDOVIMetadataInternal { > + AVDOVIMetadata metadata; > + AVDOVIRpuDataHeader header; > + AVDOVIDataMapping mapping; > + AVDOVIColorMetadata color; > +} AVDOVIMetadataInternal; > + > +AVDOVIMetadata *av_dovi_metadata_alloc(size_t *size) > +{ > + AVDOVIMetadataInternal *dovi = av_mallocz(sizeof(AVDOVIMetadataInternal)); > + if (!dovi) > + return NULL; > + > + if (size) > + *size = sizeof(*dovi); > + > + dovi->metadata = (struct AVDOVIMetadata) { > + .header_offset = offsetof(AVDOVIMetadataInternal, header), > + .mapping_offset = offsetof(AVDOVIMetadataInternal, mapping), > + .color_offset = offsetof(AVDOVIMetadataInternal, color), > + }; > + > + return &dovi->metadata; > +} > diff --git a/libavutil/dovi_meta.h b/libavutil/dovi_meta.h > index 299911d434..3d11e02bff 100644 > --- a/libavutil/dovi_meta.h > +++ b/libavutil/dovi_meta.h > @@ -29,6 +29,7 @@ > > #include <stdint.h> > #include <stddef.h> > +#include "rational.h" > > /* > * DOVI configuration > @@ -67,4 +68,169 @@ typedef struct AVDOVIDecoderConfigurationRecord { > */ > AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size); > > +/** > + * Dolby Vision RPU data header. > + * > + * @note sizeof(AVDOVIRpuDataHeader) is not part of the public ABI. > + */ > +typedef struct AVDOVIRpuDataHeader { > + uint8_t rpu_type; > + uint16_t rpu_format; > + uint8_t vdr_rpu_profile; > + uint8_t vdr_rpu_level; > + uint8_t chroma_resampling_explicit_filter_flag; > + uint8_t coef_data_type; /* informative, lavc always converts to fixed */ > + uint8_t coef_log2_denom; > + uint8_t vdr_rpu_normalized_idc; > + uint8_t bl_video_full_range_flag; > + uint8_t bl_bit_depth; /* [8, 16] */ > + uint8_t el_bit_depth; /* [8, 16] */ > + uint8_t vdr_bit_depth; /* [8, 16] */ > + uint8_t spatial_resampling_filter_flag; > + uint8_t el_spatial_resampling_filter_flag; > + uint8_t disable_residual_flag; > +} AVDOVIRpuDataHeader; > + > +enum AVDOVIMappingMethod { > + AV_DOVI_MAPPING_POLYNOMIAL = 0, > + AV_DOVI_MAPPING_MMR = 1, > +}; > + > +/** > + * Coefficients of a piece-wise function. The pieces of the function span the > + * value ranges between two adjacent pivot values. > + */ > +#define AV_DOVI_MAX_PIECES 8 > +typedef struct AVDOVIReshapingCurve { > + uint8_t num_pivots; /* [2, 9] */ > + uint16_t pivots[AV_DOVI_MAX_PIECES + 1]; /* sorted ascending */ > + enum AVDOVIMappingMethod mapping_idc[AV_DOVI_MAX_PIECES]; > + /* AV_DOVI_MAPPING_POLYNOMIAL */ > + uint8_t poly_order[AV_DOVI_MAX_PIECES]; /* [1, 2] */ > + int64_t poly_coef[AV_DOVI_MAX_PIECES][3]; /* x^0, x^1, x^2 */ > + /* AV_DOVI_MAPPING_MMR */ > + uint8_t mmr_order[AV_DOVI_MAX_PIECES]; /* [1, 3] */ > + int64_t mmr_constant[AV_DOVI_MAX_PIECES]; > + int64_t mmr_coef[AV_DOVI_MAX_PIECES][3/* order - 1 */][7]; > +} AVDOVIReshapingCurve; > + > +enum AVDOVINLQMethod { > + AV_DOVI_NLQ_NONE = -1, > + AV_DOVI_NLQ_LINEAR_DZ = 0, > +}; > + > +/** > + * Coefficients of the non-linear inverse quantization. For the interpretation > + * of these, see ETSI GS CCM 001. > + */ > +typedef struct AVDOVINLQParams { > + uint16_t nlq_offset; > + uint64_t vdr_in_max; > + /* AV_DOVI_NLQ_LINEAR_DZ */ > + uint64_t linear_deadzone_slope; > + uint64_t linear_deadzone_threshold; > +} AVDOVINLQParams; > + > +/** > + * Dolby Vision RPU data mapping parameters. > + * > + * @note sizeof(AVDOVIDataMapping) is not part of the public ABI. > + */ > +typedef struct AVDOVIDataMapping { > + uint8_t vdr_rpu_id; > + uint8_t mapping_color_space; > + uint8_t mapping_chroma_format_idc; > + AVDOVIReshapingCurve curves[3]; /* per component */ > + > + /* Non-linear inverse quantization */ > + enum AVDOVINLQMethod nlq_method_idc; > + uint32_t num_x_partitions; > + uint32_t num_y_partitions; > + AVDOVINLQParams nlq[3]; /* per component */ > +} AVDOVIDataMapping; > + > +/** > + * Dolby Vision RPU colorspace metadata parameters. > + * > + * @note sizeof(AVDOVIColorMetadata) is not part of the public ABI. > + */ > +typedef struct AVDOVIColorMetadata { > + uint8_t dm_metadata_id; > + uint8_t scene_refresh_flag; > + > + /** > + * Coefficients of the custom Dolby Vision IPT-PQ matrices. These are to be > + * used instead of the matrices indicated by the frame's colorspace tags. > + * The output of rgb_to_lms_matrix is to be fed into a BT.2020 LMS->RGB > + * matrix based on a Hunt-Pointer-Estevez transform, but without any > + * crosstalk. (See the definition of the ICtCp colorspace for more > + * information.) > + */ > + AVRational ycc_to_rgb_matrix[9]; /* before PQ linearization */ > + AVRational ycc_to_rgb_offset[3]; /* input offset of neutral value */ > + AVRational rgb_to_lms_matrix[9]; /* after PQ linearization */ > + > + /** > + * Extra signal metadata (see Dolby patents for more info). > + */ > + uint16_t signal_eotf; > + uint16_t signal_eotf_param0; > + uint16_t signal_eotf_param1; > + uint32_t signal_eotf_param2; > + uint8_t signal_bit_depth; > + uint8_t signal_color_space; > + uint8_t signal_chroma_format; > + uint8_t signal_full_range_flag; /* [0, 3] */ > + uint16_t source_min_pq; > + uint16_t source_max_pq; > + uint16_t source_diagonal; > +} AVDOVIColorMetadata; > + > +/** > + * Combined struct representing a combination of header, mapping and color > + * metadata, for attaching to frames as side data. > + * > + * @note The struct must be allocated with av_dovi_metadata_alloc() and > + * its size is not a part of the public ABI. > + */ > + > +typedef struct AVDOVIMetadata { > + /** > + * Offset in bytes from the beginning of this structure at which the > + * respective structs start. > + */ > + size_t header_offset; /* AVDOVIRpuDataHeader */ > + size_t mapping_offset; /* AVDOVIDataMapping */ > + size_t color_offset; /* AVDOVIColorMetadata */ > +} AVDOVIMetadata; > + > +static av_always_inline AVDOVIRpuDataHeader * > +av_dovi_get_header(const AVDOVIMetadata *data) > +{ > + return (AVDOVIRpuDataHeader *)((uint8_t *) data + data->header_offset); > +} > + > +static av_always_inline AVDOVIDataMapping * > +av_dovi_get_mapping(const AVDOVIMetadata *data) > +{ > + return (AVDOVIDataMapping *)((uint8_t *) data + data->mapping_offset); > +} > + > +static av_always_inline AVDOVIColorMetadata * > +av_dovi_get_color(const AVDOVIMetadata *data) > +{ > + return (AVDOVIColorMetadata *)((uint8_t *) data + data->color_offset); > +} > + > +/** > + * Allocate an AVDOVIMetadata structure and initialize its > + * fields to default values. > + * > + * @param size If this parameter is non-NULL, the size in bytes of the > + * allocated struct will be written here on success > + * > + * @return the newly allocated struct or NULL on failure > + */ > +AVDOVIMetadata *av_dovi_metadata_alloc(size_t *size); > + > #endif /* AVUTIL_DOVI_META_H */ > diff --git a/libavutil/frame.c b/libavutil/frame.c > index 0912ad9131..8997c85e35 100644 > --- a/libavutil/frame.c > +++ b/libavutil/frame.c > @@ -729,6 +729,7 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type) > case AV_FRAME_DATA_FILM_GRAIN_PARAMS: return "Film grain parameters"; > case AV_FRAME_DATA_DETECTION_BBOXES: return "Bounding boxes for object detection and classification"; > case AV_FRAME_DATA_DOVI_RPU_BUFFER: return "Dolby Vision RPU Data"; > + case AV_FRAME_DATA_DOVI_METADATA: return "Dolby Vision Metadata"; > } > return NULL; > } > diff --git a/libavutil/frame.h b/libavutil/frame.h > index 3f295f6b9e..18e239f870 100644 > --- a/libavutil/frame.h > +++ b/libavutil/frame.h > @@ -189,11 +189,18 @@ enum AVFrameSideDataType { > AV_FRAME_DATA_DETECTION_BBOXES, > > /** > - * Dolby Vision RPU data, suitable for passing to x265 > + * Dolby Vision RPU raw data, suitable for passing to x265 > * or other libraries. Array of uint8_t, with NAL emulation > * bytes intact. > */ > AV_FRAME_DATA_DOVI_RPU_BUFFER, > + > + /** > + * Parsed Dolby Vision metadata, suitable for passing to a software > + * implementation. The payload is the AVDOVIMetadata struct defined in > + * libavutil/dovi_meta.h. > + */ > + AV_FRAME_DATA_DOVI_METADATA, > }; > > enum AVActiveFormatDescription { > diff --git a/libavutil/version.h b/libavutil/version.h > index 3cac09cb96..318045d4c4 100644 > --- a/libavutil/version.h > +++ b/libavutil/version.h > @@ -79,7 +79,7 @@ > */ > > #define LIBAVUTIL_VERSION_MAJOR 57 > -#define LIBAVUTIL_VERSION_MINOR 13 > +#define LIBAVUTIL_VERSION_MINOR 14 > #define LIBAVUTIL_VERSION_MICRO 100 > > #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ > No further comment from me. - Andreas
diff --git a/doc/APIchanges b/doc/APIchanges index 670a59329e..5721486f09 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -14,6 +14,9 @@ libavutil: 2021-04-27 API changes, most recent first: +2021-12-xx - xxxxxxxxxx - lavu 57.14.100 - frame.h + Add AV_FRAME_DATA_DOVI_METADATA. + 2021-12-xx - xxxxxxxxxx - lavu 57.13.100 - hwcontext_videotoolbox.h Add av_vt_pixbuf_set_attachments diff --git a/libavutil/dovi_meta.c b/libavutil/dovi_meta.c index 7bd08f6c54..9c50da561e 100644 --- a/libavutil/dovi_meta.c +++ b/libavutil/dovi_meta.c @@ -33,3 +33,28 @@ AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size) return dovi; } + +typedef struct AVDOVIMetadataInternal { + AVDOVIMetadata metadata; + AVDOVIRpuDataHeader header; + AVDOVIDataMapping mapping; + AVDOVIColorMetadata color; +} AVDOVIMetadataInternal; + +AVDOVIMetadata *av_dovi_metadata_alloc(size_t *size) +{ + AVDOVIMetadataInternal *dovi = av_mallocz(sizeof(AVDOVIMetadataInternal)); + if (!dovi) + return NULL; + + if (size) + *size = sizeof(*dovi); + + dovi->metadata = (struct AVDOVIMetadata) { + .header_offset = offsetof(AVDOVIMetadataInternal, header), + .mapping_offset = offsetof(AVDOVIMetadataInternal, mapping), + .color_offset = offsetof(AVDOVIMetadataInternal, color), + }; + + return &dovi->metadata; +} diff --git a/libavutil/dovi_meta.h b/libavutil/dovi_meta.h index 299911d434..3d11e02bff 100644 --- a/libavutil/dovi_meta.h +++ b/libavutil/dovi_meta.h @@ -29,6 +29,7 @@ #include <stdint.h> #include <stddef.h> +#include "rational.h" /* * DOVI configuration @@ -67,4 +68,169 @@ typedef struct AVDOVIDecoderConfigurationRecord { */ AVDOVIDecoderConfigurationRecord *av_dovi_alloc(size_t *size); +/** + * Dolby Vision RPU data header. + * + * @note sizeof(AVDOVIRpuDataHeader) is not part of the public ABI. + */ +typedef struct AVDOVIRpuDataHeader { + uint8_t rpu_type; + uint16_t rpu_format; + uint8_t vdr_rpu_profile; + uint8_t vdr_rpu_level; + uint8_t chroma_resampling_explicit_filter_flag; + uint8_t coef_data_type; /* informative, lavc always converts to fixed */ + uint8_t coef_log2_denom; + uint8_t vdr_rpu_normalized_idc; + uint8_t bl_video_full_range_flag; + uint8_t bl_bit_depth; /* [8, 16] */ + uint8_t el_bit_depth; /* [8, 16] */ + uint8_t vdr_bit_depth; /* [8, 16] */ + uint8_t spatial_resampling_filter_flag; + uint8_t el_spatial_resampling_filter_flag; + uint8_t disable_residual_flag; +} AVDOVIRpuDataHeader; + +enum AVDOVIMappingMethod { + AV_DOVI_MAPPING_POLYNOMIAL = 0, + AV_DOVI_MAPPING_MMR = 1, +}; + +/** + * Coefficients of a piece-wise function. The pieces of the function span the + * value ranges between two adjacent pivot values. + */ +#define AV_DOVI_MAX_PIECES 8 +typedef struct AVDOVIReshapingCurve { + uint8_t num_pivots; /* [2, 9] */ + uint16_t pivots[AV_DOVI_MAX_PIECES + 1]; /* sorted ascending */ + enum AVDOVIMappingMethod mapping_idc[AV_DOVI_MAX_PIECES]; + /* AV_DOVI_MAPPING_POLYNOMIAL */ + uint8_t poly_order[AV_DOVI_MAX_PIECES]; /* [1, 2] */ + int64_t poly_coef[AV_DOVI_MAX_PIECES][3]; /* x^0, x^1, x^2 */ + /* AV_DOVI_MAPPING_MMR */ + uint8_t mmr_order[AV_DOVI_MAX_PIECES]; /* [1, 3] */ + int64_t mmr_constant[AV_DOVI_MAX_PIECES]; + int64_t mmr_coef[AV_DOVI_MAX_PIECES][3/* order - 1 */][7]; +} AVDOVIReshapingCurve; + +enum AVDOVINLQMethod { + AV_DOVI_NLQ_NONE = -1, + AV_DOVI_NLQ_LINEAR_DZ = 0, +}; + +/** + * Coefficients of the non-linear inverse quantization. For the interpretation + * of these, see ETSI GS CCM 001. + */ +typedef struct AVDOVINLQParams { + uint16_t nlq_offset; + uint64_t vdr_in_max; + /* AV_DOVI_NLQ_LINEAR_DZ */ + uint64_t linear_deadzone_slope; + uint64_t linear_deadzone_threshold; +} AVDOVINLQParams; + +/** + * Dolby Vision RPU data mapping parameters. + * + * @note sizeof(AVDOVIDataMapping) is not part of the public ABI. + */ +typedef struct AVDOVIDataMapping { + uint8_t vdr_rpu_id; + uint8_t mapping_color_space; + uint8_t mapping_chroma_format_idc; + AVDOVIReshapingCurve curves[3]; /* per component */ + + /* Non-linear inverse quantization */ + enum AVDOVINLQMethod nlq_method_idc; + uint32_t num_x_partitions; + uint32_t num_y_partitions; + AVDOVINLQParams nlq[3]; /* per component */ +} AVDOVIDataMapping; + +/** + * Dolby Vision RPU colorspace metadata parameters. + * + * @note sizeof(AVDOVIColorMetadata) is not part of the public ABI. + */ +typedef struct AVDOVIColorMetadata { + uint8_t dm_metadata_id; + uint8_t scene_refresh_flag; + + /** + * Coefficients of the custom Dolby Vision IPT-PQ matrices. These are to be + * used instead of the matrices indicated by the frame's colorspace tags. + * The output of rgb_to_lms_matrix is to be fed into a BT.2020 LMS->RGB + * matrix based on a Hunt-Pointer-Estevez transform, but without any + * crosstalk. (See the definition of the ICtCp colorspace for more + * information.) + */ + AVRational ycc_to_rgb_matrix[9]; /* before PQ linearization */ + AVRational ycc_to_rgb_offset[3]; /* input offset of neutral value */ + AVRational rgb_to_lms_matrix[9]; /* after PQ linearization */ + + /** + * Extra signal metadata (see Dolby patents for more info). + */ + uint16_t signal_eotf; + uint16_t signal_eotf_param0; + uint16_t signal_eotf_param1; + uint32_t signal_eotf_param2; + uint8_t signal_bit_depth; + uint8_t signal_color_space; + uint8_t signal_chroma_format; + uint8_t signal_full_range_flag; /* [0, 3] */ + uint16_t source_min_pq; + uint16_t source_max_pq; + uint16_t source_diagonal; +} AVDOVIColorMetadata; + +/** + * Combined struct representing a combination of header, mapping and color + * metadata, for attaching to frames as side data. + * + * @note The struct must be allocated with av_dovi_metadata_alloc() and + * its size is not a part of the public ABI. + */ + +typedef struct AVDOVIMetadata { + /** + * Offset in bytes from the beginning of this structure at which the + * respective structs start. + */ + size_t header_offset; /* AVDOVIRpuDataHeader */ + size_t mapping_offset; /* AVDOVIDataMapping */ + size_t color_offset; /* AVDOVIColorMetadata */ +} AVDOVIMetadata; + +static av_always_inline AVDOVIRpuDataHeader * +av_dovi_get_header(const AVDOVIMetadata *data) +{ + return (AVDOVIRpuDataHeader *)((uint8_t *) data + data->header_offset); +} + +static av_always_inline AVDOVIDataMapping * +av_dovi_get_mapping(const AVDOVIMetadata *data) +{ + return (AVDOVIDataMapping *)((uint8_t *) data + data->mapping_offset); +} + +static av_always_inline AVDOVIColorMetadata * +av_dovi_get_color(const AVDOVIMetadata *data) +{ + return (AVDOVIColorMetadata *)((uint8_t *) data + data->color_offset); +} + +/** + * Allocate an AVDOVIMetadata structure and initialize its + * fields to default values. + * + * @param size If this parameter is non-NULL, the size in bytes of the + * allocated struct will be written here on success + * + * @return the newly allocated struct or NULL on failure + */ +AVDOVIMetadata *av_dovi_metadata_alloc(size_t *size); + #endif /* AVUTIL_DOVI_META_H */ diff --git a/libavutil/frame.c b/libavutil/frame.c index 0912ad9131..8997c85e35 100644 --- a/libavutil/frame.c +++ b/libavutil/frame.c @@ -729,6 +729,7 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type) case AV_FRAME_DATA_FILM_GRAIN_PARAMS: return "Film grain parameters"; case AV_FRAME_DATA_DETECTION_BBOXES: return "Bounding boxes for object detection and classification"; case AV_FRAME_DATA_DOVI_RPU_BUFFER: return "Dolby Vision RPU Data"; + case AV_FRAME_DATA_DOVI_METADATA: return "Dolby Vision Metadata"; } return NULL; } diff --git a/libavutil/frame.h b/libavutil/frame.h index 3f295f6b9e..18e239f870 100644 --- a/libavutil/frame.h +++ b/libavutil/frame.h @@ -189,11 +189,18 @@ enum AVFrameSideDataType { AV_FRAME_DATA_DETECTION_BBOXES, /** - * Dolby Vision RPU data, suitable for passing to x265 + * Dolby Vision RPU raw data, suitable for passing to x265 * or other libraries. Array of uint8_t, with NAL emulation * bytes intact. */ AV_FRAME_DATA_DOVI_RPU_BUFFER, + + /** + * Parsed Dolby Vision metadata, suitable for passing to a software + * implementation. The payload is the AVDOVIMetadata struct defined in + * libavutil/dovi_meta.h. + */ + AV_FRAME_DATA_DOVI_METADATA, }; enum AVActiveFormatDescription { diff --git a/libavutil/version.h b/libavutil/version.h index 3cac09cb96..318045d4c4 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -79,7 +79,7 @@ */ #define LIBAVUTIL_VERSION_MAJOR 57 -#define LIBAVUTIL_VERSION_MINOR 13 +#define LIBAVUTIL_VERSION_MINOR 14 #define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \