diff mbox series

[FFmpeg-devel,1/2] avformat/flvenc: support enhanced flv PacketTypeMetadata

Message ID tencent_843FDD31634A4410E8513087C84E38086209@qq.com
State New
Headers show
Series [FFmpeg-devel,1/2] avformat/flvenc: support enhanced flv PacketTypeMetadata | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Diego Felix de Souza via ffmpeg-devel Nov. 15, 2023, 2:40 p.m. UTC
From: Zhu Pengfei <411294962@qq.com>

Signed-off-by: Zhu Pengfei <411294962@qq.com>
---
 libavformat/flvenc.c | 158 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 158 insertions(+)

Comments

James Almer Dec. 4, 2023, 9:16 p.m. UTC | #1
On 11/15/2023 11:40 AM, zhupengfei via ffmpeg-devel wrote:
> From: Zhu Pengfei <411294962@qq.com>
> 
> Signed-off-by: Zhu Pengfei <411294962@qq.com>
> ---
>   libavformat/flvenc.c | 158 +++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 158 insertions(+)
> 
> diff --git a/libavformat/flvenc.c b/libavformat/flvenc.c
> index f6d10f331c..615a072928 100644
> --- a/libavformat/flvenc.c
> +++ b/libavformat/flvenc.c
> @@ -24,6 +24,7 @@
>   #include "libavutil/intfloat.h"
>   #include "libavutil/avassert.h"
>   #include "libavutil/mathematics.h"
> +#include "libavutil/mastering_display_metadata.h"
>   #include "libavcodec/codec_desc.h"
>   #include "libavcodec/mpeg4audio.h"
>   #include "avio.h"
> @@ -124,6 +125,7 @@ typedef struct FLVContext {
>   
>       int flags;
>       int64_t last_ts[FLV_STREAM_TYPE_NB];
> +    int write_metadata_pkt;
>   } FLVContext;
>   
>   static int get_audio_flags(AVFormatContext *s, AVCodecParameters *par)
> @@ -478,6 +480,161 @@ static void write_metadata(AVFormatContext *s, unsigned int ts)
>       avio_wb32(pb, flv->metadata_totalsize + 11);
>   }
>   
> +static void flv_write_metadata_packet_if_needed(AVFormatContext *s, AVCodecParameters *par, unsigned int ts)

flv_write_metadata_packet() is enough and shorter.

> +{
> +    AVIOContext *pb = s->pb;
> +    FLVContext *flv = s->priv_data;
> +    AVContentLightMetadata *lightMetadata = NULL;
> +    AVMasteringDisplayMetadata *displayMetadata = NULL;
> +    const int flags_size = 5;
> +    int64_t metadata_size_pos = 0;
> +    int64_t total_size = 0;
> +    const AVPacketSideData *side_data = NULL;
> +
> +    if (flv->write_metadata_pkt) {

No brackets for single line blocks, please.

> +        return;
> +    }
> +
> +    side_data = av_packet_side_data_get(par->coded_side_data, par->nb_coded_side_data,
> +                                        AV_PKT_DATA_CONTENT_LIGHT_LEVEL);
> +    if (side_data)
> +        lightMetadata = (AVContentLightMetadata *)side_data->data;
> +
> +    side_data = av_packet_side_data_get(par->coded_side_data, par->nb_coded_side_data,
> +                                        AV_PKT_DATA_MASTERING_DISPLAY_METADATA);
> +    if (side_data)
> +        displayMetadata = (AVMasteringDisplayMetadata *)side_data->data;
> +
> +    if (!lightMetadata && !displayMetadata) {
> +        return;

Can't you write transferCharacteristics, matrixCoefficients and 
colorPrimaries if no HDR metadata is present?

> +    }
> +
> +    /*
> +    * Reference Enhancing FLV
> +    * https://github.com/veovera/enhanced-rtmp/blob/main/enhanced-rtmp.pdf
> +    * */
> +    avio_w8(pb, FLV_TAG_TYPE_VIDEO); //写入Video /Audio tag type
> +    metadata_size_pos = avio_tell(pb);
> +    avio_wb24(pb, 0 + flags_size);
> +    put_timestamp(pb, ts); //ts = pkt->dts, gen
> +    avio_wb24(pb, flv->reserved);
> +
> +    if (par->codec_id == AV_CODEC_ID_HEVC) {
> +        avio_w8(pb, FLV_IS_EX_HEADER | PacketTypeMetadata| FLV_FRAME_VIDEO_INFO_CMD); // ExVideoTagHeader mode with PacketTypeMetadata
> +        avio_write(pb, "hvc1", 4);
> +    } else if (par->codec_id == AV_CODEC_ID_AV1 || par->codec_id == AV_CODEC_ID_VP9) {
> +        avio_w8(pb, FLV_IS_EX_HEADER | PacketTypeMetadata| FLV_FRAME_VIDEO_INFO_CMD);
> +        avio_write(pb, par->codec_id == AV_CODEC_ID_AV1 ? "av01" : "vp09", 4);
> +    } else {
> +        return;

You should only call this function for the three supported codecs, then. 
Otherwise if you reach this point you'll have written some bytes, 
including a FLV_TAG_TYPE_VIDEO, for streams like AAC.

> +    }
> +
> +    avio_w8(pb, AMF_DATA_TYPE_STRING);
> +    put_amf_string(pb, "colorInfo");
> +
> +    avio_w8(pb, AMF_DATA_TYPE_OBJECT);
> +
> +    put_amf_string(pb, "colorConfig");  // colorConfig
> +
> +    /* mixed array (hash) with size and string/type/data tuples */
> +    avio_w8(pb, AMF_DATA_TYPE_MIXEDARRAY);
> +
> +    avio_wb32(pb, 0); // write array count
> +
> +    if (par->color_trc != AVCOL_TRC_UNSPECIFIED &&
> +        par->color_trc < AVCOL_TRC_NB) {
> +        put_amf_string(pb, "transferCharacteristics");  // color_trc
> +        put_amf_double(pb, par->color_trc);
> +    }
> +
> +    if (par->color_space != AVCOL_SPC_UNSPECIFIED &&
> +        par->color_space < AVCOL_SPC_NB) {
> +        put_amf_string(pb, "matrixCoefficients"); // colorspace
> +        put_amf_double(pb, par->color_space);
> +    }
> +
> +    if (par->color_primaries != AVCOL_PRI_UNSPECIFIED &&
> +        par->color_primaries < AVCOL_PRI_NB) {
> +        put_amf_string(pb, "colorPrimaries"); // color_primaries
> +        put_amf_double(pb, par->color_primaries);
> +    }
> +
> +    put_amf_string(pb, "");
> +    avio_w8(pb, AMF_END_OF_OBJECT); // array end of object
> +
> +    if (lightMetadata) {
> +        put_amf_string(pb, "hdrCll");
> +
> +        /* mixed array (hash) with size and string/type/data tuples */
> +        avio_w8(pb, AMF_DATA_TYPE_MIXEDARRAY);
> +
> +        avio_wb32(pb, 0); // write array count
> +
> +        put_amf_string(pb, "maxFall");
> +        put_amf_double(pb, lightMetadata->MaxFALL);
> +
> +        put_amf_string(pb, "maxCLL");
> +        put_amf_double(pb, lightMetadata->MaxCLL);
> +
> +        // array end of object
> +        put_amf_string(pb, "");
> +        avio_w8(pb, AMF_END_OF_OBJECT);
> +    }
> +
> +    if (displayMetadata && (displayMetadata->has_primaries || displayMetadata->has_luminance)) {
> +        put_amf_string(pb, "hdrMdcv");
> +
> +        /* mixed array (hash) with size and string/type/data tuples */
> +        avio_w8(pb, AMF_DATA_TYPE_MIXEDARRAY);
> +        avio_wb32(pb, 0); // write array count
> +
> +        if (displayMetadata->has_primaries) {
> +            put_amf_string(pb, "redX");
> +            put_amf_double(pb, av_q2d(displayMetadata->display_primaries[0][0]));
> +
> +            put_amf_string(pb, "redY");
> +            put_amf_double(pb, av_q2d(displayMetadata->display_primaries[0][1]));
> +
> +            put_amf_string(pb, "greenX");
> +            put_amf_double(pb, av_q2d(displayMetadata->display_primaries[1][0]));
> +
> +            put_amf_string(pb, "greenY");
> +            put_amf_double(pb, av_q2d(displayMetadata->display_primaries[1][1]));
> +
> +            put_amf_string(pb, "blueX");
> +            put_amf_double(pb, av_q2d(displayMetadata->display_primaries[2][0]));
> +
> +            put_amf_string(pb, "blueY");
> +            put_amf_double(pb, av_q2d(displayMetadata->display_primaries[2][1]));
> +
> +            put_amf_string(pb, "whitePointX");
> +            put_amf_double(pb, av_q2d(displayMetadata->white_point[0]));
> +
> +            put_amf_string(pb, "whitePointY");
> +            put_amf_double(pb, av_q2d(displayMetadata->white_point[1]));
> +        }
> +        if (displayMetadata->has_luminance) {
> +            put_amf_string(pb, "maxLuminance");
> +            put_amf_double(pb, av_q2d(displayMetadata->max_luminance));
> +
> +            put_amf_string(pb, "minLuminance");
> +            put_amf_double(pb, av_q2d(displayMetadata->min_luminance));
> +        }
> +        put_amf_string(pb, "");
> +        avio_w8(pb, AMF_END_OF_OBJECT);
> +    }
> +    avio_w8(pb, AMF_DATA_TYPE_OBJECT_END);
> +
> +    total_size = avio_tell(pb) - metadata_size_pos - 10;
> +    avio_seek(pb, metadata_size_pos, SEEK_SET);
> +    avio_wb24(pb, total_size);
> +    avio_tell(pb);
> +    avio_skip(pb, total_size + 10 - 3);
> +    avio_tell(pb);
> +    avio_wb32(pb, total_size + 11); // previous tag size
> +    flv->write_metadata_pkt = 1;
> +}
> +
>   static int unsupported_codec(AVFormatContext *s,
>                                const char* type, int codec_id)
>   {
> @@ -878,6 +1035,7 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt)
>               memcpy(par->extradata, side, side_size);
>               flv_write_codec_header(s, par, pkt->dts);
>           }
> +        flv_write_metadata_packet_if_needed(s, par, pkt->dts);
>       }
>   
>       if (flv->delay == AV_NOPTS_VALUE)
diff mbox series

Patch

diff --git a/libavformat/flvenc.c b/libavformat/flvenc.c
index f6d10f331c..615a072928 100644
--- a/libavformat/flvenc.c
+++ b/libavformat/flvenc.c
@@ -24,6 +24,7 @@ 
 #include "libavutil/intfloat.h"
 #include "libavutil/avassert.h"
 #include "libavutil/mathematics.h"
+#include "libavutil/mastering_display_metadata.h"
 #include "libavcodec/codec_desc.h"
 #include "libavcodec/mpeg4audio.h"
 #include "avio.h"
@@ -124,6 +125,7 @@  typedef struct FLVContext {
 
     int flags;
     int64_t last_ts[FLV_STREAM_TYPE_NB];
+    int write_metadata_pkt;
 } FLVContext;
 
 static int get_audio_flags(AVFormatContext *s, AVCodecParameters *par)
@@ -478,6 +480,161 @@  static void write_metadata(AVFormatContext *s, unsigned int ts)
     avio_wb32(pb, flv->metadata_totalsize + 11);
 }
 
+static void flv_write_metadata_packet_if_needed(AVFormatContext *s, AVCodecParameters *par, unsigned int ts)
+{
+    AVIOContext *pb = s->pb;
+    FLVContext *flv = s->priv_data;
+    AVContentLightMetadata *lightMetadata = NULL;
+    AVMasteringDisplayMetadata *displayMetadata = NULL;
+    const int flags_size = 5;
+    int64_t metadata_size_pos = 0;
+    int64_t total_size = 0;
+    const AVPacketSideData *side_data = NULL;
+
+    if (flv->write_metadata_pkt) {
+        return;
+    }
+
+    side_data = av_packet_side_data_get(par->coded_side_data, par->nb_coded_side_data,
+                                        AV_PKT_DATA_CONTENT_LIGHT_LEVEL);
+    if (side_data)
+        lightMetadata = (AVContentLightMetadata *)side_data->data;
+
+    side_data = av_packet_side_data_get(par->coded_side_data, par->nb_coded_side_data,
+                                        AV_PKT_DATA_MASTERING_DISPLAY_METADATA);
+    if (side_data)
+        displayMetadata = (AVMasteringDisplayMetadata *)side_data->data;
+
+    if (!lightMetadata && !displayMetadata) {
+        return;
+    }
+
+    /*
+    * Reference Enhancing FLV
+    * https://github.com/veovera/enhanced-rtmp/blob/main/enhanced-rtmp.pdf
+    * */
+    avio_w8(pb, FLV_TAG_TYPE_VIDEO); //写入Video /Audio tag type
+    metadata_size_pos = avio_tell(pb);
+    avio_wb24(pb, 0 + flags_size);
+    put_timestamp(pb, ts); //ts = pkt->dts, gen
+    avio_wb24(pb, flv->reserved);
+
+    if (par->codec_id == AV_CODEC_ID_HEVC) {
+        avio_w8(pb, FLV_IS_EX_HEADER | PacketTypeMetadata| FLV_FRAME_VIDEO_INFO_CMD); // ExVideoTagHeader mode with PacketTypeMetadata
+        avio_write(pb, "hvc1", 4);
+    } else if (par->codec_id == AV_CODEC_ID_AV1 || par->codec_id == AV_CODEC_ID_VP9) {
+        avio_w8(pb, FLV_IS_EX_HEADER | PacketTypeMetadata| FLV_FRAME_VIDEO_INFO_CMD);
+        avio_write(pb, par->codec_id == AV_CODEC_ID_AV1 ? "av01" : "vp09", 4);
+    } else {
+        return;
+    }
+
+    avio_w8(pb, AMF_DATA_TYPE_STRING);
+    put_amf_string(pb, "colorInfo");
+
+    avio_w8(pb, AMF_DATA_TYPE_OBJECT);
+
+    put_amf_string(pb, "colorConfig");  // colorConfig
+
+    /* mixed array (hash) with size and string/type/data tuples */
+    avio_w8(pb, AMF_DATA_TYPE_MIXEDARRAY);
+
+    avio_wb32(pb, 0); // write array count
+
+    if (par->color_trc != AVCOL_TRC_UNSPECIFIED &&
+        par->color_trc < AVCOL_TRC_NB) {
+        put_amf_string(pb, "transferCharacteristics");  // color_trc
+        put_amf_double(pb, par->color_trc);
+    }
+
+    if (par->color_space != AVCOL_SPC_UNSPECIFIED &&
+        par->color_space < AVCOL_SPC_NB) {
+        put_amf_string(pb, "matrixCoefficients"); // colorspace
+        put_amf_double(pb, par->color_space);
+    }
+
+    if (par->color_primaries != AVCOL_PRI_UNSPECIFIED &&
+        par->color_primaries < AVCOL_PRI_NB) {
+        put_amf_string(pb, "colorPrimaries"); // color_primaries
+        put_amf_double(pb, par->color_primaries);
+    }
+
+    put_amf_string(pb, "");
+    avio_w8(pb, AMF_END_OF_OBJECT); // array end of object
+
+    if (lightMetadata) {
+        put_amf_string(pb, "hdrCll");
+
+        /* mixed array (hash) with size and string/type/data tuples */
+        avio_w8(pb, AMF_DATA_TYPE_MIXEDARRAY);
+
+        avio_wb32(pb, 0); // write array count
+
+        put_amf_string(pb, "maxFall");
+        put_amf_double(pb, lightMetadata->MaxFALL);
+
+        put_amf_string(pb, "maxCLL");
+        put_amf_double(pb, lightMetadata->MaxCLL);
+
+        // array end of object
+        put_amf_string(pb, "");
+        avio_w8(pb, AMF_END_OF_OBJECT);
+    }
+
+    if (displayMetadata && (displayMetadata->has_primaries || displayMetadata->has_luminance)) {
+        put_amf_string(pb, "hdrMdcv");
+
+        /* mixed array (hash) with size and string/type/data tuples */
+        avio_w8(pb, AMF_DATA_TYPE_MIXEDARRAY);
+        avio_wb32(pb, 0); // write array count
+
+        if (displayMetadata->has_primaries) {
+            put_amf_string(pb, "redX");
+            put_amf_double(pb, av_q2d(displayMetadata->display_primaries[0][0]));
+
+            put_amf_string(pb, "redY");
+            put_amf_double(pb, av_q2d(displayMetadata->display_primaries[0][1]));
+
+            put_amf_string(pb, "greenX");
+            put_amf_double(pb, av_q2d(displayMetadata->display_primaries[1][0]));
+
+            put_amf_string(pb, "greenY");
+            put_amf_double(pb, av_q2d(displayMetadata->display_primaries[1][1]));
+
+            put_amf_string(pb, "blueX");
+            put_amf_double(pb, av_q2d(displayMetadata->display_primaries[2][0]));
+
+            put_amf_string(pb, "blueY");
+            put_amf_double(pb, av_q2d(displayMetadata->display_primaries[2][1]));
+
+            put_amf_string(pb, "whitePointX");
+            put_amf_double(pb, av_q2d(displayMetadata->white_point[0]));
+
+            put_amf_string(pb, "whitePointY");
+            put_amf_double(pb, av_q2d(displayMetadata->white_point[1]));
+        }
+        if (displayMetadata->has_luminance) {
+            put_amf_string(pb, "maxLuminance");
+            put_amf_double(pb, av_q2d(displayMetadata->max_luminance));
+
+            put_amf_string(pb, "minLuminance");
+            put_amf_double(pb, av_q2d(displayMetadata->min_luminance));
+        }
+        put_amf_string(pb, "");
+        avio_w8(pb, AMF_END_OF_OBJECT);
+    }
+    avio_w8(pb, AMF_DATA_TYPE_OBJECT_END);
+
+    total_size = avio_tell(pb) - metadata_size_pos - 10;
+    avio_seek(pb, metadata_size_pos, SEEK_SET);
+    avio_wb24(pb, total_size);
+    avio_tell(pb);
+    avio_skip(pb, total_size + 10 - 3);
+    avio_tell(pb);
+    avio_wb32(pb, total_size + 11); // previous tag size
+    flv->write_metadata_pkt = 1;
+}
+
 static int unsupported_codec(AVFormatContext *s,
                              const char* type, int codec_id)
 {
@@ -878,6 +1035,7 @@  static int flv_write_packet(AVFormatContext *s, AVPacket *pkt)
             memcpy(par->extradata, side, side_size);
             flv_write_codec_header(s, par, pkt->dts);
         }
+        flv_write_metadata_packet_if_needed(s, par, pkt->dts);
     }
 
     if (flv->delay == AV_NOPTS_VALUE)