diff mbox series

[FFmpeg-devel,v5,3/3] avcodec/nvenc: write out user data unregistered SEI

Message ID 20210517081944.35238-4-bradh@frogmouth.net
State Accepted
Headers show
Series [FFmpeg-devel,v5,1/3] libavcodec/libx264: write out user data unregistered SEI | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Brad Hards May 17, 2021, 8:19 a.m. UTC
Signed-off-by: Brad Hards <bradh@frogmouth.net>
---
 libavcodec/nvenc.c | 64 ++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 53 insertions(+), 11 deletions(-)

Comments

Timo Rothenpieler May 17, 2021, 11:25 a.m. UTC | #1
On 17.05.2021 10:19, Brad Hards wrote:
> Signed-off-by: Brad Hards <bradh@frogmouth.net>
> ---
>   libavcodec/nvenc.c | 64 ++++++++++++++++++++++++++++++++++++++--------
>   1 file changed, 53 insertions(+), 11 deletions(-)
> 
> diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
> index 0dcd93a99c..e22fdfb5a8 100644
> --- a/libavcodec/nvenc.c
> +++ b/libavcodec/nvenc.c
> @@ -2170,9 +2170,10 @@ static int nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
>       NVENCSTATUS nv_status;
>       NvencSurface *tmp_out_surf, *in_surf;
>       int res, res2;
> -    NV_ENC_SEI_PAYLOAD sei_data[8];
> +    NV_ENC_SEI_PAYLOAD *sei_data = 0;
>       int sei_count = 0;
>       int i;
> +    int total_unregistered_sei = 0;
>   
>       NvencContext *ctx = avctx->priv_data;
>       NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
> @@ -2185,6 +2186,8 @@ static int nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
>           return AVERROR(EINVAL);
>   
>       if (frame && frame->buf[0]) {
> +        void *a53_data = NULL;
> +        void *tc_data = NULL;
>           in_surf = get_free_frame(ctx);
>           if (!in_surf)
>               return AVERROR(EAGAIN);
> @@ -2230,7 +2233,6 @@ static int nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
>           pic_params.inputTimeStamp = frame->pts;
>   
>           if (ctx->a53_cc && av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC)) {
> -            void *a53_data = NULL;
>               size_t a53_size = 0;
>   
>               if (ff_alloc_a53_sei(frame, 0, (void**)&a53_data, &a53_size) < 0) {
> @@ -2238,15 +2240,21 @@ static int nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
>               }
>   
>               if (a53_data) {
> -                sei_data[sei_count].payloadSize = (uint32_t)a53_size;
> -                sei_data[sei_count].payloadType = 4;
> -                sei_data[sei_count].payload = (uint8_t*)a53_data;
> -                sei_count ++;
> +                sei_data = av_realloc_array(sei_data, sei_count + 1, sizeof(NV_ENC_SEI_PAYLOAD));

Doing this realloc dance every frame is extremely inefficient.
The sei_data array and its current size could instead be stored in the 
nvenc context, with a best-guess initial size, and then only grow when 
the already allocated size is too small.

> +                if (!sei_data) {
> +                    av_free(a53_data);
> +                    sei_count = 0;
> +                    return AVERROR(ENOMEM);
> +                } else {
> +                    sei_data[sei_count].payloadSize = (uint32_t)a53_size;
> +                    sei_data[sei_count].payloadType = 4;
> +                    sei_data[sei_count].payload = (uint8_t*)a53_data;
> +                    sei_count ++;
> +                }
>               }
>           }
>   
>           if (ctx->s12m_tc && av_frame_get_side_data(frame, AV_FRAME_DATA_S12M_TIMECODE)) {
> -            void *tc_data = NULL;
>               size_t tc_size = 0;
>   
>               if (ff_alloc_timecode_sei(frame, avctx->framerate, 0, (void**)&tc_data, &tc_size) < 0) {
> @@ -2254,13 +2262,46 @@ static int nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
>               }
>   
>               if (tc_data) {
> -                sei_data[sei_count].payloadSize = (uint32_t)tc_size;
> -                sei_data[sei_count].payloadType = SEI_TYPE_TIME_CODE;
> -                sei_data[sei_count].payload = (uint8_t*)tc_data;
> -                sei_count ++;
> +                sei_data = av_realloc_array(sei_data, sei_count + 1, sizeof(NV_ENC_SEI_PAYLOAD));
> +                if (!sei_data) {
> +                    av_free(a53_data);
> +                    av_free(tc_data);
> +                    sei_count = 0;
> +                    return AVERROR(ENOMEM);
> +                } else {
> +                    sei_data[sei_count].payloadSize = (uint32_t)tc_size;
> +                    sei_data[sei_count].payloadType = SEI_TYPE_TIME_CODE;
> +                    sei_data[sei_count].payload = (uint8_t*)tc_data;
> +                    sei_count ++;
> +                }
>               }
>           }
>   
> +        for (int j = 0; j < frame->nb_side_data; j++)
> +            if (frame->side_data[j]->type == AV_FRAME_DATA_SEI_UNREGISTERED)
> +                total_unregistered_sei++;
> +        if (total_unregistered_sei > 0) {
> +            sei_data = av_realloc_array(sei_data,
> +                                        sei_count + total_unregistered_sei,
> +                                        sizeof(NV_ENC_SEI_PAYLOAD));
> +            if (!sei_data) {
> +                av_free(a53_data);
> +                av_free(tc_data);
> +                sei_count = 0;
> +                return AVERROR(ENOMEM);
> +            } else
> +                for (int j = 0; j < frame->nb_side_data; j++) {
> +                    AVFrameSideData *side_data = frame->side_data[j];
> +                    if (side_data->type == AV_FRAME_DATA_SEI_UNREGISTERED) {
> +                        sei_data[sei_count].payloadSize = side_data->size;
> +                        sei_data[sei_count].payloadType = SEI_TYPE_USER_DATA_UNREGISTERED;
> +                        sei_data[sei_count].payload = av_memdup(side_data->data, side_data->size);

Does this ever get freed anywhere?
This looks like an intense memory leak to me.

> +                        if (sei_data[sei_count].payload)
> +                            sei_count ++;
> +                    }
> +                }
> +        }
> +
>           nvenc_codec_specific_pic_params(avctx, &pic_params, sei_data, sei_count);
>       } else {
>           pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
> @@ -2274,6 +2315,7 @@ static int nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
>   
>       for ( i = 0; i < sei_count; i++)
>           av_freep(&sei_data[i].payload);
> +    av_free(sei_data);
>   
>       res = nvenc_pop_context(avctx);
>       if (res < 0)
>
Marton Balint May 17, 2021, 3:52 p.m. UTC | #2
On Mon, 17 May 2021, Timo Rothenpieler wrote:

> On 17.05.2021 10:19, Brad Hards wrote:
>> Signed-off-by: Brad Hards <bradh@frogmouth.net>
>> ---
>>   libavcodec/nvenc.c | 64 ++++++++++++++++++++++++++++++++++++++--------
>>   1 file changed, 53 insertions(+), 11 deletions(-)
>> 
>> diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
>> index 0dcd93a99c..e22fdfb5a8 100644
>> --- a/libavcodec/nvenc.c
>> +++ b/libavcodec/nvenc.c
>> @@ -2170,9 +2170,10 @@ static int nvenc_send_frame(AVCodecContext *avctx, 
>> const AVFrame *frame)
>>       NVENCSTATUS nv_status;
>>       NvencSurface *tmp_out_surf, *in_surf;
>>       int res, res2;
>> -    NV_ENC_SEI_PAYLOAD sei_data[8];
>> +    NV_ENC_SEI_PAYLOAD *sei_data = 0;
>>       int sei_count = 0;
>>       int i;
>> +    int total_unregistered_sei = 0;
>>         NvencContext *ctx = avctx->priv_data;
>>       NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
>> @@ -2185,6 +2186,8 @@ static int nvenc_send_frame(AVCodecContext *avctx, 
>> const AVFrame *frame)
>>           return AVERROR(EINVAL);
>>         if (frame && frame->buf[0]) {
>> +        void *a53_data = NULL;
>> +        void *tc_data = NULL;
>>           in_surf = get_free_frame(ctx);
>>           if (!in_surf)
>>               return AVERROR(EAGAIN);
>> @@ -2230,7 +2233,6 @@ static int nvenc_send_frame(AVCodecContext *avctx, 
>> const AVFrame *frame)
>>           pic_params.inputTimeStamp = frame->pts;
>>             if (ctx->a53_cc && av_frame_get_side_data(frame, 
>> AV_FRAME_DATA_A53_CC)) {
>> -            void *a53_data = NULL;
>>               size_t a53_size = 0;
>>                 if (ff_alloc_a53_sei(frame, 0, (void**)&a53_data, 
>> &a53_size) < 0) {
>> @@ -2238,15 +2240,21 @@ static int nvenc_send_frame(AVCodecContext *avctx, 
>> const AVFrame *frame)
>>               }
>>                 if (a53_data) {
>> -                sei_data[sei_count].payloadSize = (uint32_t)a53_size;
>> -                sei_data[sei_count].payloadType = 4;
>> -                sei_data[sei_count].payload = (uint8_t*)a53_data;
>> -                sei_count ++;
>> +                sei_data = av_realloc_array(sei_data, sei_count + 1, 
>> sizeof(NV_ENC_SEI_PAYLOAD));
>
> Doing this realloc dance every frame is extremely inefficient.
> The sei_data array and its current size could instead be stored in the nvenc 
> context, with a best-guess initial size, and then only grow when the already 
> allocated size is too small.

Don't we have av_fast_realloc for this?

Thanks,
Marton
Timo Rothenpieler May 17, 2021, 3:57 p.m. UTC | #3
On 17.05.2021 17:52, Marton Balint wrote:
> Don't we have av_fast_realloc for this?

Yes, but you still need to save the pointer for that somewhere.
Brad Hards May 18, 2021, 6:43 a.m. UTC | #4
On Monday, 17 May 2021 9:25:10 PM AEST Timo Rothenpieler wrote:
> On 17.05.2021 10:19, Brad Hards wrote:
> > Signed-off-by: Brad Hards <bradh@frogmouth.net>
> > ---
> > 
> >   libavcodec/nvenc.c | 64 ++++++++++++++++++++++++++++++++++++++--------
> >   1 file changed, 53 insertions(+), 11 deletions(-)
> > 
> > diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
> > index 0dcd93a99c..e22fdfb5a8 100644
> > --- a/libavcodec/nvenc.c
> > +++ b/libavcodec/nvenc.c
> > @@ -2170,9 +2170,10 @@ static int nvenc_send_frame(AVCodecContext *avctx,
> > const AVFrame *frame)> 
> >       NVENCSTATUS nv_status;
> >       NvencSurface *tmp_out_surf, *in_surf;
> >       int res, res2;
> > 
> > -    NV_ENC_SEI_PAYLOAD sei_data[8];
> > +    NV_ENC_SEI_PAYLOAD *sei_data = 0;
> > 
> >       int sei_count = 0;
> >       int i;
> > 
> > +    int total_unregistered_sei = 0;
> > 
> >       NvencContext *ctx = avctx->priv_data;
> >       NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
> > 
> > @@ -2185,6 +2186,8 @@ static int nvenc_send_frame(AVCodecContext *avctx,
> > const AVFrame *frame)> 
> >           return AVERROR(EINVAL);
> >       
> >       if (frame && frame->buf[0]) {
> > 
> > +        void *a53_data = NULL;
> > +        void *tc_data = NULL;
> > 
> >           in_surf = get_free_frame(ctx);
> >           if (!in_surf)
> >           
> >               return AVERROR(EAGAIN);
> > 
> > @@ -2230,7 +2233,6 @@ static int nvenc_send_frame(AVCodecContext *avctx,
> > const AVFrame *frame)> 
> >           pic_params.inputTimeStamp = frame->pts;
> >           
> >           if (ctx->a53_cc && av_frame_get_side_data(frame,
> >           AV_FRAME_DATA_A53_CC)) {
> > 
> > -            void *a53_data = NULL;
> > 
> >               size_t a53_size = 0;
> >               
> >               if (ff_alloc_a53_sei(frame, 0, (void**)&a53_data, &a53_size)
> >               < 0) {
> > 
> > @@ -2238,15 +2240,21 @@ static int nvenc_send_frame(AVCodecContext *avctx,
> > const AVFrame *frame)> 
> >               }
> >               
> >               if (a53_data) {
> > 
> > -                sei_data[sei_count].payloadSize = (uint32_t)a53_size;
> > -                sei_data[sei_count].payloadType = 4;
> > -                sei_data[sei_count].payload = (uint8_t*)a53_data;
> > -                sei_count ++;
> > +                sei_data = av_realloc_array(sei_data, sei_count + 1,
> > sizeof(NV_ENC_SEI_PAYLOAD));
> Doing this realloc dance every frame is extremely inefficient.
> The sei_data array and its current size could instead be stored in the
> nvenc context, with a best-guess initial size, and then only grow when
> the already allocated size is too small.
I recognise that this might be frustrating you. Would you prefer to persist, 
or would it be easier just to take over the patch and do it the way you want 
it? I'm fine with either - just let me know.


> > +                if (!sei_data) {
> > +                    av_free(a53_data);
> > +                    sei_count = 0;
> > +                    return AVERROR(ENOMEM);
> > +                } else {
> > +                    sei_data[sei_count].payloadSize = (uint32_t)a53_size;
> > +                    sei_data[sei_count].payloadType = 4;
> > +                    sei_data[sei_count].payload = (uint8_t*)a53_data;
> > +                    sei_count ++;
> > +                }
> > 
> >               }
> >           
> >           }
> >           
> >           if (ctx->s12m_tc && av_frame_get_side_data(frame,
> >           AV_FRAME_DATA_S12M_TIMECODE)) {> 
> > -            void *tc_data = NULL;
> > 
> >               size_t tc_size = 0;
> >               
> >               if (ff_alloc_timecode_sei(frame, avctx->framerate, 0,
> >               (void**)&tc_data, &tc_size) < 0) {> 
> > @@ -2254,13 +2262,46 @@ static int nvenc_send_frame(AVCodecContext *avctx,
> > const AVFrame *frame)> 
> >               }
> >               
> >               if (tc_data) {
> > 
> > -                sei_data[sei_count].payloadSize = (uint32_t)tc_size;
> > -                sei_data[sei_count].payloadType = SEI_TYPE_TIME_CODE;
> > -                sei_data[sei_count].payload = (uint8_t*)tc_data;
> > -                sei_count ++;
> > +                sei_data = av_realloc_array(sei_data, sei_count + 1,
> > sizeof(NV_ENC_SEI_PAYLOAD)); +                if (!sei_data) {
> > +                    av_free(a53_data);
> > +                    av_free(tc_data);
> > +                    sei_count = 0;
> > +                    return AVERROR(ENOMEM);
> > +                } else {
> > +                    sei_data[sei_count].payloadSize = (uint32_t)tc_size;
> > +                    sei_data[sei_count].payloadType = SEI_TYPE_TIME_CODE;
> > +                    sei_data[sei_count].payload = (uint8_t*)tc_data;
> > +                    sei_count ++;
> > +                }
> > 
> >               }
> >           
> >           }
> > 
> > +        for (int j = 0; j < frame->nb_side_data; j++)
> > +            if (frame->side_data[j]->type ==
> > AV_FRAME_DATA_SEI_UNREGISTERED) +               
> > total_unregistered_sei++;
> > +        if (total_unregistered_sei > 0) {
> > +            sei_data = av_realloc_array(sei_data,
> > +                                        sei_count +
> > total_unregistered_sei, +                                       
> > sizeof(NV_ENC_SEI_PAYLOAD)); +            if (!sei_data) {
> > +                av_free(a53_data);
> > +                av_free(tc_data);
> > +                sei_count = 0;
> > +                return AVERROR(ENOMEM);
> > +            } else
> > +                for (int j = 0; j < frame->nb_side_data; j++) {
> > +                    AVFrameSideData *side_data = frame->side_data[j];
> > +                    if (side_data->type ==
> > AV_FRAME_DATA_SEI_UNREGISTERED) { +                       
> > sei_data[sei_count].payloadSize = side_data->size; +                     
> >   sei_data[sei_count].payloadType = SEI_TYPE_USER_DATA_UNREGISTERED; +   
> >                     sei_data[sei_count].payload =
> > av_memdup(side_data->data, side_data->size);
> Does this ever get freed anywhere?
Yes it does (just as for the other two payload allocations), in existing code.
See
https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/nvenc.c#L2275-L2276
diff mbox series

Patch

diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index 0dcd93a99c..e22fdfb5a8 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -2170,9 +2170,10 @@  static int nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
     NVENCSTATUS nv_status;
     NvencSurface *tmp_out_surf, *in_surf;
     int res, res2;
-    NV_ENC_SEI_PAYLOAD sei_data[8];
+    NV_ENC_SEI_PAYLOAD *sei_data = 0;
     int sei_count = 0;
     int i;
+    int total_unregistered_sei = 0;
 
     NvencContext *ctx = avctx->priv_data;
     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
@@ -2185,6 +2186,8 @@  static int nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
         return AVERROR(EINVAL);
 
     if (frame && frame->buf[0]) {
+        void *a53_data = NULL;
+        void *tc_data = NULL;
         in_surf = get_free_frame(ctx);
         if (!in_surf)
             return AVERROR(EAGAIN);
@@ -2230,7 +2233,6 @@  static int nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
         pic_params.inputTimeStamp = frame->pts;
 
         if (ctx->a53_cc && av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC)) {
-            void *a53_data = NULL;
             size_t a53_size = 0;
 
             if (ff_alloc_a53_sei(frame, 0, (void**)&a53_data, &a53_size) < 0) {
@@ -2238,15 +2240,21 @@  static int nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
             }
 
             if (a53_data) {
-                sei_data[sei_count].payloadSize = (uint32_t)a53_size;
-                sei_data[sei_count].payloadType = 4;
-                sei_data[sei_count].payload = (uint8_t*)a53_data;
-                sei_count ++;
+                sei_data = av_realloc_array(sei_data, sei_count + 1, sizeof(NV_ENC_SEI_PAYLOAD));
+                if (!sei_data) {
+                    av_free(a53_data);
+                    sei_count = 0;
+                    return AVERROR(ENOMEM);
+                } else {
+                    sei_data[sei_count].payloadSize = (uint32_t)a53_size;
+                    sei_data[sei_count].payloadType = 4;
+                    sei_data[sei_count].payload = (uint8_t*)a53_data;
+                    sei_count ++;
+                }
             }
         }
 
         if (ctx->s12m_tc && av_frame_get_side_data(frame, AV_FRAME_DATA_S12M_TIMECODE)) {
-            void *tc_data = NULL;
             size_t tc_size = 0;
 
             if (ff_alloc_timecode_sei(frame, avctx->framerate, 0, (void**)&tc_data, &tc_size) < 0) {
@@ -2254,13 +2262,46 @@  static int nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
             }
 
             if (tc_data) {
-                sei_data[sei_count].payloadSize = (uint32_t)tc_size;
-                sei_data[sei_count].payloadType = SEI_TYPE_TIME_CODE;
-                sei_data[sei_count].payload = (uint8_t*)tc_data;
-                sei_count ++;
+                sei_data = av_realloc_array(sei_data, sei_count + 1, sizeof(NV_ENC_SEI_PAYLOAD));
+                if (!sei_data) {
+                    av_free(a53_data);
+                    av_free(tc_data);
+                    sei_count = 0;
+                    return AVERROR(ENOMEM);
+                } else {
+                    sei_data[sei_count].payloadSize = (uint32_t)tc_size;
+                    sei_data[sei_count].payloadType = SEI_TYPE_TIME_CODE;
+                    sei_data[sei_count].payload = (uint8_t*)tc_data;
+                    sei_count ++;
+                }
             }
         }
 
+        for (int j = 0; j < frame->nb_side_data; j++)
+            if (frame->side_data[j]->type == AV_FRAME_DATA_SEI_UNREGISTERED)
+                total_unregistered_sei++;
+        if (total_unregistered_sei > 0) {
+            sei_data = av_realloc_array(sei_data,
+                                        sei_count + total_unregistered_sei,
+                                        sizeof(NV_ENC_SEI_PAYLOAD));
+            if (!sei_data) {
+                av_free(a53_data);
+                av_free(tc_data);
+                sei_count = 0;
+                return AVERROR(ENOMEM);
+            } else    
+                for (int j = 0; j < frame->nb_side_data; j++) {
+                    AVFrameSideData *side_data = frame->side_data[j];
+                    if (side_data->type == AV_FRAME_DATA_SEI_UNREGISTERED) {
+                        sei_data[sei_count].payloadSize = side_data->size;
+                        sei_data[sei_count].payloadType = SEI_TYPE_USER_DATA_UNREGISTERED;
+                        sei_data[sei_count].payload = av_memdup(side_data->data, side_data->size);
+                        if (sei_data[sei_count].payload)
+                            sei_count ++;
+                    }
+                }
+        }
+
         nvenc_codec_specific_pic_params(avctx, &pic_params, sei_data, sei_count);
     } else {
         pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
@@ -2274,6 +2315,7 @@  static int nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
 
     for ( i = 0; i < sei_count; i++)
         av_freep(&sei_data[i].payload);
+    av_free(sei_data);
 
     res = nvenc_pop_context(avctx);
     if (res < 0)