diff mbox series

[FFmpeg-devel] Added Closed caption support for cuviddec for preserving a53 data n GPU decoding

Message ID 20210412202125.149031-1-dhanishvijayan@gmail.com
State New
Headers show
Series [FFmpeg-devel] Added Closed caption support for cuviddec for preserving a53 data n GPU decoding | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Dhanish Vijayan April 12, 2021, 8:21 p.m. UTC
Signed-off-by: Dhanish Vijayan <dhanishvijayan@gmail.com>
---
 libavcodec/cuviddec.c | 199 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 199 insertions(+)

Comments

James Almer April 12, 2021, 11:41 p.m. UTC | #1
On 4/12/2021 5:21 PM, Dhanish Vijayan wrote:
> Signed-off-by: Dhanish Vijayan <dhanishvijayan@gmail.com>
> ---
>   libavcodec/cuviddec.c | 199 ++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 199 insertions(+)
> 
> diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c
> index ec57afdefe..3b07d0a874 100644
> --- a/libavcodec/cuviddec.c
> +++ b/libavcodec/cuviddec.c
> @@ -46,6 +46,9 @@
>   #define CUVID_HAS_AV1_SUPPORT
>   #endif
>   
> +#define MAX_FRAME_COUNT 25
> +#define A53_QUEUE_SIZE (MAX_FRAME_COUNT + 8)
> +
>   typedef struct CuvidContext
>   {
>       AVClass *avclass;
> @@ -89,6 +92,11 @@ typedef struct CuvidContext
>       cudaVideoCodec codec_type;
>       cudaVideoChromaFormat chroma_format;
>   
> +    uint8_t* a53_caption;
> +    int a53_caption_size;
> +    uint8_t* a53_caption_queue[A53_QUEUE_SIZE];
> +    int a53_caption_size_queue[A53_QUEUE_SIZE];
> +
>       CUVIDDECODECAPS caps8, caps10, caps12;
>   
>       CUVIDPARSERPARAMS cuparseinfo;
> @@ -103,6 +111,8 @@ typedef struct CuvidParsedFrame
>       CUVIDPARSERDISPINFO dispinfo;
>       int second_field;
>       int is_deinterlacing;
> +    uint8_t* a53_caption;
> +    int a53_caption_size;
>   } CuvidParsedFrame;
>   
>   #define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x)
> @@ -338,6 +348,24 @@ static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* pic
>   
>       ctx->key_frame[picparams->CurrPicIdx] = picparams->intra_pic_flag;
>   
> +    if (ctx->a53_caption)
> +    {
> +
> +        if (picparams->CurrPicIdx >= A53_QUEUE_SIZE)
> +        {
> +            av_log(avctx, AV_LOG_WARNING, "CurrPicIdx too big: %d\n", picparams->CurrPicIdx);
> +            av_freep(&ctx->a53_caption);
> +        }
> +        else
> +        {
> +            int pos = picparams->CurrPicIdx;
> +            av_freep(&ctx->a53_caption_queue[pos]);
> +            ctx->a53_caption_queue[pos] = ctx->a53_caption;
> +            ctx->a53_caption_size_queue[pos] = ctx->a53_caption_size;
> +            ctx->a53_caption = NULL;
> +        }
> +    }
> +
>       ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams));
>       if (ctx->internal_error < 0)
>           return 0;
> @@ -350,6 +378,20 @@ static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF
>       AVCodecContext *avctx = opaque;
>       CuvidContext *ctx = avctx->priv_data;
>       CuvidParsedFrame parsed_frame = { { 0 } };
> +    uint8_t* a53_caption = NULL;
> +    int a53_caption_size = 0;
> +
> +    if (dispinfo->picture_index >= A53_QUEUE_SIZE)
> +    {
> +        av_log(avctx, AV_LOG_WARNING, "picture_index too big: %d\n", dispinfo->picture_index);
> +    }
> +    else
> +    {
> +        int pos = dispinfo->picture_index;
> +        a53_caption = ctx->a53_caption_queue[pos];
> +        a53_caption_size = ctx->a53_caption_size_queue[pos];
> +        ctx->a53_caption_queue[pos] = NULL;
> +    }
>   
>       parsed_frame.dispinfo = *dispinfo;
>       ctx->internal_error = 0;
> @@ -358,11 +400,17 @@ static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF
>       parsed_frame.dispinfo.progressive_frame = ctx->progressive_sequence;
>   
>       if (ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave) {
> +        parsed_frame.a53_caption = a53_caption;
> +        parsed_frame.a53_caption_size = a53_caption_size;
>           av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
>       } else {
>           parsed_frame.is_deinterlacing = 1;
> +        parsed_frame.a53_caption = a53_caption;
> +        parsed_frame.a53_caption_size = a53_caption_size;
>           av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
>           if (!ctx->drop_second_field) {
> +            parsed_frame.a53_caption = NULL;
> +            parsed_frame.a53_caption_size = 0;
>               parsed_frame.second_field = 1;
>               av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
>           }
> @@ -382,6 +430,139 @@ static int cuvid_is_buffer_full(AVCodecContext *avctx)
>       return (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + delay >= ctx->nb_surfaces;
>   }
>   
> +
> +static void cuvid_mpeg_parse_a53(CuvidContext *ctx, const uint8_t* p, int buf_size)
> +{
> +    const uint8_t* buf_end = p + buf_size;
> +    for(;;)
> +    {
> +        uint32_t start_code = -1;
> +        p = avpriv_find_start_code(p, buf_end, &start_code);
> +        if (start_code > 0x1ff)
> +            break;
> +        if (start_code != 0x1b2)
> +            continue;
> +        buf_size = buf_end - p;
> +        if (buf_size >= 6 &&
> +            p[0] == 'G' && p[1] == 'A' && p[2] == '9' && p[3] == '4' && p[4] == 3 && (p[5] & 0x40))
> +        {
> +            /* extract A53 Part 4 CC data */
> +            int cc_count = p[5] & 0x1f;
> +            if (cc_count > 0 && buf_size >= 7 + cc_count * 3)
> +            {
> +                av_freep(&ctx->a53_caption);
> +                ctx->a53_caption_size = cc_count * 3;
> +                ctx->a53_caption      = av_malloc(ctx->a53_caption_size);
> +                if (ctx->a53_caption)
> +                    memcpy(ctx->a53_caption, p + 7, ctx->a53_caption_size);
> +            }
> +        }
> +        else if (buf_size >= 11 && p[0] == 'C' && p[1] == 'C' && p[2] == 0x01 && p[3] == 0xf8)
> +        {
> +            int cc_count = 0;
> +            int i;
> +            // There is a caption count field in the data, but it is often
> +            // incorrect.  So count the number of captions present.
> +            for (i = 5; i + 6 <= buf_size && ((p[i] & 0xfe) == 0xfe); i += 6)
> +                cc_count++;
> +            // Transform the DVD format into A53 Part 4 format
> +            if (cc_count > 0) {
> +                av_freep(&ctx->a53_caption);
> +                ctx->a53_caption_size = cc_count * 6;
> +                ctx->a53_caption      = av_malloc(ctx->a53_caption_size);
> +                if (ctx->a53_caption) {
> +                    uint8_t field1 = !!(p[4] & 0x80);
> +                    uint8_t *cap = ctx->a53_caption;
> +                    p += 5;
> +                    for (i = 0; i < cc_count; i++)
> +                    {
> +                        cap[0] = (p[0] == 0xff && field1) ? 0xfc : 0xfd;
> +                        cap[1] = p[1];
> +                        cap[2] = p[2];
> +                        cap[3] = (p[3] == 0xff && !field1) ? 0xfc : 0xfd;
> +                        cap[4] = p[4];
> +                        cap[5] = p[5];
> +                        cap += 6;
> +                        p += 6;
> +                    }
> +                }
> +            }
> +        }
> +    }
> +}
> +
> +
> +static void cuvid_h264_parse_a53(CuvidContext *ctx, const uint8_t* p, int buf_size)
> +{
> +    const uint8_t* buf_end = p + buf_size;
> +    while(p < buf_end)
> +    {
> +        int i, size, cc_count;
> +        uint32_t start_code = -1;
> +    uint64_t new_size;
> +        p = avpriv_find_start_code(p, buf_end, &start_code);
> +            if (start_code > 0x1ff)
> +            break;
> +        if (start_code != 0x106)
> +            continue;
> +        buf_size = buf_end - p;
> +        if (buf_size < 1 || p[0] != 4)
> +            continue;
> +        p += 1; buf_size -= 1;
> +        size = 0;
> +        while (buf_size > 0)
> +        {
> +            size += p[0];
> +            buf_size -= 1;
> +            if (*(p++) != 0xFF)
> +                break;
> +        }
> +        if (buf_size <= 0 || buf_size < size)
> +            continue;
> +        if (size < 7)
> +            continue;
> +        if (p[0] == 0xFF)
> +        {
> +            p+=4;
> +            size-=4;
> +        }
> +        else
> +        {
> +            p+=3;
> +            size-=3;
> +        }
> +        if (p[0] != 'G' || p[1] != 'A' || p[2] != '9' || p[3] != '4')
> +            continue;
> +        p += 4;
> +        size -= 4;
> +
> +        if (size < 3)
> +            continue;
> +        if (p[0] != 3)
> +            continue;
> +        if (!(p[1] & 0x40))
> +            continue;
> +        cc_count = p[1] & 0x1F;
> +        p+=3;
> +        size -= 3;
> +
> +        if (!cc_count || size < cc_count * 3)
> +            continue;
> +
> +        if (!ctx->a53_caption)
> +            ctx->a53_caption_size = 0;
> +        new_size = (ctx->a53_caption_size + cc_count * 3);
> +        if (av_reallocp(&ctx->a53_caption, new_size) < 0)
> +            continue;
> +        for(i = 0; i < cc_count; ++i, p += 3)
> +        {
> +            ctx->a53_caption[ctx->a53_caption_size++] = p[0];
> +            ctx->a53_caption[ctx->a53_caption_size++] = p[1];
> +            ctx->a53_caption[ctx->a53_caption_size++] = p[2];
> +        }
> +    }
> +}

You have ff_parse_a53_cc() in atsc_a53.c for this

> +
>   static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
>   {
>       CuvidContext *ctx = avctx->priv_data;
> @@ -424,6 +605,15 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
>   
>       ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt));
>   
> +    // assume there is one frame delay (the parser outputs previous picture once it sees new frame data)
> +    av_freep(&ctx->a53_caption);
> +    if (avpkt && avpkt->size) {
> +        if (ctx->cuparseinfo.CodecType == cudaVideoCodec_MPEG2)
> +            cuvid_mpeg_parse_a53(ctx, avpkt->data, avpkt->size);
> +        else if (ctx->cuparseinfo.CodecType == cudaVideoCodec_H264)
> +            cuvid_h264_parse_a53(ctx, avpkt->data, avpkt->size);
> +    }
> +
>       if (ret < 0)
>           goto error;
>   
> @@ -627,6 +817,15 @@ FF_ENABLE_DEPRECATION_WARNINGS
>   
>           if (frame->interlaced_frame)
>               frame->top_field_first = parsed_frame.dispinfo.top_field_first;
> +
> +        if (parsed_frame.a53_caption)
> +        {
> +            AVFrameSideData *sd = av_frame_new_side_data(frame, AV_FRAME_DATA_A53_CC, parsed_frame.a53_caption_size);
> +            if (sd)
> +                memcpy(sd->data, parsed_frame.a53_caption, parsed_frame.a53_caption_size);
> +            av_freep(&parsed_frame.a53_caption);
> +            avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
> +        }
>       } else if (ctx->decoder_flushing) {
>           ret = AVERROR_EOF;
>       } else {
>
Timo Rothenpieler April 13, 2021, 7:32 a.m. UTC | #2
On 13.04.2021 01:41, James Almer wrote:
> You have ff_parse_a53_cc() in atsc_a53.c for this

I'm pretty sure I NAKed this or a very similar patch in the past, asking 
to not re-implement an entire parser in cuviddec.

Same applies here: I'm not going to accept a patch that implements a 
complex, or really any, parsers into cuviddec.

If this can be done by simply calling an internal ff_ functions, that'd 
much more likely be acceptable.
Dhanish Vijayan April 13, 2021, 10:51 a.m. UTC | #3
I used the patch provided here -
https://github.com/tea/FFmpeg/commit/ba3df26047ca2e2c2a79c4cab436ace032c73d57
With this patch I was able to get the Closed caption issue fixed with the
cuvid.

After seeing your message, I am trying to figure out how I can use
the ff_parse_a53_cc for parsing.
The implementation seems to be different, and I am beginner in this. Will
let you know if I can implement the parsing using this function.



On Tue, Apr 13, 2021 at 1:02 PM Timo Rothenpieler <timo@rothenpieler.org>
wrote:

> On 13.04.2021 01:41, James Almer wrote:
> > You have ff_parse_a53_cc() in atsc_a53.c for this
>
> I'm pretty sure I NAKed this or a very similar patch in the past, asking
> to not re-implement an entire parser in cuviddec.
>
> Same applies here: I'm not going to accept a patch that implements a
> complex, or really any, parsers into cuviddec.
>
> If this can be done by simply calling an internal ff_ functions, that'd
> much more likely be acceptable.
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff mbox series

Patch

diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c
index ec57afdefe..3b07d0a874 100644
--- a/libavcodec/cuviddec.c
+++ b/libavcodec/cuviddec.c
@@ -46,6 +46,9 @@ 
 #define CUVID_HAS_AV1_SUPPORT
 #endif
 
+#define MAX_FRAME_COUNT 25
+#define A53_QUEUE_SIZE (MAX_FRAME_COUNT + 8)
+
 typedef struct CuvidContext
 {
     AVClass *avclass;
@@ -89,6 +92,11 @@  typedef struct CuvidContext
     cudaVideoCodec codec_type;
     cudaVideoChromaFormat chroma_format;
 
+    uint8_t* a53_caption;
+    int a53_caption_size;
+    uint8_t* a53_caption_queue[A53_QUEUE_SIZE];
+    int a53_caption_size_queue[A53_QUEUE_SIZE];
+
     CUVIDDECODECAPS caps8, caps10, caps12;
 
     CUVIDPARSERPARAMS cuparseinfo;
@@ -103,6 +111,8 @@  typedef struct CuvidParsedFrame
     CUVIDPARSERDISPINFO dispinfo;
     int second_field;
     int is_deinterlacing;
+    uint8_t* a53_caption;
+    int a53_caption_size;
 } CuvidParsedFrame;
 
 #define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x)
@@ -338,6 +348,24 @@  static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* pic
 
     ctx->key_frame[picparams->CurrPicIdx] = picparams->intra_pic_flag;
 
+    if (ctx->a53_caption)
+    {
+
+        if (picparams->CurrPicIdx >= A53_QUEUE_SIZE)
+        {
+            av_log(avctx, AV_LOG_WARNING, "CurrPicIdx too big: %d\n", picparams->CurrPicIdx);
+            av_freep(&ctx->a53_caption);
+        }
+        else
+        {
+            int pos = picparams->CurrPicIdx;
+            av_freep(&ctx->a53_caption_queue[pos]);
+            ctx->a53_caption_queue[pos] = ctx->a53_caption;
+            ctx->a53_caption_size_queue[pos] = ctx->a53_caption_size;
+            ctx->a53_caption = NULL;
+        }
+    }
+
     ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams));
     if (ctx->internal_error < 0)
         return 0;
@@ -350,6 +378,20 @@  static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF
     AVCodecContext *avctx = opaque;
     CuvidContext *ctx = avctx->priv_data;
     CuvidParsedFrame parsed_frame = { { 0 } };
+    uint8_t* a53_caption = NULL;
+    int a53_caption_size = 0;
+
+    if (dispinfo->picture_index >= A53_QUEUE_SIZE)
+    {
+        av_log(avctx, AV_LOG_WARNING, "picture_index too big: %d\n", dispinfo->picture_index);
+    }
+    else
+    {
+        int pos = dispinfo->picture_index;
+        a53_caption = ctx->a53_caption_queue[pos];
+        a53_caption_size = ctx->a53_caption_size_queue[pos];
+        ctx->a53_caption_queue[pos] = NULL;
+    }
 
     parsed_frame.dispinfo = *dispinfo;
     ctx->internal_error = 0;
@@ -358,11 +400,17 @@  static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF
     parsed_frame.dispinfo.progressive_frame = ctx->progressive_sequence;
 
     if (ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave) {
+        parsed_frame.a53_caption = a53_caption;
+        parsed_frame.a53_caption_size = a53_caption_size;
         av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
     } else {
         parsed_frame.is_deinterlacing = 1;
+        parsed_frame.a53_caption = a53_caption;
+        parsed_frame.a53_caption_size = a53_caption_size;
         av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
         if (!ctx->drop_second_field) {
+            parsed_frame.a53_caption = NULL;
+            parsed_frame.a53_caption_size = 0;
             parsed_frame.second_field = 1;
             av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
         }
@@ -382,6 +430,139 @@  static int cuvid_is_buffer_full(AVCodecContext *avctx)
     return (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + delay >= ctx->nb_surfaces;
 }
 
+
+static void cuvid_mpeg_parse_a53(CuvidContext *ctx, const uint8_t* p, int buf_size)
+{
+    const uint8_t* buf_end = p + buf_size;
+    for(;;)
+    {
+        uint32_t start_code = -1;
+        p = avpriv_find_start_code(p, buf_end, &start_code);
+        if (start_code > 0x1ff)
+            break;
+        if (start_code != 0x1b2)
+            continue;
+        buf_size = buf_end - p;
+        if (buf_size >= 6 &&
+            p[0] == 'G' && p[1] == 'A' && p[2] == '9' && p[3] == '4' && p[4] == 3 && (p[5] & 0x40))
+        {
+            /* extract A53 Part 4 CC data */
+            int cc_count = p[5] & 0x1f;
+            if (cc_count > 0 && buf_size >= 7 + cc_count * 3)
+            {
+                av_freep(&ctx->a53_caption);
+                ctx->a53_caption_size = cc_count * 3;
+                ctx->a53_caption      = av_malloc(ctx->a53_caption_size);
+                if (ctx->a53_caption)
+                    memcpy(ctx->a53_caption, p + 7, ctx->a53_caption_size);
+            }
+        }
+        else if (buf_size >= 11 && p[0] == 'C' && p[1] == 'C' && p[2] == 0x01 && p[3] == 0xf8)
+        {
+            int cc_count = 0;
+            int i;
+            // There is a caption count field in the data, but it is often
+            // incorrect.  So count the number of captions present.
+            for (i = 5; i + 6 <= buf_size && ((p[i] & 0xfe) == 0xfe); i += 6)
+                cc_count++;
+            // Transform the DVD format into A53 Part 4 format
+            if (cc_count > 0) {
+                av_freep(&ctx->a53_caption);
+                ctx->a53_caption_size = cc_count * 6;
+                ctx->a53_caption      = av_malloc(ctx->a53_caption_size);
+                if (ctx->a53_caption) {
+                    uint8_t field1 = !!(p[4] & 0x80);
+                    uint8_t *cap = ctx->a53_caption;
+                    p += 5;
+                    for (i = 0; i < cc_count; i++)
+                    {
+                        cap[0] = (p[0] == 0xff && field1) ? 0xfc : 0xfd;
+                        cap[1] = p[1];
+                        cap[2] = p[2];
+                        cap[3] = (p[3] == 0xff && !field1) ? 0xfc : 0xfd;
+                        cap[4] = p[4];
+                        cap[5] = p[5];
+                        cap += 6;
+                        p += 6;
+                    }
+                }
+            }
+        }
+    }
+}
+
+
+static void cuvid_h264_parse_a53(CuvidContext *ctx, const uint8_t* p, int buf_size)
+{
+    const uint8_t* buf_end = p + buf_size;
+    while(p < buf_end)
+    {
+        int i, size, cc_count;
+        uint32_t start_code = -1;
+    uint64_t new_size;
+        p = avpriv_find_start_code(p, buf_end, &start_code);
+            if (start_code > 0x1ff)
+            break;
+        if (start_code != 0x106)
+            continue;
+        buf_size = buf_end - p;
+        if (buf_size < 1 || p[0] != 4)
+            continue;
+        p += 1; buf_size -= 1;
+        size = 0;
+        while (buf_size > 0)
+        {
+            size += p[0];
+            buf_size -= 1;
+            if (*(p++) != 0xFF)
+                break;
+        }
+        if (buf_size <= 0 || buf_size < size)
+            continue;
+        if (size < 7)
+            continue;
+        if (p[0] == 0xFF)
+        {
+            p+=4;
+            size-=4;
+        }
+        else
+        {
+            p+=3;
+            size-=3;
+        }
+        if (p[0] != 'G' || p[1] != 'A' || p[2] != '9' || p[3] != '4')
+            continue;
+        p += 4;
+        size -= 4;
+
+        if (size < 3)
+            continue;
+        if (p[0] != 3)
+            continue;
+        if (!(p[1] & 0x40))
+            continue;
+        cc_count = p[1] & 0x1F;
+        p+=3;
+        size -= 3;
+
+        if (!cc_count || size < cc_count * 3)
+            continue;
+
+        if (!ctx->a53_caption)
+            ctx->a53_caption_size = 0;
+        new_size = (ctx->a53_caption_size + cc_count * 3);
+        if (av_reallocp(&ctx->a53_caption, new_size) < 0)
+            continue;
+        for(i = 0; i < cc_count; ++i, p += 3)
+        {
+            ctx->a53_caption[ctx->a53_caption_size++] = p[0];
+            ctx->a53_caption[ctx->a53_caption_size++] = p[1];
+            ctx->a53_caption[ctx->a53_caption_size++] = p[2];
+        }
+    }
+}
+
 static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
 {
     CuvidContext *ctx = avctx->priv_data;
@@ -424,6 +605,15 @@  static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
 
     ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt));
 
+    // assume there is one frame delay (the parser outputs previous picture once it sees new frame data)
+    av_freep(&ctx->a53_caption);
+    if (avpkt && avpkt->size) {
+        if (ctx->cuparseinfo.CodecType == cudaVideoCodec_MPEG2)
+            cuvid_mpeg_parse_a53(ctx, avpkt->data, avpkt->size);
+        else if (ctx->cuparseinfo.CodecType == cudaVideoCodec_H264)
+            cuvid_h264_parse_a53(ctx, avpkt->data, avpkt->size);
+    }
+
     if (ret < 0)
         goto error;
 
@@ -627,6 +817,15 @@  FF_ENABLE_DEPRECATION_WARNINGS
 
         if (frame->interlaced_frame)
             frame->top_field_first = parsed_frame.dispinfo.top_field_first;
+
+        if (parsed_frame.a53_caption)
+        {
+            AVFrameSideData *sd = av_frame_new_side_data(frame, AV_FRAME_DATA_A53_CC, parsed_frame.a53_caption_size);
+            if (sd)
+                memcpy(sd->data, parsed_frame.a53_caption, parsed_frame.a53_caption_size);
+            av_freep(&parsed_frame.a53_caption);
+            avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
+        }
     } else if (ctx->decoder_flushing) {
         ret = AVERROR_EOF;
     } else {