Message ID | 20210412202125.149031-1-dhanishvijayan@gmail.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel] Added Closed caption support for cuviddec for preserving a53 data n GPU decoding | expand |
Context | Check | Description |
---|---|---|
andriy/x86_make | success | Make finished |
andriy/x86_make_fate | success | Make fate finished |
andriy/PPC64_make | success | Make finished |
andriy/PPC64_make_fate | success | Make fate finished |
On 4/12/2021 5:21 PM, Dhanish Vijayan wrote: > Signed-off-by: Dhanish Vijayan <dhanishvijayan@gmail.com> > --- > libavcodec/cuviddec.c | 199 ++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 199 insertions(+) > > diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c > index ec57afdefe..3b07d0a874 100644 > --- a/libavcodec/cuviddec.c > +++ b/libavcodec/cuviddec.c > @@ -46,6 +46,9 @@ > #define CUVID_HAS_AV1_SUPPORT > #endif > > +#define MAX_FRAME_COUNT 25 > +#define A53_QUEUE_SIZE (MAX_FRAME_COUNT + 8) > + > typedef struct CuvidContext > { > AVClass *avclass; > @@ -89,6 +92,11 @@ typedef struct CuvidContext > cudaVideoCodec codec_type; > cudaVideoChromaFormat chroma_format; > > + uint8_t* a53_caption; > + int a53_caption_size; > + uint8_t* a53_caption_queue[A53_QUEUE_SIZE]; > + int a53_caption_size_queue[A53_QUEUE_SIZE]; > + > CUVIDDECODECAPS caps8, caps10, caps12; > > CUVIDPARSERPARAMS cuparseinfo; > @@ -103,6 +111,8 @@ typedef struct CuvidParsedFrame > CUVIDPARSERDISPINFO dispinfo; > int second_field; > int is_deinterlacing; > + uint8_t* a53_caption; > + int a53_caption_size; > } CuvidParsedFrame; > > #define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x) > @@ -338,6 +348,24 @@ static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* pic > > ctx->key_frame[picparams->CurrPicIdx] = picparams->intra_pic_flag; > > + if (ctx->a53_caption) > + { > + > + if (picparams->CurrPicIdx >= A53_QUEUE_SIZE) > + { > + av_log(avctx, AV_LOG_WARNING, "CurrPicIdx too big: %d\n", picparams->CurrPicIdx); > + av_freep(&ctx->a53_caption); > + } > + else > + { > + int pos = picparams->CurrPicIdx; > + av_freep(&ctx->a53_caption_queue[pos]); > + ctx->a53_caption_queue[pos] = ctx->a53_caption; > + ctx->a53_caption_size_queue[pos] = ctx->a53_caption_size; > + ctx->a53_caption = NULL; > + } > + } > + > ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams)); > if (ctx->internal_error < 0) > return 0; > @@ -350,6 +378,20 @@ static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF > AVCodecContext *avctx = opaque; > CuvidContext *ctx = avctx->priv_data; > CuvidParsedFrame parsed_frame = { { 0 } }; > + uint8_t* a53_caption = NULL; > + int a53_caption_size = 0; > + > + if (dispinfo->picture_index >= A53_QUEUE_SIZE) > + { > + av_log(avctx, AV_LOG_WARNING, "picture_index too big: %d\n", dispinfo->picture_index); > + } > + else > + { > + int pos = dispinfo->picture_index; > + a53_caption = ctx->a53_caption_queue[pos]; > + a53_caption_size = ctx->a53_caption_size_queue[pos]; > + ctx->a53_caption_queue[pos] = NULL; > + } > > parsed_frame.dispinfo = *dispinfo; > ctx->internal_error = 0; > @@ -358,11 +400,17 @@ static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF > parsed_frame.dispinfo.progressive_frame = ctx->progressive_sequence; > > if (ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave) { > + parsed_frame.a53_caption = a53_caption; > + parsed_frame.a53_caption_size = a53_caption_size; > av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL); > } else { > parsed_frame.is_deinterlacing = 1; > + parsed_frame.a53_caption = a53_caption; > + parsed_frame.a53_caption_size = a53_caption_size; > av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL); > if (!ctx->drop_second_field) { > + parsed_frame.a53_caption = NULL; > + parsed_frame.a53_caption_size = 0; > parsed_frame.second_field = 1; > av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL); > } > @@ -382,6 +430,139 @@ static int cuvid_is_buffer_full(AVCodecContext *avctx) > return (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + delay >= ctx->nb_surfaces; > } > > + > +static void cuvid_mpeg_parse_a53(CuvidContext *ctx, const uint8_t* p, int buf_size) > +{ > + const uint8_t* buf_end = p + buf_size; > + for(;;) > + { > + uint32_t start_code = -1; > + p = avpriv_find_start_code(p, buf_end, &start_code); > + if (start_code > 0x1ff) > + break; > + if (start_code != 0x1b2) > + continue; > + buf_size = buf_end - p; > + if (buf_size >= 6 && > + p[0] == 'G' && p[1] == 'A' && p[2] == '9' && p[3] == '4' && p[4] == 3 && (p[5] & 0x40)) > + { > + /* extract A53 Part 4 CC data */ > + int cc_count = p[5] & 0x1f; > + if (cc_count > 0 && buf_size >= 7 + cc_count * 3) > + { > + av_freep(&ctx->a53_caption); > + ctx->a53_caption_size = cc_count * 3; > + ctx->a53_caption = av_malloc(ctx->a53_caption_size); > + if (ctx->a53_caption) > + memcpy(ctx->a53_caption, p + 7, ctx->a53_caption_size); > + } > + } > + else if (buf_size >= 11 && p[0] == 'C' && p[1] == 'C' && p[2] == 0x01 && p[3] == 0xf8) > + { > + int cc_count = 0; > + int i; > + // There is a caption count field in the data, but it is often > + // incorrect. So count the number of captions present. > + for (i = 5; i + 6 <= buf_size && ((p[i] & 0xfe) == 0xfe); i += 6) > + cc_count++; > + // Transform the DVD format into A53 Part 4 format > + if (cc_count > 0) { > + av_freep(&ctx->a53_caption); > + ctx->a53_caption_size = cc_count * 6; > + ctx->a53_caption = av_malloc(ctx->a53_caption_size); > + if (ctx->a53_caption) { > + uint8_t field1 = !!(p[4] & 0x80); > + uint8_t *cap = ctx->a53_caption; > + p += 5; > + for (i = 0; i < cc_count; i++) > + { > + cap[0] = (p[0] == 0xff && field1) ? 0xfc : 0xfd; > + cap[1] = p[1]; > + cap[2] = p[2]; > + cap[3] = (p[3] == 0xff && !field1) ? 0xfc : 0xfd; > + cap[4] = p[4]; > + cap[5] = p[5]; > + cap += 6; > + p += 6; > + } > + } > + } > + } > + } > +} > + > + > +static void cuvid_h264_parse_a53(CuvidContext *ctx, const uint8_t* p, int buf_size) > +{ > + const uint8_t* buf_end = p + buf_size; > + while(p < buf_end) > + { > + int i, size, cc_count; > + uint32_t start_code = -1; > + uint64_t new_size; > + p = avpriv_find_start_code(p, buf_end, &start_code); > + if (start_code > 0x1ff) > + break; > + if (start_code != 0x106) > + continue; > + buf_size = buf_end - p; > + if (buf_size < 1 || p[0] != 4) > + continue; > + p += 1; buf_size -= 1; > + size = 0; > + while (buf_size > 0) > + { > + size += p[0]; > + buf_size -= 1; > + if (*(p++) != 0xFF) > + break; > + } > + if (buf_size <= 0 || buf_size < size) > + continue; > + if (size < 7) > + continue; > + if (p[0] == 0xFF) > + { > + p+=4; > + size-=4; > + } > + else > + { > + p+=3; > + size-=3; > + } > + if (p[0] != 'G' || p[1] != 'A' || p[2] != '9' || p[3] != '4') > + continue; > + p += 4; > + size -= 4; > + > + if (size < 3) > + continue; > + if (p[0] != 3) > + continue; > + if (!(p[1] & 0x40)) > + continue; > + cc_count = p[1] & 0x1F; > + p+=3; > + size -= 3; > + > + if (!cc_count || size < cc_count * 3) > + continue; > + > + if (!ctx->a53_caption) > + ctx->a53_caption_size = 0; > + new_size = (ctx->a53_caption_size + cc_count * 3); > + if (av_reallocp(&ctx->a53_caption, new_size) < 0) > + continue; > + for(i = 0; i < cc_count; ++i, p += 3) > + { > + ctx->a53_caption[ctx->a53_caption_size++] = p[0]; > + ctx->a53_caption[ctx->a53_caption_size++] = p[1]; > + ctx->a53_caption[ctx->a53_caption_size++] = p[2]; > + } > + } > +} You have ff_parse_a53_cc() in atsc_a53.c for this > + > static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt) > { > CuvidContext *ctx = avctx->priv_data; > @@ -424,6 +605,15 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt) > > ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt)); > > + // assume there is one frame delay (the parser outputs previous picture once it sees new frame data) > + av_freep(&ctx->a53_caption); > + if (avpkt && avpkt->size) { > + if (ctx->cuparseinfo.CodecType == cudaVideoCodec_MPEG2) > + cuvid_mpeg_parse_a53(ctx, avpkt->data, avpkt->size); > + else if (ctx->cuparseinfo.CodecType == cudaVideoCodec_H264) > + cuvid_h264_parse_a53(ctx, avpkt->data, avpkt->size); > + } > + > if (ret < 0) > goto error; > > @@ -627,6 +817,15 @@ FF_ENABLE_DEPRECATION_WARNINGS > > if (frame->interlaced_frame) > frame->top_field_first = parsed_frame.dispinfo.top_field_first; > + > + if (parsed_frame.a53_caption) > + { > + AVFrameSideData *sd = av_frame_new_side_data(frame, AV_FRAME_DATA_A53_CC, parsed_frame.a53_caption_size); > + if (sd) > + memcpy(sd->data, parsed_frame.a53_caption, parsed_frame.a53_caption_size); > + av_freep(&parsed_frame.a53_caption); > + avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS; > + } > } else if (ctx->decoder_flushing) { > ret = AVERROR_EOF; > } else { >
On 13.04.2021 01:41, James Almer wrote:
> You have ff_parse_a53_cc() in atsc_a53.c for this
I'm pretty sure I NAKed this or a very similar patch in the past, asking
to not re-implement an entire parser in cuviddec.
Same applies here: I'm not going to accept a patch that implements a
complex, or really any, parsers into cuviddec.
If this can be done by simply calling an internal ff_ functions, that'd
much more likely be acceptable.
I used the patch provided here - https://github.com/tea/FFmpeg/commit/ba3df26047ca2e2c2a79c4cab436ace032c73d57 With this patch I was able to get the Closed caption issue fixed with the cuvid. After seeing your message, I am trying to figure out how I can use the ff_parse_a53_cc for parsing. The implementation seems to be different, and I am beginner in this. Will let you know if I can implement the parsing using this function. On Tue, Apr 13, 2021 at 1:02 PM Timo Rothenpieler <timo@rothenpieler.org> wrote: > On 13.04.2021 01:41, James Almer wrote: > > You have ff_parse_a53_cc() in atsc_a53.c for this > > I'm pretty sure I NAKed this or a very similar patch in the past, asking > to not re-implement an entire parser in cuviddec. > > Same applies here: I'm not going to accept a patch that implements a > complex, or really any, parsers into cuviddec. > > If this can be done by simply calling an internal ff_ functions, that'd > much more likely be acceptable. > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c index ec57afdefe..3b07d0a874 100644 --- a/libavcodec/cuviddec.c +++ b/libavcodec/cuviddec.c @@ -46,6 +46,9 @@ #define CUVID_HAS_AV1_SUPPORT #endif +#define MAX_FRAME_COUNT 25 +#define A53_QUEUE_SIZE (MAX_FRAME_COUNT + 8) + typedef struct CuvidContext { AVClass *avclass; @@ -89,6 +92,11 @@ typedef struct CuvidContext cudaVideoCodec codec_type; cudaVideoChromaFormat chroma_format; + uint8_t* a53_caption; + int a53_caption_size; + uint8_t* a53_caption_queue[A53_QUEUE_SIZE]; + int a53_caption_size_queue[A53_QUEUE_SIZE]; + CUVIDDECODECAPS caps8, caps10, caps12; CUVIDPARSERPARAMS cuparseinfo; @@ -103,6 +111,8 @@ typedef struct CuvidParsedFrame CUVIDPARSERDISPINFO dispinfo; int second_field; int is_deinterlacing; + uint8_t* a53_caption; + int a53_caption_size; } CuvidParsedFrame; #define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x) @@ -338,6 +348,24 @@ static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* pic ctx->key_frame[picparams->CurrPicIdx] = picparams->intra_pic_flag; + if (ctx->a53_caption) + { + + if (picparams->CurrPicIdx >= A53_QUEUE_SIZE) + { + av_log(avctx, AV_LOG_WARNING, "CurrPicIdx too big: %d\n", picparams->CurrPicIdx); + av_freep(&ctx->a53_caption); + } + else + { + int pos = picparams->CurrPicIdx; + av_freep(&ctx->a53_caption_queue[pos]); + ctx->a53_caption_queue[pos] = ctx->a53_caption; + ctx->a53_caption_size_queue[pos] = ctx->a53_caption_size; + ctx->a53_caption = NULL; + } + } + ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams)); if (ctx->internal_error < 0) return 0; @@ -350,6 +378,20 @@ static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF AVCodecContext *avctx = opaque; CuvidContext *ctx = avctx->priv_data; CuvidParsedFrame parsed_frame = { { 0 } }; + uint8_t* a53_caption = NULL; + int a53_caption_size = 0; + + if (dispinfo->picture_index >= A53_QUEUE_SIZE) + { + av_log(avctx, AV_LOG_WARNING, "picture_index too big: %d\n", dispinfo->picture_index); + } + else + { + int pos = dispinfo->picture_index; + a53_caption = ctx->a53_caption_queue[pos]; + a53_caption_size = ctx->a53_caption_size_queue[pos]; + ctx->a53_caption_queue[pos] = NULL; + } parsed_frame.dispinfo = *dispinfo; ctx->internal_error = 0; @@ -358,11 +400,17 @@ static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF parsed_frame.dispinfo.progressive_frame = ctx->progressive_sequence; if (ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave) { + parsed_frame.a53_caption = a53_caption; + parsed_frame.a53_caption_size = a53_caption_size; av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL); } else { parsed_frame.is_deinterlacing = 1; + parsed_frame.a53_caption = a53_caption; + parsed_frame.a53_caption_size = a53_caption_size; av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL); if (!ctx->drop_second_field) { + parsed_frame.a53_caption = NULL; + parsed_frame.a53_caption_size = 0; parsed_frame.second_field = 1; av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL); } @@ -382,6 +430,139 @@ static int cuvid_is_buffer_full(AVCodecContext *avctx) return (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + delay >= ctx->nb_surfaces; } + +static void cuvid_mpeg_parse_a53(CuvidContext *ctx, const uint8_t* p, int buf_size) +{ + const uint8_t* buf_end = p + buf_size; + for(;;) + { + uint32_t start_code = -1; + p = avpriv_find_start_code(p, buf_end, &start_code); + if (start_code > 0x1ff) + break; + if (start_code != 0x1b2) + continue; + buf_size = buf_end - p; + if (buf_size >= 6 && + p[0] == 'G' && p[1] == 'A' && p[2] == '9' && p[3] == '4' && p[4] == 3 && (p[5] & 0x40)) + { + /* extract A53 Part 4 CC data */ + int cc_count = p[5] & 0x1f; + if (cc_count > 0 && buf_size >= 7 + cc_count * 3) + { + av_freep(&ctx->a53_caption); + ctx->a53_caption_size = cc_count * 3; + ctx->a53_caption = av_malloc(ctx->a53_caption_size); + if (ctx->a53_caption) + memcpy(ctx->a53_caption, p + 7, ctx->a53_caption_size); + } + } + else if (buf_size >= 11 && p[0] == 'C' && p[1] == 'C' && p[2] == 0x01 && p[3] == 0xf8) + { + int cc_count = 0; + int i; + // There is a caption count field in the data, but it is often + // incorrect. So count the number of captions present. + for (i = 5; i + 6 <= buf_size && ((p[i] & 0xfe) == 0xfe); i += 6) + cc_count++; + // Transform the DVD format into A53 Part 4 format + if (cc_count > 0) { + av_freep(&ctx->a53_caption); + ctx->a53_caption_size = cc_count * 6; + ctx->a53_caption = av_malloc(ctx->a53_caption_size); + if (ctx->a53_caption) { + uint8_t field1 = !!(p[4] & 0x80); + uint8_t *cap = ctx->a53_caption; + p += 5; + for (i = 0; i < cc_count; i++) + { + cap[0] = (p[0] == 0xff && field1) ? 0xfc : 0xfd; + cap[1] = p[1]; + cap[2] = p[2]; + cap[3] = (p[3] == 0xff && !field1) ? 0xfc : 0xfd; + cap[4] = p[4]; + cap[5] = p[5]; + cap += 6; + p += 6; + } + } + } + } + } +} + + +static void cuvid_h264_parse_a53(CuvidContext *ctx, const uint8_t* p, int buf_size) +{ + const uint8_t* buf_end = p + buf_size; + while(p < buf_end) + { + int i, size, cc_count; + uint32_t start_code = -1; + uint64_t new_size; + p = avpriv_find_start_code(p, buf_end, &start_code); + if (start_code > 0x1ff) + break; + if (start_code != 0x106) + continue; + buf_size = buf_end - p; + if (buf_size < 1 || p[0] != 4) + continue; + p += 1; buf_size -= 1; + size = 0; + while (buf_size > 0) + { + size += p[0]; + buf_size -= 1; + if (*(p++) != 0xFF) + break; + } + if (buf_size <= 0 || buf_size < size) + continue; + if (size < 7) + continue; + if (p[0] == 0xFF) + { + p+=4; + size-=4; + } + else + { + p+=3; + size-=3; + } + if (p[0] != 'G' || p[1] != 'A' || p[2] != '9' || p[3] != '4') + continue; + p += 4; + size -= 4; + + if (size < 3) + continue; + if (p[0] != 3) + continue; + if (!(p[1] & 0x40)) + continue; + cc_count = p[1] & 0x1F; + p+=3; + size -= 3; + + if (!cc_count || size < cc_count * 3) + continue; + + if (!ctx->a53_caption) + ctx->a53_caption_size = 0; + new_size = (ctx->a53_caption_size + cc_count * 3); + if (av_reallocp(&ctx->a53_caption, new_size) < 0) + continue; + for(i = 0; i < cc_count; ++i, p += 3) + { + ctx->a53_caption[ctx->a53_caption_size++] = p[0]; + ctx->a53_caption[ctx->a53_caption_size++] = p[1]; + ctx->a53_caption[ctx->a53_caption_size++] = p[2]; + } + } +} + static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt) { CuvidContext *ctx = avctx->priv_data; @@ -424,6 +605,15 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt) ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt)); + // assume there is one frame delay (the parser outputs previous picture once it sees new frame data) + av_freep(&ctx->a53_caption); + if (avpkt && avpkt->size) { + if (ctx->cuparseinfo.CodecType == cudaVideoCodec_MPEG2) + cuvid_mpeg_parse_a53(ctx, avpkt->data, avpkt->size); + else if (ctx->cuparseinfo.CodecType == cudaVideoCodec_H264) + cuvid_h264_parse_a53(ctx, avpkt->data, avpkt->size); + } + if (ret < 0) goto error; @@ -627,6 +817,15 @@ FF_ENABLE_DEPRECATION_WARNINGS if (frame->interlaced_frame) frame->top_field_first = parsed_frame.dispinfo.top_field_first; + + if (parsed_frame.a53_caption) + { + AVFrameSideData *sd = av_frame_new_side_data(frame, AV_FRAME_DATA_A53_CC, parsed_frame.a53_caption_size); + if (sd) + memcpy(sd->data, parsed_frame.a53_caption, parsed_frame.a53_caption_size); + av_freep(&parsed_frame.a53_caption); + avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS; + } } else if (ctx->decoder_flushing) { ret = AVERROR_EOF; } else {
Signed-off-by: Dhanish Vijayan <dhanishvijayan@gmail.com> --- libavcodec/cuviddec.c | 199 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+)