[FFmpeg-devel,4/5] avcodec/cuviddec: Add support for decoding HEVC 4:4:4 content

Submitted by Philip Langdale on Oct. 7, 2018, 5:50 p.m.

Details

Message ID 20181007175057.31070-5-philipl@overt.org
State New
Headers show

Commit Message

Philip Langdale Oct. 7, 2018, 5:50 p.m.
This is the equivalent change for cuviddec after the previous change
for nvdec. I made similar changes to the copying routines to handle
pixel formats in a more generic way.

Note that unlike with nvdec, there is no confusion about the ability
of a codec to output 444 formats. This is because the cuvid parser is
used, meaning that 444 JPEG content is still indicated as using a 420
output format.

Signed-off-by: Philip Langdale <philipl@overt.org>
---
 libavcodec/cuviddec.c | 59 +++++++++++++++++++++++++++++--------------
 1 file changed, 40 insertions(+), 19 deletions(-)

Comments

Philip Langdale Oct. 7, 2018, 7:36 p.m.
On Sun,  7 Oct 2018 10:50:56 -0700
Philip Langdale <philipl@overt.org> wrote:

> This is the equivalent change for cuviddec after the previous change
> for nvdec. I made similar changes to the copying routines to handle
> pixel formats in a more generic way.
> 
> Note that unlike with nvdec, there is no confusion about the ability
> of a codec to output 444 formats. This is because the cuvid parser is
> used, meaning that 444 JPEG content is still indicated as using a 420
> output format.
> 
> Signed-off-by: Philip Langdale <philipl@overt.org>
> ---
>  libavcodec/cuviddec.c | 59
> +++++++++++++++++++++++++++++-------------- 1 file changed, 40
> insertions(+), 19 deletions(-)
> 
> diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c
> index 4d3caf924e..595249475d 100644
> --- a/libavcodec/cuviddec.c
> +++ b/libavcodec/cuviddec.c
> @@ -35,6 +35,9 @@
>  #include "hwaccel.h"
>  #include "internal.h"
>  
> +#define CUVID_FORMAT_YUV444P 2
> +#define CUVID_FORMAT_YUV444P16 3
> +
>  typedef struct CuvidContext
>  {
>      AVClass *avclass;
> @@ -127,6 +130,7 @@ static int CUDAAPI
> cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
> CUVIDDECODECAPS *caps = NULL; CUVIDDECODECREATEINFO cuinfo;
>      int surface_fmt;
> +    int chroma_444;
>  
>      int old_width = avctx->width;
>      int old_height = avctx->height;
> @@ -169,17 +173,19 @@ static int CUDAAPI
> cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
> cuinfo.target_rect.right = cuinfo.ulTargetWidth;
> cuinfo.target_rect.bottom = cuinfo.ulTargetHeight; 
> +    chroma_444 = format->chroma_format == cudaVideoChromaFormat_444;
> +
>      switch (format->bit_depth_luma_minus8) {
>      case 0: // 8-bit
> -        pix_fmts[1] = AV_PIX_FMT_NV12;
> +        pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P :
> AV_PIX_FMT_NV12; caps = &ctx->caps8;
>          break;
>      case 2: // 10-bit
> -        pix_fmts[1] = AV_PIX_FMT_P010;
> +        pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P10_LSB :
> AV_PIX_FMT_P010; caps = &ctx->caps10;
>          break;
>      case 4: // 12-bit
> -        pix_fmts[1] = AV_PIX_FMT_P016;
> +        pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P12_LSB :
> AV_PIX_FMT_P016; caps = &ctx->caps12;
>          break;
>      default:
> @@ -282,12 +288,6 @@ static int CUDAAPI
> cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form return
> 0; }
>  
> -    if (format->chroma_format != cudaVideoChromaFormat_420) {
> -        av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420
> are not supported\n");
> -        ctx->internal_error = AVERROR(EINVAL);
> -        return 0;
> -    }
> -
>      ctx->chroma_format = format->chroma_format;
>  
>      cuinfo.CodecType = ctx->codec_type = format->codec;
> @@ -301,6 +301,14 @@ static int CUDAAPI
> cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form case
> AV_PIX_FMT_P016: cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016;
>          break;
> +    case AV_PIX_FMT_YUV444P:
> +        cuinfo.OutputFormat = CUVID_FORMAT_YUV444P;
> +        break;
> +    case AV_PIX_FMT_YUV444P10_LSB:
> +    case AV_PIX_FMT_YUV444P12_LSB:
> +    case AV_PIX_FMT_YUV444P16:
> +        cuinfo.OutputFormat = CUVID_FORMAT_YUV444P16;
> +        break;
>      default:
>          av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12,
> P010 or P016 are not supported\n"); ctx->internal_error =
> AVERROR(EINVAL); @@ -507,6 +515,7 @@ static int
> cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) return ret;
>  
>      if (av_fifo_size(ctx->frame_queue)) {
> +        const AVPixFmtDescriptor *pixdesc;
>          CuvidParsedFrame parsed_frame;
>          CUVIDPROCPARAMS params;
>          unsigned int pitch = 0;
> @@ -537,7 +546,10 @@ static int cuvid_output_frame(AVCodecContext
> *avctx, AVFrame *frame) goto error;
>              }
>  
> -            for (i = 0; i < 2; i++) {
> +            pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
> +
> +            for (i = 0; i < pixdesc->nb_components; i++) {
> +                size_t height = avctx->height >> (i ?
> pixdesc->log2_chroma_h : 0); CUDA_MEMCPY2D cpy = {
>                      .srcMemoryType = CU_MEMORYTYPE_DEVICE,
>                      .dstMemoryType = CU_MEMORYTYPE_DEVICE,
> @@ -547,22 +559,27 @@ static int cuvid_output_frame(AVCodecContext
> *avctx, AVFrame *frame) .dstPitch      = frame->linesize[i],
>                      .srcY          = offset,
>                      .WidthInBytes  = FFMIN(pitch,
> frame->linesize[i]),
> -                    .Height        = avctx->height >> (i ? 1 : 0),
> +                    .Height        = height,
>                  };
>  
>                  ret = CHECK_CU(ctx->cudl->cuMemcpy2DAsync(&cpy,
> device_hwctx->stream)); if (ret < 0)
>                      goto error;
>  
> -                offset += avctx->height;
> +                offset += height;
>              }
>  
>              ret =
> CHECK_CU(ctx->cudl->cuStreamSynchronize(device_hwctx->stream)); if
> (ret < 0) goto error;
> -        } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 ||
> -                   avctx->pix_fmt == AV_PIX_FMT_P010 ||
> -                   avctx->pix_fmt == AV_PIX_FMT_P016) {
> +        } else if (avctx->pix_fmt == AV_PIX_FMT_NV12          ||
> +                   avctx->pix_fmt == AV_PIX_FMT_P010          ||
> +                   avctx->pix_fmt == AV_PIX_FMT_P016          ||
> +                   avctx->pix_fmt == AV_PIX_FMT_YUV444P       ||
> +                   avctx->pix_fmt == AV_PIX_FMT_YUV444P10_LSB ||
> +                   avctx->pix_fmt == AV_PIX_FMT_YUV444P12_LSB ||
> +                   avctx->pix_fmt == AV_PIX_FMT_YUV444P16) {
> +            size_t offset = 0;
>              AVFrame *tmp_frame = av_frame_alloc();
>              if (!tmp_frame) {
>                  av_log(avctx, AV_LOG_ERROR, "av_frame_alloc
> failed\n"); @@ -570,15 +587,19 @@ static int
> cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) goto error;
>              }
>  
> +            pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
> +
>              tmp_frame->format        = AV_PIX_FMT_CUDA;
>              tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe);
> -            tmp_frame->data[0]       = (uint8_t*)mapped_frame;
> -            tmp_frame->linesize[0]   = pitch;
> -            tmp_frame->data[1]       = (uint8_t*)(mapped_frame +
> avctx->height * pitch);
> -            tmp_frame->linesize[1]   = pitch;
>              tmp_frame->width         = avctx->width;
>              tmp_frame->height        = avctx->height;
>  
> +            for (i = 0; i < pixdesc->nb_components; i++) {
> +                tmp_frame->data[i]     = (uint8_t*)mapped_frame +
> offset;
> +                tmp_frame->linesize[i] = pitch;
> +                offset += avctx->height >> (i ?
> pixdesc->log2_chroma_h : 0);

This needs to be multipled by 'pitch'. Fixed locally.

> +            }
> +
>              ret = ff_get_buffer(avctx, frame, 0);
>              if (ret < 0) {
>                  av_log(avctx, AV_LOG_ERROR, "ff_get_buffer
> failed\n");

--phil

Patch hide | download patch | download mbox

diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c
index 4d3caf924e..595249475d 100644
--- a/libavcodec/cuviddec.c
+++ b/libavcodec/cuviddec.c
@@ -35,6 +35,9 @@ 
 #include "hwaccel.h"
 #include "internal.h"
 
+#define CUVID_FORMAT_YUV444P 2
+#define CUVID_FORMAT_YUV444P16 3
+
 typedef struct CuvidContext
 {
     AVClass *avclass;
@@ -127,6 +130,7 @@  static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
     CUVIDDECODECAPS *caps = NULL;
     CUVIDDECODECREATEINFO cuinfo;
     int surface_fmt;
+    int chroma_444;
 
     int old_width = avctx->width;
     int old_height = avctx->height;
@@ -169,17 +173,19 @@  static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
     cuinfo.target_rect.right = cuinfo.ulTargetWidth;
     cuinfo.target_rect.bottom = cuinfo.ulTargetHeight;
 
+    chroma_444 = format->chroma_format == cudaVideoChromaFormat_444;
+
     switch (format->bit_depth_luma_minus8) {
     case 0: // 8-bit
-        pix_fmts[1] = AV_PIX_FMT_NV12;
+        pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
         caps = &ctx->caps8;
         break;
     case 2: // 10-bit
-        pix_fmts[1] = AV_PIX_FMT_P010;
+        pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P10_LSB : AV_PIX_FMT_P010;
         caps = &ctx->caps10;
         break;
     case 4: // 12-bit
-        pix_fmts[1] = AV_PIX_FMT_P016;
+        pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P12_LSB : AV_PIX_FMT_P016;
         caps = &ctx->caps12;
         break;
     default:
@@ -282,12 +288,6 @@  static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
         return 0;
     }
 
-    if (format->chroma_format != cudaVideoChromaFormat_420) {
-        av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n");
-        ctx->internal_error = AVERROR(EINVAL);
-        return 0;
-    }
-
     ctx->chroma_format = format->chroma_format;
 
     cuinfo.CodecType = ctx->codec_type = format->codec;
@@ -301,6 +301,14 @@  static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
     case AV_PIX_FMT_P016:
         cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016;
         break;
+    case AV_PIX_FMT_YUV444P:
+        cuinfo.OutputFormat = CUVID_FORMAT_YUV444P;
+        break;
+    case AV_PIX_FMT_YUV444P10_LSB:
+    case AV_PIX_FMT_YUV444P12_LSB:
+    case AV_PIX_FMT_YUV444P16:
+        cuinfo.OutputFormat = CUVID_FORMAT_YUV444P16;
+        break;
     default:
         av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12, P010 or P016 are not supported\n");
         ctx->internal_error = AVERROR(EINVAL);
@@ -507,6 +515,7 @@  static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
         return ret;
 
     if (av_fifo_size(ctx->frame_queue)) {
+        const AVPixFmtDescriptor *pixdesc;
         CuvidParsedFrame parsed_frame;
         CUVIDPROCPARAMS params;
         unsigned int pitch = 0;
@@ -537,7 +546,10 @@  static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
                 goto error;
             }
 
-            for (i = 0; i < 2; i++) {
+            pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+
+            for (i = 0; i < pixdesc->nb_components; i++) {
+                size_t height = avctx->height >> (i ? pixdesc->log2_chroma_h : 0);
                 CUDA_MEMCPY2D cpy = {
                     .srcMemoryType = CU_MEMORYTYPE_DEVICE,
                     .dstMemoryType = CU_MEMORYTYPE_DEVICE,
@@ -547,22 +559,27 @@  static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
                     .dstPitch      = frame->linesize[i],
                     .srcY          = offset,
                     .WidthInBytes  = FFMIN(pitch, frame->linesize[i]),
-                    .Height        = avctx->height >> (i ? 1 : 0),
+                    .Height        = height,
                 };
 
                 ret = CHECK_CU(ctx->cudl->cuMemcpy2DAsync(&cpy, device_hwctx->stream));
                 if (ret < 0)
                     goto error;
 
-                offset += avctx->height;
+                offset += height;
             }
 
             ret = CHECK_CU(ctx->cudl->cuStreamSynchronize(device_hwctx->stream));
             if (ret < 0)
                 goto error;
-        } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 ||
-                   avctx->pix_fmt == AV_PIX_FMT_P010 ||
-                   avctx->pix_fmt == AV_PIX_FMT_P016) {
+        } else if (avctx->pix_fmt == AV_PIX_FMT_NV12          ||
+                   avctx->pix_fmt == AV_PIX_FMT_P010          ||
+                   avctx->pix_fmt == AV_PIX_FMT_P016          ||
+                   avctx->pix_fmt == AV_PIX_FMT_YUV444P       ||
+                   avctx->pix_fmt == AV_PIX_FMT_YUV444P10_LSB ||
+                   avctx->pix_fmt == AV_PIX_FMT_YUV444P12_LSB ||
+                   avctx->pix_fmt == AV_PIX_FMT_YUV444P16) {
+            size_t offset = 0;
             AVFrame *tmp_frame = av_frame_alloc();
             if (!tmp_frame) {
                 av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
@@ -570,15 +587,19 @@  static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
                 goto error;
             }
 
+            pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+
             tmp_frame->format        = AV_PIX_FMT_CUDA;
             tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe);
-            tmp_frame->data[0]       = (uint8_t*)mapped_frame;
-            tmp_frame->linesize[0]   = pitch;
-            tmp_frame->data[1]       = (uint8_t*)(mapped_frame + avctx->height * pitch);
-            tmp_frame->linesize[1]   = pitch;
             tmp_frame->width         = avctx->width;
             tmp_frame->height        = avctx->height;
 
+            for (i = 0; i < pixdesc->nb_components; i++) {
+                tmp_frame->data[i]     = (uint8_t*)mapped_frame + offset;
+                tmp_frame->linesize[i] = pitch;
+                offset += avctx->height >> (i ? pixdesc->log2_chroma_h : 0);
+            }
+
             ret = ff_get_buffer(avctx, frame, 0);
             if (ret < 0) {
                 av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n");