diff mbox

[FFmpeg-devel] avcodec/nvdec: Add support for decoding HEVC 4:4:4 content

Message ID 20181007021955.6668-1-philipl@overt.org
State Superseded
Headers show

Commit Message

Philip Langdale Oct. 7, 2018, 2:19 a.m. UTC
The latest generation video decoder on the Turing chips supports
decoding HEVC 4:4:4. Supporting this is relatively straight-forward;
we need to account for the different chroma format and pick the
right output and sw formats at the right times.

There was one bug which was the hard-coded assumption that the
first chroma plane would be half-height; I fixed this to use the
actual shift value on the plane.

The output formats ('2', and '3') are currently undocumented but
appear to be YUV444P and YUV444P16 based on how they behave.
---
 libavcodec/hevcdec.c |  2 ++
 libavcodec/nvdec.c   | 43 +++++++++++++++++++++++++++++++++++--------
 2 files changed, 37 insertions(+), 8 deletions(-)

Comments

Timo Rothenpieler Oct. 7, 2018, 8:55 a.m. UTC | #1
On 07.10.2018 04:19, Philip Langdale wrote:
> The latest generation video decoder on the Turing chips supports
> decoding HEVC 4:4:4. Supporting this is relatively straight-forward;
> we need to account for the different chroma format and pick the
> right output and sw formats at the right times.
> 
> There was one bug which was the hard-coded assumption that the
> first chroma plane would be half-height; I fixed this to use the
> actual shift value on the plane.
> 
> The output formats ('2', and '3') are currently undocumented but
> appear to be YUV444P and YUV444P16 based on how they behave.
> ---
>   libavcodec/hevcdec.c |  2 ++
>   libavcodec/nvdec.c   | 43 +++++++++++++++++++++++++++++++++++--------
>   2 files changed, 37 insertions(+), 8 deletions(-)
> 
> diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
> index a3b5c8cb71..508e093ea3 100644
> --- a/libavcodec/hevcdec.c
> +++ b/libavcodec/hevcdec.c
> @@ -409,6 +409,8 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
>   #endif
>           break;
>       case AV_PIX_FMT_YUV420P12:
> +    case AV_PIX_FMT_YUV444P10:
> +    case AV_PIX_FMT_YUV444P12:
>   #if CONFIG_HEVC_NVDEC_HWACCEL
>           *fmt++ = AV_PIX_FMT_CUDA;
>   #endif
> diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c
> index e779be3a45..7e5c1791ea 100644
> --- a/libavcodec/nvdec.c
> +++ b/libavcodec/nvdec.c
> @@ -34,6 +34,9 @@
>   #include "nvdec.h"
>   #include "internal.h"
>   
> +#define cudaVideoSurfaceFormat_YUV444P 2
> +#define cudaVideoSurfaceFormat_YUV444P16 3

This will probably collide once the headers add those values, not sure 
how to properly handle that, but they at least should have a different 
naming scheme.

>   typedef struct NVDECDecoder {
>       CUvideodecoder decoder;
>   
> @@ -273,7 +276,8 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
>   
>       CUVIDDECODECREATEINFO params = { 0 };
>   
> -    int cuvid_codec_type, cuvid_chroma_format;
> +    cudaVideoSurfaceFormat output_format;
> +    int cuvid_codec_type, cuvid_chroma_format, chroma_444;
>       int ret = 0;
>   
>       sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
> @@ -291,6 +295,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
>           av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n");
>           return AVERROR(ENOSYS);
>       }
> +    chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
>   
>       if (!avctx->hw_frames_ctx) {
>           ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_CUDA);
> @@ -298,6 +303,21 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
>               return ret;
>       }
>   
> +    switch (sw_desc->comp[0].depth) {
> +    case 8:
> +        output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444P :
> +                                     cudaVideoSurfaceFormat_NV12;
> +        break;
> +    case 10:
> +    case 12:
> +        output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444P16 :
> +                                     cudaVideoSurfaceFormat_P016;
> +        break;
> +    default:
> +        av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n");
> +        return AVERROR(ENOSYS);
> +    }
> +
>       frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
>   
>       params.ulWidth             = avctx->coded_width;
> @@ -305,8 +325,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
>       params.ulTargetWidth       = avctx->coded_width;
>       params.ulTargetHeight      = avctx->coded_height;
>       params.bitDepthMinus8      = sw_desc->comp[0].depth - 8;
> -    params.OutputFormat        = params.bitDepthMinus8 ?
> -                                 cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
> +    params.OutputFormat        = output_format;
>       params.CodecType           = cuvid_codec_type;
>       params.ChromaFormat        = cuvid_chroma_format;
>       params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
> @@ -388,6 +407,8 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
>       NVDECFrame        *cf = (NVDECFrame*)fdd->hwaccel_priv;
>       NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
>   
> +    AVHWFramesContext *hwctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
> +
>       CUVIDPROCPARAMS vpp = { 0 };
>       NVDECFrame *unmap_data = NULL;
>   
> @@ -397,6 +418,7 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
>   
>       unsigned int pitch, i;
>       unsigned int offset = 0;
> +    int shift_h = 0, shift_v = 0;
>       int ret = 0;
>   
>       vpp.progressive_frame = 1;
> @@ -433,10 +455,11 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
>       unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
>       unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
>   
> +    av_pix_fmt_get_chroma_sub_sample(hwctx->sw_format, &shift_h, &shift_v);
>       for (i = 0; frame->linesize[i]; i++) {
>           frame->data[i] = (uint8_t*)(devptr + offset);
>           frame->linesize[i] = pitch;
> -        offset += pitch * (frame->height >> (i ? 1 : 0));
> +        offset += pitch * (frame->height >> (i ? shift_v : 0));
>       }
>   
>       goto finish;
> @@ -576,7 +599,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
>   {
>       AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
>       const AVPixFmtDescriptor *sw_desc;
> -    int cuvid_codec_type, cuvid_chroma_format;
> +    int cuvid_codec_type, cuvid_chroma_format, chroma_444;
>   
>       sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
>       if (!sw_desc)
> @@ -593,6 +616,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
>           av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n");
>           return AVERROR(EINVAL);
>       }
> +    chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
>   
>       frames_ctx->format            = AV_PIX_FMT_CUDA;
>       frames_ctx->width             = (avctx->coded_width + 1) & ~1;
> @@ -605,15 +629,18 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
>       if (!frames_ctx->pool)
>           return AVERROR(ENOMEM);
>   
> +    // It it semantically incorrect to use AX_PIX_FMT_YUV444P16 for either the 10
> +    // or 12 bit case, but ffmpeg and nvidia disagree on which end the padding
> +    // bits go at. P16 is unambiguous and matches.
>       switch (sw_desc->comp[0].depth) {
>       case 8:
> -        frames_ctx->sw_format = AV_PIX_FMT_NV12;
> +        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
>           break;
>       case 10:
> -        frames_ctx->sw_format = AV_PIX_FMT_P010;
> +        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
>           break;
>       case 12:
> -        frames_ctx->sw_format = AV_PIX_FMT_P016;
> +        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
>           break;
>       default:
>           return AVERROR(EINVAL);
> 

rest LGTM
diff mbox

Patch

diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index a3b5c8cb71..508e093ea3 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -409,6 +409,8 @@  static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
 #endif
         break;
     case AV_PIX_FMT_YUV420P12:
+    case AV_PIX_FMT_YUV444P10:
+    case AV_PIX_FMT_YUV444P12:
 #if CONFIG_HEVC_NVDEC_HWACCEL
         *fmt++ = AV_PIX_FMT_CUDA;
 #endif
diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c
index e779be3a45..7e5c1791ea 100644
--- a/libavcodec/nvdec.c
+++ b/libavcodec/nvdec.c
@@ -34,6 +34,9 @@ 
 #include "nvdec.h"
 #include "internal.h"
 
+#define cudaVideoSurfaceFormat_YUV444P 2
+#define cudaVideoSurfaceFormat_YUV444P16 3
+
 typedef struct NVDECDecoder {
     CUvideodecoder decoder;
 
@@ -273,7 +276,8 @@  int ff_nvdec_decode_init(AVCodecContext *avctx)
 
     CUVIDDECODECREATEINFO params = { 0 };
 
-    int cuvid_codec_type, cuvid_chroma_format;
+    cudaVideoSurfaceFormat output_format;
+    int cuvid_codec_type, cuvid_chroma_format, chroma_444;
     int ret = 0;
 
     sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
@@ -291,6 +295,7 @@  int ff_nvdec_decode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n");
         return AVERROR(ENOSYS);
     }
+    chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
 
     if (!avctx->hw_frames_ctx) {
         ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_CUDA);
@@ -298,6 +303,21 @@  int ff_nvdec_decode_init(AVCodecContext *avctx)
             return ret;
     }
 
+    switch (sw_desc->comp[0].depth) {
+    case 8:
+        output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444P :
+                                     cudaVideoSurfaceFormat_NV12;
+        break;
+    case 10:
+    case 12:
+        output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444P16 :
+                                     cudaVideoSurfaceFormat_P016;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n");
+        return AVERROR(ENOSYS);
+    }
+
     frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
 
     params.ulWidth             = avctx->coded_width;
@@ -305,8 +325,7 @@  int ff_nvdec_decode_init(AVCodecContext *avctx)
     params.ulTargetWidth       = avctx->coded_width;
     params.ulTargetHeight      = avctx->coded_height;
     params.bitDepthMinus8      = sw_desc->comp[0].depth - 8;
-    params.OutputFormat        = params.bitDepthMinus8 ?
-                                 cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
+    params.OutputFormat        = output_format;
     params.CodecType           = cuvid_codec_type;
     params.ChromaFormat        = cuvid_chroma_format;
     params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
@@ -388,6 +407,8 @@  static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
     NVDECFrame        *cf = (NVDECFrame*)fdd->hwaccel_priv;
     NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
 
+    AVHWFramesContext *hwctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
+
     CUVIDPROCPARAMS vpp = { 0 };
     NVDECFrame *unmap_data = NULL;
 
@@ -397,6 +418,7 @@  static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
 
     unsigned int pitch, i;
     unsigned int offset = 0;
+    int shift_h = 0, shift_v = 0;
     int ret = 0;
 
     vpp.progressive_frame = 1;
@@ -433,10 +455,11 @@  static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
     unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
     unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
 
+    av_pix_fmt_get_chroma_sub_sample(hwctx->sw_format, &shift_h, &shift_v);
     for (i = 0; frame->linesize[i]; i++) {
         frame->data[i] = (uint8_t*)(devptr + offset);
         frame->linesize[i] = pitch;
-        offset += pitch * (frame->height >> (i ? 1 : 0));
+        offset += pitch * (frame->height >> (i ? shift_v : 0));
     }
 
     goto finish;
@@ -576,7 +599,7 @@  int ff_nvdec_frame_params(AVCodecContext *avctx,
 {
     AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
     const AVPixFmtDescriptor *sw_desc;
-    int cuvid_codec_type, cuvid_chroma_format;
+    int cuvid_codec_type, cuvid_chroma_format, chroma_444;
 
     sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
     if (!sw_desc)
@@ -593,6 +616,7 @@  int ff_nvdec_frame_params(AVCodecContext *avctx,
         av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n");
         return AVERROR(EINVAL);
     }
+    chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
 
     frames_ctx->format            = AV_PIX_FMT_CUDA;
     frames_ctx->width             = (avctx->coded_width + 1) & ~1;
@@ -605,15 +629,18 @@  int ff_nvdec_frame_params(AVCodecContext *avctx,
     if (!frames_ctx->pool)
         return AVERROR(ENOMEM);
 
+    // It it semantically incorrect to use AX_PIX_FMT_YUV444P16 for either the 10
+    // or 12 bit case, but ffmpeg and nvidia disagree on which end the padding
+    // bits go at. P16 is unambiguous and matches.
     switch (sw_desc->comp[0].depth) {
     case 8:
-        frames_ctx->sw_format = AV_PIX_FMT_NV12;
+        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
         break;
     case 10:
-        frames_ctx->sw_format = AV_PIX_FMT_P010;
+        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
         break;
     case 12:
-        frames_ctx->sw_format = AV_PIX_FMT_P016;
+        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
         break;
     default:
         return AVERROR(EINVAL);