[FFmpeg-devel,2/5] avcodec/nvdec: Add support for decoding HEVC 4:4:4 content

Submitted by Philip Langdale on Oct. 7, 2018, 5:50 p.m.

Details

Message ID 20181007175057.31070-3-philipl@overt.org
State New
Headers show

Commit Message

Philip Langdale Oct. 7, 2018, 5:50 p.m.
The latest generation video decoder on the Turing chips supports
decoding HEVC 4:4:4. Supporting this is relatively straight-forward;
we need to account for the different chroma format and pick the
right output and sw formats at the right times.

There was one bug which was the hard-coded assumption that the
first chroma plane would be half-height; I fixed this to use the
actual shift value on the plane.

The output formats ('2', and '3') are currently undocumented but
appear to be YUV444P and YUV444P16 based on how they behave.

Signed-off-by: Philip Langdale <philipl@overt.org>
---
 libavcodec/hevcdec.c       |  3 +++
 libavcodec/nvdec.c         | 43 +++++++++++++++++++++++++++++++-------
 libavutil/hwcontext_cuda.c |  2 ++
 3 files changed, 40 insertions(+), 8 deletions(-)

Patch hide | download patch | download mbox

diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index a3b5c8cb71..972f2b56b6 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -409,6 +409,9 @@  static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
 #endif
         break;
     case AV_PIX_FMT_YUV420P12:
+    case AV_PIX_FMT_YUV444P:
+    case AV_PIX_FMT_YUV444P10:
+    case AV_PIX_FMT_YUV444P12:
 #if CONFIG_HEVC_NVDEC_HWACCEL
         *fmt++ = AV_PIX_FMT_CUDA;
 #endif
diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c
index e779be3a45..e1ac06f852 100644
--- a/libavcodec/nvdec.c
+++ b/libavcodec/nvdec.c
@@ -34,6 +34,9 @@ 
 #include "nvdec.h"
 #include "internal.h"
 
+#define NVDEC_FORMAT_YUV444P 2
+#define NVDEC_FORMAT_YUV444P16 3
+
 typedef struct NVDECDecoder {
     CUvideodecoder decoder;
 
@@ -273,7 +276,8 @@  int ff_nvdec_decode_init(AVCodecContext *avctx)
 
     CUVIDDECODECREATEINFO params = { 0 };
 
-    int cuvid_codec_type, cuvid_chroma_format;
+    cudaVideoSurfaceFormat output_format;
+    int cuvid_codec_type, cuvid_chroma_format, chroma_444;
     int ret = 0;
 
     sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
@@ -291,6 +295,7 @@  int ff_nvdec_decode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n");
         return AVERROR(ENOSYS);
     }
+    chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
 
     if (!avctx->hw_frames_ctx) {
         ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_CUDA);
@@ -298,6 +303,21 @@  int ff_nvdec_decode_init(AVCodecContext *avctx)
             return ret;
     }
 
+    switch (sw_desc->comp[0].depth) {
+    case 8:
+        output_format = chroma_444 ? NVDEC_FORMAT_YUV444P :
+                                     cudaVideoSurfaceFormat_NV12;
+        break;
+    case 10:
+    case 12:
+        output_format = chroma_444 ? NVDEC_FORMAT_YUV444P16 :
+                                     cudaVideoSurfaceFormat_P016;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n");
+        return AVERROR(ENOSYS);
+    }
+
     frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
 
     params.ulWidth             = avctx->coded_width;
@@ -305,8 +325,7 @@  int ff_nvdec_decode_init(AVCodecContext *avctx)
     params.ulTargetWidth       = avctx->coded_width;
     params.ulTargetHeight      = avctx->coded_height;
     params.bitDepthMinus8      = sw_desc->comp[0].depth - 8;
-    params.OutputFormat        = params.bitDepthMinus8 ?
-                                 cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
+    params.OutputFormat        = output_format;
     params.CodecType           = cuvid_codec_type;
     params.ChromaFormat        = cuvid_chroma_format;
     params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
@@ -388,6 +407,8 @@  static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
     NVDECFrame        *cf = (NVDECFrame*)fdd->hwaccel_priv;
     NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
 
+    AVHWFramesContext *hwctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
+
     CUVIDPROCPARAMS vpp = { 0 };
     NVDECFrame *unmap_data = NULL;
 
@@ -397,6 +418,7 @@  static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
 
     unsigned int pitch, i;
     unsigned int offset = 0;
+    int shift_h = 0, shift_v = 0;
     int ret = 0;
 
     vpp.progressive_frame = 1;
@@ -433,10 +455,11 @@  static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
     unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
     unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
 
+    av_pix_fmt_get_chroma_sub_sample(hwctx->sw_format, &shift_h, &shift_v);
     for (i = 0; frame->linesize[i]; i++) {
         frame->data[i] = (uint8_t*)(devptr + offset);
         frame->linesize[i] = pitch;
-        offset += pitch * (frame->height >> (i ? 1 : 0));
+        offset += pitch * (frame->height >> (i ? shift_v : 0));
     }
 
     goto finish;
@@ -576,7 +599,7 @@  int ff_nvdec_frame_params(AVCodecContext *avctx,
 {
     AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
     const AVPixFmtDescriptor *sw_desc;
-    int cuvid_codec_type, cuvid_chroma_format;
+    int cuvid_codec_type, cuvid_chroma_format, chroma_444;
 
     sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
     if (!sw_desc)
@@ -593,6 +616,7 @@  int ff_nvdec_frame_params(AVCodecContext *avctx,
         av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n");
         return AVERROR(EINVAL);
     }
+    chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
 
     frames_ctx->format            = AV_PIX_FMT_CUDA;
     frames_ctx->width             = (avctx->coded_width + 1) & ~1;
@@ -605,15 +629,18 @@  int ff_nvdec_frame_params(AVCodecContext *avctx,
     if (!frames_ctx->pool)
         return AVERROR(ENOMEM);
 
+    // It it semantically incorrect to use AX_PIX_FMT_YUV444P16 for either the 10
+    // or 12 bit case, but ffmpeg and nvidia disagree on which end the padding
+    // bits go at. P16 is unambiguous and matches.
     switch (sw_desc->comp[0].depth) {
     case 8:
-        frames_ctx->sw_format = AV_PIX_FMT_NV12;
+        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
         break;
     case 10:
-        frames_ctx->sw_format = AV_PIX_FMT_P010;
+        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P10_LSB : AV_PIX_FMT_P010;
         break;
     case 12:
-        frames_ctx->sw_format = AV_PIX_FMT_P016;
+        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P12_LSB : AV_PIX_FMT_P016;
         break;
     default:
         return AVERROR(EINVAL);
diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
index 3b1d53e799..094706db44 100644
--- a/libavutil/hwcontext_cuda.c
+++ b/libavutil/hwcontext_cuda.c
@@ -38,6 +38,8 @@  static const enum AVPixelFormat supported_formats[] = {
     AV_PIX_FMT_YUV444P,
     AV_PIX_FMT_P010,
     AV_PIX_FMT_P016,
+    AV_PIX_FMT_YUV444P10_LSB,
+    AV_PIX_FMT_YUV444P12_LSB,
     AV_PIX_FMT_YUV444P16,
     AV_PIX_FMT_0RGB32,
     AV_PIX_FMT_0BGR32,