@@ -83,7 +83,8 @@ typedef enum cudaVideoCodec_enum {
* Video Surface Formats Enums
*/
typedef enum cudaVideoSurfaceFormat_enum {
- cudaVideoSurfaceFormat_NV12=0 /**< NV12 (currently the only supported output format) */
+ cudaVideoSurfaceFormat_NV12=0, /**< NV12 */
+ cudaVideoSurfaceFormat_P016=1 /**< P016 */
} cudaVideoSurfaceFormat;
/*!
@@ -28,6 +28,7 @@
#include "libavutil/fifo.h"
#include "libavutil/log.h"
#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
#include "avcodec.h"
#include "internal.h"
@@ -103,11 +104,35 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
CuvidContext *ctx = avctx->priv_data;
AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
CUVIDDECODECREATEINFO cuinfo;
+ int surface_fmt;
+
+ enum AVPixelFormat pix_fmts_nv12[3] = { AV_PIX_FMT_CUDA,
+ AV_PIX_FMT_NV12,
+ AV_PIX_FMT_NONE };
+
+ enum AVPixelFormat pix_fmts_p016[3] = { AV_PIX_FMT_CUDA,
+ AV_PIX_FMT_P016,
+ AV_PIX_FMT_NONE };
av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, progressive_sequence=%d\n", format->progressive_sequence);
ctx->internal_error = 0;
+ surface_fmt = ff_get_format(avctx, format->bit_depth_luma_minus8 > 0 ?
+ pix_fmts_p016 : pix_fmts_nv12);
+ if (surface_fmt < 0) {
+ av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", surface_fmt);
+ ctx->internal_error = AVERROR(EINVAL);
+ return 0;
+ }
+
+ av_log(avctx, AV_LOG_VERBOSE, "Formats: Original: %s | HW: %s | SW: %s\n",
+ av_get_pix_fmt_name(avctx->pix_fmt),
+ av_get_pix_fmt_name(surface_fmt),
+ av_get_pix_fmt_name(avctx->sw_pix_fmt));
+
+ avctx->pix_fmt = surface_fmt;
+
avctx->width = format->display_area.right;
avctx->height = format->display_area.bottom;
@@ -156,7 +181,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
hwframe_ctx->width < avctx->width ||
hwframe_ctx->height < avctx->height ||
hwframe_ctx->format != AV_PIX_FMT_CUDA ||
- hwframe_ctx->sw_format != AV_PIX_FMT_NV12)) {
+ hwframe_ctx->sw_format != avctx->sw_pix_fmt)) {
av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized with incompatible parameters\n");
ctx->internal_error = AVERROR(EINVAL);
return 0;
@@ -177,7 +202,19 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
cuinfo.CodecType = ctx->codec_type = format->codec;
cuinfo.ChromaFormat = format->chroma_format;
- cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
+
+ switch (avctx->sw_pix_fmt) {
+ case AV_PIX_FMT_NV12:
+ cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
+ break;
+ case AV_PIX_FMT_P016:
+ cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016;
+ break;
+ default:
+ av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12 or P016 are not supported\n");
+ ctx->internal_error = AVERROR(EINVAL);
+ return 0;
+ }
cuinfo.ulWidth = avctx->coded_width;
cuinfo.ulHeight = avctx->coded_height;
@@ -209,7 +246,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
if (!hwframe_ctx->pool) {
hwframe_ctx->format = AV_PIX_FMT_CUDA;
- hwframe_ctx->sw_format = AV_PIX_FMT_NV12;
+ hwframe_ctx->sw_format = avctx->sw_pix_fmt;
hwframe_ctx->width = avctx->width;
hwframe_ctx->height = avctx->height;
@@ -417,7 +454,8 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
offset += avctx->coded_height;
}
- } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) {
+ } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 ||
+ avctx->pix_fmt == AV_PIX_FMT_P016) {
AVFrame *tmp_frame = av_frame_alloc();
if (!tmp_frame) {
av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
@@ -447,7 +485,6 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
av_frame_free(&tmp_frame);
goto error;
}
-
av_frame_free(&tmp_frame);
} else {
ret = AVERROR_BUG;
@@ -615,17 +652,6 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
const AVBitStreamFilter *bsf;
int ret = 0;
- enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
- AV_PIX_FMT_NV12,
- AV_PIX_FMT_NONE };
-
- ret = ff_get_format(avctx, pix_fmts);
- if (ret < 0) {
- av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret);
- return ret;
- }
- avctx->pix_fmt = ret;
-
ret = cuvid_load_functions(&ctx->cvdl);
if (ret < 0) {
av_log(avctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n");
@@ -899,6 +925,7 @@ static const AVOption options[] = {
.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \
.pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
AV_PIX_FMT_NV12, \
+ AV_PIX_FMT_P016, \
AV_PIX_FMT_NONE }, \
};
@@ -35,6 +35,7 @@ static const enum AVPixelFormat supported_formats[] = {
AV_PIX_FMT_NV12,
AV_PIX_FMT_YUV420P,
AV_PIX_FMT_YUV444P,
+ AV_PIX_FMT_P016,
};
static void cuda_buffer_free(void *opaque, uint8_t *data)
@@ -111,6 +112,7 @@ static int cuda_frames_init(AVHWFramesContext *ctx)
size = aligned_width * ctx->height * 3 / 2;
break;
case AV_PIX_FMT_YUV444P:
+ case AV_PIX_FMT_P016:
size = aligned_width * ctx->height * 3;
break;
}
@@ -125,7 +127,13 @@ static int cuda_frames_init(AVHWFramesContext *ctx)
static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
{
- int aligned_width = FFALIGN(ctx->width, CUDA_FRAME_ALIGNMENT);
+ int aligned_width;
+ int width_in_bytes = ctx->width;
+
+ if (ctx->sw_format == AV_PIX_FMT_P016) {
+ width_in_bytes *= 2;
+ }
+ aligned_width = FFALIGN(width_in_bytes, CUDA_FRAME_ALIGNMENT);
frame->buf[0] = av_buffer_pool_get(ctx->pool);
if (!frame->buf[0])
@@ -133,6 +141,7 @@ static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
switch (ctx->sw_format) {
case AV_PIX_FMT_NV12:
+ case AV_PIX_FMT_P016:
frame->data[0] = frame->buf[0]->data;
frame->data[1] = frame->data[0] + aligned_width * ctx->height;
frame->linesize[0] = aligned_width;
The nvidia 375.xx driver introduces support for P016 output surfaces, for 10bit and 12bit HEVC content (it's also the first driver to support hardware decoding of 12bit content). This change introduces cuvid decoder support for P016 output for output to hardware and system memory surfaces. For simplicity, it does not maintain the previous ability to output NV12 for > 8 bit input video - the user will need to update their driver to decode such videos. Signed-off-by: Philip Langdale <philipl@overt.org> --- compat/cuda/dynlink_cuviddec.h | 3 ++- libavcodec/cuvid.c | 59 ++++++++++++++++++++++++++++++------------ libavutil/hwcontext_cuda.c | 11 +++++++- 3 files changed, 55 insertions(+), 18 deletions(-)