Message ID | 20181007021955.6668-1-philipl@overt.org |
---|---|
State | Superseded |
Headers | show |
On 07.10.2018 04:19, Philip Langdale wrote: > The latest generation video decoder on the Turing chips supports > decoding HEVC 4:4:4. Supporting this is relatively straight-forward; > we need to account for the different chroma format and pick the > right output and sw formats at the right times. > > There was one bug which was the hard-coded assumption that the > first chroma plane would be half-height; I fixed this to use the > actual shift value on the plane. > > The output formats ('2', and '3') are currently undocumented but > appear to be YUV444P and YUV444P16 based on how they behave. > --- > libavcodec/hevcdec.c | 2 ++ > libavcodec/nvdec.c | 43 +++++++++++++++++++++++++++++++++++-------- > 2 files changed, 37 insertions(+), 8 deletions(-) > > diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c > index a3b5c8cb71..508e093ea3 100644 > --- a/libavcodec/hevcdec.c > +++ b/libavcodec/hevcdec.c > @@ -409,6 +409,8 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) > #endif > break; > case AV_PIX_FMT_YUV420P12: > + case AV_PIX_FMT_YUV444P10: > + case AV_PIX_FMT_YUV444P12: > #if CONFIG_HEVC_NVDEC_HWACCEL > *fmt++ = AV_PIX_FMT_CUDA; > #endif > diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c > index e779be3a45..7e5c1791ea 100644 > --- a/libavcodec/nvdec.c > +++ b/libavcodec/nvdec.c > @@ -34,6 +34,9 @@ > #include "nvdec.h" > #include "internal.h" > > +#define cudaVideoSurfaceFormat_YUV444P 2 > +#define cudaVideoSurfaceFormat_YUV444P16 3 This will probably collide once the headers add those values, not sure how to properly handle that, but they at least should have a different naming scheme. > typedef struct NVDECDecoder { > CUvideodecoder decoder; > > @@ -273,7 +276,8 @@ int ff_nvdec_decode_init(AVCodecContext *avctx) > > CUVIDDECODECREATEINFO params = { 0 }; > > - int cuvid_codec_type, cuvid_chroma_format; > + cudaVideoSurfaceFormat output_format; > + int cuvid_codec_type, cuvid_chroma_format, chroma_444; > int ret = 0; > > sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); > @@ -291,6 +295,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx) > av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n"); > return AVERROR(ENOSYS); > } > + chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444; > > if (!avctx->hw_frames_ctx) { > ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_CUDA); > @@ -298,6 +303,21 @@ int ff_nvdec_decode_init(AVCodecContext *avctx) > return ret; > } > > + switch (sw_desc->comp[0].depth) { > + case 8: > + output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444P : > + cudaVideoSurfaceFormat_NV12; > + break; > + case 10: > + case 12: > + output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444P16 : > + cudaVideoSurfaceFormat_P016; > + break; > + default: > + av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n"); > + return AVERROR(ENOSYS); > + } > + > frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data; > > params.ulWidth = avctx->coded_width; > @@ -305,8 +325,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx) > params.ulTargetWidth = avctx->coded_width; > params.ulTargetHeight = avctx->coded_height; > params.bitDepthMinus8 = sw_desc->comp[0].depth - 8; > - params.OutputFormat = params.bitDepthMinus8 ? > - cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12; > + params.OutputFormat = output_format; > params.CodecType = cuvid_codec_type; > params.ChromaFormat = cuvid_chroma_format; > params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size; > @@ -388,6 +407,8 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame) > NVDECFrame *cf = (NVDECFrame*)fdd->hwaccel_priv; > NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data; > > + AVHWFramesContext *hwctx = (AVHWFramesContext *)frame->hw_frames_ctx->data; > + > CUVIDPROCPARAMS vpp = { 0 }; > NVDECFrame *unmap_data = NULL; > > @@ -397,6 +418,7 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame) > > unsigned int pitch, i; > unsigned int offset = 0; > + int shift_h = 0, shift_v = 0; > int ret = 0; > > vpp.progressive_frame = 1; > @@ -433,10 +455,11 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame) > unmap_data->idx_ref = av_buffer_ref(cf->idx_ref); > unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref); > > + av_pix_fmt_get_chroma_sub_sample(hwctx->sw_format, &shift_h, &shift_v); > for (i = 0; frame->linesize[i]; i++) { > frame->data[i] = (uint8_t*)(devptr + offset); > frame->linesize[i] = pitch; > - offset += pitch * (frame->height >> (i ? 1 : 0)); > + offset += pitch * (frame->height >> (i ? shift_v : 0)); > } > > goto finish; > @@ -576,7 +599,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx, > { > AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data; > const AVPixFmtDescriptor *sw_desc; > - int cuvid_codec_type, cuvid_chroma_format; > + int cuvid_codec_type, cuvid_chroma_format, chroma_444; > > sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); > if (!sw_desc) > @@ -593,6 +616,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx, > av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n"); > return AVERROR(EINVAL); > } > + chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444; > > frames_ctx->format = AV_PIX_FMT_CUDA; > frames_ctx->width = (avctx->coded_width + 1) & ~1; > @@ -605,15 +629,18 @@ int ff_nvdec_frame_params(AVCodecContext *avctx, > if (!frames_ctx->pool) > return AVERROR(ENOMEM); > > + // It it semantically incorrect to use AX_PIX_FMT_YUV444P16 for either the 10 > + // or 12 bit case, but ffmpeg and nvidia disagree on which end the padding > + // bits go at. P16 is unambiguous and matches. > switch (sw_desc->comp[0].depth) { > case 8: > - frames_ctx->sw_format = AV_PIX_FMT_NV12; > + frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12; > break; > case 10: > - frames_ctx->sw_format = AV_PIX_FMT_P010; > + frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010; > break; > case 12: > - frames_ctx->sw_format = AV_PIX_FMT_P016; > + frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016; > break; > default: > return AVERROR(EINVAL); > rest LGTM
diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index a3b5c8cb71..508e093ea3 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -409,6 +409,8 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #endif break; case AV_PIX_FMT_YUV420P12: + case AV_PIX_FMT_YUV444P10: + case AV_PIX_FMT_YUV444P12: #if CONFIG_HEVC_NVDEC_HWACCEL *fmt++ = AV_PIX_FMT_CUDA; #endif diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c index e779be3a45..7e5c1791ea 100644 --- a/libavcodec/nvdec.c +++ b/libavcodec/nvdec.c @@ -34,6 +34,9 @@ #include "nvdec.h" #include "internal.h" +#define cudaVideoSurfaceFormat_YUV444P 2 +#define cudaVideoSurfaceFormat_YUV444P16 3 + typedef struct NVDECDecoder { CUvideodecoder decoder; @@ -273,7 +276,8 @@ int ff_nvdec_decode_init(AVCodecContext *avctx) CUVIDDECODECREATEINFO params = { 0 }; - int cuvid_codec_type, cuvid_chroma_format; + cudaVideoSurfaceFormat output_format; + int cuvid_codec_type, cuvid_chroma_format, chroma_444; int ret = 0; sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); @@ -291,6 +295,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx) av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n"); return AVERROR(ENOSYS); } + chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444; if (!avctx->hw_frames_ctx) { ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_CUDA); @@ -298,6 +303,21 @@ int ff_nvdec_decode_init(AVCodecContext *avctx) return ret; } + switch (sw_desc->comp[0].depth) { + case 8: + output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444P : + cudaVideoSurfaceFormat_NV12; + break; + case 10: + case 12: + output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444P16 : + cudaVideoSurfaceFormat_P016; + break; + default: + av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n"); + return AVERROR(ENOSYS); + } + frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data; params.ulWidth = avctx->coded_width; @@ -305,8 +325,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx) params.ulTargetWidth = avctx->coded_width; params.ulTargetHeight = avctx->coded_height; params.bitDepthMinus8 = sw_desc->comp[0].depth - 8; - params.OutputFormat = params.bitDepthMinus8 ? - cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12; + params.OutputFormat = output_format; params.CodecType = cuvid_codec_type; params.ChromaFormat = cuvid_chroma_format; params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size; @@ -388,6 +407,8 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame) NVDECFrame *cf = (NVDECFrame*)fdd->hwaccel_priv; NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data; + AVHWFramesContext *hwctx = (AVHWFramesContext *)frame->hw_frames_ctx->data; + CUVIDPROCPARAMS vpp = { 0 }; NVDECFrame *unmap_data = NULL; @@ -397,6 +418,7 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame) unsigned int pitch, i; unsigned int offset = 0; + int shift_h = 0, shift_v = 0; int ret = 0; vpp.progressive_frame = 1; @@ -433,10 +455,11 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame) unmap_data->idx_ref = av_buffer_ref(cf->idx_ref); unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref); + av_pix_fmt_get_chroma_sub_sample(hwctx->sw_format, &shift_h, &shift_v); for (i = 0; frame->linesize[i]; i++) { frame->data[i] = (uint8_t*)(devptr + offset); frame->linesize[i] = pitch; - offset += pitch * (frame->height >> (i ? 1 : 0)); + offset += pitch * (frame->height >> (i ? shift_v : 0)); } goto finish; @@ -576,7 +599,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx, { AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data; const AVPixFmtDescriptor *sw_desc; - int cuvid_codec_type, cuvid_chroma_format; + int cuvid_codec_type, cuvid_chroma_format, chroma_444; sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); if (!sw_desc) @@ -593,6 +616,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx, av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n"); return AVERROR(EINVAL); } + chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444; frames_ctx->format = AV_PIX_FMT_CUDA; frames_ctx->width = (avctx->coded_width + 1) & ~1; @@ -605,15 +629,18 @@ int ff_nvdec_frame_params(AVCodecContext *avctx, if (!frames_ctx->pool) return AVERROR(ENOMEM); + // It it semantically incorrect to use AX_PIX_FMT_YUV444P16 for either the 10 + // or 12 bit case, but ffmpeg and nvidia disagree on which end the padding + // bits go at. P16 is unambiguous and matches. switch (sw_desc->comp[0].depth) { case 8: - frames_ctx->sw_format = AV_PIX_FMT_NV12; + frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12; break; case 10: - frames_ctx->sw_format = AV_PIX_FMT_P010; + frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010; break; case 12: - frames_ctx->sw_format = AV_PIX_FMT_P016; + frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016; break; default: return AVERROR(EINVAL);