diff mbox

[FFmpeg-devel,2/5] avcodec/nvdec: Add support for decoding HEVC 4:4:4 content

Message ID 20181020204701.6865-3-philipl@overt.org
State Superseded
Headers show

Commit Message

Philip Langdale Oct. 20, 2018, 8:46 p.m. UTC
The latest generation video decoder on the Turing chips supports
decoding HEVC 4:4:4. Supporting this is relatively straight-forward;
we need to account for the different chroma format and pick the
right output and sw formats at the right times.

There was one bug which was the hard-coded assumption that the
first chroma plane would be half-height; I fixed this to use the
actual shift value on the plane.

The output formats ('2', and '3') are currently undocumented but
appear to be YUV444P and YUV444P16 based on how they behave.

Signed-off-by: Philip Langdale <philipl@overt.org>
---
 libavcodec/hevcdec.c       |  3 +++
 libavcodec/nvdec.c         | 43 +++++++++++++++++++++++++++++++-------
 libavutil/hwcontext_cuda.c |  2 ++
 3 files changed, 40 insertions(+), 8 deletions(-)

Comments

Timo Rothenpieler Oct. 20, 2018, 8:58 p.m. UTC | #1
On 20.10.2018 22:46, Philip Langdale wrote:
> The latest generation video decoder on the Turing chips supports
> decoding HEVC 4:4:4. Supporting this is relatively straight-forward;
> we need to account for the different chroma format and pick the
> right output and sw formats at the right times.
> 
> There was one bug which was the hard-coded assumption that the
> first chroma plane would be half-height; I fixed this to use the
> actual shift value on the plane.
> 
> The output formats ('2', and '3') are currently undocumented but
> appear to be YUV444P and YUV444P16 based on how they behave.
> 
> Signed-off-by: Philip Langdale <philipl@overt.org>
> ---
>   libavcodec/hevcdec.c       |  3 +++
>   libavcodec/nvdec.c         | 43 +++++++++++++++++++++++++++++++-------
>   libavutil/hwcontext_cuda.c |  2 ++
>   3 files changed, 40 insertions(+), 8 deletions(-)
> 
> diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
> index a3b5c8cb71..972f2b56b6 100644
> --- a/libavcodec/hevcdec.c
> +++ b/libavcodec/hevcdec.c
> @@ -409,6 +409,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
>   #endif
>           break;
>       case AV_PIX_FMT_YUV420P12:
> +    case AV_PIX_FMT_YUV444P:
> +    case AV_PIX_FMT_YUV444P10:
> +    case AV_PIX_FMT_YUV444P12:
>   #if CONFIG_HEVC_NVDEC_HWACCEL
>           *fmt++ = AV_PIX_FMT_CUDA;
>   #endif
> diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c
> index e779be3a45..43cc38485a 100644
> --- a/libavcodec/nvdec.c
> +++ b/libavcodec/nvdec.c
> @@ -34,6 +34,9 @@
>   #include "nvdec.h"
>   #include "internal.h"
>   
> +#define NVDEC_FORMAT_YUV444P 2
> +#define NVDEC_FORMAT_YUV444P16 3
> +
>   typedef struct NVDECDecoder {
>       CUvideodecoder decoder;
>   
> @@ -273,7 +276,8 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
>   
>       CUVIDDECODECREATEINFO params = { 0 };
>   
> -    int cuvid_codec_type, cuvid_chroma_format;
> +    cudaVideoSurfaceFormat output_format;
> +    int cuvid_codec_type, cuvid_chroma_format, chroma_444;
>       int ret = 0;
>   
>       sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
> @@ -291,6 +295,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
>           av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n");
>           return AVERROR(ENOSYS);
>       }
> +    chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
>   
>       if (!avctx->hw_frames_ctx) {
>           ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_CUDA);
> @@ -298,6 +303,21 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
>               return ret;
>       }
>   
> +    switch (sw_desc->comp[0].depth) {
> +    case 8:
> +        output_format = chroma_444 ? NVDEC_FORMAT_YUV444P :
> +                                     cudaVideoSurfaceFormat_NV12;
> +        break;
> +    case 10:
> +    case 12:
> +        output_format = chroma_444 ? NVDEC_FORMAT_YUV444P16 :
> +                                     cudaVideoSurfaceFormat_P016;
> +        break;
> +    default:
> +        av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n");
> +        return AVERROR(ENOSYS);
> +    }
> +
>       frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
>   
>       params.ulWidth             = avctx->coded_width;
> @@ -305,8 +325,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
>       params.ulTargetWidth       = avctx->coded_width;
>       params.ulTargetHeight      = avctx->coded_height;
>       params.bitDepthMinus8      = sw_desc->comp[0].depth - 8;
> -    params.OutputFormat        = params.bitDepthMinus8 ?
> -                                 cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
> +    params.OutputFormat        = output_format;
>       params.CodecType           = cuvid_codec_type;
>       params.ChromaFormat        = cuvid_chroma_format;
>       params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
> @@ -388,6 +407,8 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
>       NVDECFrame        *cf = (NVDECFrame*)fdd->hwaccel_priv;
>       NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
>   
> +    AVHWFramesContext *hwctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
> +
>       CUVIDPROCPARAMS vpp = { 0 };
>       NVDECFrame *unmap_data = NULL;
>   
> @@ -397,6 +418,7 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
>   
>       unsigned int pitch, i;
>       unsigned int offset = 0;
> +    int shift_h = 0, shift_v = 0;
>       int ret = 0;
>   
>       vpp.progressive_frame = 1;
> @@ -433,10 +455,11 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
>       unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
>       unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
>   
> +    av_pix_fmt_get_chroma_sub_sample(hwctx->sw_format, &shift_h, &shift_v);
>       for (i = 0; frame->linesize[i]; i++) {
>           frame->data[i] = (uint8_t*)(devptr + offset);
>           frame->linesize[i] = pitch;
> -        offset += pitch * (frame->height >> (i ? 1 : 0));
> +        offset += pitch * (frame->height >> (i ? shift_v : 0));
>       }
>   
>       goto finish;
> @@ -576,7 +599,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
>   {
>       AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
>       const AVPixFmtDescriptor *sw_desc;
> -    int cuvid_codec_type, cuvid_chroma_format;
> +    int cuvid_codec_type, cuvid_chroma_format, chroma_444;
>   
>       sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
>       if (!sw_desc)
> @@ -593,6 +616,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
>           av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n");
>           return AVERROR(EINVAL);
>       }
> +    chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
>   
>       frames_ctx->format            = AV_PIX_FMT_CUDA;
>       frames_ctx->width             = (avctx->coded_width + 1) & ~1;
> @@ -605,15 +629,18 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
>       if (!frames_ctx->pool)
>           return AVERROR(ENOMEM);
>   
> +    // It it semantically incorrect to use AX_PIX_FMT_YUV444P16 for either the 10
> +    // or 12 bit case, but ffmpeg and nvidia disagree on which end the padding
> +    // bits go at. P16 is unambiguous and matches.

This comment seems redundant, since AX_PIX_FMT_YUV444P16 isn't even used.

>       switch (sw_desc->comp[0].depth) {
>       case 8:
> -        frames_ctx->sw_format = AV_PIX_FMT_NV12;
> +        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
>           break;
>       case 10:
> -        frames_ctx->sw_format = AV_PIX_FMT_P010;
> +        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P10_MSB : AV_PIX_FMT_P010;
>           break;
>       case 12:
> -        frames_ctx->sw_format = AV_PIX_FMT_P016;
> +        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P12_MSB : AV_PIX_FMT_P016;
>           break;
>       default:
>           return AVERROR(EINVAL);
> diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
> index 3b1d53e799..43337f14f0 100644
> --- a/libavutil/hwcontext_cuda.c
> +++ b/libavutil/hwcontext_cuda.c
> @@ -38,6 +38,8 @@ static const enum AVPixelFormat supported_formats[] = {
>       AV_PIX_FMT_YUV444P,
>       AV_PIX_FMT_P010,
>       AV_PIX_FMT_P016,
> +    AV_PIX_FMT_YUV444P10_MSB,
> +    AV_PIX_FMT_YUV444P12_MSB,

You are adding these to supported formats, but are not actually adding 
support, so cuda_frames_init and cuda_get_buffer will both run into the 
BUG case. Should be super easy, as they're identical to 444P16.

>       AV_PIX_FMT_YUV444P16,
Technically, this can go now. But I guess removing it is an API break, 
so it gotta stay I guess.

>       AV_PIX_FMT_0RGB32,
>       AV_PIX_FMT_0BGR32,
>
Philip Langdale Oct. 20, 2018, 9:52 p.m. UTC | #2
On Sat, 20 Oct 2018 22:58:34 +0200
Timo Rothenpieler <timo@rothenpieler.org> wrote:

> >   
> > +    // It it semantically incorrect to use AX_PIX_FMT_YUV444P16
> > for either the 10
> > +    // or 12 bit case, but ffmpeg and nvidia disagree on which end
> > the padding
> > +    // bits go at. P16 is unambiguous and matches.  
> 
> This comment seems redundant, since AX_PIX_FMT_YUV444P16 isn't even
> used.

Yeah, that's outdated from the original version with no pix fmts. I'll
remove it.

> > diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
> > index 3b1d53e799..43337f14f0 100644
> > --- a/libavutil/hwcontext_cuda.c
> > +++ b/libavutil/hwcontext_cuda.c
> > @@ -38,6 +38,8 @@ static const enum AVPixelFormat
> > supported_formats[] = { AV_PIX_FMT_YUV444P,
> >       AV_PIX_FMT_P010,
> >       AV_PIX_FMT_P016,
> > +    AV_PIX_FMT_YUV444P10_MSB,
> > +    AV_PIX_FMT_YUV444P12_MSB,  
> 
> You are adding these to supported formats, but are not actually
> adding support, so cuda_frames_init and cuda_get_buffer will both run
> into the BUG case. Should be super easy, as they're identical to
> 444P16.

This actually works fine. cuda_frames_init and cuda_get_buffer both use
generic mechanisms that introspect the pix desc information.
 
> >       AV_PIX_FMT_YUV444P16,  
> Technically, this can go now. But I guess removing it is an API
> break, so it gotta stay I guess.

Right. And at the end of the day it works correctly in the context of
the hwcontext. You might feed into some theoretical filter that would
handle it appropriately. Just because nvenc can't actually consume it
fully doesn't make it wrong.

--phil
Timo Rothenpieler Oct. 20, 2018, 10:07 p.m. UTC | #3
On 20.10.2018 23:52, Philip Langdale wrote:
> On Sat, 20 Oct 2018 22:58:34 +0200
> Timo Rothenpieler <timo@rothenpieler.org> wrote:
> 
>>>    
>>> +    // It it semantically incorrect to use AX_PIX_FMT_YUV444P16
>>> for either the 10
>>> +    // or 12 bit case, but ffmpeg and nvidia disagree on which end
>>> the padding
>>> +    // bits go at. P16 is unambiguous and matches.
>>
>> This comment seems redundant, since AX_PIX_FMT_YUV444P16 isn't even
>> used.
> 
> Yeah, that's outdated from the original version with no pix fmts. I'll
> remove it.
> 
>>> diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
>>> index 3b1d53e799..43337f14f0 100644
>>> --- a/libavutil/hwcontext_cuda.c
>>> +++ b/libavutil/hwcontext_cuda.c
>>> @@ -38,6 +38,8 @@ static const enum AVPixelFormat
>>> supported_formats[] = { AV_PIX_FMT_YUV444P,
>>>        AV_PIX_FMT_P010,
>>>        AV_PIX_FMT_P016,
>>> +    AV_PIX_FMT_YUV444P10_MSB,
>>> +    AV_PIX_FMT_YUV444P12_MSB,
>>
>> You are adding these to supported formats, but are not actually
>> adding support, so cuda_frames_init and cuda_get_buffer will both run
>> into the BUG case. Should be super easy, as they're identical to
>> 444P16.
> 
> This actually works fine. cuda_frames_init and cuda_get_buffer both use
> generic mechanisms that introspect the pix desc information.

Right, I had the 3.4 branch checked out when checking that...

>>>        AV_PIX_FMT_YUV444P16,
>> Technically, this can go now. But I guess removing it is an API
>> break, so it gotta stay I guess.
> 
> Right. And at the end of the day it works correctly in the context of
> the hwcontext. You might feed into some theoretical filter that would
> handle it appropriately. Just because nvenc can't actually consume it
> fully doesn't make it wrong.
> 
> --phil
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
diff mbox

Patch

diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index a3b5c8cb71..972f2b56b6 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -409,6 +409,9 @@  static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
 #endif
         break;
     case AV_PIX_FMT_YUV420P12:
+    case AV_PIX_FMT_YUV444P:
+    case AV_PIX_FMT_YUV444P10:
+    case AV_PIX_FMT_YUV444P12:
 #if CONFIG_HEVC_NVDEC_HWACCEL
         *fmt++ = AV_PIX_FMT_CUDA;
 #endif
diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c
index e779be3a45..43cc38485a 100644
--- a/libavcodec/nvdec.c
+++ b/libavcodec/nvdec.c
@@ -34,6 +34,9 @@ 
 #include "nvdec.h"
 #include "internal.h"
 
+#define NVDEC_FORMAT_YUV444P 2
+#define NVDEC_FORMAT_YUV444P16 3
+
 typedef struct NVDECDecoder {
     CUvideodecoder decoder;
 
@@ -273,7 +276,8 @@  int ff_nvdec_decode_init(AVCodecContext *avctx)
 
     CUVIDDECODECREATEINFO params = { 0 };
 
-    int cuvid_codec_type, cuvid_chroma_format;
+    cudaVideoSurfaceFormat output_format;
+    int cuvid_codec_type, cuvid_chroma_format, chroma_444;
     int ret = 0;
 
     sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
@@ -291,6 +295,7 @@  int ff_nvdec_decode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n");
         return AVERROR(ENOSYS);
     }
+    chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
 
     if (!avctx->hw_frames_ctx) {
         ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_CUDA);
@@ -298,6 +303,21 @@  int ff_nvdec_decode_init(AVCodecContext *avctx)
             return ret;
     }
 
+    switch (sw_desc->comp[0].depth) {
+    case 8:
+        output_format = chroma_444 ? NVDEC_FORMAT_YUV444P :
+                                     cudaVideoSurfaceFormat_NV12;
+        break;
+    case 10:
+    case 12:
+        output_format = chroma_444 ? NVDEC_FORMAT_YUV444P16 :
+                                     cudaVideoSurfaceFormat_P016;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n");
+        return AVERROR(ENOSYS);
+    }
+
     frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
 
     params.ulWidth             = avctx->coded_width;
@@ -305,8 +325,7 @@  int ff_nvdec_decode_init(AVCodecContext *avctx)
     params.ulTargetWidth       = avctx->coded_width;
     params.ulTargetHeight      = avctx->coded_height;
     params.bitDepthMinus8      = sw_desc->comp[0].depth - 8;
-    params.OutputFormat        = params.bitDepthMinus8 ?
-                                 cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
+    params.OutputFormat        = output_format;
     params.CodecType           = cuvid_codec_type;
     params.ChromaFormat        = cuvid_chroma_format;
     params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
@@ -388,6 +407,8 @@  static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
     NVDECFrame        *cf = (NVDECFrame*)fdd->hwaccel_priv;
     NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
 
+    AVHWFramesContext *hwctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
+
     CUVIDPROCPARAMS vpp = { 0 };
     NVDECFrame *unmap_data = NULL;
 
@@ -397,6 +418,7 @@  static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
 
     unsigned int pitch, i;
     unsigned int offset = 0;
+    int shift_h = 0, shift_v = 0;
     int ret = 0;
 
     vpp.progressive_frame = 1;
@@ -433,10 +455,11 @@  static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
     unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
     unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
 
+    av_pix_fmt_get_chroma_sub_sample(hwctx->sw_format, &shift_h, &shift_v);
     for (i = 0; frame->linesize[i]; i++) {
         frame->data[i] = (uint8_t*)(devptr + offset);
         frame->linesize[i] = pitch;
-        offset += pitch * (frame->height >> (i ? 1 : 0));
+        offset += pitch * (frame->height >> (i ? shift_v : 0));
     }
 
     goto finish;
@@ -576,7 +599,7 @@  int ff_nvdec_frame_params(AVCodecContext *avctx,
 {
     AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
     const AVPixFmtDescriptor *sw_desc;
-    int cuvid_codec_type, cuvid_chroma_format;
+    int cuvid_codec_type, cuvid_chroma_format, chroma_444;
 
     sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
     if (!sw_desc)
@@ -593,6 +616,7 @@  int ff_nvdec_frame_params(AVCodecContext *avctx,
         av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n");
         return AVERROR(EINVAL);
     }
+    chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
 
     frames_ctx->format            = AV_PIX_FMT_CUDA;
     frames_ctx->width             = (avctx->coded_width + 1) & ~1;
@@ -605,15 +629,18 @@  int ff_nvdec_frame_params(AVCodecContext *avctx,
     if (!frames_ctx->pool)
         return AVERROR(ENOMEM);
 
+    // It it semantically incorrect to use AX_PIX_FMT_YUV444P16 for either the 10
+    // or 12 bit case, but ffmpeg and nvidia disagree on which end the padding
+    // bits go at. P16 is unambiguous and matches.
     switch (sw_desc->comp[0].depth) {
     case 8:
-        frames_ctx->sw_format = AV_PIX_FMT_NV12;
+        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
         break;
     case 10:
-        frames_ctx->sw_format = AV_PIX_FMT_P010;
+        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P10_MSB : AV_PIX_FMT_P010;
         break;
     case 12:
-        frames_ctx->sw_format = AV_PIX_FMT_P016;
+        frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P12_MSB : AV_PIX_FMT_P016;
         break;
     default:
         return AVERROR(EINVAL);
diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
index 3b1d53e799..43337f14f0 100644
--- a/libavutil/hwcontext_cuda.c
+++ b/libavutil/hwcontext_cuda.c
@@ -38,6 +38,8 @@  static const enum AVPixelFormat supported_formats[] = {
     AV_PIX_FMT_YUV444P,
     AV_PIX_FMT_P010,
     AV_PIX_FMT_P016,
+    AV_PIX_FMT_YUV444P10_MSB,
+    AV_PIX_FMT_YUV444P12_MSB,
     AV_PIX_FMT_YUV444P16,
     AV_PIX_FMT_0RGB32,
     AV_PIX_FMT_0BGR32,