[FFmpeg-devel] Allow using primary CUDA device context

On Sun, 2019-11-17 at 23:31 +0100, Timo Rothenpieler wrote:
> On 17.11.2019 15:58, Oleg Dobkin wrote:
> 
> Add AVCUDADeviceContextFlags to control the creation of CUDA device
> context for the hardware CUDA decoder.
> 
> The current values are 0 (default behavior) - new context will be
> created for each decoder, and 1 - primary CUDA context will be used.
> 
> There are several reasons for using primary device context instead of
> creating a new one:
> 
>   - This is the recommended way to handle device contexts (see
> 
https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__CTX.html#group__CUDA__CTX_1g65dc0012348bc84810e2103a40d8e2cf
> )
> 
>   - Memory allocations, kernels and other state are associated with
> the
> current device context. Currently, the context is not accessible from
> FFmpeg API, so, technically, the memory created by the hardware
> decoder
> (the video frame) can't be safely read.
> 
> Signed-off-by: Oleg Dobkin <olegd@anyvision.co>
> ---
>   libavutil/hwcontext_cuda.c | 20 +++++++++++++++-----
>   libavutil/hwcontext_cuda.h |  7 +++++++
>   2 files changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
> index cca39e9fc7..608ea57569 100644
> --- a/libavutil/hwcontext_cuda.c
> +++ b/libavutil/hwcontext_cuda.c
> @@ -281,8 +281,12 @@ static void cuda_device_uninit(AVHWDeviceContext
> *device_ctx)
>       if (hwctx->internal) {
>           CudaFunctions *cu = hwctx->internal->cuda_dl;
>           if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
> -            CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
> +            if (hwctx->flags == DCF_CREATE_CONTEXT)
> 
> Should actually be checking for the flag, not equality.
> 
> 
> +                CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
> +            else
> +                CHECK_CU(cu->cuDevicePrimaryCtxRelease(hwctx-
> >cuda_device));
>               hwctx->cuda_ctx = NULL;
> +            hwctx->cuda_device = NULL;
>           }
>           cuda_free_functions(&hwctx->internal->cuda_dl);
>       }
> @@ -322,7 +326,6 @@ static int cuda_device_create(AVHWDeviceContext
> *device_ctx,
>   {
>       AVCUDADeviceContext *hwctx = device_ctx->hwctx;
>       CudaFunctions *cu;
> -    CUdevice cu_device;
>       CUcontext dummy;
>       int ret, device_idx = 0;
>   
> @@ -338,18 +341,25 @@ static int cuda_device_create(AVHWDeviceContext
> *device_ctx,
>       if (ret < 0)
>           goto error;
>   
> -    ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx));
> +    ret = CHECK_CU(cu->cuDeviceGet(&hwctx->cuda_device,
> device_idx));
>       if (ret < 0)
>           goto error;
>   
> -    ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx,
> CU_CTX_SCHED_BLOCKING_SYNC, cu_device));
> +    hwctx->flags = flags;
> +
> +    if (flags == DCF_CREATE_CONTEXT)
> +        ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx,
> CU_CTX_SCHED_BLOCKING_SYNC, hwctx->cuda_device));
> +    else
> +        ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx-
> >cuda_ctx, hwctx->cuda_device));
> +
>       if (ret < 0)
>           goto error;
>   
>       // Setting stream to NULL will make functions automatically use
> the default CUstream
>       hwctx->stream = NULL;
>   
> -    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
> +    if (flags == DCF_CREATE_CONTEXT)
> +        CHECK_CU(cu->cuCtxPopCurrent(&dummy));
>   
>       hwctx->internal->is_allocated = 1;
>   
> diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h
> index 81a0552cab..bab5eefe54 100644
> --- a/libavutil/hwcontext_cuda.h
> +++ b/libavutil/hwcontext_cuda.h
> @@ -34,6 +34,11 @@
>    * AVBufferRefs whose data pointer is a CUdeviceptr.
>    */
>   
> +enum AVCUDADeviceContextFlags {
> +    DCF_CREATE_CONTEXT = 0,
> +    DCF_USE_PRIMARY_CONTEXT = 1
> +};
> 
> I'd only define a flag for the new behavior. If it's not set, keep
> old 
> behavior.
> 
> 
>   typedef struct AVCUDADeviceContextInternal
> AVCUDADeviceContextInternal;
>   
>   /**
> @@ -43,6 +48,8 @@ typedef struct AVCUDADeviceContext {
>       CUcontext cuda_ctx;
>       CUstream stream;
>       AVCUDADeviceContextInternal *internal;
> +    CUdevice cuda_device;
> 
> Can't one just call cuCtxGetDevice on the context to get the device?
> 
> 
> +    enum AVCUDADeviceContextFlags flags;
> 
> The device_create/av_hwdevice_ctx_create function already has a (at
> the 
> moment unused) flags parameter. So there should be no need to add
> this here.
> If need be, the information should be stored in 
> AVCUDADeviceContextInternal instead.
> 
> 
>   } AVCUDADeviceContext;
>   
> 
> Also needs configure updated for the higher ffnvcodec version that's 
> required after this patch, and probably deserved a lavu micro bump.
> 
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] Allow using primary CUDA device context

Commit Message

Patch