diff mbox

[FFmpeg-devel] Allow using primary CUDA device context

Message ID 261ab9f327503a8f5712f0a33d1988b98f1032b7.camel@anyvision.co
State New
Headers show

Commit Message

Oleg Dobkin Nov. 18, 2019, 10:56 a.m. UTC
On Mon, 2019-11-18 at 12:51 +0200, Oleg Dobkin wrote:
> I've changed enum into a flag and moved it into
> AVCUDADeviceContextInternal.
> 
> 
> Can't one just call cuCtxGetDevice on the context to get the device?
> 
> Not sure the cuCtxGetDevice can be used for the primary context;
> also,
> according to documentation it returns device id not handle. Anyway,
> is
> it that bad to store an additional handle?
> 
> 
> Also needs configure updated for the higher ffnvcodec version
> 
> I'm looking at the configure and can't figure out how ffnvcodec
> version
> is enforced. The script seems to accept several ranges of ffnvcodec
> versions. I could just increment the top-level check, but I'm not
> sure
> this is the correct way.
> 
> On Sun, 2019-11-17 at 23:31 +0100, Timo Rothenpieler wrote:
> 
> On 17.11.2019 15:58, Oleg Dobkin wrote:
> 
> Add AVCUDADeviceContextFlags to control the creation of CUDA device
> context for the hardware CUDA decoder.
> 
> The current values are 0 (default behavior) - new context will be
> created for each decoder, and 1 - primary CUDA context will be used.
> 
> There are several reasons for using primary device context instead of
> creating a new one:
> 
>   - This is the recommended way to handle device contexts (see
> 
> 
https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__CTX.html#group__CUDA__CTX_1g65dc0012348bc84810e2103a40d8e2cf
> 
> )
> 
>   - Memory allocations, kernels and other state are associated with
> the
> current device context. Currently, the context is not accessible from
> FFmpeg API, so, technically, the memory created by the hardware
> decoder
> (the video frame) can't be safely read.
> 
> Signed-off-by: Oleg Dobkin <olegd@anyvision.co>
> ---
>   libavutil/hwcontext_cuda.c | 20 +++++++++++++++-----
>   libavutil/hwcontext_cuda.h |  7 +++++++
>   2 files changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
> index cca39e9fc7..608ea57569 100644
> --- a/libavutil/hwcontext_cuda.c
> +++ b/libavutil/hwcontext_cuda.c
> @@ -281,8 +281,12 @@ static void cuda_device_uninit(AVHWDeviceContext
> *device_ctx)
>       if (hwctx->internal) {
>           CudaFunctions *cu = hwctx->internal->cuda_dl;
>           if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
> -            CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
> +            if (hwctx->flags == DCF_CREATE_CONTEXT)
> 
> Should actually be checking for the flag, not equality.
> 
> 
> +                CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
> +            else
> +                CHECK_CU(cu->cuDevicePrimaryCtxRelease(hwctx-
> 
> cuda_device));
>               hwctx->cuda_ctx = NULL;
> +            hwctx->cuda_device = NULL;
>           }
>           cuda_free_functions(&hwctx->internal->cuda_dl);
>       }
> @@ -322,7 +326,6 @@ static int cuda_device_create(AVHWDeviceContext
> *device_ctx,
>   {
>       AVCUDADeviceContext *hwctx = device_ctx->hwctx;
>       CudaFunctions *cu;
> -    CUdevice cu_device;
>       CUcontext dummy;
>       int ret, device_idx = 0;
>   
> @@ -338,18 +341,25 @@ static int cuda_device_create(AVHWDeviceContext
> *device_ctx,
>       if (ret < 0)
>           goto error;
>   
> -    ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx));
> +    ret = CHECK_CU(cu->cuDeviceGet(&hwctx->cuda_device,
> device_idx));
>       if (ret < 0)
>           goto error;
>   
> -    ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx,
> CU_CTX_SCHED_BLOCKING_SYNC, cu_device));
> +    hwctx->flags = flags;
> +
> +    if (flags == DCF_CREATE_CONTEXT)
> +        ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx,
> CU_CTX_SCHED_BLOCKING_SYNC, hwctx->cuda_device));
> +    else
> +        ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx-
> 
> cuda_ctx, hwctx->cuda_device));
> +
>       if (ret < 0)
>           goto error;
>   
>       // Setting stream to NULL will make functions automatically use
> the default CUstream
>       hwctx->stream = NULL;
>   
> -    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
> +    if (flags == DCF_CREATE_CONTEXT)
> +        CHECK_CU(cu->cuCtxPopCurrent(&dummy));
>   
>       hwctx->internal->is_allocated = 1;
>   
> diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h
> index 81a0552cab..bab5eefe54 100644
> --- a/libavutil/hwcontext_cuda.h
> +++ b/libavutil/hwcontext_cuda.h
> @@ -34,6 +34,11 @@
>    * AVBufferRefs whose data pointer is a CUdeviceptr.
>    */
>   
> +enum AVCUDADeviceContextFlags {
> +    DCF_CREATE_CONTEXT = 0,
> +    DCF_USE_PRIMARY_CONTEXT = 1
> +};
> 
> I'd only define a flag for the new behavior. If it's not set, keep
> old 
> behavior.
> 
> 
>   typedef struct AVCUDADeviceContextInternal
> AVCUDADeviceContextInternal;
>   
>   /**
> @@ -43,6 +48,8 @@ typedef struct AVCUDADeviceContext {
>       CUcontext cuda_ctx;
>       CUstream stream;
>       AVCUDADeviceContextInternal *internal;
> +    CUdevice cuda_device;
> 
> Can't one just call cuCtxGetDevice on the context to get the device?
> 
> 
> +    enum AVCUDADeviceContextFlags flags;
> 
> The device_create/av_hwdevice_ctx_create function already has a (at
> the 
> moment unused) flags parameter. So there should be no need to add
> this here.
> If need be, the information should be stored in 
> AVCUDADeviceContextInternal instead.
> 
> 
>   } AVCUDADeviceContext;
>   
> 
> Also needs configure updated for the higher ffnvcodec version that's 
> required after this patch, and probably deserved a lavu micro bump.
> 
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> 
> 
>
diff mbox

Patch

From b8b88692944bf5ccaac4706d0723cf32c0fce99a Mon Sep 17 00:00:00 2001
From: Oleg Dobkin <olegd@anyvision.co>
Date: Sun, 17 Nov 2019 13:35:32 +0200
Subject: [PATCH] Add cuDevicePrimaryCtxRetain and cuDevicePrimaryCtxRelease

These functions can be used to create HW cuda device using primary
contexts - which is the preferred approach.

Signed-off-by: Oleg Dobkin <olegd@anyvision.co>
---
 ffnvcodec.pc.in                    | 2 +-
 include/ffnvcodec/dynlink_cuda.h   | 2 ++
 include/ffnvcodec/dynlink_loader.h | 4 ++++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/ffnvcodec.pc.in b/ffnvcodec.pc.in
index 7799048..2a6318f 100644
--- a/ffnvcodec.pc.in
+++ b/ffnvcodec.pc.in
@@ -3,5 +3,5 @@  includedir=${prefix}/include
 
 Name: ffnvcodec
 Description: FFmpeg version of Nvidia Codec SDK headers
-Version: 9.1.23.1
+Version: 9.1.23.2
 Cflags: -I${includedir}
diff --git a/include/ffnvcodec/dynlink_cuda.h b/include/ffnvcodec/dynlink_cuda.h
index 5c85e0d..bf0ed9f 100644
--- a/include/ffnvcodec/dynlink_cuda.h
+++ b/include/ffnvcodec/dynlink_cuda.h
@@ -331,6 +331,8 @@  typedef CUresult CUDAAPI tcuMemcpy2DAsync_v2(const CUDA_MEMCPY2D *pcopy, CUstrea
 typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pstr);
 typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char** pstr);
 typedef CUresult CUDAAPI tcuCtxGetDevice(CUdevice *device);
+typedef CUresult CUDAAPI tcuDevicePrimaryCtxRetain(CUcontext *pctx, CUdevice dev);
+typedef CUresult CUDAAPI tcuDevicePrimaryCtxRelease(CUdevice dev);
 
 typedef CUresult CUDAAPI tcuStreamCreate(CUstream *phStream, unsigned int flags);
 typedef CUresult CUDAAPI tcuStreamQuery(CUstream hStream);
diff --git a/include/ffnvcodec/dynlink_loader.h b/include/ffnvcodec/dynlink_loader.h
index a1fa323..f471200 100644
--- a/include/ffnvcodec/dynlink_loader.h
+++ b/include/ffnvcodec/dynlink_loader.h
@@ -157,6 +157,8 @@  typedef struct CudaFunctions {
     tcuGetErrorName *cuGetErrorName;
     tcuGetErrorString *cuGetErrorString;
     tcuCtxGetDevice *cuCtxGetDevice;
+    tcuDevicePrimaryCtxRetain *cuDevicePrimaryCtxRetain;
+    tcuDevicePrimaryCtxRelease *cuDevicePrimaryCtxRelease;
 
     tcuStreamCreate *cuStreamCreate;
     tcuStreamQuery *cuStreamQuery;
@@ -282,6 +284,8 @@  static inline int cuda_load_functions(CudaFunctions **functions, void *logctx)
     LOAD_SYMBOL(cuGetErrorName, tcuGetErrorName, "cuGetErrorName");
     LOAD_SYMBOL(cuGetErrorString, tcuGetErrorString, "cuGetErrorString");
     LOAD_SYMBOL(cuCtxGetDevice, tcuCtxGetDevice, "cuCtxGetDevice");
+    LOAD_SYMBOL(cuDevicePrimaryCtxRetain, tcuDevicePrimaryCtxRetain, "cuDevicePrimaryCtxRetain");
+    LOAD_SYMBOL(cuDevicePrimaryCtxRelease, tcuDevicePrimaryCtxRelease, "cuDevicePrimaryCtxRelease");
 
     LOAD_SYMBOL(cuStreamCreate, tcuStreamCreate, "cuStreamCreate");
     LOAD_SYMBOL(cuStreamQuery, tcuStreamQuery, "cuStreamQuery");
-- 
2.17.1