[FFmpeg-devel] Allow using primary CUDA device context

Message ID	20191118092625.24877-1-olegd@anyvision.co
State	Superseded
Headers	show Return-Path: <ffmpeg-devel-bounces@ffmpeg.org> From: Oleg Dobkin <olegd@anyvision.co> To: ffmpeg-devel@ffmpeg.org Date: Mon, 18 Nov 2019 11:26:25 +0200 Message-Id: <20191118092625.24877-1-olegd@anyvision.co> In-Reply-To: <66af27d8-0f8d-02b9-c8df-93fe55088439@rothenpieler.org> References: <66af27d8-0f8d-02b9-c8df-93fe55088439@rothenpieler.org> Subject: [FFmpeg-devel] [PATCH] Allow using primary CUDA device context Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Cc: Oleg Dobkin <olegd@anyvision.co> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

Message ID

20191118092625.24877-1-olegd@anyvision.co

State

Superseded

Headers

From: Oleg Dobkin <olegd@anyvision.co>
To: ffmpeg-devel@ffmpeg.org
Date: Mon, 18 Nov 2019 11:26:25 +0200
Message-Id: <20191118092625.24877-1-olegd@anyvision.co>
In-Reply-To: <66af27d8-0f8d-02b9-c8df-93fe55088439@rothenpieler.org>
References: <66af27d8-0f8d-02b9-c8df-93fe55088439@rothenpieler.org>
Subject: [FFmpeg-devel] [PATCH] Allow using primary CUDA device context
Precedence: list
Reply-To: FFmpeg development discussions and patches
	<ffmpeg-devel@ffmpeg.org>
Cc: Oleg Dobkin <olegd@anyvision.co>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

Commit Message

Oleg Dobkin Nov. 18, 2019, 9:26 a.m. UTC

Add AVCUDADeviceContextFlags to control the creation of CUDA device
context for the hardware CUDA decoder.

The current values are 0 (default behavior) - new context will be
created for each decoder, and 1 - primary CUDA context will be used.

There are several reasons for using primary device context instead of
creating a new one:

 - This is the recommended way to handle device contexts (see
https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__CTX.html#group__CUDA__CTX_1g65dc0012348bc84810e2103a40d8e2cf)

 - Memory allocations, kernels and other state are associated with the
current device context. Currently, the context is not accessible from
FFmpeg API, so, technically, the memory created by the hardware decoder
(the video frame) can't be safely read.

Signed-off-by: Oleg Dobkin <olegd@anyvision.co>
---
 libavutil/hwcontext_cuda.c          | 22 +++++++++++++++++-----
 libavutil/hwcontext_cuda_internal.h |  2 ++
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
index cca39e9fc7..e72efbe5af 100644
--- a/libavutil/hwcontext_cuda.c
+++ b/libavutil/hwcontext_cuda.c
@@ -29,6 +29,8 @@ 
 
 #define CUDA_FRAME_ALIGNMENT 256
 
+#define USE_PRIMARY_CONTEXT 1
+
 typedef struct CUDAFramesContext {
     int shift_width, shift_height;
 } CUDAFramesContext;
@@ -281,8 +283,12 @@  static void cuda_device_uninit(AVHWDeviceContext *device_ctx)
     if (hwctx->internal) {
         CudaFunctions *cu = hwctx->internal->cuda_dl;
         if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
-            CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
+            if (hwctx->internal->flags & USE_PRIMARY_CONTEXT)
+                CHECK_CU(cu->cuDevicePrimaryCtxRelease(hwctx->internal->cuda_device));
+            else
+                CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
             hwctx->cuda_ctx = NULL;
+            hwctx->internal->cuda_device = NULL;
         }
         cuda_free_functions(&hwctx->internal->cuda_dl);
     }
@@ -322,7 +328,6 @@  static int cuda_device_create(AVHWDeviceContext *device_ctx,
 {
     AVCUDADeviceContext *hwctx = device_ctx->hwctx;
     CudaFunctions *cu;
-    CUdevice cu_device;
     CUcontext dummy;
     int ret, device_idx = 0;
 
@@ -338,18 +343,25 @@  static int cuda_device_create(AVHWDeviceContext *device_ctx,
     if (ret < 0)
         goto error;
 
-    ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx));
+    ret = CHECK_CU(cu->cuDeviceGet(&hwctx->internal->cuda_device, device_idx));
     if (ret < 0)
         goto error;
 
-    ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device));
+    hwctx->internal->flags = flags;
+
+    if (flags & USE_PRIMARY_CONTEXT)
+        ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx->cuda_ctx, hwctx->internal->cuda_device));
+    else
+        ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, hwctx->internal->cuda_device));
+
     if (ret < 0)
         goto error;
 
     // Setting stream to NULL will make functions automatically use the default CUstream
     hwctx->stream = NULL;
 
-    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+    if (!(flags & USE_PRIMARY_CONTEXT))
+        CHECK_CU(cu->cuCtxPopCurrent(&dummy));
 
     hwctx->internal->is_allocated = 1;
 
diff --git a/libavutil/hwcontext_cuda_internal.h b/libavutil/hwcontext_cuda_internal.h
index e1bc6ff350..d5633c58d5 100644
--- a/libavutil/hwcontext_cuda_internal.h
+++ b/libavutil/hwcontext_cuda_internal.h
@@ -31,6 +31,8 @@ 
 struct AVCUDADeviceContextInternal {
     CudaFunctions *cuda_dl;
     int is_allocated;
+    CUdevice cuda_device;
+    int flags;
 };
 
 #endif /* AVUTIL_HWCONTEXT_CUDA_INTERNAL_H */

[FFmpeg-devel] Allow using primary CUDA device context

Commit Message

Patch