From patchwork Mon Nov 18 13:35:49 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Oleg Dobkin X-Patchwork-Id: 16320 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 60BFF447346 for ; Mon, 18 Nov 2019 15:36:05 +0200 (EET) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 40DC268A44A; Mon, 18 Nov 2019 15:36:05 +0200 (EET) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mail-wr1-f66.google.com (mail-wr1-f66.google.com [209.85.221.66]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 5F83968A3A4 for ; Mon, 18 Nov 2019 15:35:59 +0200 (EET) Received: by mail-wr1-f66.google.com with SMTP id b18so18069231wrj.8 for ; Mon, 18 Nov 2019 05:35:59 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=anyvision-co.20150623.gappssmtp.com; s=20150623; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=l1QgcPOistUDj5t8uyOUXV45fzddFgwypjZoAnQ3I+8=; b=yr9BlDgWP8HNCskSNeNxc4YM9spPGPFpWRvo5ioaOeE9/6wpPoZH5Dy+DOzbi2d8TJ dXCfBJCLhfZgyOvhrC6RVrOKNbtig9Kbvp4X38TvJ11rtYmWO5nfaokUPQzi3V4KWm/v 1dXYOQBpVIMfb8tXaWZZB7HVhLvRrfPx7cUlTJZOoYMTo/ZDGGRh6WfxLE7eYzMl2H5Q ySW9wqa3T83L0bYMiLcSUlV6+lP1dYWinrPCYTBSZPSL2gTllZj/e8mRW2A7oeelvcQR DHZ8873LyYHT4xr7NJPuYD9t3++xdq5V4mjnFyIyb9PvMj3JDz9Fn7kEisR4N0Nr/Q1w 0p3Q== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=l1QgcPOistUDj5t8uyOUXV45fzddFgwypjZoAnQ3I+8=; b=S3RWQIecUPslmtXHHm1zjtC2A0gBj1v7cAhlYbdIfZto/FYsFQqK7iiAQJvoVcX4cU K9ofWOaNSF0ALYPMY1vKjaKljI7efPXgHE2mRL/44k7KasC1eswzo2kBsrPOsjiGKnP9 CTroyLmKl7huXM8NsZQCbxHEXLxuYakMlB7t76jEesPqivTK0CYT7hxKFUAGqzs9xAQM K80Q5IJwdVQZNQ2xQnjaE62caDn2I45TqnArp4vMDq2akuk+xsbpzskcEc3m2+ECJLXn CK/idvxXvDuB+B1RPVdpewKD9d9my7pbMCtkkRVIeEydpGYVxlz2S4WOSJKBOEGBQXDz nxOw== X-Gm-Message-State: APjAAAXh2anx2LwkfYi5eiEU+jhBdG3D/6goJTRnh6IWDqdG3JYC11wP wnBnZcSZZh+P/Oz4/fCx2t/puEFm5ek= X-Google-Smtp-Source: APXvYqyfhrt3dxYag0jvbQ+TvQ/QHPCC8I+UutLgcHIMnihgA7WI5/JKDH+LFgW0Q2sD6G+GH1CHgg== X-Received: by 2002:a5d:5306:: with SMTP id e6mr29403453wrv.187.1574084158402; Mon, 18 Nov 2019 05:35:58 -0800 (PST) Received: from localhost.localdomain ([31.154.171.234]) by smtp.gmail.com with ESMTPSA id d11sm23178661wrn.28.2019.11.18.05.35.56 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Mon, 18 Nov 2019 05:35:57 -0800 (PST) From: Oleg Dobkin To: ffmpeg-devel@ffmpeg.org Date: Mon, 18 Nov 2019 15:35:49 +0200 Message-Id: <20191118133549.20903-1-olegd@anyvision.co> X-Mailer: git-send-email 2.17.1 In-Reply-To: <4a576d75-be2f-3c3a-d036-cd49091cddf6@rothenpieler.org> References: <4a576d75-be2f-3c3a-d036-cd49091cddf6@rothenpieler.org> Subject: [FFmpeg-devel] [PATCH] Allow using primary CUDA device context X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: Oleg Dobkin MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Add AVCUDADeviceContextFlags to control the creation of CUDA device context for the hardware CUDA decoder. The current values are 0 (default behavior) - new context will be created for each decoder, and 1 - primary CUDA context will be used. There are several reasons for using primary device context instead of creating a new one: - This is the recommended way to handle device contexts (see https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__CTX.html#group__CUDA__CTX_1g65dc0012348bc84810e2103a40d8e2cf) - Memory allocations, kernels and other state are associated with the current device context. Currently, the context is not accessible from FFmpeg API, so, technically, the memory created by the hardware decoder (the video frame) can't be safely read. Signed-off-by: Oleg Dobkin --- configure | 8 ++++---- libavutil/hwcontext_cuda.c | 22 +++++++++++++++++----- libavutil/hwcontext_cuda_internal.h | 2 ++ 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/configure b/configure index 8f4f2884cf..2519e6421f 100755 --- a/configure +++ b/configure @@ -6139,10 +6139,10 @@ fi if ! disabled ffnvcodec; then ffnv_hdr_list="ffnvcodec/nvEncodeAPI.h ffnvcodec/dynlink_cuda.h ffnvcodec/dynlink_cuviddec.h ffnvcodec/dynlink_nvcuvid.h" - check_pkg_config ffnvcodec "ffnvcodec >= 9.0.18.0" "$ffnv_hdr_list" "" || \ - check_pkg_config ffnvcodec "ffnvcodec >= 8.2.15.8 ffnvcodec < 8.3" "$ffnv_hdr_list" "" || \ - check_pkg_config ffnvcodec "ffnvcodec >= 8.1.24.9 ffnvcodec < 8.2" "$ffnv_hdr_list" "" || \ - check_pkg_config ffnvcodec "ffnvcodec >= 8.0.14.9 ffnvcodec < 8.1" "$ffnv_hdr_list" "" + check_pkg_config ffnvcodec "ffnvcodec >= 9.1.23.1" "$ffnv_hdr_list" "" || \ + check_pkg_config ffnvcodec "ffnvcodec >= 9.0.18.3 ffnvcodec < 9.1" "$ffnv_hdr_list" "" || \ + check_pkg_config ffnvcodec "ffnvcodec >= 8.2.15.10 ffnvcodec < 8.3" "$ffnv_hdr_list" "" || \ + check_pkg_config ffnvcodec "ffnvcodec >= 8.1.24.11 ffnvcodec < 8.2" "$ffnv_hdr_list" "" fi check_cpp_condition winrt windows.h "!WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)" diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c index cca39e9fc7..e72efbe5af 100644 --- a/libavutil/hwcontext_cuda.c +++ b/libavutil/hwcontext_cuda.c @@ -29,6 +29,8 @@ #define CUDA_FRAME_ALIGNMENT 256 +#define USE_PRIMARY_CONTEXT 1 + typedef struct CUDAFramesContext { int shift_width, shift_height; } CUDAFramesContext; @@ -281,8 +283,12 @@ static void cuda_device_uninit(AVHWDeviceContext *device_ctx) if (hwctx->internal) { CudaFunctions *cu = hwctx->internal->cuda_dl; if (hwctx->internal->is_allocated && hwctx->cuda_ctx) { - CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx)); + if (hwctx->internal->flags & USE_PRIMARY_CONTEXT) + CHECK_CU(cu->cuDevicePrimaryCtxRelease(hwctx->internal->cuda_device)); + else + CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx)); hwctx->cuda_ctx = NULL; + hwctx->internal->cuda_device = NULL; } cuda_free_functions(&hwctx->internal->cuda_dl); } @@ -322,7 +328,6 @@ static int cuda_device_create(AVHWDeviceContext *device_ctx, { AVCUDADeviceContext *hwctx = device_ctx->hwctx; CudaFunctions *cu; - CUdevice cu_device; CUcontext dummy; int ret, device_idx = 0; @@ -338,18 +343,25 @@ static int cuda_device_create(AVHWDeviceContext *device_ctx, if (ret < 0) goto error; - ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx)); + ret = CHECK_CU(cu->cuDeviceGet(&hwctx->internal->cuda_device, device_idx)); if (ret < 0) goto error; - ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device)); + hwctx->internal->flags = flags; + + if (flags & USE_PRIMARY_CONTEXT) + ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx->cuda_ctx, hwctx->internal->cuda_device)); + else + ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, hwctx->internal->cuda_device)); + if (ret < 0) goto error; // Setting stream to NULL will make functions automatically use the default CUstream hwctx->stream = NULL; - CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + if (!(flags & USE_PRIMARY_CONTEXT)) + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); hwctx->internal->is_allocated = 1; diff --git a/libavutil/hwcontext_cuda_internal.h b/libavutil/hwcontext_cuda_internal.h index e1bc6ff350..d5633c58d5 100644 --- a/libavutil/hwcontext_cuda_internal.h +++ b/libavutil/hwcontext_cuda_internal.h @@ -31,6 +31,8 @@ struct AVCUDADeviceContextInternal { CudaFunctions *cuda_dl; int is_allocated; + CUdevice cuda_device; + int flags; }; #endif /* AVUTIL_HWCONTEXT_CUDA_INTERNAL_H */