From patchwork Wed Oct 19 12:00:33 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Timo Rothenpieler X-Patchwork-Id: 1067 Delivered-To: ffmpegpatchwork@gmail.com Received: by 10.103.140.133 with SMTP id o127csp176178vsd; Wed, 19 Oct 2016 05:01:48 -0700 (PDT) X-Received: by 10.28.209.142 with SMTP id i136mr2675756wmg.1.1476878508162; Wed, 19 Oct 2016 05:01:48 -0700 (PDT) Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org. [79.124.17.100]) by mx.google.com with ESMTP id k7si53825784wjs.211.2016.10.19.05.01.46; Wed, 19 Oct 2016 05:01:48 -0700 (PDT) Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; Authentication-Results: mx.google.com; dkim=neutral (body hash did not verify) header.i=@rothenpieler.org; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id CBB27689B35; Wed, 19 Oct 2016 15:00:57 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from btbn.de (btbn.de [5.9.118.179]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id A1B5C689A81 for ; Wed, 19 Oct 2016 15:00:50 +0300 (EEST) Received: from localhost.localdomain (ip4d173c0b.dynamic.kabel-deutschland.de [77.23.60.11]) by btbn.de (Postfix) with ESMTPSA id 515086FF42; Wed, 19 Oct 2016 14:00:53 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=rothenpieler.org; s=mail; t=1476878453; bh=bbkWQYFXtrAAE0VXe5/uyqLRo1mXN/G3b0rISig8ZjQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=CtNImIjB3G9xwmk/VXxS9XCUpSRJm+u7yJMutbJkLiYJnvaFA4Kt3mGbteZlHEdiA BjhHO1KYNG1r7BOjXKH1Ujz8jGllMl/tMYqLB9YfhMPyufT1s1PKVVW7kD53qKUjJU fUVJynRHvFrm9Ez/F4u2GY+4Owbr0/NQHCJudqCU= From: Timo Rothenpieler To: ffmpeg-devel@ffmpeg.org Date: Wed, 19 Oct 2016 14:00:33 +0200 Message-Id: <20161019120036.618-5-timo@rothenpieler.org> X-Mailer: git-send-email 2.10.1 In-Reply-To: <20161019120036.618-1-timo@rothenpieler.org> References: <20161019120036.618-1-timo@rothenpieler.org> Subject: [FFmpeg-devel] [PATCH 5/8] avcodec/nvenc: use dynamically loaded CUDA X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: Timo Rothenpieler MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" --- configure | 1 + libavcodec/nvenc.c | 129 +++++++++-------------------------------------------- libavcodec/nvenc.h | 46 ++----------------- 3 files changed, 26 insertions(+), 150 deletions(-) diff --git a/configure b/configure index 8c5742f..78113e2 100755 --- a/configure +++ b/configure @@ -2692,6 +2692,7 @@ vaapi_encode_deps="vaapi" hwupload_cuda_filter_deps="cuda" scale_npp_filter_deps="cuda libnpp" +nvenc_deps="cuda" nvenc_deps_any="dlopen LoadLibrary" nvenc_encoder_deps="nvenc" h263_cuvid_decoder_deps="cuda cuvid" diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c index 2505c3d..d71a445 100644 --- a/libavcodec/nvenc.c +++ b/libavcodec/nvenc.c @@ -21,60 +21,20 @@ #include "config.h" -#if defined(_WIN32) || defined(__CYGWIN__) -# define CUDA_LIBNAME "nvcuda.dll" -# if ARCH_X86_64 -# define NVENC_LIBNAME "nvEncodeAPI64.dll" -# else -# define NVENC_LIBNAME "nvEncodeAPI.dll" -# endif -#else -# define CUDA_LIBNAME "libcuda.so.1" -# define NVENC_LIBNAME "libnvidia-encode.so.1" -#endif - -#if defined(_WIN32) -#include - -#define dlopen(filename, flags) LoadLibrary(TEXT(filename)) -#define dlsym(handle, symbol) GetProcAddress(handle, symbol) -#define dlclose(handle) FreeLibrary(handle) -#else -#include -#endif +#include "nvenc.h" +#include "libavutil/hwcontext_cuda.h" #include "libavutil/hwcontext.h" #include "libavutil/imgutils.h" #include "libavutil/avassert.h" #include "libavutil/mem.h" #include "internal.h" -#include "nvenc.h" #define NVENC_CAP 0x30 #define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR || \ rc == NV_ENC_PARAMS_RC_2_PASS_QUALITY || \ rc == NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP) -#define LOAD_LIBRARY(l, path) \ - do { \ - if (!((l) = dlopen(path, RTLD_LAZY))) { \ - av_log(avctx, AV_LOG_ERROR, \ - "Cannot load %s\n", \ - path); \ - return AVERROR_UNKNOWN; \ - } \ - } while (0) - -#define LOAD_SYMBOL(fun, lib, symbol) \ - do { \ - if (!((fun) = dlsym(lib, symbol))) { \ - av_log(avctx, AV_LOG_ERROR, \ - "Cannot load %s\n", \ - symbol); \ - return AVERROR_UNKNOWN; \ - } \ - } while (0) - const enum AVPixelFormat ff_nvenc_pix_fmts[] = { AV_PIX_FMT_YUV420P, AV_PIX_FMT_NV12, @@ -83,9 +43,7 @@ const enum AVPixelFormat ff_nvenc_pix_fmts[] = { AV_PIX_FMT_YUV444P16, AV_PIX_FMT_0RGB32, AV_PIX_FMT_0BGR32, -#if CONFIG_CUDA AV_PIX_FMT_CUDA, -#endif AV_PIX_FMT_NONE }; @@ -157,42 +115,19 @@ static av_cold int nvenc_load_libraries(AVCodecContext *avctx) { NvencContext *ctx = avctx->priv_data; NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; - PNVENCODEAPIGETMAXSUPPORTEDVERSION nvenc_get_max_ver; - PNVENCODEAPICREATEINSTANCE nvenc_create_instance; NVENCSTATUS err; uint32_t nvenc_max_ver; + int ret; -#if CONFIG_CUDA - dl_fn->cu_init = cuInit; - dl_fn->cu_device_get_count = cuDeviceGetCount; - dl_fn->cu_device_get = cuDeviceGet; - dl_fn->cu_device_get_name = cuDeviceGetName; - dl_fn->cu_device_compute_capability = cuDeviceComputeCapability; - dl_fn->cu_ctx_create = cuCtxCreate_v2; - dl_fn->cu_ctx_pop_current = cuCtxPopCurrent_v2; - dl_fn->cu_ctx_destroy = cuCtxDestroy_v2; -#else - LOAD_LIBRARY(dl_fn->cuda, CUDA_LIBNAME); - - LOAD_SYMBOL(dl_fn->cu_init, dl_fn->cuda, "cuInit"); - LOAD_SYMBOL(dl_fn->cu_device_get_count, dl_fn->cuda, "cuDeviceGetCount"); - LOAD_SYMBOL(dl_fn->cu_device_get, dl_fn->cuda, "cuDeviceGet"); - LOAD_SYMBOL(dl_fn->cu_device_get_name, dl_fn->cuda, "cuDeviceGetName"); - LOAD_SYMBOL(dl_fn->cu_device_compute_capability, dl_fn->cuda, - "cuDeviceComputeCapability"); - LOAD_SYMBOL(dl_fn->cu_ctx_create, dl_fn->cuda, "cuCtxCreate_v2"); - LOAD_SYMBOL(dl_fn->cu_ctx_pop_current, dl_fn->cuda, "cuCtxPopCurrent_v2"); - LOAD_SYMBOL(dl_fn->cu_ctx_destroy, dl_fn->cuda, "cuCtxDestroy_v2"); -#endif - - LOAD_LIBRARY(dl_fn->nvenc, NVENC_LIBNAME); + ret = cuda_load_functions(&dl_fn->cuda_dl); + if (ret < 0) + return ret; - LOAD_SYMBOL(nvenc_get_max_ver, dl_fn->nvenc, - "NvEncodeAPIGetMaxSupportedVersion"); - LOAD_SYMBOL(nvenc_create_instance, dl_fn->nvenc, - "NvEncodeAPICreateInstance"); + ret = nvenc_load_functions(&dl_fn->nvenc_dl); + if (ret < 0) + return ret; - err = nvenc_get_max_ver(&nvenc_max_ver); + err = dl_fn->nvenc_dl->NvEncodeAPIGetMaxSupportedVersion(&nvenc_max_ver); if (err != NV_ENC_SUCCESS) return nvenc_print_error(avctx, err, "Failed to query nvenc max version"); @@ -208,7 +143,7 @@ static av_cold int nvenc_load_libraries(AVCodecContext *avctx) dl_fn->nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER; - err = nvenc_create_instance(&dl_fn->nvenc_funcs); + err = dl_fn->nvenc_dl->NvEncodeAPICreateInstance(&dl_fn->nvenc_funcs); if (err != NV_ENC_SUCCESS) return nvenc_print_error(avctx, err, "Failed to create nvenc instance"); @@ -380,7 +315,7 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx) if (ctx->device == LIST_DEVICES) loglevel = AV_LOG_INFO; - cu_res = dl_fn->cu_device_get(&cu_device, idx); + cu_res = dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx); if (cu_res != CUDA_SUCCESS) { av_log(avctx, AV_LOG_ERROR, "Cannot access the CUDA device %d\n", @@ -388,11 +323,11 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx) return -1; } - cu_res = dl_fn->cu_device_get_name(name, sizeof(name), cu_device); + cu_res = dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device); if (cu_res != CUDA_SUCCESS) return -1; - cu_res = dl_fn->cu_device_compute_capability(&major, &minor, cu_device); + cu_res = dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device); if (cu_res != CUDA_SUCCESS) return -1; @@ -402,7 +337,7 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx) goto fail; } - cu_res = dl_fn->cu_ctx_create(&ctx->cu_context_internal, 0, cu_device); + cu_res = dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device); if (cu_res != CUDA_SUCCESS) { av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC: 0x%x\n", (int)cu_res); goto fail; @@ -410,7 +345,7 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx) ctx->cu_context = ctx->cu_context_internal; - cu_res = dl_fn->cu_ctx_pop_current(&dummy); + cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy); if (cu_res != CUDA_SUCCESS) { av_log(avctx, AV_LOG_FATAL, "Failed popping CUDA context: 0x%x\n", (int)cu_res); goto fail2; @@ -434,7 +369,7 @@ fail3: ctx->nvencoder = NULL; fail2: - dl_fn->cu_ctx_destroy(ctx->cu_context_internal); + dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal); ctx->cu_context_internal = NULL; fail: @@ -458,7 +393,6 @@ static av_cold int nvenc_setup_device(AVCodecContext *avctx) } if (avctx->pix_fmt == AV_PIX_FMT_CUDA) { -#if CONFIG_CUDA AVHWFramesContext *frames_ctx; AVCUDADeviceContext *device_hwctx; int ret; @@ -480,19 +414,16 @@ static av_cold int nvenc_setup_device(AVCodecContext *avctx) av_log(avctx, AV_LOG_FATAL, "Provided device doesn't support required NVENC features\n"); return ret; } -#else - return AVERROR_BUG; -#endif } else { int i, nb_devices = 0; - if ((dl_fn->cu_init(0)) != CUDA_SUCCESS) { + if ((dl_fn->cuda_dl->cuInit(0)) != CUDA_SUCCESS) { av_log(avctx, AV_LOG_ERROR, "Cannot init CUDA\n"); return AVERROR_UNKNOWN; } - if ((dl_fn->cu_device_get_count(&nb_devices)) != CUDA_SUCCESS) { + if ((dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) != CUDA_SUCCESS) { av_log(avctx, AV_LOG_ERROR, "Cannot enumerate the CUDA devices\n"); return AVERROR_UNKNOWN; @@ -1269,30 +1200,14 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx) ctx->nvencoder = NULL; if (ctx->cu_context_internal) - dl_fn->cu_ctx_destroy(ctx->cu_context_internal); + dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal); ctx->cu_context = ctx->cu_context_internal = NULL; - if (dl_fn->nvenc) - dlclose(dl_fn->nvenc); - dl_fn->nvenc = NULL; + nvenc_free_functions(&dl_fn->nvenc_dl); + cuda_free_functions(&dl_fn->cuda_dl); dl_fn->nvenc_device_count = 0; -#if !CONFIG_CUDA - if (dl_fn->cuda) - dlclose(dl_fn->cuda); - dl_fn->cuda = NULL; -#endif - - dl_fn->cu_init = NULL; - dl_fn->cu_device_get_count = NULL; - dl_fn->cu_device_get = NULL; - dl_fn->cu_device_get_name = NULL; - dl_fn->cu_device_compute_capability = NULL; - dl_fn->cu_ctx_create = NULL; - dl_fn->cu_ctx_pop_current = NULL; - dl_fn->cu_ctx_destroy = NULL; - av_log(avctx, AV_LOG_VERBOSE, "Nvenc unloaded\n"); return 0; diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h index bad659a..5bc0cba 100644 --- a/libavcodec/nvenc.h +++ b/libavcodec/nvenc.h @@ -23,29 +23,12 @@ #include "config.h" +#include "compat/cuda/dynlink_loader.h" #include "libavutil/fifo.h" #include "libavutil/opt.h" #include "avcodec.h" -#if CONFIG_CUDA -#include "libavutil/hwcontext_cuda.h" -#else - -#if defined(_WIN32) -#define CUDAAPI __stdcall -#else -#define CUDAAPI -#endif - -typedef enum cudaError_enum { - CUDA_SUCCESS = 0 -} CUresult; -typedef int CUdevice; -typedef void* CUcontext; -typedef void* CUdeviceptr; -#endif - #define MAX_REGISTERED_FRAMES 64 typedef struct NvencSurface @@ -64,33 +47,10 @@ typedef struct NvencSurface int lockCount; } NvencSurface; -typedef CUresult(CUDAAPI *PCUINIT)(unsigned int Flags); -typedef CUresult(CUDAAPI *PCUDEVICEGETCOUNT)(int *count); -typedef CUresult(CUDAAPI *PCUDEVICEGET)(CUdevice *device, int ordinal); -typedef CUresult(CUDAAPI *PCUDEVICEGETNAME)(char *name, int len, CUdevice dev); -typedef CUresult(CUDAAPI *PCUDEVICECOMPUTECAPABILITY)(int *major, int *minor, CUdevice dev); -typedef CUresult(CUDAAPI *PCUCTXCREATE)(CUcontext *pctx, unsigned int flags, CUdevice dev); -typedef CUresult(CUDAAPI *PCUCTXPOPCURRENT)(CUcontext *pctx); -typedef CUresult(CUDAAPI *PCUCTXDESTROY)(CUcontext ctx); - -typedef NVENCSTATUS (NVENCAPI *PNVENCODEAPIGETMAXSUPPORTEDVERSION)(uint32_t* version); -typedef NVENCSTATUS (NVENCAPI *PNVENCODEAPICREATEINSTANCE)(NV_ENCODE_API_FUNCTION_LIST *functionList); - typedef struct NvencDynLoadFunctions { -#if !CONFIG_CUDA - void *cuda; -#endif - void *nvenc; - - PCUINIT cu_init; - PCUDEVICEGETCOUNT cu_device_get_count; - PCUDEVICEGET cu_device_get; - PCUDEVICEGETNAME cu_device_get_name; - PCUDEVICECOMPUTECAPABILITY cu_device_compute_capability; - PCUCTXCREATE cu_ctx_create; - PCUCTXPOPCURRENT cu_ctx_pop_current; - PCUCTXDESTROY cu_ctx_destroy; + CudaFunctions *cuda_dl; + NvencFunctions *nvenc_dl; NV_ENCODE_API_FUNCTION_LIST nvenc_funcs; int nvenc_device_count;