From patchwork Wed Oct 19 12:00:30 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Timo Rothenpieler X-Patchwork-Id: 1069 Delivered-To: ffmpegpatchwork@gmail.com Received: by 10.103.140.133 with SMTP id o127csp175959vsd; Wed, 19 Oct 2016 05:01:15 -0700 (PDT) X-Received: by 10.194.75.37 with SMTP id z5mr4753453wjv.78.1476878475060; Wed, 19 Oct 2016 05:01:15 -0700 (PDT) Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org. [79.124.17.100]) by mx.google.com with ESMTP id je6si36738014wjb.41.2016.10.19.05.01.13; Wed, 19 Oct 2016 05:01:15 -0700 (PDT) Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; Authentication-Results: mx.google.com; dkim=neutral (body hash did not verify) header.i=@rothenpieler.org; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id AA8F8689B0E; Wed, 19 Oct 2016 15:00:56 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from btbn.de (btbn.de [5.9.118.179]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 167F46898C1 for ; Wed, 19 Oct 2016 15:00:50 +0300 (EEST) Received: from localhost.localdomain (ip4d173c0b.dynamic.kabel-deutschland.de [77.23.60.11]) by btbn.de (Postfix) with ESMTPSA id BCB7E6FEFF; Wed, 19 Oct 2016 14:00:52 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=rothenpieler.org; s=mail; t=1476878452; bh=/GkQxLK4hgTWCDYC+SiwuFFhBy8KccVrVMZm76L6E/0=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=AAH7iF72kezQziIyAo3rFU1Hv/WXR0ucVUUtFZ/Okq7DPueqbe7TI79l2RqPFLa5g AUu9XxDiw6MKBZi6sP4Y0oocHPLOI5HdGiWcBAacvGFqntGURyl2Ay4hiZJRhCadta NulU0qc2vQnl7tpcaRLMwxEzxQ14asjXjZ1jhhuY= From: Timo Rothenpieler To: ffmpeg-devel@ffmpeg.org Date: Wed, 19 Oct 2016 14:00:30 +0200 Message-Id: <20161019120036.618-2-timo@rothenpieler.org> X-Mailer: git-send-email 2.10.1 In-Reply-To: <20161019120036.618-1-timo@rothenpieler.org> References: <20161019120036.618-1-timo@rothenpieler.org> Subject: [FFmpeg-devel] [PATCH 2/8] avutil/hwcontext_cuda: use dynamically loaded CUDA X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: Timo Rothenpieler MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" --- configure | 2 + libavutil/hwcontext_cuda.c | 99 ++++++++++++++++++++++++++++--------- libavutil/hwcontext_cuda.h | 5 ++ libavutil/hwcontext_cuda_internal.h | 37 ++++++++++++++ libavutil/version.h | 2 +- 5 files changed, 120 insertions(+), 25 deletions(-) create mode 100644 libavutil/hwcontext_cuda_internal.h diff --git a/configure b/configure index 1e6834f..8c5742f 100755 --- a/configure +++ b/configure @@ -2536,6 +2536,7 @@ audiotoolbox_extralibs="-framework CoreFoundation -framework AudioToolbox -frame # hardware accelerators crystalhd_deps="libcrystalhd_libcrystalhd_if_h" +cuda_deps_any="dlopen LoadLibrary" cuvid_deps="cuda" d3d11va_deps="d3d11_h dxva_h ID3D11VideoDecoder ID3D11VideoContext" dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode" @@ -5414,6 +5415,7 @@ elif check_func dlopen -ldl && check_func dlsym -ldl; then ldl=-ldl fi +cuda_extralibs='$ldl' decklink_outdev_extralibs="$decklink_outdev_extralibs $ldl" decklink_indev_extralibs="$decklink_indev_extralibs $ldl" frei0r_filter_extralibs='$ldl' diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c index e1dcab0..30de299 100644 --- a/libavutil/hwcontext_cuda.c +++ b/libavutil/hwcontext_cuda.c @@ -20,7 +20,7 @@ #include "common.h" #include "hwcontext.h" #include "hwcontext_internal.h" -#include "hwcontext_cuda.h" +#include "hwcontext_cuda_internal.h" #include "mem.h" #include "pixdesc.h" #include "pixfmt.h" @@ -41,44 +41,46 @@ static void cuda_buffer_free(void *opaque, uint8_t *data) { AVHWFramesContext *ctx = opaque; AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx; + CudaFunctions *cu = hwctx->internal->cuda_dl; CUcontext dummy; - cuCtxPushCurrent(hwctx->cuda_ctx); + cu->cuCtxPushCurrent(hwctx->cuda_ctx); - cuMemFree((CUdeviceptr)data); + cu->cuMemFree((CUdeviceptr)data); - cuCtxPopCurrent(&dummy); + cu->cuCtxPopCurrent(&dummy); } static AVBufferRef *cuda_pool_alloc(void *opaque, int size) { AVHWFramesContext *ctx = opaque; AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx; + CudaFunctions *cu = hwctx->internal->cuda_dl; AVBufferRef *ret = NULL; CUcontext dummy = NULL; CUdeviceptr data; CUresult err; - err = cuCtxPushCurrent(hwctx->cuda_ctx); + err = cu->cuCtxPushCurrent(hwctx->cuda_ctx); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n"); return NULL; } - err = cuMemAlloc(&data, size); + err = cu->cuMemAlloc(&data, size); if (err != CUDA_SUCCESS) goto fail; ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0); if (!ret) { - cuMemFree(data); + cu->cuMemFree(data); goto fail; } fail: - cuCtxPopCurrent(&dummy); + cu->cuCtxPopCurrent(&dummy); return ret; } @@ -187,12 +189,13 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, { CUDAFramesContext *priv = ctx->internal->priv; AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx; + CudaFunctions *cu = device_hwctx->internal->cuda_dl; CUcontext dummy; CUresult err; int i; - err = cuCtxPushCurrent(device_hwctx->cuda_ctx); + err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx); if (err != CUDA_SUCCESS) return AVERROR_UNKNOWN; @@ -208,14 +211,14 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, .Height = src->height >> (i ? priv->shift_height : 0), }; - err = cuMemcpy2D(&cpy); + err = cu->cuMemcpy2D(&cpy); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n"); return AVERROR_UNKNOWN; } } - cuCtxPopCurrent(&dummy); + cu->cuCtxPopCurrent(&dummy); return 0; } @@ -225,12 +228,13 @@ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, { CUDAFramesContext *priv = ctx->internal->priv; AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx; + CudaFunctions *cu = device_hwctx->internal->cuda_dl; CUcontext dummy; CUresult err; int i; - err = cuCtxPushCurrent(device_hwctx->cuda_ctx); + err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx); if (err != CUDA_SUCCESS) return AVERROR_UNKNOWN; @@ -246,28 +250,64 @@ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, .Height = src->height >> (i ? priv->shift_height : 0), }; - err = cuMemcpy2D(&cpy); + err = cu->cuMemcpy2D(&cpy); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n"); return AVERROR_UNKNOWN; } } - cuCtxPopCurrent(&dummy); + cu->cuCtxPopCurrent(&dummy); return 0; } -static void cuda_device_free(AVHWDeviceContext *ctx) +static void cuda_device_uninit(AVHWDeviceContext *ctx) { AVCUDADeviceContext *hwctx = ctx->hwctx; - cuCtxDestroy(hwctx->cuda_ctx); + + if (hwctx->internal) { + if (hwctx->internal->is_allocated && hwctx->cuda_ctx) { + hwctx->internal->cuda_dl->cuCtxDestroy(hwctx->cuda_ctx); + hwctx->cuda_ctx = NULL; + } + cuda_free_functions(&hwctx->internal->cuda_dl); + } + + av_freep(&hwctx->internal); +} + +static int cuda_device_init(AVHWDeviceContext *ctx) +{ + AVCUDADeviceContext *hwctx = ctx->hwctx; + int ret; + + if (!hwctx->internal) { + hwctx->internal = av_mallocz(sizeof(*hwctx->internal)); + if (!hwctx->internal) + return AVERROR(ENOMEM); + } + + if (!hwctx->internal->cuda_dl) { + ret = cuda_load_functions(&hwctx->internal->cuda_dl); + if (ret < 0) { + av_log(ctx, AV_LOG_ERROR, "Could not dynamically load CUDA\n"); + goto error; + } + } + + return 0; + +error: + cuda_device_uninit(ctx); + return ret; } static int cuda_device_create(AVHWDeviceContext *ctx, const char *device, AVDictionary *opts, int flags) { AVCUDADeviceContext *hwctx = ctx->hwctx; + CudaFunctions *cu; CUdevice cu_device; CUcontext dummy; CUresult err; @@ -276,29 +316,38 @@ static int cuda_device_create(AVHWDeviceContext *ctx, const char *device, if (device) device_idx = strtol(device, NULL, 0); - err = cuInit(0); + if (cuda_device_init(ctx) < 0) + goto error; + + cu = hwctx->internal->cuda_dl; + + err = cu->cuInit(0); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n"); - return AVERROR_UNKNOWN; + goto error; } - err = cuDeviceGet(&cu_device, device_idx); + err = cu->cuDeviceGet(&cu_device, device_idx); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx); - return AVERROR_UNKNOWN; + goto error; } - err = cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device); + err = cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n"); - return AVERROR_UNKNOWN; + goto error; } - cuCtxPopCurrent(&dummy); + cu->cuCtxPopCurrent(&dummy); - ctx->free = cuda_device_free; + hwctx->internal->is_allocated = 1; return 0; + +error: + cuda_device_uninit(ctx); + return AVERROR_UNKNOWN; } const HWContextType ff_hwcontext_type_cuda = { @@ -309,6 +358,8 @@ const HWContextType ff_hwcontext_type_cuda = { .frames_priv_size = sizeof(CUDAFramesContext), .device_create = cuda_device_create, + .device_init = cuda_device_init, + .device_uninit = cuda_device_uninit, .frames_init = cuda_frames_init, .frames_get_buffer = cuda_get_buffer, .transfer_get_formats = cuda_transfer_get_formats, diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h index 23a77ce..12dae84 100644 --- a/libavutil/hwcontext_cuda.h +++ b/libavutil/hwcontext_cuda.h @@ -20,7 +20,9 @@ #ifndef AVUTIL_HWCONTEXT_CUDA_H #define AVUTIL_HWCONTEXT_CUDA_H +#ifndef CUDA_VERSION #include +#endif #include "pixfmt.h" @@ -32,11 +34,14 @@ * AVBufferRefs whose data pointer is a CUdeviceptr. */ +typedef struct AVCUDADeviceContextInternal AVCUDADeviceContextInternal; + /** * This struct is allocated as AVHWDeviceContext.hwctx */ typedef struct AVCUDADeviceContext { CUcontext cuda_ctx; + AVCUDADeviceContextInternal *internal; } AVCUDADeviceContext; /** diff --git a/libavutil/hwcontext_cuda_internal.h b/libavutil/hwcontext_cuda_internal.h new file mode 100644 index 0000000..e1bc6ff --- /dev/null +++ b/libavutil/hwcontext_cuda_internal.h @@ -0,0 +1,37 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +#ifndef AVUTIL_HWCONTEXT_CUDA_INTERNAL_H +#define AVUTIL_HWCONTEXT_CUDA_INTERNAL_H + +#include "compat/cuda/dynlink_loader.h" +#include "hwcontext_cuda.h" + +/** + * @file + * FFmpeg internal API for CUDA. + */ + +struct AVCUDADeviceContextInternal { + CudaFunctions *cuda_dl; + int is_allocated; +}; + +#endif /* AVUTIL_HWCONTEXT_CUDA_INTERNAL_H */ + diff --git a/libavutil/version.h b/libavutil/version.h index 8a41ef6..c7c2e9d 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -80,7 +80,7 @@ #define LIBAVUTIL_VERSION_MAJOR 55 #define LIBAVUTIL_VERSION_MINOR 32 -#define LIBAVUTIL_VERSION_MICRO 100 +#define LIBAVUTIL_VERSION_MICRO 101 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ LIBAVUTIL_VERSION_MINOR, \