diff mbox

[FFmpeg-devel,2/8] avutil/hwcontext_cuda: use dynamically loaded CUDA

Message ID 20161019120036.618-2-timo@rothenpieler.org
State Accepted
Headers show

Commit Message

Timo Rothenpieler Oct. 19, 2016, noon UTC
---
 configure                           |  2 +
 libavutil/hwcontext_cuda.c          | 99 ++++++++++++++++++++++++++++---------
 libavutil/hwcontext_cuda.h          |  5 ++
 libavutil/hwcontext_cuda_internal.h | 37 ++++++++++++++
 libavutil/version.h                 |  2 +-
 5 files changed, 120 insertions(+), 25 deletions(-)
 create mode 100644 libavutil/hwcontext_cuda_internal.h
diff mbox

Patch

diff --git a/configure b/configure
index 1e6834f..8c5742f 100755
--- a/configure
+++ b/configure
@@ -2536,6 +2536,7 @@  audiotoolbox_extralibs="-framework CoreFoundation -framework AudioToolbox -frame
 
 # hardware accelerators
 crystalhd_deps="libcrystalhd_libcrystalhd_if_h"
+cuda_deps_any="dlopen LoadLibrary"
 cuvid_deps="cuda"
 d3d11va_deps="d3d11_h dxva_h ID3D11VideoDecoder ID3D11VideoContext"
 dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode"
@@ -5414,6 +5415,7 @@  elif check_func dlopen -ldl && check_func dlsym -ldl; then
     ldl=-ldl
 fi
 
+cuda_extralibs='$ldl'
 decklink_outdev_extralibs="$decklink_outdev_extralibs $ldl"
 decklink_indev_extralibs="$decklink_indev_extralibs $ldl"
 frei0r_filter_extralibs='$ldl'
diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
index e1dcab0..30de299 100644
--- a/libavutil/hwcontext_cuda.c
+++ b/libavutil/hwcontext_cuda.c
@@ -20,7 +20,7 @@ 
 #include "common.h"
 #include "hwcontext.h"
 #include "hwcontext_internal.h"
-#include "hwcontext_cuda.h"
+#include "hwcontext_cuda_internal.h"
 #include "mem.h"
 #include "pixdesc.h"
 #include "pixfmt.h"
@@ -41,44 +41,46 @@  static void cuda_buffer_free(void *opaque, uint8_t *data)
 {
     AVHWFramesContext *ctx = opaque;
     AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
+    CudaFunctions *cu = hwctx->internal->cuda_dl;
 
     CUcontext dummy;
 
-    cuCtxPushCurrent(hwctx->cuda_ctx);
+    cu->cuCtxPushCurrent(hwctx->cuda_ctx);
 
-    cuMemFree((CUdeviceptr)data);
+    cu->cuMemFree((CUdeviceptr)data);
 
-    cuCtxPopCurrent(&dummy);
+    cu->cuCtxPopCurrent(&dummy);
 }
 
 static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
 {
     AVHWFramesContext     *ctx = opaque;
     AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
+    CudaFunctions          *cu = hwctx->internal->cuda_dl;
 
     AVBufferRef *ret = NULL;
     CUcontext dummy = NULL;
     CUdeviceptr data;
     CUresult err;
 
-    err = cuCtxPushCurrent(hwctx->cuda_ctx);
+    err = cu->cuCtxPushCurrent(hwctx->cuda_ctx);
     if (err != CUDA_SUCCESS) {
         av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
         return NULL;
     }
 
-    err = cuMemAlloc(&data, size);
+    err = cu->cuMemAlloc(&data, size);
     if (err != CUDA_SUCCESS)
         goto fail;
 
     ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
     if (!ret) {
-        cuMemFree(data);
+        cu->cuMemFree(data);
         goto fail;
     }
 
 fail:
-    cuCtxPopCurrent(&dummy);
+    cu->cuCtxPopCurrent(&dummy);
     return ret;
 }
 
@@ -187,12 +189,13 @@  static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
 {
     CUDAFramesContext           *priv = ctx->internal->priv;
     AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
+    CudaFunctions                 *cu = device_hwctx->internal->cuda_dl;
 
     CUcontext dummy;
     CUresult err;
     int i;
 
-    err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
+    err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
     if (err != CUDA_SUCCESS)
         return AVERROR_UNKNOWN;
 
@@ -208,14 +211,14 @@  static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
             .Height        = src->height >> (i ? priv->shift_height : 0),
         };
 
-        err = cuMemcpy2D(&cpy);
+        err = cu->cuMemcpy2D(&cpy);
         if (err != CUDA_SUCCESS) {
             av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
             return AVERROR_UNKNOWN;
         }
     }
 
-    cuCtxPopCurrent(&dummy);
+    cu->cuCtxPopCurrent(&dummy);
 
     return 0;
 }
@@ -225,12 +228,13 @@  static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
 {
     CUDAFramesContext           *priv = ctx->internal->priv;
     AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
+    CudaFunctions                 *cu = device_hwctx->internal->cuda_dl;
 
     CUcontext dummy;
     CUresult err;
     int i;
 
-    err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
+    err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
     if (err != CUDA_SUCCESS)
         return AVERROR_UNKNOWN;
 
@@ -246,28 +250,64 @@  static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
             .Height        = src->height >> (i ? priv->shift_height : 0),
         };
 
-        err = cuMemcpy2D(&cpy);
+        err = cu->cuMemcpy2D(&cpy);
         if (err != CUDA_SUCCESS) {
             av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
             return AVERROR_UNKNOWN;
         }
     }
 
-    cuCtxPopCurrent(&dummy);
+    cu->cuCtxPopCurrent(&dummy);
 
     return 0;
 }
 
-static void cuda_device_free(AVHWDeviceContext *ctx)
+static void cuda_device_uninit(AVHWDeviceContext *ctx)
 {
     AVCUDADeviceContext *hwctx = ctx->hwctx;
-    cuCtxDestroy(hwctx->cuda_ctx);
+
+    if (hwctx->internal) {
+        if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
+            hwctx->internal->cuda_dl->cuCtxDestroy(hwctx->cuda_ctx);
+            hwctx->cuda_ctx = NULL;
+        }
+        cuda_free_functions(&hwctx->internal->cuda_dl);
+    }
+
+    av_freep(&hwctx->internal);
+}
+
+static int cuda_device_init(AVHWDeviceContext *ctx)
+{
+    AVCUDADeviceContext *hwctx = ctx->hwctx;
+    int ret;
+
+    if (!hwctx->internal) {
+        hwctx->internal = av_mallocz(sizeof(*hwctx->internal));
+        if (!hwctx->internal)
+            return AVERROR(ENOMEM);
+    }
+
+    if (!hwctx->internal->cuda_dl) {
+        ret = cuda_load_functions(&hwctx->internal->cuda_dl);
+        if (ret < 0) {
+            av_log(ctx, AV_LOG_ERROR, "Could not dynamically load CUDA\n");
+            goto error;
+        }
+    }
+
+    return 0;
+
+error:
+    cuda_device_uninit(ctx);
+    return ret;
 }
 
 static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
                               AVDictionary *opts, int flags)
 {
     AVCUDADeviceContext *hwctx = ctx->hwctx;
+    CudaFunctions *cu;
     CUdevice cu_device;
     CUcontext dummy;
     CUresult err;
@@ -276,29 +316,38 @@  static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
     if (device)
         device_idx = strtol(device, NULL, 0);
 
-    err = cuInit(0);
+    if (cuda_device_init(ctx) < 0)
+        goto error;
+
+    cu = hwctx->internal->cuda_dl;
+
+    err = cu->cuInit(0);
     if (err != CUDA_SUCCESS) {
         av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
-        return AVERROR_UNKNOWN;
+        goto error;
     }
 
-    err = cuDeviceGet(&cu_device, device_idx);
+    err = cu->cuDeviceGet(&cu_device, device_idx);
     if (err != CUDA_SUCCESS) {
         av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
-        return AVERROR_UNKNOWN;
+        goto error;
     }
 
-    err = cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device);
+    err = cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device);
     if (err != CUDA_SUCCESS) {
         av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
-        return AVERROR_UNKNOWN;
+        goto error;
     }
 
-    cuCtxPopCurrent(&dummy);
+    cu->cuCtxPopCurrent(&dummy);
 
-    ctx->free = cuda_device_free;
+    hwctx->internal->is_allocated = 1;
 
     return 0;
+
+error:
+    cuda_device_uninit(ctx);
+    return AVERROR_UNKNOWN;
 }
 
 const HWContextType ff_hwcontext_type_cuda = {
@@ -309,6 +358,8 @@  const HWContextType ff_hwcontext_type_cuda = {
     .frames_priv_size     = sizeof(CUDAFramesContext),
 
     .device_create        = cuda_device_create,
+    .device_init          = cuda_device_init,
+    .device_uninit        = cuda_device_uninit,
     .frames_init          = cuda_frames_init,
     .frames_get_buffer    = cuda_get_buffer,
     .transfer_get_formats = cuda_transfer_get_formats,
diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h
index 23a77ce..12dae84 100644
--- a/libavutil/hwcontext_cuda.h
+++ b/libavutil/hwcontext_cuda.h
@@ -20,7 +20,9 @@ 
 #ifndef AVUTIL_HWCONTEXT_CUDA_H
 #define AVUTIL_HWCONTEXT_CUDA_H
 
+#ifndef CUDA_VERSION
 #include <cuda.h>
+#endif
 
 #include "pixfmt.h"
 
@@ -32,11 +34,14 @@ 
  * AVBufferRefs whose data pointer is a CUdeviceptr.
  */
 
+typedef struct AVCUDADeviceContextInternal AVCUDADeviceContextInternal;
+
 /**
  * This struct is allocated as AVHWDeviceContext.hwctx
  */
 typedef struct AVCUDADeviceContext {
     CUcontext cuda_ctx;
+    AVCUDADeviceContextInternal *internal;
 } AVCUDADeviceContext;
 
 /**
diff --git a/libavutil/hwcontext_cuda_internal.h b/libavutil/hwcontext_cuda_internal.h
new file mode 100644
index 0000000..e1bc6ff
--- /dev/null
+++ b/libavutil/hwcontext_cuda_internal.h
@@ -0,0 +1,37 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#ifndef AVUTIL_HWCONTEXT_CUDA_INTERNAL_H
+#define AVUTIL_HWCONTEXT_CUDA_INTERNAL_H
+
+#include "compat/cuda/dynlink_loader.h"
+#include "hwcontext_cuda.h"
+
+/**
+ * @file
+ * FFmpeg internal API for CUDA.
+ */
+
+struct AVCUDADeviceContextInternal {
+    CudaFunctions *cuda_dl;
+    int is_allocated;
+};
+
+#endif /* AVUTIL_HWCONTEXT_CUDA_INTERNAL_H */
+
diff --git a/libavutil/version.h b/libavutil/version.h
index 8a41ef6..c7c2e9d 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -80,7 +80,7 @@ 
 
 #define LIBAVUTIL_VERSION_MAJOR  55
 #define LIBAVUTIL_VERSION_MINOR  32
-#define LIBAVUTIL_VERSION_MICRO 100
+#define LIBAVUTIL_VERSION_MICRO 101
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
                                                LIBAVUTIL_VERSION_MINOR, \