[FFmpeg-devel] nv-codec-headers: add functions and tidy up loader

Submitted by Daniel Oberhoff on Sept. 2, 2019, 7:33 a.m.

Details

Message ID D61005AA-C75B-42C7-9208-D8F7269A1A65@googlemail.com
State New
Headers show

Commit Message

Daniel Oberhoff Sept. 2, 2019, 7:33 a.m.
Hi all,

We use nv-codec-headers to talk to cuda since we use ffmpeg already and nv-codec-headers makes for a nice way to talk to the cuda libs. But we need a few more functions, so we added them and would like to push those changes back upstream. They may be very useful to other users of ffmpeg/cuda. We also took the liberty to clean up the loader macros a bit.


From daeffbc8bb41fd3baa9891839a1282b8f96fe604 Mon Sep 17 00:00:00 2001
From: Daniel Oberhoff <daniel@danieloberhoff.de>
Date: Mon, 2 Sep 2019 09:25:29 +0200
Subject: [PATCH] more functions and tidier loader

---
 ffnvcodec.pc.in                    |   2 +
 include/ffnvcodec/dynlink_cuda.h   |  51 ++++++-
 include/ffnvcodec/dynlink_loader.h | 227 ++++++++++++++++++-----------
 3 files changed, 197 insertions(+), 83 deletions(-)

Patch hide | download patch | download mbox

diff --git a/ffnvcodec.pc.in b/ffnvcodec.pc.in
index 4d3723d..c25031f 100644
--- a/ffnvcodec.pc.in
+++ b/ffnvcodec.pc.in
@@ -5,3 +5,5 @@  Name: ffnvcodec
 Description: FFmpeg version of Nvidia Codec SDK headers
 Version: 9.0.18.2
 Cflags: -I${includedir}
+Libs: -ldl
+
diff --git a/include/ffnvcodec/dynlink_cuda.h b/include/ffnvcodec/dynlink_cuda.h
index ad5da7c..962d43f 100644
--- a/include/ffnvcodec/dynlink_cuda.h
+++ b/include/ffnvcodec/dynlink_cuda.h
@@ -47,6 +47,7 @@  typedef void* CUstream;
 typedef void* CUevent;
 typedef void* CUfunction;
 typedef void* CUmodule;
+typedef void* CUtexref;
 typedef void* CUtexObject;
 typedef void* CUmipmappedArray;
 typedef void* CUgraphicsResource;
@@ -238,6 +239,39 @@  typedef enum CUGLDeviceList_enum {
     CU_GL_DEVICE_LIST_NEXT_FRAME = 3,
 } CUGLDeviceList;
 
+typedef enum CUjit_option_enum {
+    CU_JIT_MAX_REGISTERS = 0,
+    CU_JIT_THREADS_PER_BLOCK,
+    CU_JIT_WALL_TIME,
+    CU_JIT_INFO_LOG_BUFFER,
+    CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
+    CU_JIT_ERROR_LOG_BUFFER,
+    CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
+    CU_JIT_OPTIMIZATION_LEVEL,
+    CU_JIT_TARGET_FROM_CUCONTEXT,
+    CU_JIT_TARGET,
+    CU_JIT_FALLBACK_STRATEGY,
+    CU_JIT_GENERATE_DEBUG_INFO,
+    CU_JIT_LOG_VERBOSE,
+    CU_JIT_GENERATE_LINE_INFO,
+    CU_JIT_CACHE_MODE,
+    CU_JIT_NEW_SM3X_OPT,
+    CU_JIT_FAST_COMPILE,
+    CU_JIT_NUM_OPTIONS
+} CUjit_option;
+
+/**
+ * Array descriptor
+ */
+typedef struct CUDA_ARRAY_DESCRIPTOR_st
+{
+    size_t Width;             /**< Width of array */
+    size_t Height;            /**< Height of array */
+
+    CUarray_format Format;    /**< Array format */
+    unsigned int NumChannels; /**< Channels per array element */
+} CUDA_ARRAY_DESCRIPTOR;
+
 typedef struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {
     CUexternalMemoryHandleType type;
     union {
@@ -305,6 +339,7 @@  typedef struct CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st {
 #define CU_STREAM_NON_BLOCKING 1
 #define CU_EVENT_BLOCKING_SYNC 1
 #define CU_EVENT_DISABLE_TIMING 2
+#define CU_TRSA_OVERRIDE_FORMAT 1
 #define CU_TRSF_READ_AS_INTEGER 1
 
 typedef void CUDAAPI CUstreamCallback(CUstream hStream, CUresult status, void *userdata);
@@ -343,17 +378,31 @@  typedef CUresult CUDAAPI tcuEventRecord(CUevent hEvent, CUstream hStream);
 
 typedef CUresult CUDAAPI tcuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams, void** extra);
 typedef CUresult CUDAAPI tcuModuleLoadData(CUmodule* module, const void* image);
-typedef CUresult CUDAAPI tcuModuleUnload(CUmodule hmod);
+typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule* module, const void* image, unsigned int numOptions, CUjit_option* options, void** optionValues);
+typedef CUresult CUDAAPI tcuModuleUnload(CUmodule module);
 typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction* hfunc, CUmodule hmod, const char* name);
+typedef CUresult CUDAAPI tcuModuleGetGlobal(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name);
+typedef CUresult CUDAAPI tcuModuleGetGlobal_v2(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name);
+typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref* pTexRef, CUmodule hmod, const char* name);
 typedef CUresult CUDAAPI tcuTexObjectCreate(CUtexObject* pTexObject, const CUDA_RESOURCE_DESC* pResDesc, const CUDA_TEXTURE_DESC* pTexDesc, const CUDA_RESOURCE_VIEW_DESC* pResViewDesc);
 typedef CUresult CUDAAPI tcuTexObjectDestroy(CUtexObject texObject);
 
 typedef CUresult CUDAAPI tcuGLGetDevices_v2(unsigned int* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList);
+typedef CUresult CUDAAPI tcuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
 typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource* pCudaResource, GLuint image, GLenum target, unsigned int Flags);
 typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource);
 typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream);
 typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream);
 typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel);
+typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource *pCudaResource, GLuint buffer, unsigned int Flags);
+typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer_v2(CUdeviceptr *pDevPtr, size_t *pSize, CUgraphicsResource resource);
+
+typedef CUresult CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags);
+typedef CUresult CUDAAPI tcuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm);
+typedef CUresult  CUDAAPI tcuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am);
+typedef CUresult  CUDAAPI tcuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags);
+typedef CUresult  CUDAAPI tcuTexRefSetAddress_v2(size_t *ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes);
+typedef CUresult  CUDAAPI tcuTexRefSetAddress2D_v2(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, size_t Pitch);
 
 typedef CUresult CUDAAPI tcuImportExternalMemory(CUexternalMemory* extMem_out, const CUDA_EXTERNAL_MEMORY_HANDLE_DESC* memHandleDesc);
 typedef CUresult CUDAAPI tcuDestroyExternalMemory(CUexternalMemory extMem);
diff --git a/include/ffnvcodec/dynlink_loader.h b/include/ffnvcodec/dynlink_loader.h
index 358acd5..e39c362 100644
--- a/include/ffnvcodec/dynlink_loader.h
+++ b/include/ffnvcodec/dynlink_loader.h
@@ -79,6 +79,8 @@ 
 # define FFNV_DEBUG_LOG_FUNC(logctx, msg, ...)
 #endif
 
+#define STRINGIFY(X) #X
+
 #define LOAD_LIBRARY(l, path)                                  \
     do {                                                       \
         if (!((l) = FFNV_LOAD_FUNC(path))) {                   \
@@ -108,6 +110,23 @@ 
         }                                                                     \
     } while (0)
 
+#define GET_PROC_EX(name, alias, required)              \
+    if (required)                                       \
+        LOAD_SYMBOL(alias, t##name, #name);              \
+    else                                                \
+        LOAD_SYMBOL_OPT(alias, t##name, #name);
+
+#define GET_PROC_EX_V2(name, alias, required)                           \
+    if (required)                                                       \
+        LOAD_SYMBOL(alias, t##name##_v2, STRINGIFY(name##_v2));              \
+    else                                                                \
+        LOAD_SYMBOL_OPT(alias, t##name##_v2, STRINGIFY(name##_v2));
+
+#define GET_PROC_REQUIRED(name) GET_PROC_EX(name,name,1)
+#define GET_PROC_OPTIONAL(name) GET_PROC_EX(name,name,0)
+#define GET_PROC(name)          GET_PROC_REQUIRED(name)
+#define GET_PROC_V2(name)       GET_PROC_EX_V2(name,name,1)
+
 #define GENERIC_LOAD_FUNC_PREAMBLE(T, n, N)     \
     T *f;                                       \
     int ret;                                    \
@@ -170,10 +189,13 @@  typedef struct CudaFunctions {
 
     tcuLaunchKernel *cuLaunchKernel;
     tcuModuleLoadData *cuModuleLoadData;
+    tcuModuleLoadDataEx *cuModuleLoadDataEx;
     tcuModuleUnload *cuModuleUnload;
     tcuModuleGetFunction *cuModuleGetFunction;
+    tcuModuleGetGlobal_v2 *cuModuleGetGlobal;
     tcuTexObjectCreate *cuTexObjectCreate;
     tcuTexObjectDestroy *cuTexObjectDestroy;
+    tcuModuleGetTexRef *cuModuleGetTexRef;
 
     tcuGLGetDevices_v2 *cuGLGetDevices;
     tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
@@ -182,6 +204,19 @@  typedef struct CudaFunctions {
     tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
     tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
 
+    tcuTexRefSetArray *cuTexRefSetArray;
+    tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
+
+    //more setting fore texref
+    tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
+    tcuTexRefSetFlags *cuTexRefSetFlags;
+
+    //graphic buffer related
+    tcuTexRefSetAddress_v2 *cuTexRefSetAddress;
+    tcuTexRefSetAddress2D_v2 *cuTexRefSetAddress2D;
+    tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
+    tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer;
+
     tcuImportExternalMemory *cuImportExternalMemory;
     tcuDestroyExternalMemory *cuDestroyExternalMemory;
     tcuExternalMemoryGetMappedBuffer *cuExternalMemoryGetMappedBuffer;
@@ -261,63 +296,83 @@  static inline int cuda_load_functions(CudaFunctions **functions, void *logctx)
 {
     GENERIC_LOAD_FUNC_PREAMBLE(CudaFunctions, cuda, CUDA_LIBNAME);
 
-    LOAD_SYMBOL(cuInit, tcuInit, "cuInit");
-    LOAD_SYMBOL(cuDeviceGetCount, tcuDeviceGetCount, "cuDeviceGetCount");
-    LOAD_SYMBOL(cuDeviceGet, tcuDeviceGet, "cuDeviceGet");
-    LOAD_SYMBOL(cuDeviceGetAttribute, tcuDeviceGetAttribute, "cuDeviceGetAttribute");
-    LOAD_SYMBOL(cuDeviceGetName, tcuDeviceGetName, "cuDeviceGetName");
-    LOAD_SYMBOL(cuDeviceComputeCapability, tcuDeviceComputeCapability, "cuDeviceComputeCapability");
-    LOAD_SYMBOL(cuCtxCreate, tcuCtxCreate_v2, "cuCtxCreate_v2");
-    LOAD_SYMBOL(cuCtxSetLimit, tcuCtxSetLimit, "cuCtxSetLimit");
-    LOAD_SYMBOL(cuCtxPushCurrent, tcuCtxPushCurrent_v2, "cuCtxPushCurrent_v2");
-    LOAD_SYMBOL(cuCtxPopCurrent, tcuCtxPopCurrent_v2, "cuCtxPopCurrent_v2");
-    LOAD_SYMBOL(cuCtxDestroy, tcuCtxDestroy_v2, "cuCtxDestroy_v2");
-    LOAD_SYMBOL(cuMemAlloc, tcuMemAlloc_v2, "cuMemAlloc_v2");
-    LOAD_SYMBOL(cuMemAllocPitch, tcuMemAllocPitch_v2, "cuMemAllocPitch_v2");
-    LOAD_SYMBOL(cuMemsetD8Async, tcuMemsetD8Async, "cuMemsetD8Async");
-    LOAD_SYMBOL(cuMemFree, tcuMemFree_v2, "cuMemFree_v2");
-    LOAD_SYMBOL(cuMemcpy2D, tcuMemcpy2D_v2, "cuMemcpy2D_v2");
-    LOAD_SYMBOL(cuMemcpy2DAsync, tcuMemcpy2DAsync_v2, "cuMemcpy2DAsync_v2");
-    LOAD_SYMBOL(cuGetErrorName, tcuGetErrorName, "cuGetErrorName");
-    LOAD_SYMBOL(cuGetErrorString, tcuGetErrorString, "cuGetErrorString");
-
-    LOAD_SYMBOL(cuStreamCreate, tcuStreamCreate, "cuStreamCreate");
-    LOAD_SYMBOL(cuStreamQuery, tcuStreamQuery, "cuStreamQuery");
-    LOAD_SYMBOL(cuStreamSynchronize, tcuStreamSynchronize, "cuStreamSynchronize");
-    LOAD_SYMBOL(cuStreamDestroy, tcuStreamDestroy_v2, "cuStreamDestroy_v2");
-    LOAD_SYMBOL(cuStreamAddCallback, tcuStreamAddCallback, "cuStreamAddCallback");
-    LOAD_SYMBOL(cuEventCreate, tcuEventCreate, "cuEventCreate");
-    LOAD_SYMBOL(cuEventDestroy, tcuEventDestroy_v2, "cuEventDestroy_v2");
-    LOAD_SYMBOL(cuEventSynchronize, tcuEventSynchronize, "cuEventSynchronize");
-    LOAD_SYMBOL(cuEventQuery, tcuEventQuery, "cuEventQuery");
-    LOAD_SYMBOL(cuEventRecord, tcuEventRecord, "cuEventRecord");
-
-    LOAD_SYMBOL(cuLaunchKernel, tcuLaunchKernel, "cuLaunchKernel");
-    LOAD_SYMBOL(cuModuleLoadData, tcuModuleLoadData, "cuModuleLoadData");
-    LOAD_SYMBOL(cuModuleUnload, tcuModuleUnload, "cuModuleUnload");
-    LOAD_SYMBOL(cuModuleGetFunction, tcuModuleGetFunction, "cuModuleGetFunction");
-    LOAD_SYMBOL(cuTexObjectCreate, tcuTexObjectCreate, "cuTexObjectCreate");
-    LOAD_SYMBOL(cuTexObjectDestroy, tcuTexObjectDestroy, "cuTexObjectDestroy");
-
-    LOAD_SYMBOL(cuGLGetDevices, tcuGLGetDevices_v2, "cuGLGetDevices_v2");
-    LOAD_SYMBOL(cuGraphicsGLRegisterImage, tcuGraphicsGLRegisterImage, "cuGraphicsGLRegisterImage");
-    LOAD_SYMBOL(cuGraphicsUnregisterResource, tcuGraphicsUnregisterResource, "cuGraphicsUnregisterResource");
-    LOAD_SYMBOL(cuGraphicsMapResources, tcuGraphicsMapResources, "cuGraphicsMapResources");
-    LOAD_SYMBOL(cuGraphicsUnmapResources, tcuGraphicsUnmapResources, "cuGraphicsUnmapResources");
-    LOAD_SYMBOL(cuGraphicsSubResourceGetMappedArray, tcuGraphicsSubResourceGetMappedArray, "cuGraphicsSubResourceGetMappedArray");
-
-    LOAD_SYMBOL_OPT(cuDeviceGetUuid, tcuDeviceGetUuid, "cuDeviceGetUuid");
-    LOAD_SYMBOL_OPT(cuImportExternalMemory, tcuImportExternalMemory, "cuImportExternalMemory");
-    LOAD_SYMBOL_OPT(cuDestroyExternalMemory, tcuDestroyExternalMemory, "cuDestroyExternalMemory");
-    LOAD_SYMBOL_OPT(cuExternalMemoryGetMappedBuffer, tcuExternalMemoryGetMappedBuffer, "cuExternalMemoryGetMappedBuffer");
-    LOAD_SYMBOL_OPT(cuExternalMemoryGetMappedMipmappedArray, tcuExternalMemoryGetMappedMipmappedArray, "cuExternalMemoryGetMappedMipmappedArray");
-    LOAD_SYMBOL_OPT(cuMipmappedArrayGetLevel, tcuMipmappedArrayGetLevel, "cuMipmappedArrayGetLevel");
-    LOAD_SYMBOL_OPT(cuMipmappedArrayDestroy, tcuMipmappedArrayDestroy, "cuMipmappedArrayDestroy");
-
-    LOAD_SYMBOL_OPT(cuImportExternalSemaphore, tcuImportExternalSemaphore, "cuImportExternalSemaphore");
-    LOAD_SYMBOL_OPT(cuDestroyExternalSemaphore, tcuDestroyExternalSemaphore, "cuDestroyExternalSemaphore");
-    LOAD_SYMBOL_OPT(cuSignalExternalSemaphoresAsync, tcuSignalExternalSemaphoresAsync, "cuSignalExternalSemaphoresAsync");
-    LOAD_SYMBOL_OPT(cuWaitExternalSemaphoresAsync, tcuWaitExternalSemaphoresAsync, "cuWaitExternalSemaphoresAsync");
+    GET_PROC(cuInit);
+    GET_PROC(cuDeviceGetCount);
+    GET_PROC(cuDeviceGet);
+    GET_PROC(cuDeviceGetName);
+    GET_PROC(cuDeviceComputeCapability);
+    GET_PROC_V2(cuCtxCreate);
+    GET_PROC(cuCtxSetLimit);
+    GET_PROC_V2(cuCtxPushCurrent);
+    GET_PROC_V2(cuCtxPopCurrent);
+    GET_PROC_V2(cuCtxDestroy);
+    GET_PROC_V2(cuMemAlloc);
+    GET_PROC_V2(cuMemFree);
+    GET_PROC_V2(cuMemcpy2D);
+    GET_PROC_V2(cuMemcpy2DAsync);
+    GET_PROC(cuGetErrorName);
+    GET_PROC(cuGetErrorString);
+    GET_PROC(cuDeviceGetAttribute);
+    GET_PROC_V2(cuMemAllocPitch);
+    GET_PROC(cuMemsetD8Async);
+
+    GET_PROC(cuStreamCreate);
+    GET_PROC(cuStreamQuery);
+    GET_PROC(cuStreamSynchronize);
+    GET_PROC_V2(cuStreamDestroy);
+    GET_PROC(cuStreamAddCallback);
+    GET_PROC(cuEventCreate);
+    GET_PROC_V2(cuEventDestroy);
+    GET_PROC(cuEventSynchronize);
+    GET_PROC(cuEventQuery);
+    GET_PROC(cuEventRecord);
+
+    GET_PROC_V2(cuGLGetDevices);
+    GET_PROC(cuGraphicsGLRegisterImage);
+    GET_PROC(cuGraphicsUnregisterResource);
+    GET_PROC(cuGraphicsMapResources);
+    GET_PROC(cuGraphicsUnmapResources);
+    GET_PROC(cuGraphicsSubResourceGetMappedArray);
+
+    GET_PROC(cuModuleLoadData);
+    GET_PROC(cuModuleLoadDataEx);
+    GET_PROC(cuModuleUnload);
+    GET_PROC(cuModuleGetFunction);
+    GET_PROC(cuModuleGetGlobal);
+    GET_PROC(cuModuleGetTexRef);
+    GET_PROC(cuLaunchKernel);
+
+    GET_PROC(cuTexObjectCreate);
+    GET_PROC(cuTexObjectDestroy);
+
+    GET_PROC(cuTexRefSetArray);
+    GET_PROC(cuTexRefSetFilterMode);
+
+    //more setting fore texref
+    GET_PROC(cuTexRefSetAddressMode);
+    GET_PROC(cuTexRefSetFlags);
+
+    //graphic buffer related
+    GET_PROC_V2(cuTexRefSetAddress);
+    GET_PROC_V2(cuTexRefSetAddress2D);
+    GET_PROC(cuGraphicsGLRegisterBuffer);
+    GET_PROC_V2(cuGraphicsResourceGetMappedPointer);
+
+    //more driver info
+    GET_PROC(cuDeviceGetAttribute);
+
+    GET_PROC_OPTIONAL(cuDeviceGetUuid);
+    GET_PROC_OPTIONAL(cuImportExternalMemory);
+    GET_PROC_OPTIONAL(cuDestroyExternalMemory);
+    GET_PROC_OPTIONAL(cuExternalMemoryGetMappedBuffer);
+    GET_PROC_OPTIONAL(cuExternalMemoryGetMappedMipmappedArray);
+    GET_PROC_OPTIONAL(cuMipmappedArrayGetLevel);
+    GET_PROC_OPTIONAL(cuMipmappedArrayDestroy);
+
+    GET_PROC_OPTIONAL(cuImportExternalSemaphore);
+    GET_PROC_OPTIONAL(cuDestroyExternalSemaphore);
+    GET_PROC_OPTIONAL(cuSignalExternalSemaphoresAsync);
+    GET_PROC_OPTIONAL(cuWaitExternalSemaphoresAsync);
 
     GENERIC_LOAD_FUNC_FINALE(cuda);
 }
@@ -327,34 +382,34 @@  static inline int cuvid_load_functions(CuvidFunctions **functions, void *logctx)
 {
     GENERIC_LOAD_FUNC_PREAMBLE(CuvidFunctions, cuvid, NVCUVID_LIBNAME);
 
-    LOAD_SYMBOL_OPT(cuvidGetDecoderCaps, tcuvidGetDecoderCaps, "cuvidGetDecoderCaps");
-    LOAD_SYMBOL(cuvidCreateDecoder, tcuvidCreateDecoder, "cuvidCreateDecoder");
-    LOAD_SYMBOL(cuvidDestroyDecoder, tcuvidDestroyDecoder, "cuvidDestroyDecoder");
-    LOAD_SYMBOL(cuvidDecodePicture, tcuvidDecodePicture, "cuvidDecodePicture");
-    LOAD_SYMBOL(cuvidGetDecodeStatus, tcuvidGetDecodeStatus, "cuvidGetDecodeStatus");
-    LOAD_SYMBOL(cuvidReconfigureDecoder, tcuvidReconfigureDecoder, "cuvidReconfigureDecoder");
+    GET_PROC_OPTIONAL(cuvidGetDecoderCaps);
+    GET_PROC(cuvidCreateDecoder);
+    GET_PROC(cuvidDestroyDecoder);
+    GET_PROC(cuvidDecodePicture);
+    GET_PROC(cuvidGetDecodeStatus);
+    GET_PROC(cuvidReconfigureDecoder);
 #ifdef __CUVID_DEVPTR64
     LOAD_SYMBOL(cuvidMapVideoFrame, tcuvidMapVideoFrame, "cuvidMapVideoFrame64");
     LOAD_SYMBOL(cuvidUnmapVideoFrame, tcuvidUnmapVideoFrame, "cuvidUnmapVideoFrame64");
 #else
-    LOAD_SYMBOL(cuvidMapVideoFrame, tcuvidMapVideoFrame, "cuvidMapVideoFrame");
-    LOAD_SYMBOL(cuvidUnmapVideoFrame, tcuvidUnmapVideoFrame, "cuvidUnmapVideoFrame");
+    GET_PROC(cuvidMapVideoFrame);
+    GET_PROC(cuvidUnmapVideoFrame);
 #endif
-    LOAD_SYMBOL(cuvidCtxLockCreate, tcuvidCtxLockCreate, "cuvidCtxLockCreate");
-    LOAD_SYMBOL(cuvidCtxLockDestroy, tcuvidCtxLockDestroy, "cuvidCtxLockDestroy");
-    LOAD_SYMBOL(cuvidCtxLock, tcuvidCtxLock, "cuvidCtxLock");
-    LOAD_SYMBOL(cuvidCtxUnlock, tcuvidCtxUnlock, "cuvidCtxUnlock");
-
-    LOAD_SYMBOL(cuvidCreateVideoSource, tcuvidCreateVideoSource, "cuvidCreateVideoSource");
-    LOAD_SYMBOL(cuvidCreateVideoSourceW, tcuvidCreateVideoSourceW, "cuvidCreateVideoSourceW");
-    LOAD_SYMBOL(cuvidDestroyVideoSource, tcuvidDestroyVideoSource, "cuvidDestroyVideoSource");
-    LOAD_SYMBOL(cuvidSetVideoSourceState, tcuvidSetVideoSourceState, "cuvidSetVideoSourceState");
-    LOAD_SYMBOL(cuvidGetVideoSourceState, tcuvidGetVideoSourceState, "cuvidGetVideoSourceState");
-    LOAD_SYMBOL(cuvidGetSourceVideoFormat, tcuvidGetSourceVideoFormat, "cuvidGetSourceVideoFormat");
-    LOAD_SYMBOL(cuvidGetSourceAudioFormat, tcuvidGetSourceAudioFormat, "cuvidGetSourceAudioFormat");
-    LOAD_SYMBOL(cuvidCreateVideoParser, tcuvidCreateVideoParser, "cuvidCreateVideoParser");
-    LOAD_SYMBOL(cuvidParseVideoData, tcuvidParseVideoData, "cuvidParseVideoData");
-    LOAD_SYMBOL(cuvidDestroyVideoParser, tcuvidDestroyVideoParser, "cuvidDestroyVideoParser");
+    GET_PROC(cuvidCtxLockCreate);
+    GET_PROC(cuvidCtxLockDestroy);
+    GET_PROC(cuvidCtxLock);
+    GET_PROC(cuvidCtxUnlock);
+
+    GET_PROC(cuvidCreateVideoSource);
+    GET_PROC(cuvidCreateVideoSourceW);
+    GET_PROC(cuvidDestroyVideoSource);
+    GET_PROC(cuvidSetVideoSourceState);
+    GET_PROC(cuvidGetVideoSourceState);
+    GET_PROC(cuvidGetSourceVideoFormat);
+    GET_PROC(cuvidGetSourceAudioFormat);
+    GET_PROC(cuvidCreateVideoParser);
+    GET_PROC(cuvidParseVideoData);
+    GET_PROC(cuvidDestroyVideoParser);
 
     GENERIC_LOAD_FUNC_FINALE(cuvid);
 }
@@ -363,15 +418,23 @@  static inline int nvenc_load_functions(NvencFunctions **functions, void *logctx)
 {
     GENERIC_LOAD_FUNC_PREAMBLE(NvencFunctions, nvenc, NVENC_LIBNAME);
 
-    LOAD_SYMBOL(NvEncodeAPICreateInstance, tNvEncodeAPICreateInstance, "NvEncodeAPICreateInstance");
-    LOAD_SYMBOL(NvEncodeAPIGetMaxSupportedVersion, tNvEncodeAPIGetMaxSupportedVersion, "NvEncodeAPIGetMaxSupportedVersion");
+    GET_PROC(NvEncodeAPICreateInstance);
+    GET_PROC(NvEncodeAPIGetMaxSupportedVersion);
 
     GENERIC_LOAD_FUNC_FINALE(nvenc);
 }
 
 #undef GENERIC_LOAD_FUNC_PREAMBLE
 #undef LOAD_LIBRARY
+#undef GET_PROC
+#undef GET_PROC_V2
+#undef GET_PROC_OPTIONAL
+#undef GET_PROC_REQUIRED
+#undef GET_PROC_EX
+#undef GET_PROC_EX_V2
+#undef STRINGIFY
 #undef LOAD_SYMBOL
+#undef LOAD_SYMBOL_OPT
 #undef GENERIC_LOAD_FUNC_FINALE
 #undef GENERIC_FREE_FUNC
 #undef CUDA_LIBNAME