diff mbox series

[FFmpeg-devel] nv-codec-headers: add cuMemcpy

Message ID 20201113085523.1469-1-nowerzt@gmail.com
State Superseded
Headers show
Series [FFmpeg-devel] nv-codec-headers: add cuMemcpy | expand

Checks

Context Check Description
andriy/configure warning Failed to apply patch

Commit Message

leozhang Nov. 13, 2020, 8:55 a.m. UTC
Signed-off-by: leozhang <nowerzt@gmail.com>
---
 include/ffnvcodec/dynlink_cuda.h   | 1 +
 include/ffnvcodec/dynlink_loader.h | 2 ++
 2 files changed, 3 insertions(+)

Comments

Timo Rothenpieler Nov. 13, 2020, 12:19 p.m. UTC | #1
Please also add cuMemcpyAsync while at it.

What for and where is this needed?
leozhang Nov. 16, 2020, 2:38 a.m. UTC | #2
Timo Rothenpieler <timo@rothenpieler.org> 于2020年11月13日周五 下午8:20写道:
>
> Please also add cuMemcpyAsync while at it.
Will add it.
>
> What for and where is this needed?
cuMemcpy is used to copy 1D array between host and device. For
example, the gauss blur filter weights can be calculated by CPU, then
memcpy to device memory, and futher load to shared memory to later
use. It's much speed up then calculating duplicated weights by per GPU
thread.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff mbox series

Patch

diff --git a/include/ffnvcodec/dynlink_cuda.h b/include/ffnvcodec/dynlink_cuda.h
index 117fdc5..1686894 100644
--- a/include/ffnvcodec/dynlink_cuda.h
+++ b/include/ffnvcodec/dynlink_cuda.h
@@ -365,6 +365,7 @@  typedef CUresult CUDAAPI tcuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize);
 typedef CUresult CUDAAPI tcuMemAllocPitch_v2(CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
 typedef CUresult CUDAAPI tcuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream);
 typedef CUresult CUDAAPI tcuMemFree_v2(CUdeviceptr dptr);
+typedef CUresult CUDAAPI tcuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t bytesize);
 typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D *pcopy);
 typedef CUresult CUDAAPI tcuMemcpy2DAsync_v2(const CUDA_MEMCPY2D *pcopy, CUstream hStream);
 typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pstr);
diff --git a/include/ffnvcodec/dynlink_loader.h b/include/ffnvcodec/dynlink_loader.h
index b9995c3..6ce3009 100644
--- a/include/ffnvcodec/dynlink_loader.h
+++ b/include/ffnvcodec/dynlink_loader.h
@@ -152,6 +152,7 @@  typedef struct CudaFunctions {
     tcuMemAllocPitch_v2 *cuMemAllocPitch;
     tcuMemsetD8Async *cuMemsetD8Async;
     tcuMemFree_v2 *cuMemFree;
+    tcuMemcpy *cuMemcpy;
     tcuMemcpy2D_v2 *cuMemcpy2D;
     tcuMemcpy2DAsync_v2 *cuMemcpy2DAsync;
     tcuGetErrorName *cuGetErrorName;
@@ -290,6 +291,7 @@  static inline int cuda_load_functions(CudaFunctions **functions, void *logctx)
     LOAD_SYMBOL(cuMemAllocPitch, tcuMemAllocPitch_v2, "cuMemAllocPitch_v2");
     LOAD_SYMBOL(cuMemsetD8Async, tcuMemsetD8Async, "cuMemsetD8Async");
     LOAD_SYMBOL(cuMemFree, tcuMemFree_v2, "cuMemFree_v2");
+    LOAD_SYMBOL(cuMemcpy, tcuMemcpy, "cuMemcpy");
     LOAD_SYMBOL(cuMemcpy2D, tcuMemcpy2D_v2, "cuMemcpy2D_v2");
     LOAD_SYMBOL(cuMemcpy2DAsync, tcuMemcpy2DAsync_v2, "cuMemcpy2DAsync_v2");
     LOAD_SYMBOL(cuGetErrorName, tcuGetErrorName, "cuGetErrorName");