Message ID | 20191117145804.32642-1-olegd@anyvision.co |
---|---|
State | Superseded |
Headers | show |
On 17.11.2019 15:58, Oleg Dobkin wrote: > Add AVCUDADeviceContextFlags to control the creation of CUDA device > context for the hardware CUDA decoder. > > The current values are 0 (default behavior) - new context will be > created for each decoder, and 1 - primary CUDA context will be used. > > There are several reasons for using primary device context instead of > creating a new one: > > - This is the recommended way to handle device contexts (see > https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__CTX.html#group__CUDA__CTX_1g65dc0012348bc84810e2103a40d8e2cf) > > - Memory allocations, kernels and other state are associated with the > current device context. Currently, the context is not accessible from > FFmpeg API, so, technically, the memory created by the hardware decoder > (the video frame) can't be safely read. > > Signed-off-by: Oleg Dobkin <olegd@anyvision.co> > --- > libavutil/hwcontext_cuda.c | 20 +++++++++++++++----- > libavutil/hwcontext_cuda.h | 7 +++++++ > 2 files changed, 22 insertions(+), 5 deletions(-) > > diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c > index cca39e9fc7..608ea57569 100644 > --- a/libavutil/hwcontext_cuda.c > +++ b/libavutil/hwcontext_cuda.c > @@ -281,8 +281,12 @@ static void cuda_device_uninit(AVHWDeviceContext *device_ctx) > if (hwctx->internal) { > CudaFunctions *cu = hwctx->internal->cuda_dl; > if (hwctx->internal->is_allocated && hwctx->cuda_ctx) { > - CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx)); > + if (hwctx->flags == DCF_CREATE_CONTEXT) Should actually be checking for the flag, not equality. > + CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx)); > + else > + CHECK_CU(cu->cuDevicePrimaryCtxRelease(hwctx->cuda_device)); > hwctx->cuda_ctx = NULL; > + hwctx->cuda_device = NULL; > } > cuda_free_functions(&hwctx->internal->cuda_dl); > } > @@ -322,7 +326,6 @@ static int cuda_device_create(AVHWDeviceContext *device_ctx, > { > AVCUDADeviceContext *hwctx = device_ctx->hwctx; > CudaFunctions *cu; > - CUdevice cu_device; > CUcontext dummy; > int ret, device_idx = 0; > > @@ -338,18 +341,25 @@ static int cuda_device_create(AVHWDeviceContext *device_ctx, > if (ret < 0) > goto error; > > - ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx)); > + ret = CHECK_CU(cu->cuDeviceGet(&hwctx->cuda_device, device_idx)); > if (ret < 0) > goto error; > > - ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device)); > + hwctx->flags = flags; > + > + if (flags == DCF_CREATE_CONTEXT) > + ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, hwctx->cuda_device)); > + else > + ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx->cuda_ctx, hwctx->cuda_device)); > + > if (ret < 0) > goto error; > > // Setting stream to NULL will make functions automatically use the default CUstream > hwctx->stream = NULL; > > - CHECK_CU(cu->cuCtxPopCurrent(&dummy)); > + if (flags == DCF_CREATE_CONTEXT) > + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); > > hwctx->internal->is_allocated = 1; > > diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h > index 81a0552cab..bab5eefe54 100644 > --- a/libavutil/hwcontext_cuda.h > +++ b/libavutil/hwcontext_cuda.h > @@ -34,6 +34,11 @@ > * AVBufferRefs whose data pointer is a CUdeviceptr. > */ > > +enum AVCUDADeviceContextFlags { > + DCF_CREATE_CONTEXT = 0, > + DCF_USE_PRIMARY_CONTEXT = 1 > +}; I'd only define a flag for the new behavior. If it's not set, keep old behavior. > typedef struct AVCUDADeviceContextInternal AVCUDADeviceContextInternal; > > /** > @@ -43,6 +48,8 @@ typedef struct AVCUDADeviceContext { > CUcontext cuda_ctx; > CUstream stream; > AVCUDADeviceContextInternal *internal; > + CUdevice cuda_device; Can't one just call cuCtxGetDevice on the context to get the device? > + enum AVCUDADeviceContextFlags flags; The device_create/av_hwdevice_ctx_create function already has a (at the moment unused) flags parameter. So there should be no need to add this here. If need be, the information should be stored in AVCUDADeviceContextInternal instead. > } AVCUDADeviceContext; > Also needs configure updated for the higher ffnvcodec version that's required after this patch, and probably deserved a lavu micro bump.
I've changed enum into a flag and moved it into AVCUDADeviceContextInternal. > Can't one just call cuCtxGetDevice on the context to get the device? Not sure the cuCtxGetDevice can be used for the primary context; also, according to documentation it returns device id not handle. Anyway, is it that bad to store an additional handle? > Also needs configure updated for the higher ffnvcodec version I'm looking at the configure and can't figure out how ffnvcodec version is enforced. The script seems to accept several ranges of ffnvcodec versions. I could just increment the top-level check, but I'm not sure this is the correct way. On Sun, 2019-11-17 at 23:31 +0100, Timo Rothenpieler wrote: > On 17.11.2019 15:58, Oleg Dobkin wrote: > > Add AVCUDADeviceContextFlags to control the creation of CUDA device > context for the hardware CUDA decoder. > > The current values are 0 (default behavior) - new context will be > created for each decoder, and 1 - primary CUDA context will be used. > > There are several reasons for using primary device context instead of > creating a new one: > > - This is the recommended way to handle device contexts (see > https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__CTX.html#group__CUDA__CTX_1g65dc0012348bc84810e2103a40d8e2cf > ) > > - Memory allocations, kernels and other state are associated with > the > current device context. Currently, the context is not accessible from > FFmpeg API, so, technically, the memory created by the hardware > decoder > (the video frame) can't be safely read. > > Signed-off-by: Oleg Dobkin <olegd@anyvision.co> > --- > libavutil/hwcontext_cuda.c | 20 +++++++++++++++----- > libavutil/hwcontext_cuda.h | 7 +++++++ > 2 files changed, 22 insertions(+), 5 deletions(-) > > diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c > index cca39e9fc7..608ea57569 100644 > --- a/libavutil/hwcontext_cuda.c > +++ b/libavutil/hwcontext_cuda.c > @@ -281,8 +281,12 @@ static void cuda_device_uninit(AVHWDeviceContext > *device_ctx) > if (hwctx->internal) { > CudaFunctions *cu = hwctx->internal->cuda_dl; > if (hwctx->internal->is_allocated && hwctx->cuda_ctx) { > - CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx)); > + if (hwctx->flags == DCF_CREATE_CONTEXT) > > Should actually be checking for the flag, not equality. > > > + CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx)); > + else > + CHECK_CU(cu->cuDevicePrimaryCtxRelease(hwctx- > >cuda_device)); > hwctx->cuda_ctx = NULL; > + hwctx->cuda_device = NULL; > } > cuda_free_functions(&hwctx->internal->cuda_dl); > } > @@ -322,7 +326,6 @@ static int cuda_device_create(AVHWDeviceContext > *device_ctx, > { > AVCUDADeviceContext *hwctx = device_ctx->hwctx; > CudaFunctions *cu; > - CUdevice cu_device; > CUcontext dummy; > int ret, device_idx = 0; > > @@ -338,18 +341,25 @@ static int cuda_device_create(AVHWDeviceContext > *device_ctx, > if (ret < 0) > goto error; > > - ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx)); > + ret = CHECK_CU(cu->cuDeviceGet(&hwctx->cuda_device, > device_idx)); > if (ret < 0) > goto error; > > - ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, > CU_CTX_SCHED_BLOCKING_SYNC, cu_device)); > + hwctx->flags = flags; > + > + if (flags == DCF_CREATE_CONTEXT) > + ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, > CU_CTX_SCHED_BLOCKING_SYNC, hwctx->cuda_device)); > + else > + ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx- > >cuda_ctx, hwctx->cuda_device)); > + > if (ret < 0) > goto error; > > // Setting stream to NULL will make functions automatically use > the default CUstream > hwctx->stream = NULL; > > - CHECK_CU(cu->cuCtxPopCurrent(&dummy)); > + if (flags == DCF_CREATE_CONTEXT) > + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); > > hwctx->internal->is_allocated = 1; > > diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h > index 81a0552cab..bab5eefe54 100644 > --- a/libavutil/hwcontext_cuda.h > +++ b/libavutil/hwcontext_cuda.h > @@ -34,6 +34,11 @@ > * AVBufferRefs whose data pointer is a CUdeviceptr. > */ > > +enum AVCUDADeviceContextFlags { > + DCF_CREATE_CONTEXT = 0, > + DCF_USE_PRIMARY_CONTEXT = 1 > +}; > > I'd only define a flag for the new behavior. If it's not set, keep > old > behavior. > > > typedef struct AVCUDADeviceContextInternal > AVCUDADeviceContextInternal; > > /** > @@ -43,6 +48,8 @@ typedef struct AVCUDADeviceContext { > CUcontext cuda_ctx; > CUstream stream; > AVCUDADeviceContextInternal *internal; > + CUdevice cuda_device; > > Can't one just call cuCtxGetDevice on the context to get the device? > > > + enum AVCUDADeviceContextFlags flags; > > The device_create/av_hwdevice_ctx_create function already has a (at > the > moment unused) flags parameter. So there should be no need to add > this here. > If need be, the information should be stored in > AVCUDADeviceContextInternal instead. > > > } AVCUDADeviceContext; > > > Also needs configure updated for the higher ffnvcodec version that's > required after this patch, and probably deserved a lavu micro bump. > > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
On 18/11/2019 11:51, Oleg Dobkin wrote: > I've changed enum into a flag and moved it into > AVCUDADeviceContextInternal. > >> Can't one just call cuCtxGetDevice on the context to get the device? > > Not sure the cuCtxGetDevice can be used for the primary context; also, > according to documentation it returns device id not handle. Anyway, is > it that bad to store an additional handle? It's public API, and once something is there, it has to stay there for quite a while. So I'm not too much a fan of adding something there, which can already easily be gotten from existing fields. The CUdevice, which you intended to store there, and which cuCtxGetDevice returns, _is_ the device ID. It's a typedef to int. I'm not aware of there being anything beyond that. Storing it in the internal struct is fine though, for the sake of not having to call cuCtxGetDevice each time. >> Also needs configure updated for the higher ffnvcodec version > > I'm looking at the configure and can't figure out how ffnvcodec version > is enforced. The script seems to accept several ranges of ffnvcodec > versions. I could just increment the top-level check, but I'm not sure > this is the correct way. FFmpeg supports multiple tracks of the Video Codec SDK, to support older drivers and legacy GPUs that way. Since the version number only tracks the Video SDK Version, and not the CUDA loader version, what needs to be done is to set the new minimum version for each supported track. So far, there was no need to add explicit checks for SDK 9.1, but that will be required for this. Also, since SDK 8.0 is effectively dead, it can be dropped. I immediately bump the version on ffnvcodec git after every release, so the correct versions to check for in configure right now are: 9.1.23.1, 9.0.18.3, 8.2.15.10 and 8.1.24.11 > On Sun, 2019-11-17 at 23:31 +0100, Timo Rothenpieler wrote: >> On 17.11.2019 15:58, Oleg Dobkin wrote: >> >> Add AVCUDADeviceContextFlags to control the creation of CUDA device >> context for the hardware CUDA decoder. >> >> The current values are 0 (default behavior) - new context will be >> created for each decoder, and 1 - primary CUDA context will be used. >> >> There are several reasons for using primary device context instead of >> creating a new one: >> >> - This is the recommended way to handle device contexts (see >> > https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__CTX.html#group__CUDA__CTX_1g65dc0012348bc84810e2103a40d8e2cf >> ) >> >> - Memory allocations, kernels and other state are associated with >> the >> current device context. Currently, the context is not accessible from >> FFmpeg API, so, technically, the memory created by the hardware >> decoder >> (the video frame) can't be safely read. >> >> Signed-off-by: Oleg Dobkin <olegd@anyvision.co> >> --- >> libavutil/hwcontext_cuda.c | 20 +++++++++++++++----- >> libavutil/hwcontext_cuda.h | 7 +++++++ >> 2 files changed, 22 insertions(+), 5 deletions(-) >> >> diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c >> index cca39e9fc7..608ea57569 100644 >> --- a/libavutil/hwcontext_cuda.c >> +++ b/libavutil/hwcontext_cuda.c >> @@ -281,8 +281,12 @@ static void cuda_device_uninit(AVHWDeviceContext >> *device_ctx) >> if (hwctx->internal) { >> CudaFunctions *cu = hwctx->internal->cuda_dl; >> if (hwctx->internal->is_allocated && hwctx->cuda_ctx) { >> - CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx)); >> + if (hwctx->flags == DCF_CREATE_CONTEXT) >> >> Should actually be checking for the flag, not equality. >> >> >> + CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx)); >> + else >> + CHECK_CU(cu->cuDevicePrimaryCtxRelease(hwctx- >>> cuda_device)); >> hwctx->cuda_ctx = NULL; >> + hwctx->cuda_device = NULL; >> } >> cuda_free_functions(&hwctx->internal->cuda_dl); >> } >> @@ -322,7 +326,6 @@ static int cuda_device_create(AVHWDeviceContext >> *device_ctx, >> { >> AVCUDADeviceContext *hwctx = device_ctx->hwctx; >> CudaFunctions *cu; >> - CUdevice cu_device; >> CUcontext dummy; >> int ret, device_idx = 0; >> >> @@ -338,18 +341,25 @@ static int cuda_device_create(AVHWDeviceContext >> *device_ctx, >> if (ret < 0) >> goto error; >> >> - ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx)); >> + ret = CHECK_CU(cu->cuDeviceGet(&hwctx->cuda_device, >> device_idx)); >> if (ret < 0) >> goto error; >> >> - ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, >> CU_CTX_SCHED_BLOCKING_SYNC, cu_device)); >> + hwctx->flags = flags; >> + >> + if (flags == DCF_CREATE_CONTEXT) >> + ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, >> CU_CTX_SCHED_BLOCKING_SYNC, hwctx->cuda_device)); >> + else >> + ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx- >>> cuda_ctx, hwctx->cuda_device)); >> + >> if (ret < 0) >> goto error; >> >> // Setting stream to NULL will make functions automatically use >> the default CUstream >> hwctx->stream = NULL; >> >> - CHECK_CU(cu->cuCtxPopCurrent(&dummy)); >> + if (flags == DCF_CREATE_CONTEXT) >> + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); >> >> hwctx->internal->is_allocated = 1; >> >> diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h >> index 81a0552cab..bab5eefe54 100644 >> --- a/libavutil/hwcontext_cuda.h >> +++ b/libavutil/hwcontext_cuda.h >> @@ -34,6 +34,11 @@ >> * AVBufferRefs whose data pointer is a CUdeviceptr. >> */ >> >> +enum AVCUDADeviceContextFlags { >> + DCF_CREATE_CONTEXT = 0, >> + DCF_USE_PRIMARY_CONTEXT = 1 >> +}; >> >> I'd only define a flag for the new behavior. If it's not set, keep >> old >> behavior. >> >> >> typedef struct AVCUDADeviceContextInternal >> AVCUDADeviceContextInternal; >> >> /** >> @@ -43,6 +48,8 @@ typedef struct AVCUDADeviceContext { >> CUcontext cuda_ctx; >> CUstream stream; >> AVCUDADeviceContextInternal *internal; >> + CUdevice cuda_device; >> >> Can't one just call cuCtxGetDevice on the context to get the device? >> >> >> + enum AVCUDADeviceContextFlags flags; >> >> The device_create/av_hwdevice_ctx_create function already has a (at >> the >> moment unused) flags parameter. So there should be no need to add >> this here. >> If need be, the information should be stored in >> AVCUDADeviceContextInternal instead. >> >> >> } AVCUDADeviceContext; >> >> >> Also needs configure updated for the higher ffnvcodec version that's >> required after this patch, and probably deserved a lavu micro bump. >> >> >> _______________________________________________ >> ffmpeg-devel mailing list >> ffmpeg-devel@ffmpeg.org >> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel >> >> To unsubscribe, visit link above, or email >> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
> FFmpeg supports multiple tracks of the Video Codec SDK, to support > older > drivers and legacy GPUs that way. > Since the version number only tracks the Video SDK Version, and not > the > CUDA loader version, what needs to be done is to set the new minimum > version for each supported track. > So far, there was no need to add explicit checks for SDK 9.1, but > that > will be required for this. > Also, since SDK 8.0 is effectively dead, it can be dropped. > > I immediately bump the version on ffnvcodec git after every release, > so > the correct versions to check for in configure right now are: > 9.1.23.1, 9.0.18.3, 8.2.15.10 and 8.1.24.11 I'm not sure I understand this. As I see - ffmpeg wouldn't compile without the latest version of ffnvcodec, which would be 9.1.23.2 after the change is merged, so this is the only check that needs to be performed, regardless of the supported Video SDK versions, isn't it? On Mon, 2019-11-18 at 12:09 +0100, Timo Rothenpieler wrote: > On 18/11/2019 11:51, Oleg Dobkin wrote: > > I've changed enum into a flag and moved it into > AVCUDADeviceContextInternal. > > > Can't one just call cuCtxGetDevice on the context to get the device? > > Not sure the cuCtxGetDevice can be used for the primary context; > also, > according to documentation it returns device id not handle. Anyway, > is > it that bad to store an additional handle? > > It's public API, and once something is there, it has to stay there > for > quite a while. So I'm not too much a fan of adding something there, > which can already easily be gotten from existing fields. > > The CUdevice, which you intended to store there, and which > cuCtxGetDevice returns, _is_ the device ID. It's a typedef to int. > I'm not aware of there being anything beyond that. > > Storing it in the internal struct is fine though, for the sake of > not > having to call cuCtxGetDevice each time. > > > Also needs configure updated for the higher ffnvcodec version > > I'm looking at the configure and can't figure out how ffnvcodec > version > is enforced. The script seems to accept several ranges of ffnvcodec > versions. I could just increment the top-level check, but I'm not > sure > this is the correct way. > > FFmpeg supports multiple tracks of the Video Codec SDK, to support > older > drivers and legacy GPUs that way. > Since the version number only tracks the Video SDK Version, and not > the > CUDA loader version, what needs to be done is to set the new minimum > version for each supported track. > So far, there was no need to add explicit checks for SDK 9.1, but > that > will be required for this. > Also, since SDK 8.0 is effectively dead, it can be dropped. > > I immediately bump the version on ffnvcodec git after every release, > so > the correct versions to check for in configure right now are: > 9.1.23.1, 9.0.18.3, 8.2.15.10 and 8.1.24.11 > > > On Sun, 2019-11-17 at 23:31 +0100, Timo Rothenpieler wrote: > > On 17.11.2019 15:58, Oleg Dobkin wrote: > > Add AVCUDADeviceContextFlags to control the creation of CUDA device > context for the hardware CUDA decoder. > > The current values are 0 (default behavior) - new context will be > created for each decoder, and 1 - primary CUDA context will be used. > > There are several reasons for using primary device context instead of > creating a new one: > > - This is the recommended way to handle device contexts (see > > https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__CTX.html#group__CUDA__CTX_1g65dc0012348bc84810e2103a40d8e2cf > > ) > > - Memory allocations, kernels and other state are associated with > the > current device context. Currently, the context is not accessible from > FFmpeg API, so, technically, the memory created by the hardware > decoder > (the video frame) can't be safely read. > > Signed-off-by: Oleg Dobkin <olegd@anyvision.co> > --- > libavutil/hwcontext_cuda.c | 20 +++++++++++++++----- > libavutil/hwcontext_cuda.h | 7 +++++++ > 2 files changed, 22 insertions(+), 5 deletions(-) > > diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c > index cca39e9fc7..608ea57569 100644 > --- a/libavutil/hwcontext_cuda.c > +++ b/libavutil/hwcontext_cuda.c > @@ -281,8 +281,12 @@ static void cuda_device_uninit(AVHWDeviceContext > *device_ctx) > if (hwctx->internal) { > CudaFunctions *cu = hwctx->internal->cuda_dl; > if (hwctx->internal->is_allocated && hwctx->cuda_ctx) { > - CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx)); > + if (hwctx->flags == DCF_CREATE_CONTEXT) > > Should actually be checking for the flag, not equality. > > > + CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx)); > + else > + CHECK_CU(cu->cuDevicePrimaryCtxRelease(hwctx- > > cuda_device)); > hwctx->cuda_ctx = NULL; > + hwctx->cuda_device = NULL; > } > cuda_free_functions(&hwctx->internal->cuda_dl); > } > @@ -322,7 +326,6 @@ static int cuda_device_create(AVHWDeviceContext > *device_ctx, > { > AVCUDADeviceContext *hwctx = device_ctx->hwctx; > CudaFunctions *cu; > - CUdevice cu_device; > CUcontext dummy; > int ret, device_idx = 0; > > @@ -338,18 +341,25 @@ static int cuda_device_create(AVHWDeviceContext > *device_ctx, > if (ret < 0) > goto error; > > - ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx)); > + ret = CHECK_CU(cu->cuDeviceGet(&hwctx->cuda_device, > device_idx)); > if (ret < 0) > goto error; > > - ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, > CU_CTX_SCHED_BLOCKING_SYNC, cu_device)); > + hwctx->flags = flags; > + > + if (flags == DCF_CREATE_CONTEXT) > + ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, > CU_CTX_SCHED_BLOCKING_SYNC, hwctx->cuda_device)); > + else > + ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx- > > cuda_ctx, hwctx->cuda_device)); > + > if (ret < 0) > goto error; > > // Setting stream to NULL will make functions automatically > use > the default CUstream > hwctx->stream = NULL; > > - CHECK_CU(cu->cuCtxPopCurrent(&dummy)); > + if (flags == DCF_CREATE_CONTEXT) > + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); > > hwctx->internal->is_allocated = 1; > > diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h > index 81a0552cab..bab5eefe54 100644 > --- a/libavutil/hwcontext_cuda.h > +++ b/libavutil/hwcontext_cuda.h > @@ -34,6 +34,11 @@ > * AVBufferRefs whose data pointer is a CUdeviceptr. > */ > > +enum AVCUDADeviceContextFlags { > + DCF_CREATE_CONTEXT = 0, > + DCF_USE_PRIMARY_CONTEXT = 1 > +}; > > I'd only define a flag for the new behavior. If it's not set, keep > old > behavior. > > > typedef struct AVCUDADeviceContextInternal > AVCUDADeviceContextInternal; > > /** > @@ -43,6 +48,8 @@ typedef struct AVCUDADeviceContext { > CUcontext cuda_ctx; > CUstream stream; > AVCUDADeviceContextInternal *internal; > + CUdevice cuda_device; > > Can't one just call cuCtxGetDevice on the context to get the device? > > > + enum AVCUDADeviceContextFlags flags; > > The device_create/av_hwdevice_ctx_create function already has a (at > the > moment unused) flags parameter. So there should be no need to add > this here. > If need be, the information should be stored in > AVCUDADeviceContextInternal instead. > > > } AVCUDADeviceContext; > > > Also needs configure updated for the higher ffnvcodec version that's > required after this patch, and probably deserved a lavu micro bump. > > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
On 18/11/2019 12:25, Oleg Dobkin wrote: >> FFmpeg supports multiple tracks of the Video Codec SDK, to support >> older >> drivers and legacy GPUs that way. >> Since the version number only tracks the Video SDK Version, and not >> the >> CUDA loader version, what needs to be done is to set the new minimum >> version for each supported track. >> So far, there was no need to add explicit checks for SDK 9.1, but >> that >> will be required for this. >> Also, since SDK 8.0 is effectively dead, it can be dropped. >> >> I immediately bump the version on ffnvcodec git after every release, >> so >> the correct versions to check for in configure right now are: >> 9.1.23.1, 9.0.18.3, 8.2.15.10 and 8.1.24.11 > > I'm not sure I understand this. As I see - ffmpeg wouldn't compile > without the latest version of ffnvcodec, which would be 9.1.23.2 after > the change is merged, so this is the only check that needs to be > performed, regardless of the supported Video SDK versions, isn't it? ffmpeg will compile with the latest version of each Video SDK Version track.
Updated On Mon, 2019-11-18 at 13:51 +0100, Timo Rothenpieler wrote: > On 18/11/2019 12:25, Oleg Dobkin wrote: > > FFmpeg supports multiple tracks of the Video Codec SDK, to support > older > drivers and legacy GPUs that way. > Since the version number only tracks the Video SDK Version, and not > the > CUDA loader version, what needs to be done is to set the new minimum > version for each supported track. > So far, there was no need to add explicit checks for SDK 9.1, but > that > will be required for this. > Also, since SDK 8.0 is effectively dead, it can be dropped. > > I immediately bump the version on ffnvcodec git after every release, > so > the correct versions to check for in configure right now are: > 9.1.23.1, 9.0.18.3, 8.2.15.10 and 8.1.24.11 > > I'm not sure I understand this. As I see - ffmpeg wouldn't compile > without the latest version of ffnvcodec, which would be 9.1.23.2 > after > the change is merged, so this is the only check that needs to be > performed, regardless of the supported Video SDK versions, isn't it? > > ffmpeg will compile with the latest version of each Video SDK Version > track. > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Any more comments? On Mon, 2019-11-18 at 15:39 +0200, Oleg Dobkin wrote: > Updated > > On Mon, 2019-11-18 at 13:51 +0100, Timo Rothenpieler wrote: > > On 18/11/2019 12:25, Oleg Dobkin wrote: > > FFmpeg supports multiple tracks of the Video Codec SDK, to support > older > drivers and legacy GPUs that way. > Since the version number only tracks the Video SDK Version, and not > the > CUDA loader version, what needs to be done is to set the new minimum > version for each supported track. > So far, there was no need to add explicit checks for SDK 9.1, but > that > will be required for this. > Also, since SDK 8.0 is effectively dead, it can be dropped. > > I immediately bump the version on ffnvcodec git after every release, > so > the correct versions to check for in configure right now are: > 9.1.23.1, 9.0.18.3, 8.2.15.10 and 8.1.24.11 > > I'm not sure I understand this. As I see - ffmpeg wouldn't compile > without the latest version of ffnvcodec, which would be 9.1.23.2 > after > the change is merged, so this is the only check that needs to be > performed, regardless of the supported Video SDK versions, isn't it? > > ffmpeg will compile with the latest version of each Video SDK Version > track. > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". > > >
On 19.11.2019 17:42, Oleg Dobkin wrote: > Any more comments? > Only thing I'm still wondering is if it would make sense to use the primary context by default. I can't think of any obvious downsides, other than weakened isolation, which really shouldn't be a huge concern. And there apparently are some potential performance gains.
On Tue, 2019-11-19 at 23:58 +0100, Timo Rothenpieler wrote: > Only thing I'm still wondering is if it would make sense to use the > primary context by default. > I can't think of any obvious downsides, other than weakened > isolation, > which really shouldn't be a huge concern. > And there apparently are some potential performance gains. I think it's safer to leave the default behavior, in case of some unknown corner cases - there could be some legacy code that assumes the creation of different contexts per each decoder.
On 20/11/2019 11:29, Oleg Dobkin wrote: > On Tue, 2019-11-19 at 23:58 +0100, Timo Rothenpieler wrote: >> Only thing I'm still wondering is if it would make sense to use the >> primary context by default. >> I can't think of any obvious downsides, other than weakened >> isolation, >> which really shouldn't be a huge concern. >> And there apparently are some potential performance gains. > > I think it's safer to leave the default behavior, in case of some > unknown corner cases - there could be some legacy code that assumes the > creation of different contexts per each decoder. > Good point. I haven't found the time to give this a test run so far. Once I did, and nothing exploded, I'll do a round of releases of ffnvcodec and apply the patch.
On Wed, Nov 20, 2019 at 12:48 PM Timo Rothenpieler <timo@rothenpieler.org> wrote: > > On 20/11/2019 11:29, Oleg Dobkin wrote: > > On Tue, 2019-11-19 at 23:58 +0100, Timo Rothenpieler wrote: > >> Only thing I'm still wondering is if it would make sense to use the > >> primary context by default. > >> I can't think of any obvious downsides, other than weakened > >> isolation, > >> which really shouldn't be a huge concern. > >> And there apparently are some potential performance gains. > > > > I think it's safer to leave the default behavior, in case of some > > unknown corner cases - there could be some legacy code that assumes the > > creation of different contexts per each decoder. > > > > Good point. I haven't found the time to give this a test run so far. > Once I did, and nothing exploded, I'll do a round of releases of > ffnvcodec and apply the patch. Is there any progress on this?
diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c index cca39e9fc7..608ea57569 100644 --- a/libavutil/hwcontext_cuda.c +++ b/libavutil/hwcontext_cuda.c @@ -281,8 +281,12 @@ static void cuda_device_uninit(AVHWDeviceContext *device_ctx) if (hwctx->internal) { CudaFunctions *cu = hwctx->internal->cuda_dl; if (hwctx->internal->is_allocated && hwctx->cuda_ctx) { - CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx)); + if (hwctx->flags == DCF_CREATE_CONTEXT) + CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx)); + else + CHECK_CU(cu->cuDevicePrimaryCtxRelease(hwctx->cuda_device)); hwctx->cuda_ctx = NULL; + hwctx->cuda_device = NULL; } cuda_free_functions(&hwctx->internal->cuda_dl); } @@ -322,7 +326,6 @@ static int cuda_device_create(AVHWDeviceContext *device_ctx, { AVCUDADeviceContext *hwctx = device_ctx->hwctx; CudaFunctions *cu; - CUdevice cu_device; CUcontext dummy; int ret, device_idx = 0; @@ -338,18 +341,25 @@ static int cuda_device_create(AVHWDeviceContext *device_ctx, if (ret < 0) goto error; - ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx)); + ret = CHECK_CU(cu->cuDeviceGet(&hwctx->cuda_device, device_idx)); if (ret < 0) goto error; - ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device)); + hwctx->flags = flags; + + if (flags == DCF_CREATE_CONTEXT) + ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, hwctx->cuda_device)); + else + ret = CHECK_CU(cu->cuDevicePrimaryCtxRetain(&hwctx->cuda_ctx, hwctx->cuda_device)); + if (ret < 0) goto error; // Setting stream to NULL will make functions automatically use the default CUstream hwctx->stream = NULL; - CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + if (flags == DCF_CREATE_CONTEXT) + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); hwctx->internal->is_allocated = 1; diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h index 81a0552cab..bab5eefe54 100644 --- a/libavutil/hwcontext_cuda.h +++ b/libavutil/hwcontext_cuda.h @@ -34,6 +34,11 @@ * AVBufferRefs whose data pointer is a CUdeviceptr. */ +enum AVCUDADeviceContextFlags { + DCF_CREATE_CONTEXT = 0, + DCF_USE_PRIMARY_CONTEXT = 1 +}; + typedef struct AVCUDADeviceContextInternal AVCUDADeviceContextInternal; /** @@ -43,6 +48,8 @@ typedef struct AVCUDADeviceContext { CUcontext cuda_ctx; CUstream stream; AVCUDADeviceContextInternal *internal; + CUdevice cuda_device; + enum AVCUDADeviceContextFlags flags; } AVCUDADeviceContext; /**
Add AVCUDADeviceContextFlags to control the creation of CUDA device context for the hardware CUDA decoder. The current values are 0 (default behavior) - new context will be created for each decoder, and 1 - primary CUDA context will be used. There are several reasons for using primary device context instead of creating a new one: - This is the recommended way to handle device contexts (see https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__CTX.html#group__CUDA__CTX_1g65dc0012348bc84810e2103a40d8e2cf) - Memory allocations, kernels and other state are associated with the current device context. Currently, the context is not accessible from FFmpeg API, so, technically, the memory created by the hardware decoder (the video frame) can't be safely read. Signed-off-by: Oleg Dobkin <olegd@anyvision.co> --- libavutil/hwcontext_cuda.c | 20 +++++++++++++++----- libavutil/hwcontext_cuda.h | 7 +++++++ 2 files changed, 22 insertions(+), 5 deletions(-)