diff mbox

[FFmpeg-devel] avutil/hwcontext_cuda: add AVCUDAFramesContext and AVCUDAFramesContext.flags

Message ID 20180507214648.25185-1-timo@rothenpieler.org
State Superseded
Headers show

Commit Message

Timo Rothenpieler May 7, 2018, 9:46 p.m. UTC
Frames can be mapped from nvdec/cuvid, not needing any actual memory
allocation, but all other features of the hw_frames_ctx.
Hence the dummy-mode, which does not allocate any (notable amounts of)
memory but otherwise behaves the exact same.
---
 doc/APIchanges             |  3 +++
 libavutil/hwcontext_cuda.c | 12 +++++++++++-
 libavutil/hwcontext_cuda.h | 22 +++++++++++++++++++++-
 libavutil/version.h        |  2 +-
 4 files changed, 36 insertions(+), 3 deletions(-)

Comments

wm4 May 8, 2018, 3:23 p.m. UTC | #1
On Mon,  7 May 2018 23:46:48 +0200
Timo Rothenpieler <timo@rothenpieler.org> wrote:

> Frames can be mapped from nvdec/cuvid, not needing any actual memory
> allocation, but all other features of the hw_frames_ctx.
> Hence the dummy-mode, which does not allocate any (notable amounts of)
> memory but otherwise behaves the exact same.
> ---
>  doc/APIchanges             |  3 +++
>  libavutil/hwcontext_cuda.c | 12 +++++++++++-
>  libavutil/hwcontext_cuda.h | 22 +++++++++++++++++++++-
>  libavutil/version.h        |  2 +-
>  4 files changed, 36 insertions(+), 3 deletions(-)
> 
> diff --git a/doc/APIchanges b/doc/APIchanges
> index ede5b186ae..82ec888fd8 100644
> --- a/doc/APIchanges
> +++ b/doc/APIchanges
> @@ -15,6 +15,9 @@ libavutil:     2017-10-21
>  
>  API changes, most recent first:
>  
> +2018-05-xx - xxxxxxxxxx - lavu 56.19.100 - hwcontext.h
> +  Add AVCUDAFramesContext and AVCUDAFramesContext.flags.
> +
>  2018-04-xx - xxxxxxxxxx - lavu 56.18.100 - pixdesc.h
>    Add AV_PIX_FMT_FLAG_ALPHA to AV_PIX_FMT_PAL8.
>  
> diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
> index 37827a770c..b0b4bf24ae 100644
> --- a/libavutil/hwcontext_cuda.c
> +++ b/libavutil/hwcontext_cuda.c
> @@ -161,6 +161,7 @@ static int cuda_frames_init(AVHWFramesContext *ctx)
>  
>  static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
>  {
> +    AVCUDAFramesContext *frctx = ctx->hwctx;
>      int aligned_width;
>      int width_in_bytes = ctx->width;
>  
> @@ -171,7 +172,11 @@ static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
>      }
>      aligned_width = FFALIGN(width_in_bytes, CUDA_FRAME_ALIGNMENT);
>  
> -    frame->buf[0] = av_buffer_pool_get(ctx->pool);
> +    if (frctx->flags & AV_CUDA_HWFRAMES_DUMMY_MODE)
> +        frame->buf[0] = av_buffer_create(NULL, 0, NULL, NULL, 0);
> +    else
> +        frame->buf[0] = av_buffer_pool_get(ctx->pool);
> +

Is this really needed? Because at least videotoolbox also lets the
decoder allocate frames, and allocates the "dummy" buffers outside of
the hwcontext. (I don't quite remember how it works.)

>      if (!frame->buf[0])
>          return AVERROR(ENOMEM);
>  
> @@ -210,6 +215,10 @@ static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
>      frame->width  = ctx->width;
>      frame->height = ctx->height;
>  
> +    // they're pointing to invalid memory, dangerous to leave set
> +    if (frctx->flags & AV_CUDA_HWFRAMES_DUMMY_MODE)
> +        frame->data[0] = frame->data[1] = frame->data[2] = NULL;
> +
>      return 0;
>  }
>  
> @@ -402,6 +411,7 @@ const HWContextType ff_hwcontext_type_cuda = {
>      .name                 = "CUDA",
>  
>      .device_hwctx_size    = sizeof(AVCUDADeviceContext),
> +    .frames_hwctx_size    = sizeof(AVCUDAFramesContext),
>      .frames_priv_size     = sizeof(CUDAFramesContext),
>  
>      .device_create        = cuda_device_create,
> diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h
> index 12dae8449e..19accbd9be 100644
> --- a/libavutil/hwcontext_cuda.h
> +++ b/libavutil/hwcontext_cuda.h
> @@ -45,7 +45,27 @@ typedef struct AVCUDADeviceContext {
>  } AVCUDADeviceContext;
>  
>  /**
> - * AVHWFramesContext.hwctx is currently not used
> + * This struct is allocated as AVHWFramesContext.hwctx
>   */
> +typedef struct AVCUDAFramesContext {
> +    /**
> +     * Special implementation-specific flags.
> +     *
> +     * May be set by the user before calling av_hwframe_ctx_init().
> +     */
> +    int flags;
> +} AVCUDAFramesContext;
> +
> +/**
> + * No actual allocation will happen, but otherwise behaves like normal.
> + *
> + * This is to be used if a AVHWFramesContext is required, but the actual
> + * allocation is happening outside of it.
> + *
> + * The resulting AVFrames will be identical to normal frames, except for
> + * their data[] pointers being NULL and the AVBufferRef in buf[0] being
> + * set but containing no notable allocation of memory.
> + */
> +#define AV_CUDA_HWFRAMES_DUMMY_MODE (1 << 0)
>  
>  #endif /* AVUTIL_HWCONTEXT_CUDA_H */
> diff --git a/libavutil/version.h b/libavutil/version.h
> index 5185454d9b..84409b1d69 100644
> --- a/libavutil/version.h
> +++ b/libavutil/version.h
> @@ -79,7 +79,7 @@
>   */
>  
>  #define LIBAVUTIL_VERSION_MAJOR  56
> -#define LIBAVUTIL_VERSION_MINOR  18
> +#define LIBAVUTIL_VERSION_MINOR  19
>  #define LIBAVUTIL_VERSION_MICRO 100
>  
>  #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
Timo Rothenpieler May 8, 2018, 3:43 p.m. UTC | #2
>> -    frame->buf[0] = av_buffer_pool_get(ctx->pool);
>> +    if (frctx->flags & AV_CUDA_HWFRAMES_DUMMY_MODE)
>> +        frame->buf[0] = av_buffer_create(NULL, 0, NULL, NULL, 0);
>> +    else
>> +        frame->buf[0] = av_buffer_pool_get(ctx->pool);
>> +
> 
> Is this really needed? Because at least videotoolbox also lets the
> decoder allocate frames, and allocates the "dummy" buffers outside of
> the hwcontext. (I don't quite remember how it works.)

You mean compared to just leaving buf[0] empty?
wm4 May 8, 2018, 3:49 p.m. UTC | #3
On Tue, 8 May 2018 17:43:49 +0200
Timo Rothenpieler <timo@rothenpieler.org> wrote:

> >> -    frame->buf[0] = av_buffer_pool_get(ctx->pool);
> >> +    if (frctx->flags & AV_CUDA_HWFRAMES_DUMMY_MODE)
> >> +        frame->buf[0] = av_buffer_create(NULL, 0, NULL, NULL, 0);
> >> +    else
> >> +        frame->buf[0] = av_buffer_pool_get(ctx->pool);
> >> +  
> > 
> > Is this really needed? Because at least videotoolbox also lets the
> > decoder allocate frames, and allocates the "dummy" buffers outside of
> > the hwcontext. (I don't quite remember how it works.)  
> 
> You mean compared to just leaving buf[0] empty?
> 

No, compared to how the videotoolbox code does things.
Timo Rothenpieler May 8, 2018, 4:30 p.m. UTC | #4
Am 08.05.2018 um 17:49 schrieb wm4:
> On Tue, 8 May 2018 17:43:49 +0200
> Timo Rothenpieler <timo@rothenpieler.org> wrote:
> 
>>>> -    frame->buf[0] = av_buffer_pool_get(ctx->pool);
>>>> +    if (frctx->flags & AV_CUDA_HWFRAMES_DUMMY_MODE)
>>>> +        frame->buf[0] = av_buffer_create(NULL, 0, NULL, NULL, 0);
>>>> +    else
>>>> +        frame->buf[0] = av_buffer_pool_get(ctx->pool);
>>>> +
>>>
>>> Is this really needed? Because at least videotoolbox also lets the
>>> decoder allocate frames, and allocates the "dummy" buffers outside of
>>> the hwcontext. (I don't quite remember how it works.)
>>
>> You mean compared to just leaving buf[0] empty?
>>
> 
> No, compared to how the videotoolbox code does things.

videotoolbox seems to use an externally supplied AVHWFramesContext::pool.
Which should be possible to do for cuda as well, i.e. a custom buffer 
pool that returns mostly empty data, so the new API would be unneeded. 
I'll have a look if it works out.
diff mbox

Patch

diff --git a/doc/APIchanges b/doc/APIchanges
index ede5b186ae..82ec888fd8 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -15,6 +15,9 @@  libavutil:     2017-10-21
 
 API changes, most recent first:
 
+2018-05-xx - xxxxxxxxxx - lavu 56.19.100 - hwcontext.h
+  Add AVCUDAFramesContext and AVCUDAFramesContext.flags.
+
 2018-04-xx - xxxxxxxxxx - lavu 56.18.100 - pixdesc.h
   Add AV_PIX_FMT_FLAG_ALPHA to AV_PIX_FMT_PAL8.
 
diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
index 37827a770c..b0b4bf24ae 100644
--- a/libavutil/hwcontext_cuda.c
+++ b/libavutil/hwcontext_cuda.c
@@ -161,6 +161,7 @@  static int cuda_frames_init(AVHWFramesContext *ctx)
 
 static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
 {
+    AVCUDAFramesContext *frctx = ctx->hwctx;
     int aligned_width;
     int width_in_bytes = ctx->width;
 
@@ -171,7 +172,11 @@  static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
     }
     aligned_width = FFALIGN(width_in_bytes, CUDA_FRAME_ALIGNMENT);
 
-    frame->buf[0] = av_buffer_pool_get(ctx->pool);
+    if (frctx->flags & AV_CUDA_HWFRAMES_DUMMY_MODE)
+        frame->buf[0] = av_buffer_create(NULL, 0, NULL, NULL, 0);
+    else
+        frame->buf[0] = av_buffer_pool_get(ctx->pool);
+
     if (!frame->buf[0])
         return AVERROR(ENOMEM);
 
@@ -210,6 +215,10 @@  static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
     frame->width  = ctx->width;
     frame->height = ctx->height;
 
+    // they're pointing to invalid memory, dangerous to leave set
+    if (frctx->flags & AV_CUDA_HWFRAMES_DUMMY_MODE)
+        frame->data[0] = frame->data[1] = frame->data[2] = NULL;
+
     return 0;
 }
 
@@ -402,6 +411,7 @@  const HWContextType ff_hwcontext_type_cuda = {
     .name                 = "CUDA",
 
     .device_hwctx_size    = sizeof(AVCUDADeviceContext),
+    .frames_hwctx_size    = sizeof(AVCUDAFramesContext),
     .frames_priv_size     = sizeof(CUDAFramesContext),
 
     .device_create        = cuda_device_create,
diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h
index 12dae8449e..19accbd9be 100644
--- a/libavutil/hwcontext_cuda.h
+++ b/libavutil/hwcontext_cuda.h
@@ -45,7 +45,27 @@  typedef struct AVCUDADeviceContext {
 } AVCUDADeviceContext;
 
 /**
- * AVHWFramesContext.hwctx is currently not used
+ * This struct is allocated as AVHWFramesContext.hwctx
  */
+typedef struct AVCUDAFramesContext {
+    /**
+     * Special implementation-specific flags.
+     *
+     * May be set by the user before calling av_hwframe_ctx_init().
+     */
+    int flags;
+} AVCUDAFramesContext;
+
+/**
+ * No actual allocation will happen, but otherwise behaves like normal.
+ *
+ * This is to be used if a AVHWFramesContext is required, but the actual
+ * allocation is happening outside of it.
+ *
+ * The resulting AVFrames will be identical to normal frames, except for
+ * their data[] pointers being NULL and the AVBufferRef in buf[0] being
+ * set but containing no notable allocation of memory.
+ */
+#define AV_CUDA_HWFRAMES_DUMMY_MODE (1 << 0)
 
 #endif /* AVUTIL_HWCONTEXT_CUDA_H */
diff --git a/libavutil/version.h b/libavutil/version.h
index 5185454d9b..84409b1d69 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -79,7 +79,7 @@ 
  */
 
 #define LIBAVUTIL_VERSION_MAJOR  56
-#define LIBAVUTIL_VERSION_MINOR  18
+#define LIBAVUTIL_VERSION_MINOR  19
 #define LIBAVUTIL_VERSION_MICRO 100
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \