Message ID | 20180507214648.25185-1-timo@rothenpieler.org |
---|---|
State | Superseded |
Headers | show |
On Mon, 7 May 2018 23:46:48 +0200 Timo Rothenpieler <timo@rothenpieler.org> wrote: > Frames can be mapped from nvdec/cuvid, not needing any actual memory > allocation, but all other features of the hw_frames_ctx. > Hence the dummy-mode, which does not allocate any (notable amounts of) > memory but otherwise behaves the exact same. > --- > doc/APIchanges | 3 +++ > libavutil/hwcontext_cuda.c | 12 +++++++++++- > libavutil/hwcontext_cuda.h | 22 +++++++++++++++++++++- > libavutil/version.h | 2 +- > 4 files changed, 36 insertions(+), 3 deletions(-) > > diff --git a/doc/APIchanges b/doc/APIchanges > index ede5b186ae..82ec888fd8 100644 > --- a/doc/APIchanges > +++ b/doc/APIchanges > @@ -15,6 +15,9 @@ libavutil: 2017-10-21 > > API changes, most recent first: > > +2018-05-xx - xxxxxxxxxx - lavu 56.19.100 - hwcontext.h > + Add AVCUDAFramesContext and AVCUDAFramesContext.flags. > + > 2018-04-xx - xxxxxxxxxx - lavu 56.18.100 - pixdesc.h > Add AV_PIX_FMT_FLAG_ALPHA to AV_PIX_FMT_PAL8. > > diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c > index 37827a770c..b0b4bf24ae 100644 > --- a/libavutil/hwcontext_cuda.c > +++ b/libavutil/hwcontext_cuda.c > @@ -161,6 +161,7 @@ static int cuda_frames_init(AVHWFramesContext *ctx) > > static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) > { > + AVCUDAFramesContext *frctx = ctx->hwctx; > int aligned_width; > int width_in_bytes = ctx->width; > > @@ -171,7 +172,11 @@ static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) > } > aligned_width = FFALIGN(width_in_bytes, CUDA_FRAME_ALIGNMENT); > > - frame->buf[0] = av_buffer_pool_get(ctx->pool); > + if (frctx->flags & AV_CUDA_HWFRAMES_DUMMY_MODE) > + frame->buf[0] = av_buffer_create(NULL, 0, NULL, NULL, 0); > + else > + frame->buf[0] = av_buffer_pool_get(ctx->pool); > + Is this really needed? Because at least videotoolbox also lets the decoder allocate frames, and allocates the "dummy" buffers outside of the hwcontext. (I don't quite remember how it works.) > if (!frame->buf[0]) > return AVERROR(ENOMEM); > > @@ -210,6 +215,10 @@ static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) > frame->width = ctx->width; > frame->height = ctx->height; > > + // they're pointing to invalid memory, dangerous to leave set > + if (frctx->flags & AV_CUDA_HWFRAMES_DUMMY_MODE) > + frame->data[0] = frame->data[1] = frame->data[2] = NULL; > + > return 0; > } > > @@ -402,6 +411,7 @@ const HWContextType ff_hwcontext_type_cuda = { > .name = "CUDA", > > .device_hwctx_size = sizeof(AVCUDADeviceContext), > + .frames_hwctx_size = sizeof(AVCUDAFramesContext), > .frames_priv_size = sizeof(CUDAFramesContext), > > .device_create = cuda_device_create, > diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h > index 12dae8449e..19accbd9be 100644 > --- a/libavutil/hwcontext_cuda.h > +++ b/libavutil/hwcontext_cuda.h > @@ -45,7 +45,27 @@ typedef struct AVCUDADeviceContext { > } AVCUDADeviceContext; > > /** > - * AVHWFramesContext.hwctx is currently not used > + * This struct is allocated as AVHWFramesContext.hwctx > */ > +typedef struct AVCUDAFramesContext { > + /** > + * Special implementation-specific flags. > + * > + * May be set by the user before calling av_hwframe_ctx_init(). > + */ > + int flags; > +} AVCUDAFramesContext; > + > +/** > + * No actual allocation will happen, but otherwise behaves like normal. > + * > + * This is to be used if a AVHWFramesContext is required, but the actual > + * allocation is happening outside of it. > + * > + * The resulting AVFrames will be identical to normal frames, except for > + * their data[] pointers being NULL and the AVBufferRef in buf[0] being > + * set but containing no notable allocation of memory. > + */ > +#define AV_CUDA_HWFRAMES_DUMMY_MODE (1 << 0) > > #endif /* AVUTIL_HWCONTEXT_CUDA_H */ > diff --git a/libavutil/version.h b/libavutil/version.h > index 5185454d9b..84409b1d69 100644 > --- a/libavutil/version.h > +++ b/libavutil/version.h > @@ -79,7 +79,7 @@ > */ > > #define LIBAVUTIL_VERSION_MAJOR 56 > -#define LIBAVUTIL_VERSION_MINOR 18 > +#define LIBAVUTIL_VERSION_MINOR 19 > #define LIBAVUTIL_VERSION_MICRO 100 > > #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
>> - frame->buf[0] = av_buffer_pool_get(ctx->pool); >> + if (frctx->flags & AV_CUDA_HWFRAMES_DUMMY_MODE) >> + frame->buf[0] = av_buffer_create(NULL, 0, NULL, NULL, 0); >> + else >> + frame->buf[0] = av_buffer_pool_get(ctx->pool); >> + > > Is this really needed? Because at least videotoolbox also lets the > decoder allocate frames, and allocates the "dummy" buffers outside of > the hwcontext. (I don't quite remember how it works.) You mean compared to just leaving buf[0] empty?
On Tue, 8 May 2018 17:43:49 +0200 Timo Rothenpieler <timo@rothenpieler.org> wrote: > >> - frame->buf[0] = av_buffer_pool_get(ctx->pool); > >> + if (frctx->flags & AV_CUDA_HWFRAMES_DUMMY_MODE) > >> + frame->buf[0] = av_buffer_create(NULL, 0, NULL, NULL, 0); > >> + else > >> + frame->buf[0] = av_buffer_pool_get(ctx->pool); > >> + > > > > Is this really needed? Because at least videotoolbox also lets the > > decoder allocate frames, and allocates the "dummy" buffers outside of > > the hwcontext. (I don't quite remember how it works.) > > You mean compared to just leaving buf[0] empty? > No, compared to how the videotoolbox code does things.
Am 08.05.2018 um 17:49 schrieb wm4: > On Tue, 8 May 2018 17:43:49 +0200 > Timo Rothenpieler <timo@rothenpieler.org> wrote: > >>>> - frame->buf[0] = av_buffer_pool_get(ctx->pool); >>>> + if (frctx->flags & AV_CUDA_HWFRAMES_DUMMY_MODE) >>>> + frame->buf[0] = av_buffer_create(NULL, 0, NULL, NULL, 0); >>>> + else >>>> + frame->buf[0] = av_buffer_pool_get(ctx->pool); >>>> + >>> >>> Is this really needed? Because at least videotoolbox also lets the >>> decoder allocate frames, and allocates the "dummy" buffers outside of >>> the hwcontext. (I don't quite remember how it works.) >> >> You mean compared to just leaving buf[0] empty? >> > > No, compared to how the videotoolbox code does things. videotoolbox seems to use an externally supplied AVHWFramesContext::pool. Which should be possible to do for cuda as well, i.e. a custom buffer pool that returns mostly empty data, so the new API would be unneeded. I'll have a look if it works out.
diff --git a/doc/APIchanges b/doc/APIchanges index ede5b186ae..82ec888fd8 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -15,6 +15,9 @@ libavutil: 2017-10-21 API changes, most recent first: +2018-05-xx - xxxxxxxxxx - lavu 56.19.100 - hwcontext.h + Add AVCUDAFramesContext and AVCUDAFramesContext.flags. + 2018-04-xx - xxxxxxxxxx - lavu 56.18.100 - pixdesc.h Add AV_PIX_FMT_FLAG_ALPHA to AV_PIX_FMT_PAL8. diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c index 37827a770c..b0b4bf24ae 100644 --- a/libavutil/hwcontext_cuda.c +++ b/libavutil/hwcontext_cuda.c @@ -161,6 +161,7 @@ static int cuda_frames_init(AVHWFramesContext *ctx) static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) { + AVCUDAFramesContext *frctx = ctx->hwctx; int aligned_width; int width_in_bytes = ctx->width; @@ -171,7 +172,11 @@ static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) } aligned_width = FFALIGN(width_in_bytes, CUDA_FRAME_ALIGNMENT); - frame->buf[0] = av_buffer_pool_get(ctx->pool); + if (frctx->flags & AV_CUDA_HWFRAMES_DUMMY_MODE) + frame->buf[0] = av_buffer_create(NULL, 0, NULL, NULL, 0); + else + frame->buf[0] = av_buffer_pool_get(ctx->pool); + if (!frame->buf[0]) return AVERROR(ENOMEM); @@ -210,6 +215,10 @@ static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) frame->width = ctx->width; frame->height = ctx->height; + // they're pointing to invalid memory, dangerous to leave set + if (frctx->flags & AV_CUDA_HWFRAMES_DUMMY_MODE) + frame->data[0] = frame->data[1] = frame->data[2] = NULL; + return 0; } @@ -402,6 +411,7 @@ const HWContextType ff_hwcontext_type_cuda = { .name = "CUDA", .device_hwctx_size = sizeof(AVCUDADeviceContext), + .frames_hwctx_size = sizeof(AVCUDAFramesContext), .frames_priv_size = sizeof(CUDAFramesContext), .device_create = cuda_device_create, diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h index 12dae8449e..19accbd9be 100644 --- a/libavutil/hwcontext_cuda.h +++ b/libavutil/hwcontext_cuda.h @@ -45,7 +45,27 @@ typedef struct AVCUDADeviceContext { } AVCUDADeviceContext; /** - * AVHWFramesContext.hwctx is currently not used + * This struct is allocated as AVHWFramesContext.hwctx */ +typedef struct AVCUDAFramesContext { + /** + * Special implementation-specific flags. + * + * May be set by the user before calling av_hwframe_ctx_init(). + */ + int flags; +} AVCUDAFramesContext; + +/** + * No actual allocation will happen, but otherwise behaves like normal. + * + * This is to be used if a AVHWFramesContext is required, but the actual + * allocation is happening outside of it. + * + * The resulting AVFrames will be identical to normal frames, except for + * their data[] pointers being NULL and the AVBufferRef in buf[0] being + * set but containing no notable allocation of memory. + */ +#define AV_CUDA_HWFRAMES_DUMMY_MODE (1 << 0) #endif /* AVUTIL_HWCONTEXT_CUDA_H */ diff --git a/libavutil/version.h b/libavutil/version.h index 5185454d9b..84409b1d69 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -79,7 +79,7 @@ */ #define LIBAVUTIL_VERSION_MAJOR 56 -#define LIBAVUTIL_VERSION_MINOR 18 +#define LIBAVUTIL_VERSION_MINOR 19 #define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \