Message ID | 20230615021631.9139-1-sapharow@gmail.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel] avcodec: Align AVFrame memory to page size for access via Apple Metal | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
yinshiyou/make_loongarch64 | warning | New warnings during build |
andriy/make_fate_x86 | success | Make fate finished |
andriy/make_x86 | warning | New warnings during build |
On Thu, Jun 15, 2023 at 4:16 AM Iskandar Safarov <sapharow@gmail.com> wrote: > > --- > libavcodec/get_buffer.c | 52 ++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 51 insertions(+), 1 deletion(-) > > diff --git a/libavcodec/get_buffer.c b/libavcodec/get_buffer.c > index a04fd878de..b18af3be4a 100644 > --- a/libavcodec/get_buffer.c > +++ b/libavcodec/get_buffer.c > @@ -33,6 +33,11 @@ > #include "avcodec.h" > #include "internal.h" > > +#if __APPLE__ > +#import <mach/mach_init.h> > +#import <mach/vm_map.h> > +#endif > + > typedef struct FramePool { > /** > * Pools for each data plane. For audio all the planes have the same size, > @@ -81,6 +86,51 @@ static AVBufferRef *frame_pool_alloc(void) > return buf; > } > > +#if __APPLE__ > +/* > + When compiling for Apple platform the frame buffer data pointers need to be > + page-aligned for cases when in-place GPU processing may be required > + https://developer.apple.com/documentation/metal/mtldevice/1433382-newbufferwithbytesnocopy > + */ > +#define POOL_BUFFER_ALLOCZ aapl_buffer_allocz > + > +static void aapl_buffer_free(void *opaque, uint8_t *data) > +{ > + vm_deallocate((vm_map_t) mach_task_self(), (vm_address_t)data, (size_t)opaque); > +} > + > +static AVBufferRef *aapl_buffer_alloc(size_t size) > +{ > + AVBufferRef *ret = NULL; > + uint8_t *data = NULL; > + > + kern_return_t err; > + err = vm_allocate( (vm_map_t) mach_task_self(), > + (vm_address_t*) &data, size, VM_FLAGS_ANYWHERE); > + if (err != KERN_SUCCESS || !data) > + return NULL; > + > + ret = av_buffer_create(data, size, aapl_buffer_free, (void*)size, 0); > + if (!ret) > + free(data); > + > + return ret; > +} > + > +static AVBufferRef *aapl_buffer_allocz(size_t size) > +{ > + AVBufferRef *ret = aapl_buffer_alloc(size); > + if (!ret) > + return NULL; > + > + memset(ret->data, 0, size); > + return ret; > +} > + > +#else > +#define POOL_BUFFER_ALLOCZ av_buffer_allocz > +#endif > + > static int update_frame_pool(AVCodecContext *avctx, AVFrame *frame) > { > FramePool *pool = avctx->internal->pool ? > @@ -155,7 +205,7 @@ FF_ENABLE_DEPRECATION_WARNINGS > pool->pools[i] = av_buffer_pool_init(size[i] + 16 + STRIDE_ALIGN - 1, > CONFIG_MEMORY_POISONING ? > NULL : > - av_buffer_allocz); > + POOL_BUFFER_ALLOCZ); > if (!pool->pools[i]) { > ret = AVERROR(ENOMEM); > goto fail; > -- > 2.39.2 (Apple Git-143) > This is most definitely the wrong place to do this. Frames can be allocated through various means and in various locations, and randomly sprinkling new allocators all over is not how this should be approached. I don't believe FFmpeg itself shares this requirement, so maybe your application should just use a custom get_buffer2 callback to fullfill it? If others agree that FFmpeg should create frames with this property by default (which I can't answer without knowing if those special allocation functions have any other downsides etc), it should be done more centrally, rather then only in the avcodec pool. - Hendrik
Thanks for looking into this, The idea is to page-align the entire software-backed video pool allocation where video frames are being taken from – both for decoding and/or encoding (when done in software only). The default get_buffer2 (avcodec_default_get_buffer2) implementation contains code for both hardware-backed and software-backed frame allocations. Going down this path makes custom implementation a copy-paste of a large portion of the LGPL code into my app. Another thing to consider – it might not be a good idea to page-align every single AVFrame allocation because the alignment is not tiny – 16KB (for M1 machine). I found that this patch is minimum required change to allow in-memory GPU post-processing of the AVFrame between software encode/decode cycles. Please advise On Thu, 15 Jun 2023 at 19:40, Hendrik Leppkes <h.leppkes@gmail.com> wrote: > On Thu, Jun 15, 2023 at 4:16 AM Iskandar Safarov <sapharow@gmail.com> > wrote: > > > > --- > > libavcodec/get_buffer.c | 52 ++++++++++++++++++++++++++++++++++++++++- > > 1 file changed, 51 insertions(+), 1 deletion(-) > > > > diff --git a/libavcodec/get_buffer.c b/libavcodec/get_buffer.c > > index a04fd878de..b18af3be4a 100644 > > --- a/libavcodec/get_buffer.c > > +++ b/libavcodec/get_buffer.c > > @@ -33,6 +33,11 @@ > > #include "avcodec.h" > > #include "internal.h" > > > > +#if __APPLE__ > > +#import <mach/mach_init.h> > > +#import <mach/vm_map.h> > > +#endif > > + > > typedef struct FramePool { > > /** > > * Pools for each data plane. For audio all the planes have the > same size, > > @@ -81,6 +86,51 @@ static AVBufferRef *frame_pool_alloc(void) > > return buf; > > } > > > > +#if __APPLE__ > > +/* > > + When compiling for Apple platform the frame buffer data pointers > need to be > > + page-aligned for cases when in-place GPU processing may be required > > + > https://developer.apple.com/documentation/metal/mtldevice/1433382-newbufferwithbytesnocopy > > + */ > > +#define POOL_BUFFER_ALLOCZ aapl_buffer_allocz > > + > > +static void aapl_buffer_free(void *opaque, uint8_t *data) > > +{ > > + vm_deallocate((vm_map_t) mach_task_self(), (vm_address_t)data, > (size_t)opaque); > > +} > > + > > +static AVBufferRef *aapl_buffer_alloc(size_t size) > > +{ > > + AVBufferRef *ret = NULL; > > + uint8_t *data = NULL; > > + > > + kern_return_t err; > > + err = vm_allocate( (vm_map_t) mach_task_self(), > > + (vm_address_t*) &data, size, VM_FLAGS_ANYWHERE); > > + if (err != KERN_SUCCESS || !data) > > + return NULL; > > + > > + ret = av_buffer_create(data, size, aapl_buffer_free, (void*)size, > 0); > > + if (!ret) > > + free(data); > > + > > + return ret; > > +} > > + > > +static AVBufferRef *aapl_buffer_allocz(size_t size) > > +{ > > + AVBufferRef *ret = aapl_buffer_alloc(size); > > + if (!ret) > > + return NULL; > > + > > + memset(ret->data, 0, size); > > + return ret; > > +} > > + > > +#else > > +#define POOL_BUFFER_ALLOCZ av_buffer_allocz > > +#endif > > + > > static int update_frame_pool(AVCodecContext *avctx, AVFrame *frame) > > { > > FramePool *pool = avctx->internal->pool ? > > @@ -155,7 +205,7 @@ FF_ENABLE_DEPRECATION_WARNINGS > > pool->pools[i] = av_buffer_pool_init(size[i] + 16 + > STRIDE_ALIGN - 1, > > > CONFIG_MEMORY_POISONING ? > > NULL : > > - > av_buffer_allocz); > > + > POOL_BUFFER_ALLOCZ); > > if (!pool->pools[i]) { > > ret = AVERROR(ENOMEM); > > goto fail; > > -- > > 2.39.2 (Apple Git-143) > > > > This is most definitely the wrong place to do this. Frames can be > allocated through various means and in various locations, and randomly > sprinkling new allocators all over is not how this should be > approached. > > I don't believe FFmpeg itself shares this requirement, so maybe your > application should just use a custom get_buffer2 callback to fullfill > it? > If others agree that FFmpeg should create frames with this property by > default (which I can't answer without knowing if those special > allocation functions have any other downsides etc), it should be done > more centrally, rather then only in the avcodec pool. > > - Hendrik > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
On Thu, Jun 15, 2023 at 12:34 PM Iskandar Safarov <sapharow@gmail.com> wrote: > Thanks for looking into this, > > The idea is to page-align the entire software-backed video pool allocation > where video frames are being taken from – both for decoding and/or encoding > (when done in software only). > > The default get_buffer2 (avcodec_default_get_buffer2) implementation > contains code for both hardware-backed and software-backed frame > allocations. Going down this path makes custom implementation a copy-paste > of a large portion of the LGPL code into my app. > > Another thing to consider – it might not be a good idea to page-align every > single AVFrame allocation because the alignment is not tiny – 16KB (for M1 > machine). I found that this patch is minimum required change to allow > in-memory GPU post-processing of the AVFrame between software encode/decode > cycles. > > Please advise > Top posting is forbidden on this mailing list. As already advised use custom get_buffer2 calls, and not default ones. > > On Thu, 15 Jun 2023 at 19:40, Hendrik Leppkes <h.leppkes@gmail.com> wrote: > > > On Thu, Jun 15, 2023 at 4:16 AM Iskandar Safarov <sapharow@gmail.com> > > wrote: > > > > > > --- > > > libavcodec/get_buffer.c | 52 ++++++++++++++++++++++++++++++++++++++++- > > > 1 file changed, 51 insertions(+), 1 deletion(-) > > > > > > diff --git a/libavcodec/get_buffer.c b/libavcodec/get_buffer.c > > > index a04fd878de..b18af3be4a 100644 > > > --- a/libavcodec/get_buffer.c > > > +++ b/libavcodec/get_buffer.c > > > @@ -33,6 +33,11 @@ > > > #include "avcodec.h" > > > #include "internal.h" > > > > > > +#if __APPLE__ > > > +#import <mach/mach_init.h> > > > +#import <mach/vm_map.h> > > > +#endif > > > + > > > typedef struct FramePool { > > > /** > > > * Pools for each data plane. For audio all the planes have the > > same size, > > > @@ -81,6 +86,51 @@ static AVBufferRef *frame_pool_alloc(void) > > > return buf; > > > } > > > > > > +#if __APPLE__ > > > +/* > > > + When compiling for Apple platform the frame buffer data pointers > > need to be > > > + page-aligned for cases when in-place GPU processing may be > required > > > + > > > https://developer.apple.com/documentation/metal/mtldevice/1433382-newbufferwithbytesnocopy > > > + */ > > > +#define POOL_BUFFER_ALLOCZ aapl_buffer_allocz > > > + > > > +static void aapl_buffer_free(void *opaque, uint8_t *data) > > > +{ > > > + vm_deallocate((vm_map_t) mach_task_self(), (vm_address_t)data, > > (size_t)opaque); > > > +} > > > + > > > +static AVBufferRef *aapl_buffer_alloc(size_t size) > > > +{ > > > + AVBufferRef *ret = NULL; > > > + uint8_t *data = NULL; > > > + > > > + kern_return_t err; > > > + err = vm_allocate( (vm_map_t) mach_task_self(), > > > + (vm_address_t*) &data, size, VM_FLAGS_ANYWHERE); > > > + if (err != KERN_SUCCESS || !data) > > > + return NULL; > > > + > > > + ret = av_buffer_create(data, size, aapl_buffer_free, (void*)size, > > 0); > > > + if (!ret) > > > + free(data); > > > + > > > + return ret; > > > +} > > > + > > > +static AVBufferRef *aapl_buffer_allocz(size_t size) > > > +{ > > > + AVBufferRef *ret = aapl_buffer_alloc(size); > > > + if (!ret) > > > + return NULL; > > > + > > > + memset(ret->data, 0, size); > > > + return ret; > > > +} > > > + > > > +#else > > > +#define POOL_BUFFER_ALLOCZ av_buffer_allocz > > > +#endif > > > + > > > static int update_frame_pool(AVCodecContext *avctx, AVFrame *frame) > > > { > > > FramePool *pool = avctx->internal->pool ? > > > @@ -155,7 +205,7 @@ FF_ENABLE_DEPRECATION_WARNINGS > > > pool->pools[i] = av_buffer_pool_init(size[i] + 16 + > > STRIDE_ALIGN - 1, > > > > > CONFIG_MEMORY_POISONING ? > > > NULL : > > > - > > av_buffer_allocz); > > > + > > POOL_BUFFER_ALLOCZ); > > > if (!pool->pools[i]) { > > > ret = AVERROR(ENOMEM); > > > goto fail; > > > -- > > > 2.39.2 (Apple Git-143) > > > > > > > This is most definitely the wrong place to do this. Frames can be > > allocated through various means and in various locations, and randomly > > sprinkling new allocators all over is not how this should be > > approached. > > > > I don't believe FFmpeg itself shares this requirement, so maybe your > > application should just use a custom get_buffer2 callback to fullfill > > it? > > If others agree that FFmpeg should create frames with this property by > > default (which I can't answer without knowing if those special > > allocation functions have any other downsides etc), it should be done > > more centrally, rather then only in the avcodec pool. > > > > - Hendrik > > _______________________________________________ > > ffmpeg-devel mailing list > > ffmpeg-devel@ffmpeg.org > > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > > > To unsubscribe, visit link above, or email > > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". > > > > > -- > Regards, > Iskandar Safarov > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
diff --git a/libavcodec/get_buffer.c b/libavcodec/get_buffer.c index a04fd878de..b18af3be4a 100644 --- a/libavcodec/get_buffer.c +++ b/libavcodec/get_buffer.c @@ -33,6 +33,11 @@ #include "avcodec.h" #include "internal.h" +#if __APPLE__ +#import <mach/mach_init.h> +#import <mach/vm_map.h> +#endif + typedef struct FramePool { /** * Pools for each data plane. For audio all the planes have the same size, @@ -81,6 +86,51 @@ static AVBufferRef *frame_pool_alloc(void) return buf; } +#if __APPLE__ +/* + When compiling for Apple platform the frame buffer data pointers need to be + page-aligned for cases when in-place GPU processing may be required + https://developer.apple.com/documentation/metal/mtldevice/1433382-newbufferwithbytesnocopy + */ +#define POOL_BUFFER_ALLOCZ aapl_buffer_allocz + +static void aapl_buffer_free(void *opaque, uint8_t *data) +{ + vm_deallocate((vm_map_t) mach_task_self(), (vm_address_t)data, (size_t)opaque); +} + +static AVBufferRef *aapl_buffer_alloc(size_t size) +{ + AVBufferRef *ret = NULL; + uint8_t *data = NULL; + + kern_return_t err; + err = vm_allocate( (vm_map_t) mach_task_self(), + (vm_address_t*) &data, size, VM_FLAGS_ANYWHERE); + if (err != KERN_SUCCESS || !data) + return NULL; + + ret = av_buffer_create(data, size, aapl_buffer_free, (void*)size, 0); + if (!ret) + free(data); + + return ret; +} + +static AVBufferRef *aapl_buffer_allocz(size_t size) +{ + AVBufferRef *ret = aapl_buffer_alloc(size); + if (!ret) + return NULL; + + memset(ret->data, 0, size); + return ret; +} + +#else +#define POOL_BUFFER_ALLOCZ av_buffer_allocz +#endif + static int update_frame_pool(AVCodecContext *avctx, AVFrame *frame) { FramePool *pool = avctx->internal->pool ? @@ -155,7 +205,7 @@ FF_ENABLE_DEPRECATION_WARNINGS pool->pools[i] = av_buffer_pool_init(size[i] + 16 + STRIDE_ALIGN - 1, CONFIG_MEMORY_POISONING ? NULL : - av_buffer_allocz); + POOL_BUFFER_ALLOCZ); if (!pool->pools[i]) { ret = AVERROR(ENOMEM); goto fail;