diff mbox series

[FFmpeg-devel] avcodec: Align AVFrame memory to page size for access via Apple Metal

Message ID 20230615021631.9139-1-sapharow@gmail.com
State New
Headers show
Series [FFmpeg-devel] avcodec: Align AVFrame memory to page size for access via Apple Metal | expand

Checks

Context Check Description
yinshiyou/make_fate_loongarch64 success Make fate finished
yinshiyou/make_loongarch64 warning New warnings during build
andriy/make_fate_x86 success Make fate finished
andriy/make_x86 warning New warnings during build

Commit Message

Iskandar Safarov June 15, 2023, 2:16 a.m. UTC
---
 libavcodec/get_buffer.c | 52 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

Comments

Hendrik Leppkes June 15, 2023, 9:39 a.m. UTC | #1
On Thu, Jun 15, 2023 at 4:16 AM Iskandar Safarov <sapharow@gmail.com> wrote:
>
> ---
>  libavcodec/get_buffer.c | 52 ++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 51 insertions(+), 1 deletion(-)
>
> diff --git a/libavcodec/get_buffer.c b/libavcodec/get_buffer.c
> index a04fd878de..b18af3be4a 100644
> --- a/libavcodec/get_buffer.c
> +++ b/libavcodec/get_buffer.c
> @@ -33,6 +33,11 @@
>  #include "avcodec.h"
>  #include "internal.h"
>
> +#if __APPLE__
> +#import <mach/mach_init.h>
> +#import <mach/vm_map.h>
> +#endif
> +
>  typedef struct FramePool {
>      /**
>       * Pools for each data plane. For audio all the planes have the same size,
> @@ -81,6 +86,51 @@ static AVBufferRef *frame_pool_alloc(void)
>      return buf;
>  }
>
> +#if __APPLE__
> +/*
> +    When compiling for Apple platform the frame buffer data pointers need to be
> +    page-aligned for cases when in-place GPU processing may be required
> +    https://developer.apple.com/documentation/metal/mtldevice/1433382-newbufferwithbytesnocopy
> + */
> +#define POOL_BUFFER_ALLOCZ aapl_buffer_allocz
> +
> +static void aapl_buffer_free(void *opaque, uint8_t *data)
> +{
> +    vm_deallocate((vm_map_t) mach_task_self(), (vm_address_t)data, (size_t)opaque);
> +}
> +
> +static AVBufferRef *aapl_buffer_alloc(size_t size)
> +{
> +    AVBufferRef *ret = NULL;
> +    uint8_t    *data = NULL;
> +
> +    kern_return_t   err;
> +    err = vm_allocate(  (vm_map_t) mach_task_self(),
> +        (vm_address_t*) &data, size, VM_FLAGS_ANYWHERE);
> +    if (err != KERN_SUCCESS || !data)
> +        return NULL;
> +
> +    ret = av_buffer_create(data, size, aapl_buffer_free, (void*)size, 0);
> +    if (!ret)
> +        free(data);
> +
> +    return ret;
> +}
> +
> +static AVBufferRef *aapl_buffer_allocz(size_t size)
> +{
> +    AVBufferRef *ret = aapl_buffer_alloc(size);
> +    if (!ret)
> +        return NULL;
> +
> +    memset(ret->data, 0, size);
> +    return ret;
> +}
> +
> +#else
> +#define POOL_BUFFER_ALLOCZ av_buffer_allocz
> +#endif
> +
>  static int update_frame_pool(AVCodecContext *avctx, AVFrame *frame)
>  {
>      FramePool *pool = avctx->internal->pool ?
> @@ -155,7 +205,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
>                  pool->pools[i] = av_buffer_pool_init(size[i] + 16 + STRIDE_ALIGN - 1,
>                                                       CONFIG_MEMORY_POISONING ?
>                                                          NULL :
> -                                                        av_buffer_allocz);
> +                                                        POOL_BUFFER_ALLOCZ);
>                  if (!pool->pools[i]) {
>                      ret = AVERROR(ENOMEM);
>                      goto fail;
> --
> 2.39.2 (Apple Git-143)
>

This is most definitely the wrong place to do this. Frames can be
allocated through various means and in various locations, and randomly
sprinkling new allocators all over is not how this should be
approached.

I don't believe FFmpeg itself shares this requirement, so maybe your
application should just use a custom get_buffer2 callback to fullfill
it?
If others agree that FFmpeg should create frames with this property by
default (which I can't answer without knowing if those special
allocation functions have any other downsides etc), it should be done
more centrally, rather then only in the avcodec pool.

- Hendrik
Iskandar Safarov June 15, 2023, 10:33 a.m. UTC | #2
Thanks for looking into this,

The idea is to page-align the entire software-backed video pool allocation
where video frames are being taken from – both for decoding and/or encoding
(when done in software only).

The default get_buffer2 (avcodec_default_get_buffer2) implementation
contains code for both hardware-backed and software-backed frame
allocations. Going down this path makes custom implementation a copy-paste
of a large portion of the LGPL code into my app.

Another thing to consider – it might not be a good idea to page-align every
single AVFrame allocation because the alignment is not tiny – 16KB (for M1
machine). I found that this patch is minimum required change to allow
in-memory GPU post-processing of the AVFrame between software encode/decode
cycles.

Please advise

On Thu, 15 Jun 2023 at 19:40, Hendrik Leppkes <h.leppkes@gmail.com> wrote:

> On Thu, Jun 15, 2023 at 4:16 AM Iskandar Safarov <sapharow@gmail.com>
> wrote:
> >
> > ---
> >  libavcodec/get_buffer.c | 52 ++++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 51 insertions(+), 1 deletion(-)
> >
> > diff --git a/libavcodec/get_buffer.c b/libavcodec/get_buffer.c
> > index a04fd878de..b18af3be4a 100644
> > --- a/libavcodec/get_buffer.c
> > +++ b/libavcodec/get_buffer.c
> > @@ -33,6 +33,11 @@
> >  #include "avcodec.h"
> >  #include "internal.h"
> >
> > +#if __APPLE__
> > +#import <mach/mach_init.h>
> > +#import <mach/vm_map.h>
> > +#endif
> > +
> >  typedef struct FramePool {
> >      /**
> >       * Pools for each data plane. For audio all the planes have the
> same size,
> > @@ -81,6 +86,51 @@ static AVBufferRef *frame_pool_alloc(void)
> >      return buf;
> >  }
> >
> > +#if __APPLE__
> > +/*
> > +    When compiling for Apple platform the frame buffer data pointers
> need to be
> > +    page-aligned for cases when in-place GPU processing may be required
> > +
> https://developer.apple.com/documentation/metal/mtldevice/1433382-newbufferwithbytesnocopy
> > + */
> > +#define POOL_BUFFER_ALLOCZ aapl_buffer_allocz
> > +
> > +static void aapl_buffer_free(void *opaque, uint8_t *data)
> > +{
> > +    vm_deallocate((vm_map_t) mach_task_self(), (vm_address_t)data,
> (size_t)opaque);
> > +}
> > +
> > +static AVBufferRef *aapl_buffer_alloc(size_t size)
> > +{
> > +    AVBufferRef *ret = NULL;
> > +    uint8_t    *data = NULL;
> > +
> > +    kern_return_t   err;
> > +    err = vm_allocate(  (vm_map_t) mach_task_self(),
> > +        (vm_address_t*) &data, size, VM_FLAGS_ANYWHERE);
> > +    if (err != KERN_SUCCESS || !data)
> > +        return NULL;
> > +
> > +    ret = av_buffer_create(data, size, aapl_buffer_free, (void*)size,
> 0);
> > +    if (!ret)
> > +        free(data);
> > +
> > +    return ret;
> > +}
> > +
> > +static AVBufferRef *aapl_buffer_allocz(size_t size)
> > +{
> > +    AVBufferRef *ret = aapl_buffer_alloc(size);
> > +    if (!ret)
> > +        return NULL;
> > +
> > +    memset(ret->data, 0, size);
> > +    return ret;
> > +}
> > +
> > +#else
> > +#define POOL_BUFFER_ALLOCZ av_buffer_allocz
> > +#endif
> > +
> >  static int update_frame_pool(AVCodecContext *avctx, AVFrame *frame)
> >  {
> >      FramePool *pool = avctx->internal->pool ?
> > @@ -155,7 +205,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
> >                  pool->pools[i] = av_buffer_pool_init(size[i] + 16 +
> STRIDE_ALIGN - 1,
> >
>  CONFIG_MEMORY_POISONING ?
> >                                                          NULL :
> > -
> av_buffer_allocz);
> > +
> POOL_BUFFER_ALLOCZ);
> >                  if (!pool->pools[i]) {
> >                      ret = AVERROR(ENOMEM);
> >                      goto fail;
> > --
> > 2.39.2 (Apple Git-143)
> >
>
> This is most definitely the wrong place to do this. Frames can be
> allocated through various means and in various locations, and randomly
> sprinkling new allocators all over is not how this should be
> approached.
>
> I don't believe FFmpeg itself shares this requirement, so maybe your
> application should just use a custom get_buffer2 callback to fullfill
> it?
> If others agree that FFmpeg should create frames with this property by
> default (which I can't answer without knowing if those special
> allocation functions have any other downsides etc), it should be done
> more centrally, rather then only in the avcodec pool.
>
> - Hendrik
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Paul B Mahol June 17, 2023, 6:20 a.m. UTC | #3
On Thu, Jun 15, 2023 at 12:34 PM Iskandar Safarov <sapharow@gmail.com>
wrote:

> Thanks for looking into this,
>
> The idea is to page-align the entire software-backed video pool allocation
> where video frames are being taken from – both for decoding and/or encoding
> (when done in software only).
>
> The default get_buffer2 (avcodec_default_get_buffer2) implementation
> contains code for both hardware-backed and software-backed frame
> allocations. Going down this path makes custom implementation a copy-paste
> of a large portion of the LGPL code into my app.
>
> Another thing to consider – it might not be a good idea to page-align every
> single AVFrame allocation because the alignment is not tiny – 16KB (for M1
> machine). I found that this patch is minimum required change to allow
> in-memory GPU post-processing of the AVFrame between software encode/decode
> cycles.
>
> Please advise
>

Top posting is forbidden on this mailing list.
As already advised use custom get_buffer2 calls, and not default ones.


>
> On Thu, 15 Jun 2023 at 19:40, Hendrik Leppkes <h.leppkes@gmail.com> wrote:
>
> > On Thu, Jun 15, 2023 at 4:16 AM Iskandar Safarov <sapharow@gmail.com>
> > wrote:
> > >
> > > ---
> > >  libavcodec/get_buffer.c | 52 ++++++++++++++++++++++++++++++++++++++++-
> > >  1 file changed, 51 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/libavcodec/get_buffer.c b/libavcodec/get_buffer.c
> > > index a04fd878de..b18af3be4a 100644
> > > --- a/libavcodec/get_buffer.c
> > > +++ b/libavcodec/get_buffer.c
> > > @@ -33,6 +33,11 @@
> > >  #include "avcodec.h"
> > >  #include "internal.h"
> > >
> > > +#if __APPLE__
> > > +#import <mach/mach_init.h>
> > > +#import <mach/vm_map.h>
> > > +#endif
> > > +
> > >  typedef struct FramePool {
> > >      /**
> > >       * Pools for each data plane. For audio all the planes have the
> > same size,
> > > @@ -81,6 +86,51 @@ static AVBufferRef *frame_pool_alloc(void)
> > >      return buf;
> > >  }
> > >
> > > +#if __APPLE__
> > > +/*
> > > +    When compiling for Apple platform the frame buffer data pointers
> > need to be
> > > +    page-aligned for cases when in-place GPU processing may be
> required
> > > +
> >
> https://developer.apple.com/documentation/metal/mtldevice/1433382-newbufferwithbytesnocopy
> > > + */
> > > +#define POOL_BUFFER_ALLOCZ aapl_buffer_allocz
> > > +
> > > +static void aapl_buffer_free(void *opaque, uint8_t *data)
> > > +{
> > > +    vm_deallocate((vm_map_t) mach_task_self(), (vm_address_t)data,
> > (size_t)opaque);
> > > +}
> > > +
> > > +static AVBufferRef *aapl_buffer_alloc(size_t size)
> > > +{
> > > +    AVBufferRef *ret = NULL;
> > > +    uint8_t    *data = NULL;
> > > +
> > > +    kern_return_t   err;
> > > +    err = vm_allocate(  (vm_map_t) mach_task_self(),
> > > +        (vm_address_t*) &data, size, VM_FLAGS_ANYWHERE);
> > > +    if (err != KERN_SUCCESS || !data)
> > > +        return NULL;
> > > +
> > > +    ret = av_buffer_create(data, size, aapl_buffer_free, (void*)size,
> > 0);
> > > +    if (!ret)
> > > +        free(data);
> > > +
> > > +    return ret;
> > > +}
> > > +
> > > +static AVBufferRef *aapl_buffer_allocz(size_t size)
> > > +{
> > > +    AVBufferRef *ret = aapl_buffer_alloc(size);
> > > +    if (!ret)
> > > +        return NULL;
> > > +
> > > +    memset(ret->data, 0, size);
> > > +    return ret;
> > > +}
> > > +
> > > +#else
> > > +#define POOL_BUFFER_ALLOCZ av_buffer_allocz
> > > +#endif
> > > +
> > >  static int update_frame_pool(AVCodecContext *avctx, AVFrame *frame)
> > >  {
> > >      FramePool *pool = avctx->internal->pool ?
> > > @@ -155,7 +205,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
> > >                  pool->pools[i] = av_buffer_pool_init(size[i] + 16 +
> > STRIDE_ALIGN - 1,
> > >
> >  CONFIG_MEMORY_POISONING ?
> > >                                                          NULL :
> > > -
> > av_buffer_allocz);
> > > +
> > POOL_BUFFER_ALLOCZ);
> > >                  if (!pool->pools[i]) {
> > >                      ret = AVERROR(ENOMEM);
> > >                      goto fail;
> > > --
> > > 2.39.2 (Apple Git-143)
> > >
> >
> > This is most definitely the wrong place to do this. Frames can be
> > allocated through various means and in various locations, and randomly
> > sprinkling new allocators all over is not how this should be
> > approached.
> >
> > I don't believe FFmpeg itself shares this requirement, so maybe your
> > application should just use a custom get_buffer2 callback to fullfill
> > it?
> > If others agree that FFmpeg should create frames with this property by
> > default (which I can't answer without knowing if those special
> > allocation functions have any other downsides etc), it should be done
> > more centrally, rather then only in the avcodec pool.
> >
> > - Hendrik
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel@ffmpeg.org
> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >
> > To unsubscribe, visit link above, or email
> > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> >
>
>
> --
> Regards,
> Iskandar Safarov
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
diff mbox series

Patch

diff --git a/libavcodec/get_buffer.c b/libavcodec/get_buffer.c
index a04fd878de..b18af3be4a 100644
--- a/libavcodec/get_buffer.c
+++ b/libavcodec/get_buffer.c
@@ -33,6 +33,11 @@ 
 #include "avcodec.h"
 #include "internal.h"
 
+#if __APPLE__
+#import <mach/mach_init.h>
+#import <mach/vm_map.h>
+#endif
+
 typedef struct FramePool {
     /**
      * Pools for each data plane. For audio all the planes have the same size,
@@ -81,6 +86,51 @@  static AVBufferRef *frame_pool_alloc(void)
     return buf;
 }
 
+#if __APPLE__
+/* 
+    When compiling for Apple platform the frame buffer data pointers need to be
+    page-aligned for cases when in-place GPU processing may be required
+    https://developer.apple.com/documentation/metal/mtldevice/1433382-newbufferwithbytesnocopy
+ */
+#define POOL_BUFFER_ALLOCZ aapl_buffer_allocz
+
+static void aapl_buffer_free(void *opaque, uint8_t *data)
+{
+    vm_deallocate((vm_map_t) mach_task_self(), (vm_address_t)data, (size_t)opaque);
+}
+
+static AVBufferRef *aapl_buffer_alloc(size_t size)
+{
+    AVBufferRef *ret = NULL;
+    uint8_t    *data = NULL;
+
+    kern_return_t   err;
+    err = vm_allocate(  (vm_map_t) mach_task_self(),
+        (vm_address_t*) &data, size, VM_FLAGS_ANYWHERE);
+    if (err != KERN_SUCCESS || !data)
+        return NULL;
+
+    ret = av_buffer_create(data, size, aapl_buffer_free, (void*)size, 0);
+    if (!ret)
+        free(data);
+
+    return ret;
+}
+
+static AVBufferRef *aapl_buffer_allocz(size_t size)
+{
+    AVBufferRef *ret = aapl_buffer_alloc(size);
+    if (!ret)
+        return NULL;
+
+    memset(ret->data, 0, size);
+    return ret;
+}
+
+#else
+#define POOL_BUFFER_ALLOCZ av_buffer_allocz
+#endif
+
 static int update_frame_pool(AVCodecContext *avctx, AVFrame *frame)
 {
     FramePool *pool = avctx->internal->pool ?
@@ -155,7 +205,7 @@  FF_ENABLE_DEPRECATION_WARNINGS
                 pool->pools[i] = av_buffer_pool_init(size[i] + 16 + STRIDE_ALIGN - 1,
                                                      CONFIG_MEMORY_POISONING ?
                                                         NULL :
-                                                        av_buffer_allocz);
+                                                        POOL_BUFFER_ALLOCZ);
                 if (!pool->pools[i]) {
                     ret = AVERROR(ENOMEM);
                     goto fail;