diff mbox series

[FFmpeg-devel] avutil/mem: use C11 aligned_malloc()

Message ID 20240218161636.15649-1-jamrial@gmail.com
State New
Headers show
Series [FFmpeg-devel] avutil/mem: use C11 aligned_malloc() | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 fail Make failed
andriy/make_x86 fail Make failed

Commit Message

James Almer Feb. 18, 2024, 4:16 p.m. UTC
Save for the Microsoft C Runtime library, where free() can't handle aligned
buffers, aligned_malloc() should be available and working on all supported
targets.
Also, malloc() alone may be sufficient if alignment requirement is low, so add
a check for it.

Signed-off-by: James Almer <jamrial@gmail.com>
---
 configure       |  2 --
 libavutil/mem.c | 42 ++++++------------------------------------
 2 files changed, 6 insertions(+), 38 deletions(-)

Comments

Andreas Rheinhardt Feb. 18, 2024, 4:27 p.m. UTC | #1
James Almer:
> Save for the Microsoft C Runtime library, where free() can't handle aligned
> buffers, aligned_malloc() should be available and working on all supported
> targets.
> Also, malloc() alone may be sufficient if alignment requirement is low, so add
> a check for it.
> 
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  configure       |  2 --
>  libavutil/mem.c | 42 ++++++------------------------------------
>  2 files changed, 6 insertions(+), 38 deletions(-)
> 
> diff --git a/configure b/configure
> index 7c45ac25c8..8fd2895ac2 100755
> --- a/configure
> +++ b/configure
> @@ -6450,8 +6450,6 @@ if test -n "$custom_allocator"; then
>  fi
>  
>  check_func_headers malloc.h _aligned_malloc     && enable aligned_malloc
> -check_func  ${malloc_prefix}memalign            && enable memalign
> -check_func  ${malloc_prefix}posix_memalign      && enable posix_memalign
>  
>  check_func  access
>  check_func_headers stdlib.h arc4random_buf
> diff --git a/libavutil/mem.c b/libavutil/mem.c
> index 36b8940a0c..a72981d1ab 100644
> --- a/libavutil/mem.c
> +++ b/libavutil/mem.c
> @@ -100,44 +100,14 @@ void *av_malloc(size_t size)
>      if (size > atomic_load_explicit(&max_alloc_size, memory_order_relaxed))
>          return NULL;
>  
> -#if HAVE_POSIX_MEMALIGN
> -    if (size) //OS X on SDK 10.6 has a broken posix_memalign implementation
> -    if (posix_memalign(&ptr, ALIGN, size))
> -        ptr = NULL;
> -#elif HAVE_ALIGNED_MALLOC
> +#if HAVE_ALIGNED_MALLOC
>      ptr = _aligned_malloc(size, ALIGN);
> -#elif HAVE_MEMALIGN
> -#ifndef __DJGPP__
> -    ptr = memalign(ALIGN, size);
> -#else
> -    ptr = memalign(size, ALIGN);
> -#endif
> -    /* Why 64?
> -     * Indeed, we should align it:
> -     *   on  4 for 386
> -     *   on 16 for 486
> -     *   on 32 for 586, PPro - K6-III
> -     *   on 64 for K7 (maybe for P3 too).
> -     * Because L1 and L2 caches are aligned on those values.
> -     * But I don't want to code such logic here!
> -     */
> -    /* Why 32?
> -     * For AVX ASM. SSE / NEON needs only 16.
> -     * Why not larger? Because I did not see a difference in benchmarks ...
> -     */
> -    /* benchmarks with P3
> -     * memalign(64) + 1          3071, 3051, 3032
> -     * memalign(64) + 2          3051, 3032, 3041
> -     * memalign(64) + 4          2911, 2896, 2915
> -     * memalign(64) + 8          2545, 2554, 2550
> -     * memalign(64) + 16         2543, 2572, 2563
> -     * memalign(64) + 32         2546, 2545, 2571
> -     * memalign(64) + 64         2570, 2533, 2558
> -     *
> -     * BTW, malloc seems to do 8-byte alignment by default here.
> -     */
>  #else
> -    ptr = malloc(size);
> +    // malloc may already allocate sufficiently aligned buffers
> +    if (ALIGN > _Alignof(max_align_t))
> +        ptr = aligned_malloc(size, ALIGN);
> +    else
> +        ptr = malloc(size);
>  #endif
>      if(!ptr && !size) {
>          size = 1;

1. The function is called aligned_alloc (how did you test this?).
2. C11: "The value of alignment shall be a valid alignment supported by
the implementation and the value of size shall be an integral multiple
of alignment."
a) To use this, you would have to round size upwards; but this will make
sanitiziers more lenient.
b) If ALIGN is just not supported by the implementation, then everything
is UB in C11.
3. What's the advantage of this patch anyway?

- Andreas
James Almer Feb. 18, 2024, 4:29 p.m. UTC | #2
On 2/18/2024 1:27 PM, Andreas Rheinhardt wrote:
> James Almer:
>> Save for the Microsoft C Runtime library, where free() can't handle aligned
>> buffers, aligned_malloc() should be available and working on all supported
>> targets.
>> Also, malloc() alone may be sufficient if alignment requirement is low, so add
>> a check for it.
>>
>> Signed-off-by: James Almer <jamrial@gmail.com>
>> ---
>>   configure       |  2 --
>>   libavutil/mem.c | 42 ++++++------------------------------------
>>   2 files changed, 6 insertions(+), 38 deletions(-)
>>
>> diff --git a/configure b/configure
>> index 7c45ac25c8..8fd2895ac2 100755
>> --- a/configure
>> +++ b/configure
>> @@ -6450,8 +6450,6 @@ if test -n "$custom_allocator"; then
>>   fi
>>   
>>   check_func_headers malloc.h _aligned_malloc     && enable aligned_malloc
>> -check_func  ${malloc_prefix}memalign            && enable memalign
>> -check_func  ${malloc_prefix}posix_memalign      && enable posix_memalign
>>   
>>   check_func  access
>>   check_func_headers stdlib.h arc4random_buf
>> diff --git a/libavutil/mem.c b/libavutil/mem.c
>> index 36b8940a0c..a72981d1ab 100644
>> --- a/libavutil/mem.c
>> +++ b/libavutil/mem.c
>> @@ -100,44 +100,14 @@ void *av_malloc(size_t size)
>>       if (size > atomic_load_explicit(&max_alloc_size, memory_order_relaxed))
>>           return NULL;
>>   
>> -#if HAVE_POSIX_MEMALIGN
>> -    if (size) //OS X on SDK 10.6 has a broken posix_memalign implementation
>> -    if (posix_memalign(&ptr, ALIGN, size))
>> -        ptr = NULL;
>> -#elif HAVE_ALIGNED_MALLOC
>> +#if HAVE_ALIGNED_MALLOC
>>       ptr = _aligned_malloc(size, ALIGN);
>> -#elif HAVE_MEMALIGN
>> -#ifndef __DJGPP__
>> -    ptr = memalign(ALIGN, size);
>> -#else
>> -    ptr = memalign(size, ALIGN);
>> -#endif
>> -    /* Why 64?
>> -     * Indeed, we should align it:
>> -     *   on  4 for 386
>> -     *   on 16 for 486
>> -     *   on 32 for 586, PPro - K6-III
>> -     *   on 64 for K7 (maybe for P3 too).
>> -     * Because L1 and L2 caches are aligned on those values.
>> -     * But I don't want to code such logic here!
>> -     */
>> -    /* Why 32?
>> -     * For AVX ASM. SSE / NEON needs only 16.
>> -     * Why not larger? Because I did not see a difference in benchmarks ...
>> -     */
>> -    /* benchmarks with P3
>> -     * memalign(64) + 1          3071, 3051, 3032
>> -     * memalign(64) + 2          3051, 3032, 3041
>> -     * memalign(64) + 4          2911, 2896, 2915
>> -     * memalign(64) + 8          2545, 2554, 2550
>> -     * memalign(64) + 16         2543, 2572, 2563
>> -     * memalign(64) + 32         2546, 2545, 2571
>> -     * memalign(64) + 64         2570, 2533, 2558
>> -     *
>> -     * BTW, malloc seems to do 8-byte alignment by default here.
>> -     */
>>   #else
>> -    ptr = malloc(size);
>> +    // malloc may already allocate sufficiently aligned buffers
>> +    if (ALIGN > _Alignof(max_align_t))
>> +        ptr = aligned_malloc(size, ALIGN);
>> +    else
>> +        ptr = malloc(size);
>>   #endif
>>       if(!ptr && !size) {
>>           size = 1;
> 
> 1. The function is called aligned_alloc (how did you test this?).

By compiling on a target with _aligned_malloc(), which hid my mistake.

> 2. C11: "The value of alignment shall be a valid alignment supported by
> the implementation and the value of size shall be an integral multiple
> of alignment."

Well, that sure is not very useful...

> a) To use this, you would have to round size upwards; but this will make
> sanitiziers more lenient.
> b) If ALIGN is just not supported by the implementation, then everything
> is UB in C11.
> 3. What's the advantage of this patch anyway?

Removing all the different target specific allocation functions in favor 
of the standard one. But your second point makes it moot, so patch 
withdrawn.

> 
> - Andreas
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Rémi Denis-Courmont Feb. 18, 2024, 6:35 p.m. UTC | #3
Le sunnuntaina 18. helmikuuta 2024, 18.27.35 EET Andreas Rheinhardt a écrit :
> 1. The function is called aligned_alloc (how did you test this?).
> 2. C11: "The value of alignment shall be a valid alignment supported by
> the implementation and the value of size shall be an integral multiple
> of alignment."
> a) To use this, you would have to round size upwards; but this will make
> sanitiziers more lenient.
> b) If ALIGN is just not supported by the implementation, then everything
> is UB in C11.

The letter of the specification is that all alignments of types defined in the 
specification must be supported and other "may" be supported. The intent is 
clearly that all relevant alignments on the target platform are supported.

FFmpeg assumes that alignment 16, 32 and 64 are supported already anyhow, so 
this would not be introducing any *new* UB. In this respect, FFmpeg is doing 
UB on practically all platforms other than x86, which seems to be the only 
platform to need alignment of 32 and 64 bytes for anything.

IMO, FFmpeg should not use custom alignments beyond `alignof(max_align_t)` 
unless they are specifically needed on the given platform, but that's a 
potentially tedious clean-up task with zero practical gains.

> 3. What's the advantage of this patch anyway?

In theory, `aligned_alloc()` (not `aligned_malloc()`) supports alignment of 1 
and any legal value until `sizeof(void*)`, *unlike* `posix_memalign()`. But 
since you can just as well use `malloc()` for that purpose, that is not a real 
advantage.
Rémi Denis-Courmont Feb. 18, 2024, 6:37 p.m. UTC | #4
Le sunnuntaina 18. helmikuuta 2024, 18.16.36 EET James Almer a écrit :
> Save for the Microsoft C Runtime library, where free() can't handle aligned
> buffers, aligned_malloc() should be available and working on all supported
> targets.
> Also, malloc() alone may be sufficient if alignment requirement is low, so
> add a check for it.
> 
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  configure       |  2 --
>  libavutil/mem.c | 42 ++++++------------------------------------
>  2 files changed, 6 insertions(+), 38 deletions(-)
> 
> diff --git a/configure b/configure
> index 7c45ac25c8..8fd2895ac2 100755
> --- a/configure
> +++ b/configure
> @@ -6450,8 +6450,6 @@ if test -n "$custom_allocator"; then
>  fi
> 
>  check_func_headers malloc.h _aligned_malloc     && enable aligned_malloc
> -check_func  ${malloc_prefix}memalign            && enable memalign
> -check_func  ${malloc_prefix}posix_memalign      && enable posix_memalign
> 
>  check_func  access
>  check_func_headers stdlib.h arc4random_buf
> diff --git a/libavutil/mem.c b/libavutil/mem.c
> index 36b8940a0c..a72981d1ab 100644
> --- a/libavutil/mem.c
> +++ b/libavutil/mem.c
> @@ -100,44 +100,14 @@ void *av_malloc(size_t size)
>      if (size > atomic_load_explicit(&max_alloc_size, memory_order_relaxed))
> return NULL;
> 
> -#if HAVE_POSIX_MEMALIGN
> -    if (size) //OS X on SDK 10.6 has a broken posix_memalign implementation
> -    if (posix_memalign(&ptr, ALIGN, size))
> -        ptr = NULL;
> -#elif HAVE_ALIGNED_MALLOC
> +#if HAVE_ALIGNED_MALLOC
>      ptr = _aligned_malloc(size, ALIGN);
> -#elif HAVE_MEMALIGN
> -#ifndef __DJGPP__
> -    ptr = memalign(ALIGN, size);
> -#else
> -    ptr = memalign(size, ALIGN);
> -#endif
> -    /* Why 64?
> -     * Indeed, we should align it:
> -     *   on  4 for 386
> -     *   on 16 for 486
> -     *   on 32 for 586, PPro - K6-III
> -     *   on 64 for K7 (maybe for P3 too).
> -     * Because L1 and L2 caches are aligned on those values.
> -     * But I don't want to code such logic here!
> -     */
> -    /* Why 32?
> -     * For AVX ASM. SSE / NEON needs only 16.
> -     * Why not larger? Because I did not see a difference in benchmarks ...
> -     */
> -    /* benchmarks with P3
> -     * memalign(64) + 1          3071, 3051, 3032
> -     * memalign(64) + 2          3051, 3032, 3041
> -     * memalign(64) + 4          2911, 2896, 2915
> -     * memalign(64) + 8          2545, 2554, 2550
> -     * memalign(64) + 16         2543, 2572, 2563
> -     * memalign(64) + 32         2546, 2545, 2571
> -     * memalign(64) + 64         2570, 2533, 2558
> -     *
> -     * BTW, malloc seems to do 8-byte alignment by default here.
> -     */
>  #else
> -    ptr = malloc(size);
> +    // malloc may already allocate sufficiently aligned buffers
> +    if (ALIGN > _Alignof(max_align_t))

If you ever try to reintroduce something like this, you would need 
<stdalign.h> here, and thus you should use alignof rather than _Alignof (which 
was already deprecated by C23 deprecated).

> +        ptr = aligned_malloc(size, ALIGN);
> +    else
> +        ptr = malloc(size);
>  #endif
>      if(!ptr && !size) {
>          size = 1;
Rémi Denis-Courmont Feb. 18, 2024, 6:39 p.m. UTC | #5
Le sunnuntaina 18. helmikuuta 2024, 18.29.32 EET James Almer a écrit :
> Removing all the different target specific allocation functions in favor
> of the standard one. But your second point makes it moot, so patch
> withdrawn.

If you want to get code closer to standards for dealing with alignment, I 
would argue that using alignas() instead of nonstandard constructs comes first.
Michael Niedermayer Feb. 19, 2024, 12:08 a.m. UTC | #6
On Sun, Feb 18, 2024 at 01:16:36PM -0300, James Almer wrote:
> Save for the Microsoft C Runtime library, where free() can't handle aligned
> buffers, aligned_malloc() should be available and working on all supported
> targets.
> Also, malloc() alone may be sufficient if alignment requirement is low, so add
> a check for it.
> 
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  configure       |  2 --
>  libavutil/mem.c | 42 ++++++------------------------------------
>  2 files changed, 6 insertions(+), 38 deletions(-)

This breaks build here

libavutil/mem.c: In function ‘av_malloc’:
libavutil/mem.c:108:15: error: implicit declaration of function ‘aligned_malloc’; did you mean ‘aligned_alloc’? [-Werror=implicit-function-declaration]
         ptr = aligned_malloc(size, ALIGN);
               ^~~~~~~~~~~~~~
               aligned_alloc
libavutil/mem.c:108:13: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
         ptr = aligned_malloc(size, ALIGN);
             ^
cc1: some warnings being treated as errors
ffbuild/common.mak:81: recipe for target 'libavutil/mem.o' failed

[...]
James Almer Feb. 19, 2024, 1:37 a.m. UTC | #7
On 2/18/2024 9:08 PM, Michael Niedermayer wrote:
> On Sun, Feb 18, 2024 at 01:16:36PM -0300, James Almer wrote:
>> Save for the Microsoft C Runtime library, where free() can't handle aligned
>> buffers, aligned_malloc() should be available and working on all supported
>> targets.
>> Also, malloc() alone may be sufficient if alignment requirement is low, so add
>> a check for it.
>>
>> Signed-off-by: James Almer <jamrial@gmail.com>
>> ---
>>   configure       |  2 --
>>   libavutil/mem.c | 42 ++++++------------------------------------
>>   2 files changed, 6 insertions(+), 38 deletions(-)
> 
> This breaks build here
> 
> libavutil/mem.c: In function ‘av_malloc’:
> libavutil/mem.c:108:15: error: implicit declaration of function ‘aligned_malloc’; did you mean ‘aligned_alloc’? [-Werror=implicit-function-declaration]
>           ptr = aligned_malloc(size, ALIGN);
>                 ^~~~~~~~~~~~~~
>                 aligned_alloc
> libavutil/mem.c:108:13: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
>           ptr = aligned_malloc(size, ALIGN);
>               ^
> cc1: some warnings being treated as errors
> ffbuild/common.mak:81: recipe for target 'libavutil/mem.o' failed

Yes, i mistyped aligned_alloc as aligned_malloc.
diff mbox series

Patch

diff --git a/configure b/configure
index 7c45ac25c8..8fd2895ac2 100755
--- a/configure
+++ b/configure
@@ -6450,8 +6450,6 @@  if test -n "$custom_allocator"; then
 fi
 
 check_func_headers malloc.h _aligned_malloc     && enable aligned_malloc
-check_func  ${malloc_prefix}memalign            && enable memalign
-check_func  ${malloc_prefix}posix_memalign      && enable posix_memalign
 
 check_func  access
 check_func_headers stdlib.h arc4random_buf
diff --git a/libavutil/mem.c b/libavutil/mem.c
index 36b8940a0c..a72981d1ab 100644
--- a/libavutil/mem.c
+++ b/libavutil/mem.c
@@ -100,44 +100,14 @@  void *av_malloc(size_t size)
     if (size > atomic_load_explicit(&max_alloc_size, memory_order_relaxed))
         return NULL;
 
-#if HAVE_POSIX_MEMALIGN
-    if (size) //OS X on SDK 10.6 has a broken posix_memalign implementation
-    if (posix_memalign(&ptr, ALIGN, size))
-        ptr = NULL;
-#elif HAVE_ALIGNED_MALLOC
+#if HAVE_ALIGNED_MALLOC
     ptr = _aligned_malloc(size, ALIGN);
-#elif HAVE_MEMALIGN
-#ifndef __DJGPP__
-    ptr = memalign(ALIGN, size);
-#else
-    ptr = memalign(size, ALIGN);
-#endif
-    /* Why 64?
-     * Indeed, we should align it:
-     *   on  4 for 386
-     *   on 16 for 486
-     *   on 32 for 586, PPro - K6-III
-     *   on 64 for K7 (maybe for P3 too).
-     * Because L1 and L2 caches are aligned on those values.
-     * But I don't want to code such logic here!
-     */
-    /* Why 32?
-     * For AVX ASM. SSE / NEON needs only 16.
-     * Why not larger? Because I did not see a difference in benchmarks ...
-     */
-    /* benchmarks with P3
-     * memalign(64) + 1          3071, 3051, 3032
-     * memalign(64) + 2          3051, 3032, 3041
-     * memalign(64) + 4          2911, 2896, 2915
-     * memalign(64) + 8          2545, 2554, 2550
-     * memalign(64) + 16         2543, 2572, 2563
-     * memalign(64) + 32         2546, 2545, 2571
-     * memalign(64) + 64         2570, 2533, 2558
-     *
-     * BTW, malloc seems to do 8-byte alignment by default here.
-     */
 #else
-    ptr = malloc(size);
+    // malloc may already allocate sufficiently aligned buffers
+    if (ALIGN > _Alignof(max_align_t))
+        ptr = aligned_malloc(size, ALIGN);
+    else
+        ptr = malloc(size);
 #endif
     if(!ptr && !size) {
         size = 1;