diff mbox series

[FFmpeg-devel,v2] avutil/mem: limit alignment to maximum simd align

Message ID 20240113154600.23366-1-timo@rothenpieler.org
State Accepted
Commit 6154137b186734961726ae538ab5cbe287bab163
Headers show
Series [FFmpeg-devel,v2] avutil/mem: limit alignment to maximum simd align | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Timo Rothenpieler Jan. 13, 2024, 3:46 p.m. UTC
FFmpeg has instances of DECLARE_ALIGNED(32, ...) in a lot of structs,
which then end up heap-allocated.
By declaring any variable in a struct, or tree of structs, to be 32 byte
aligned, it allows the compiler to safely assume the entire struct
itself is also 32 byte aligned.

This might make the compiler emit code which straight up crashes or
misbehaves in other ways, and at least in one instances is now
documented to actually do (see ticket 10549 on trac).
The issue there is that an unrelated variable in SingleChannelElement is
declared to have an alignment of 32 bytes. So if the compiler does a copy
in decode_cpe() with avx instructions, but ffmpeg is built with
--disable-avx, this results in a crash, since the memory is only 16 byte
aligned.

Mind you, even if the compiler does not emit avx instructions, the code
is still invalid and could misbehave. It just happens not to. Declaring
any variable in a struct with a 32 byte alignment promises 32 byte
alignment of the whole struct to the compiler.

This patch limits the maximum alignment to the maximum possible simd
alignment according to configure.
While not perfect, it at the very least gets rid of a lot of UB, by
matching up the maximum DECLARE_ALIGNED value with the alignment of heap
allocations done by lavu.
---
 libavutil/mem.c          |  8 +++++++-
 libavutil/mem_internal.h | 14 ++++++++------
 2 files changed, 15 insertions(+), 7 deletions(-)

Comments

Timo Rothenpieler Feb. 9, 2024, 7:22 p.m. UTC | #1
On 13.01.2024 16:46, Timo Rothenpieler wrote:
> FFmpeg has instances of DECLARE_ALIGNED(32, ...) in a lot of structs,
> which then end up heap-allocated.
> By declaring any variable in a struct, or tree of structs, to be 32 byte
> aligned, it allows the compiler to safely assume the entire struct
> itself is also 32 byte aligned.
> 
> This might make the compiler emit code which straight up crashes or
> misbehaves in other ways, and at least in one instances is now
> documented to actually do (see ticket 10549 on trac).
> The issue there is that an unrelated variable in SingleChannelElement is
> declared to have an alignment of 32 bytes. So if the compiler does a copy
> in decode_cpe() with avx instructions, but ffmpeg is built with
> --disable-avx, this results in a crash, since the memory is only 16 byte
> aligned.
> 
> Mind you, even if the compiler does not emit avx instructions, the code
> is still invalid and could misbehave. It just happens not to. Declaring
> any variable in a struct with a 32 byte alignment promises 32 byte
> alignment of the whole struct to the compiler.
> 
> This patch limits the maximum alignment to the maximum possible simd
> alignment according to configure.
> While not perfect, it at the very least gets rid of a lot of UB, by
> matching up the maximum DECLARE_ALIGNED value with the alignment of heap
> allocations done by lavu.
> ---
>   libavutil/mem.c          |  8 +++++++-
>   libavutil/mem_internal.h | 14 ++++++++------
>   2 files changed, 15 insertions(+), 7 deletions(-)
> 
> diff --git a/libavutil/mem.c b/libavutil/mem.c
> index 36b8940a0c..b5bcaab164 100644
> --- a/libavutil/mem.c
> +++ b/libavutil/mem.c
> @@ -62,7 +62,13 @@ void  free(void *ptr);
>   
>   #endif /* MALLOC_PREFIX */
>   
> -#define ALIGN (HAVE_AVX512 ? 64 : (HAVE_AVX ? 32 : 16))
> +#if defined(_MSC_VER)
> +/* MSVC does not support conditionally limiting alignment.
> +   Set minimum value here to maximum used throughout the codebase. */
> +#define ALIGN (HAVE_SIMD_ALIGN_64 ? 64 : 32)
> +#else
> +#define ALIGN (HAVE_SIMD_ALIGN_64 ? 64 : (HAVE_SIMD_ALIGN_32 ? 32 : 16))
> +#endif
>   
>   /* NOTE: if you want to override these functions with your own
>    * implementations (not recommended) you have to link libav* as
> diff --git a/libavutil/mem_internal.h b/libavutil/mem_internal.h
> index 2448c606f1..e2911b5610 100644
> --- a/libavutil/mem_internal.h
> +++ b/libavutil/mem_internal.h
> @@ -75,18 +75,20 @@
>    * @param v Name of the variable
>    */
>   
> +#define MAX_ALIGNMENT (HAVE_SIMD_ALIGN_64 ? 64 : (HAVE_SIMD_ALIGN_32 ? 32 : 16))
> +
>   #if defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1110 || defined(__SUNPRO_C)
> -    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
> -    #define DECLARE_ASM_ALIGNED(n,t,v)  t __attribute__ ((aligned (n))) v
> -    #define DECLARE_ASM_CONST(n,t,v)    const t __attribute__ ((aligned (n))) v
> +    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
> +    #define DECLARE_ASM_ALIGNED(n,t,v)  t __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
> +    #define DECLARE_ASM_CONST(n,t,v)    const t __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
>   #elif defined(__DJGPP__)
>       #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (FFMIN(n, 16)))) v
>       #define DECLARE_ASM_ALIGNED(n,t,v)  t av_used __attribute__ ((aligned (FFMIN(n, 16)))) v
>       #define DECLARE_ASM_CONST(n,t,v)    static const t av_used __attribute__ ((aligned (FFMIN(n, 16)))) v
>   #elif defined(__GNUC__) || defined(__clang__)
> -    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
> -    #define DECLARE_ASM_ALIGNED(n,t,v)  t av_used __attribute__ ((aligned (n))) v
> -    #define DECLARE_ASM_CONST(n,t,v)    static const t av_used __attribute__ ((aligned (n))) v
> +    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
> +    #define DECLARE_ASM_ALIGNED(n,t,v)  t av_used __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
> +    #define DECLARE_ASM_CONST(n,t,v)    static const t av_used __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
>   #elif defined(_MSC_VER)
>       #define DECLARE_ALIGNED(n,t,v)      __declspec(align(n)) t v
>       #define DECLARE_ASM_ALIGNED(n,t,v)  __declspec(align(n)) t v

ping

We really should fix this before 7.0 (and probably also backport it, 
since UB is UB).

I'm fine with whatever approach, as long as the UB is gone.
Andreas Rheinhardt Feb. 11, 2024, 2 p.m. UTC | #2
Timo Rothenpieler:
> FFmpeg has instances of DECLARE_ALIGNED(32, ...) in a lot of structs,
> which then end up heap-allocated.
> By declaring any variable in a struct, or tree of structs, to be 32 byte
> aligned, it allows the compiler to safely assume the entire struct
> itself is also 32 byte aligned.
> 
> This might make the compiler emit code which straight up crashes or
> misbehaves in other ways, and at least in one instances is now
> documented to actually do (see ticket 10549 on trac).
> The issue there is that an unrelated variable in SingleChannelElement is
> declared to have an alignment of 32 bytes. So if the compiler does a copy
> in decode_cpe() with avx instructions, but ffmpeg is built with
> --disable-avx, this results in a crash, since the memory is only 16 byte
> aligned.
> 
> Mind you, even if the compiler does not emit avx instructions, the code
> is still invalid and could misbehave. It just happens not to. Declaring
> any variable in a struct with a 32 byte alignment promises 32 byte
> alignment of the whole struct to the compiler.
> 
> This patch limits the maximum alignment to the maximum possible simd
> alignment according to configure.
> While not perfect, it at the very least gets rid of a lot of UB, by
> matching up the maximum DECLARE_ALIGNED value with the alignment of heap
> allocations done by lavu.
> ---
>  libavutil/mem.c          |  8 +++++++-
>  libavutil/mem_internal.h | 14 ++++++++------
>  2 files changed, 15 insertions(+), 7 deletions(-)
> 
> diff --git a/libavutil/mem.c b/libavutil/mem.c
> index 36b8940a0c..b5bcaab164 100644
> --- a/libavutil/mem.c
> +++ b/libavutil/mem.c
> @@ -62,7 +62,13 @@ void  free(void *ptr);
>  
>  #endif /* MALLOC_PREFIX */
>  
> -#define ALIGN (HAVE_AVX512 ? 64 : (HAVE_AVX ? 32 : 16))
> +#if defined(_MSC_VER)
> +/* MSVC does not support conditionally limiting alignment.
> +   Set minimum value here to maximum used throughout the codebase. */
> +#define ALIGN (HAVE_SIMD_ALIGN_64 ? 64 : 32)
> +#else
> +#define ALIGN (HAVE_SIMD_ALIGN_64 ? 64 : (HAVE_SIMD_ALIGN_32 ? 32 : 16))
> +#endif
>  
>  /* NOTE: if you want to override these functions with your own
>   * implementations (not recommended) you have to link libav* as
> diff --git a/libavutil/mem_internal.h b/libavutil/mem_internal.h
> index 2448c606f1..e2911b5610 100644
> --- a/libavutil/mem_internal.h
> +++ b/libavutil/mem_internal.h
> @@ -75,18 +75,20 @@
>   * @param v Name of the variable
>   */
>  
> +#define MAX_ALIGNMENT (HAVE_SIMD_ALIGN_64 ? 64 : (HAVE_SIMD_ALIGN_32 ? 32 : 16))
> +
>  #if defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1110 || defined(__SUNPRO_C)
> -    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
> -    #define DECLARE_ASM_ALIGNED(n,t,v)  t __attribute__ ((aligned (n))) v
> -    #define DECLARE_ASM_CONST(n,t,v)    const t __attribute__ ((aligned (n))) v
> +    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
> +    #define DECLARE_ASM_ALIGNED(n,t,v)  t __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
> +    #define DECLARE_ASM_CONST(n,t,v)    const t __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
>  #elif defined(__DJGPP__)
>      #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (FFMIN(n, 16)))) v
>      #define DECLARE_ASM_ALIGNED(n,t,v)  t av_used __attribute__ ((aligned (FFMIN(n, 16)))) v
>      #define DECLARE_ASM_CONST(n,t,v)    static const t av_used __attribute__ ((aligned (FFMIN(n, 16)))) v
>  #elif defined(__GNUC__) || defined(__clang__)
> -    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
> -    #define DECLARE_ASM_ALIGNED(n,t,v)  t av_used __attribute__ ((aligned (n))) v
> -    #define DECLARE_ASM_CONST(n,t,v)    static const t av_used __attribute__ ((aligned (n))) v
> +    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
> +    #define DECLARE_ASM_ALIGNED(n,t,v)  t av_used __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
> +    #define DECLARE_ASM_CONST(n,t,v)    static const t av_used __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
>  #elif defined(_MSC_VER)
>      #define DECLARE_ALIGNED(n,t,v)      __declspec(align(n)) t v
>      #define DECLARE_ASM_ALIGNED(n,t,v)  __declspec(align(n)) t v

We use alignment for three different usecases: a) Variables on the
stack; b) variables in structs and c) static data. If we limit
alignment, we should only limit it for b). But unfortunately they use
the same macro as c), so someone would need to untangle this by adding
new macros. In the meantime, your original patch seems like the way to go.

- Andreas

One can probably make MSVC happy by avoiding FFMIN like this:
#if HAVE_SIMD_ALIGN_32
#define ALIGN_32 32
#else
#define ALIGN_32 16
#endif
#define DECLARE_VAR_ALIGNED_32(t, v) DECLARE_ALIGNED(ALIGN_32, t, v)
Sam James Feb. 11, 2024, 2:05 p.m. UTC | #3
Timo Rothenpieler <timo@rothenpieler.org> writes:

> On 13.01.2024 16:46, Timo Rothenpieler wrote:
>> FFmpeg has instances of DECLARE_ALIGNED(32, ...) in a lot of structs,
>> which then end up heap-allocated.
>> By declaring any variable in a struct, or tree of structs, to be 32 byte
>> aligned, it allows the compiler to safely assume the entire struct
>> itself is also 32 byte aligned.
>> This might make the compiler emit code which straight up crashes or
>> misbehaves in other ways, and at least in one instances is now
>> documented to actually do (see ticket 10549 on trac).
>> The issue there is that an unrelated variable in SingleChannelElement is
>> declared to have an alignment of 32 bytes. So if the compiler does a copy
>> in decode_cpe() with avx instructions, but ffmpeg is built with
>> --disable-avx, this results in a crash, since the memory is only 16 byte
>> aligned.
>> Mind you, even if the compiler does not emit avx instructions, the
>> code
>> is still invalid and could misbehave. It just happens not to. Declaring
>> any variable in a struct with a 32 byte alignment promises 32 byte
>> alignment of the whole struct to the compiler.
>> This patch limits the maximum alignment to the maximum possible simd
>> alignment according to configure.
>> While not perfect, it at the very least gets rid of a lot of UB, by
>> matching up the maximum DECLARE_ALIGNED value with the alignment of heap
>> allocations done by lavu.
>> ---
>>   libavutil/mem.c          |  8 +++++++-
>>   libavutil/mem_internal.h | 14 ++++++++------
>>   2 files changed, 15 insertions(+), 7 deletions(-)
>> diff --git a/libavutil/mem.c b/libavutil/mem.c
>> index 36b8940a0c..b5bcaab164 100644
>> --- a/libavutil/mem.c
>> +++ b/libavutil/mem.c
>> @@ -62,7 +62,13 @@ void  free(void *ptr);
>>     #endif /* MALLOC_PREFIX */
>>   -#define ALIGN (HAVE_AVX512 ? 64 : (HAVE_AVX ? 32 : 16))
>> +#if defined(_MSC_VER)
>> +/* MSVC does not support conditionally limiting alignment.
>> +   Set minimum value here to maximum used throughout the codebase. */
>> +#define ALIGN (HAVE_SIMD_ALIGN_64 ? 64 : 32)
>> +#else
>> +#define ALIGN (HAVE_SIMD_ALIGN_64 ? 64 : (HAVE_SIMD_ALIGN_32 ? 32 : 16))
>> +#endif
>>     /* NOTE: if you want to override these functions with your own
>>    * implementations (not recommended) you have to link libav* as
>> diff --git a/libavutil/mem_internal.h b/libavutil/mem_internal.h
>> index 2448c606f1..e2911b5610 100644
>> --- a/libavutil/mem_internal.h
>> +++ b/libavutil/mem_internal.h
>> @@ -75,18 +75,20 @@
>>    * @param v Name of the variable
>>    */
>>   +#define MAX_ALIGNMENT (HAVE_SIMD_ALIGN_64 ? 64 :
>> (HAVE_SIMD_ALIGN_32 ? 32 : 16))
>> +
>>   #if defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1110 || defined(__SUNPRO_C)
>> -    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
>> -    #define DECLARE_ASM_ALIGNED(n,t,v)  t __attribute__ ((aligned (n))) v
>> -    #define DECLARE_ASM_CONST(n,t,v)    const t __attribute__ ((aligned (n))) v
>> +    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
>> +    #define DECLARE_ASM_ALIGNED(n,t,v)  t __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
>> +    #define DECLARE_ASM_CONST(n,t,v)    const t __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
>>   #elif defined(__DJGPP__)
>>       #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (FFMIN(n, 16)))) v
>>       #define DECLARE_ASM_ALIGNED(n,t,v)  t av_used __attribute__ ((aligned (FFMIN(n, 16)))) v
>>       #define DECLARE_ASM_CONST(n,t,v)    static const t av_used __attribute__ ((aligned (FFMIN(n, 16)))) v
>>   #elif defined(__GNUC__) || defined(__clang__)
>> -    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
>> -    #define DECLARE_ASM_ALIGNED(n,t,v)  t av_used __attribute__ ((aligned (n))) v
>> -    #define DECLARE_ASM_CONST(n,t,v)    static const t av_used __attribute__ ((aligned (n))) v
>> +    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
>> +    #define DECLARE_ASM_ALIGNED(n,t,v)  t av_used __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
>> +    #define DECLARE_ASM_CONST(n,t,v)    static const t av_used __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
>>   #elif defined(_MSC_VER)
>>       #define DECLARE_ALIGNED(n,t,v)      __declspec(align(n)) t v
>>       #define DECLARE_ASM_ALIGNED(n,t,v)  __declspec(align(n)) t v
>
> ping
>
> We really should fix this before 7.0 (and probably also backport it,
> since UB is UB).
>
> I'm fine with whatever approach, as long as the UB is gone.

Yes please, we keep getting users hitting this.

(There's a packaging improvement we can make which Timo has suggested
and I need to implement, but the issue is there nonetheless.)
Rémi Denis-Courmont Feb. 11, 2024, 2:22 p.m. UTC | #4
Le perjantaina 9. helmikuuta 2024, 21.22.17 EET Timo Rothenpieler a écrit :
> On 13.01.2024 16:46, Timo Rothenpieler wrote:
> > FFmpeg has instances of DECLARE_ALIGNED(32, ...) in a lot of structs,
> > which then end up heap-allocated.
> > By declaring any variable in a struct, or tree of structs, to be 32 byte
> > aligned, it allows the compiler to safely assume the entire struct
> > itself is also 32 byte aligned.
> > 
> > This might make the compiler emit code which straight up crashes or
> > misbehaves in other ways, and at least in one instances is now
> > documented to actually do (see ticket 10549 on trac).
> > The issue there is that an unrelated variable in SingleChannelElement is
> > declared to have an alignment of 32 bytes. So if the compiler does a copy
> > in decode_cpe() with avx instructions, but ffmpeg is built with
> > --disable-avx, this results in a crash, since the memory is only 16 byte
> > aligned.
> > 
> > Mind you, even if the compiler does not emit avx instructions, the code
> > is still invalid and could misbehave. It just happens not to. Declaring
> > any variable in a struct with a 32 byte alignment promises 32 byte
> > alignment of the whole struct to the compiler.
> > 
> > This patch limits the maximum alignment to the maximum possible simd
> > alignment according to configure.
> > While not perfect, it at the very least gets rid of a lot of UB, by
> > matching up the maximum DECLARE_ALIGNED value with the alignment of heap
> > allocations done by lavu.
> > ---
> > 
> >   libavutil/mem.c          |  8 +++++++-
> >   libavutil/mem_internal.h | 14 ++++++++------
> >   2 files changed, 15 insertions(+), 7 deletions(-)
> > 
> > diff --git a/libavutil/mem.c b/libavutil/mem.c
> > index 36b8940a0c..b5bcaab164 100644
> > --- a/libavutil/mem.c
> > +++ b/libavutil/mem.c
> > @@ -62,7 +62,13 @@ void  free(void *ptr);
> > 
> >   #endif /* MALLOC_PREFIX */
> > 
> > -#define ALIGN (HAVE_AVX512 ? 64 : (HAVE_AVX ? 32 : 16))
> > +#if defined(_MSC_VER)
> > +/* MSVC does not support conditionally limiting alignment.
> > +   Set minimum value here to maximum used throughout the codebase. */
> > +#define ALIGN (HAVE_SIMD_ALIGN_64 ? 64 : 32)

Not that I care whatsoever, but are we assuming that MSVC supports only x86? 
Otherwise, this conditional definition does not make much sense and seems very 
sketchy. In fact, I don't see the point in making this distinction at all 
(*unlike* below).
Timo Rothenpieler Feb. 11, 2024, 3:47 p.m. UTC | #5
On 11.02.2024 15:22, Rémi Denis-Courmont wrote:
> Le perjantaina 9. helmikuuta 2024, 21.22.17 EET Timo Rothenpieler a écrit :
>> On 13.01.2024 16:46, Timo Rothenpieler wrote:
>>> FFmpeg has instances of DECLARE_ALIGNED(32, ...) in a lot of structs,
>>> which then end up heap-allocated.
>>> By declaring any variable in a struct, or tree of structs, to be 32 byte
>>> aligned, it allows the compiler to safely assume the entire struct
>>> itself is also 32 byte aligned.
>>>
>>> This might make the compiler emit code which straight up crashes or
>>> misbehaves in other ways, and at least in one instances is now
>>> documented to actually do (see ticket 10549 on trac).
>>> The issue there is that an unrelated variable in SingleChannelElement is
>>> declared to have an alignment of 32 bytes. So if the compiler does a copy
>>> in decode_cpe() with avx instructions, but ffmpeg is built with
>>> --disable-avx, this results in a crash, since the memory is only 16 byte
>>> aligned.
>>>
>>> Mind you, even if the compiler does not emit avx instructions, the code
>>> is still invalid and could misbehave. It just happens not to. Declaring
>>> any variable in a struct with a 32 byte alignment promises 32 byte
>>> alignment of the whole struct to the compiler.
>>>
>>> This patch limits the maximum alignment to the maximum possible simd
>>> alignment according to configure.
>>> While not perfect, it at the very least gets rid of a lot of UB, by
>>> matching up the maximum DECLARE_ALIGNED value with the alignment of heap
>>> allocations done by lavu.
>>> ---
>>>
>>>    libavutil/mem.c          |  8 +++++++-
>>>    libavutil/mem_internal.h | 14 ++++++++------
>>>    2 files changed, 15 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/libavutil/mem.c b/libavutil/mem.c
>>> index 36b8940a0c..b5bcaab164 100644
>>> --- a/libavutil/mem.c
>>> +++ b/libavutil/mem.c
>>> @@ -62,7 +62,13 @@ void  free(void *ptr);
>>>
>>>    #endif /* MALLOC_PREFIX */
>>>
>>> -#define ALIGN (HAVE_AVX512 ? 64 : (HAVE_AVX ? 32 : 16))
>>> +#if defined(_MSC_VER)
>>> +/* MSVC does not support conditionally limiting alignment.
>>> +   Set minimum value here to maximum used throughout the codebase. */
>>> +#define ALIGN (HAVE_SIMD_ALIGN_64 ? 64 : 32)
> 
> Not that I care whatsoever, but are we assuming that MSVC supports only x86?
> Otherwise, this conditional definition does not make much sense and seems very
> sketchy. In fact, I don't see the point in making this distinction at all
> (*unlike* below).
> 

MSVC straight up _does not support_ putting conditionals into its 
alignment macros.
It initially had the same treatment, but failed with compile errors.
Timo Rothenpieler Feb. 11, 2024, 4:06 p.m. UTC | #6
On 11.02.2024 15:00, Andreas Rheinhardt wrote:
> Timo Rothenpieler:
>> FFmpeg has instances of DECLARE_ALIGNED(32, ...) in a lot of structs,
>> which then end up heap-allocated.
>> By declaring any variable in a struct, or tree of structs, to be 32 byte
>> aligned, it allows the compiler to safely assume the entire struct
>> itself is also 32 byte aligned.
>>
>> This might make the compiler emit code which straight up crashes or
>> misbehaves in other ways, and at least in one instances is now
>> documented to actually do (see ticket 10549 on trac).
>> The issue there is that an unrelated variable in SingleChannelElement is
>> declared to have an alignment of 32 bytes. So if the compiler does a copy
>> in decode_cpe() with avx instructions, but ffmpeg is built with
>> --disable-avx, this results in a crash, since the memory is only 16 byte
>> aligned.
>>
>> Mind you, even if the compiler does not emit avx instructions, the code
>> is still invalid and could misbehave. It just happens not to. Declaring
>> any variable in a struct with a 32 byte alignment promises 32 byte
>> alignment of the whole struct to the compiler.
>>
>> This patch limits the maximum alignment to the maximum possible simd
>> alignment according to configure.
>> While not perfect, it at the very least gets rid of a lot of UB, by
>> matching up the maximum DECLARE_ALIGNED value with the alignment of heap
>> allocations done by lavu.
>> ---
>>   libavutil/mem.c          |  8 +++++++-
>>   libavutil/mem_internal.h | 14 ++++++++------
>>   2 files changed, 15 insertions(+), 7 deletions(-)
>>
>> diff --git a/libavutil/mem.c b/libavutil/mem.c
>> index 36b8940a0c..b5bcaab164 100644
>> --- a/libavutil/mem.c
>> +++ b/libavutil/mem.c
>> @@ -62,7 +62,13 @@ void  free(void *ptr);
>>   
>>   #endif /* MALLOC_PREFIX */
>>   
>> -#define ALIGN (HAVE_AVX512 ? 64 : (HAVE_AVX ? 32 : 16))
>> +#if defined(_MSC_VER)
>> +/* MSVC does not support conditionally limiting alignment.
>> +   Set minimum value here to maximum used throughout the codebase. */
>> +#define ALIGN (HAVE_SIMD_ALIGN_64 ? 64 : 32)
>> +#else
>> +#define ALIGN (HAVE_SIMD_ALIGN_64 ? 64 : (HAVE_SIMD_ALIGN_32 ? 32 : 16))
>> +#endif
>>   
>>   /* NOTE: if you want to override these functions with your own
>>    * implementations (not recommended) you have to link libav* as
>> diff --git a/libavutil/mem_internal.h b/libavutil/mem_internal.h
>> index 2448c606f1..e2911b5610 100644
>> --- a/libavutil/mem_internal.h
>> +++ b/libavutil/mem_internal.h
>> @@ -75,18 +75,20 @@
>>    * @param v Name of the variable
>>    */
>>   
>> +#define MAX_ALIGNMENT (HAVE_SIMD_ALIGN_64 ? 64 : (HAVE_SIMD_ALIGN_32 ? 32 : 16))
>> +
>>   #if defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1110 || defined(__SUNPRO_C)
>> -    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
>> -    #define DECLARE_ASM_ALIGNED(n,t,v)  t __attribute__ ((aligned (n))) v
>> -    #define DECLARE_ASM_CONST(n,t,v)    const t __attribute__ ((aligned (n))) v
>> +    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
>> +    #define DECLARE_ASM_ALIGNED(n,t,v)  t __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
>> +    #define DECLARE_ASM_CONST(n,t,v)    const t __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
>>   #elif defined(__DJGPP__)
>>       #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (FFMIN(n, 16)))) v
>>       #define DECLARE_ASM_ALIGNED(n,t,v)  t av_used __attribute__ ((aligned (FFMIN(n, 16)))) v
>>       #define DECLARE_ASM_CONST(n,t,v)    static const t av_used __attribute__ ((aligned (FFMIN(n, 16)))) v
>>   #elif defined(__GNUC__) || defined(__clang__)
>> -    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
>> -    #define DECLARE_ASM_ALIGNED(n,t,v)  t av_used __attribute__ ((aligned (n))) v
>> -    #define DECLARE_ASM_CONST(n,t,v)    static const t av_used __attribute__ ((aligned (n))) v
>> +    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
>> +    #define DECLARE_ASM_ALIGNED(n,t,v)  t av_used __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
>> +    #define DECLARE_ASM_CONST(n,t,v)    static const t av_used __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
>>   #elif defined(_MSC_VER)
>>       #define DECLARE_ALIGNED(n,t,v)      __declspec(align(n)) t v
>>       #define DECLARE_ASM_ALIGNED(n,t,v)  __declspec(align(n)) t v
> 
> We use alignment for three different usecases: a) Variables on the
> stack; b) variables in structs and c) static data. If we limit
> alignment, we should only limit it for b). But unfortunately they use
> the same macro as c), so someone would need to untangle this by adding
> new macros. In the meantime, your original patch seems like the way to go.

Is it really such an issue to limit the alignment to less than some of 
those request, if there are no SIMD instructions would would ever need a 
higher alignment on that platform?

I can't think of many situations where you'd need alignment other than 
SIMD, outside of crazy page alignment stuff, for which 32/64 bytes are 
far from enough anyway.


If there's no further objections, I'll push a simple bump to 32 bytes, 
as per the original patch now.
And then we can figure out how to make it a bit nicer.
Cause as it is now, it does unneccesarily force double the alignment 
size to a whole bunch of arches.

> - Andreas
> 
> One can probably make MSVC happy by avoiding FFMIN like this:
> #if HAVE_SIMD_ALIGN_32
> #define ALIGN_32 32
> #else
> #define ALIGN_32 16
> #endif
> #define DECLARE_VAR_ALIGNED_32(t, v) DECLARE_ALIGNED(ALIGN_32, t, v)
diff mbox series

Patch

diff --git a/libavutil/mem.c b/libavutil/mem.c
index 36b8940a0c..b5bcaab164 100644
--- a/libavutil/mem.c
+++ b/libavutil/mem.c
@@ -62,7 +62,13 @@  void  free(void *ptr);
 
 #endif /* MALLOC_PREFIX */
 
-#define ALIGN (HAVE_AVX512 ? 64 : (HAVE_AVX ? 32 : 16))
+#if defined(_MSC_VER)
+/* MSVC does not support conditionally limiting alignment.
+   Set minimum value here to maximum used throughout the codebase. */
+#define ALIGN (HAVE_SIMD_ALIGN_64 ? 64 : 32)
+#else
+#define ALIGN (HAVE_SIMD_ALIGN_64 ? 64 : (HAVE_SIMD_ALIGN_32 ? 32 : 16))
+#endif
 
 /* NOTE: if you want to override these functions with your own
  * implementations (not recommended) you have to link libav* as
diff --git a/libavutil/mem_internal.h b/libavutil/mem_internal.h
index 2448c606f1..e2911b5610 100644
--- a/libavutil/mem_internal.h
+++ b/libavutil/mem_internal.h
@@ -75,18 +75,20 @@ 
  * @param v Name of the variable
  */
 
+#define MAX_ALIGNMENT (HAVE_SIMD_ALIGN_64 ? 64 : (HAVE_SIMD_ALIGN_32 ? 32 : 16))
+
 #if defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1110 || defined(__SUNPRO_C)
-    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
-    #define DECLARE_ASM_ALIGNED(n,t,v)  t __attribute__ ((aligned (n))) v
-    #define DECLARE_ASM_CONST(n,t,v)    const t __attribute__ ((aligned (n))) v
+    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
+    #define DECLARE_ASM_ALIGNED(n,t,v)  t __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
+    #define DECLARE_ASM_CONST(n,t,v)    const t __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
 #elif defined(__DJGPP__)
     #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (FFMIN(n, 16)))) v
     #define DECLARE_ASM_ALIGNED(n,t,v)  t av_used __attribute__ ((aligned (FFMIN(n, 16)))) v
     #define DECLARE_ASM_CONST(n,t,v)    static const t av_used __attribute__ ((aligned (FFMIN(n, 16)))) v
 #elif defined(__GNUC__) || defined(__clang__)
-    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
-    #define DECLARE_ASM_ALIGNED(n,t,v)  t av_used __attribute__ ((aligned (n))) v
-    #define DECLARE_ASM_CONST(n,t,v)    static const t av_used __attribute__ ((aligned (n))) v
+    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
+    #define DECLARE_ASM_ALIGNED(n,t,v)  t av_used __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
+    #define DECLARE_ASM_CONST(n,t,v)    static const t av_used __attribute__ ((aligned (FFMIN(n, MAX_ALIGNMENT)))) v
 #elif defined(_MSC_VER)
     #define DECLARE_ALIGNED(n,t,v)      __declspec(align(n)) t v
     #define DECLARE_ASM_ALIGNED(n,t,v)  __declspec(align(n)) t v