Message ID | 20240211174055.1659320-1-timo@rothenpieler.org |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel] avutil/mem: limit alignment to maximum simd align | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
On 11/02/2024 18:40, Timo Rothenpieler wrote: > FFmpeg has instances of DECLARE_ALIGNED(32, ...) in a lot of structs, > which then end up heap-allocated. > By declaring any variable in a struct, or tree of structs, to be 32 byte > aligned, it allows the compiler to safely assume the entire struct > itself is also 32 byte aligned. > > This might make the compiler emit code which straight up crashes or > misbehaves in other ways, and at least in one instances is now > documented to actually do (see ticket 10549 on trac). > The issue there is that an unrelated variable in SingleChannelElement is > declared to have an alignment of 32 bytes. So if the compiler does a copy > in decode_cpe() with avx instructions, but ffmpeg is built with > --disable-avx, this results in a crash, since the memory is only 16 byte > aligned. > > Mind you, even if the compiler does not emit avx instructions, the code > is still invalid and could misbehave. It just happens not to. Declaring > any variable in a struct with a 32 byte alignment promises 32 byte > alignment of the whole struct to the compiler. > > This patch limits the maximum alignment to the maximum possible simd > alignment according to configure. > While not perfect, it at the very least gets rid of a lot of UB, by > matching up the maximum DECLARE_ALIGNED value with the alignment of heap > allocations done by lavu. > --- > libavutil/mem.c | 2 +- > libavutil/mem_internal.h | 33 ++++++++++++++++++++++++++++----- > 2 files changed, 29 insertions(+), 6 deletions(-) > > diff --git a/libavutil/mem.c b/libavutil/mem.c > index 36b8940a0c..62163b4cb3 100644 > --- a/libavutil/mem.c > +++ b/libavutil/mem.c > @@ -62,7 +62,7 @@ void free(void *ptr); > > #endif /* MALLOC_PREFIX */ > > -#define ALIGN (HAVE_AVX512 ? 64 : (HAVE_AVX ? 32 : 16)) > +#define ALIGN (HAVE_SIMD_ALIGN_64 ? 64 : (HAVE_SIMD_ALIGN_32 ? 32 : 16)) > > /* NOTE: if you want to override these functions with your own > * implementations (not recommended) you have to link libav* as > diff --git a/libavutil/mem_internal.h b/libavutil/mem_internal.h > index 2448c606f1..b1d89a0605 100644 > --- a/libavutil/mem_internal.h > +++ b/libavutil/mem_internal.h > @@ -76,27 +76,50 @@ > */ > > #if defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1110 || defined(__SUNPRO_C) > - #define DECLARE_ALIGNED(n,t,v) t __attribute__ ((aligned (n))) v > + #define DECLARE_ALIGNED_T(n,t,v) t __attribute__ ((aligned (n))) v > #define DECLARE_ASM_ALIGNED(n,t,v) t __attribute__ ((aligned (n))) v > #define DECLARE_ASM_CONST(n,t,v) const t __attribute__ ((aligned (n))) v > #elif defined(__DJGPP__) > - #define DECLARE_ALIGNED(n,t,v) t __attribute__ ((aligned (FFMIN(n, 16)))) v > + #define DECLARE_ALIGNED_T(n,t,v) t __attribute__ ((aligned (FFMIN(n, 16)))) v > #define DECLARE_ASM_ALIGNED(n,t,v) t av_used __attribute__ ((aligned (FFMIN(n, 16)))) v > #define DECLARE_ASM_CONST(n,t,v) static const t av_used __attribute__ ((aligned (FFMIN(n, 16)))) v > #elif defined(__GNUC__) || defined(__clang__) > - #define DECLARE_ALIGNED(n,t,v) t __attribute__ ((aligned (n))) v > + #define DECLARE_ALIGNED_T(n,t,v) t __attribute__ ((aligned (n))) v > #define DECLARE_ASM_ALIGNED(n,t,v) t av_used __attribute__ ((aligned (n))) v > #define DECLARE_ASM_CONST(n,t,v) static const t av_used __attribute__ ((aligned (n))) v > #elif defined(_MSC_VER) > - #define DECLARE_ALIGNED(n,t,v) __declspec(align(n)) t v > + #define DECLARE_ALIGNED_T(n,t,v) __declspec(align(n)) t v > #define DECLARE_ASM_ALIGNED(n,t,v) __declspec(align(n)) t v > #define DECLARE_ASM_CONST(n,t,v) __declspec(align(n)) static const t v > #else > - #define DECLARE_ALIGNED(n,t,v) t v > + #define DECLARE_ALIGNED_T(n,t,v) t v > #define DECLARE_ASM_ALIGNED(n,t,v) t v > #define DECLARE_ASM_CONST(n,t,v) static const t v > #endif > > +#if HAVE_SIMD_ALIGN_64 > + #define ALIGN_64 64 > + #define ALIGN_32 32 > +#elif HAVE_SIMD_ALIGN_32 > + #define ALIGN_64 32 > + #define ALIGN_32 32 > +#else > + #define ALIGN_64 16 > + #define ALIGN_32 16 > +#endif > + > +#define DECLARE_ALIGNED(n,t,v) DECLARE_ALIGNED_V(n,t,v) > + > +// Macro needs to be double-wrapped in order to expand > +// possible other macros being passed for n. > +#define DECLARE_ALIGNED_V(n,t,v) DECLARE_ALIGNED_##n(t,v) > + > +#define DECLARE_ALIGNED_4(t,v) DECLARE_ALIGNED_T( 4, t, v) > +#define DECLARE_ALIGNED_8(t,v) DECLARE_ALIGNED_T( 8, t, v) > +#define DECLARE_ALIGNED_16(t,v) DECLARE_ALIGNED_T( 16, t, v) > +#define DECLARE_ALIGNED_32(t,v) DECLARE_ALIGNED_T(ALIGN_32, t, v) > +#define DECLARE_ALIGNED_64(t,v) DECLARE_ALIGNED_T(ALIGN_64, t, v) > + > // Some broken preprocessors need a second expansion > // to be forced to tokenize __VA_ARGS__ > #define E1(x) x I intend to push this patch soon.
On 26.02.2024 17:58, Timo Rothenpieler wrote: > On 11/02/2024 18:40, Timo Rothenpieler wrote: >> FFmpeg has instances of DECLARE_ALIGNED(32, ...) in a lot of structs, >> which then end up heap-allocated. >> By declaring any variable in a struct, or tree of structs, to be 32 byte >> aligned, it allows the compiler to safely assume the entire struct >> itself is also 32 byte aligned. >> >> This might make the compiler emit code which straight up crashes or >> misbehaves in other ways, and at least in one instances is now >> documented to actually do (see ticket 10549 on trac). >> The issue there is that an unrelated variable in SingleChannelElement is >> declared to have an alignment of 32 bytes. So if the compiler does a copy >> in decode_cpe() with avx instructions, but ffmpeg is built with >> --disable-avx, this results in a crash, since the memory is only 16 byte >> aligned. >> >> Mind you, even if the compiler does not emit avx instructions, the code >> is still invalid and could misbehave. It just happens not to. Declaring >> any variable in a struct with a 32 byte alignment promises 32 byte >> alignment of the whole struct to the compiler. >> >> This patch limits the maximum alignment to the maximum possible simd >> alignment according to configure. >> While not perfect, it at the very least gets rid of a lot of UB, by >> matching up the maximum DECLARE_ALIGNED value with the alignment of heap >> allocations done by lavu. >> --- >> libavutil/mem.c | 2 +- >> libavutil/mem_internal.h | 33 ++++++++++++++++++++++++++++----- >> 2 files changed, 29 insertions(+), 6 deletions(-) >> >> diff --git a/libavutil/mem.c b/libavutil/mem.c >> index 36b8940a0c..62163b4cb3 100644 >> --- a/libavutil/mem.c >> +++ b/libavutil/mem.c >> @@ -62,7 +62,7 @@ void free(void *ptr); >> #endif /* MALLOC_PREFIX */ >> -#define ALIGN (HAVE_AVX512 ? 64 : (HAVE_AVX ? 32 : 16)) >> +#define ALIGN (HAVE_SIMD_ALIGN_64 ? 64 : (HAVE_SIMD_ALIGN_32 ? 32 : 16)) >> /* NOTE: if you want to override these functions with your own >> * implementations (not recommended) you have to link libav* as >> diff --git a/libavutil/mem_internal.h b/libavutil/mem_internal.h >> index 2448c606f1..b1d89a0605 100644 >> --- a/libavutil/mem_internal.h >> +++ b/libavutil/mem_internal.h >> @@ -76,27 +76,50 @@ >> */ >> #if defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1110 || >> defined(__SUNPRO_C) >> - #define DECLARE_ALIGNED(n,t,v) t __attribute__ ((aligned >> (n))) v >> + #define DECLARE_ALIGNED_T(n,t,v) t __attribute__ ((aligned >> (n))) v >> #define DECLARE_ASM_ALIGNED(n,t,v) t __attribute__ ((aligned >> (n))) v >> #define DECLARE_ASM_CONST(n,t,v) const t __attribute__ >> ((aligned (n))) v >> #elif defined(__DJGPP__) >> - #define DECLARE_ALIGNED(n,t,v) t __attribute__ ((aligned >> (FFMIN(n, 16)))) v >> + #define DECLARE_ALIGNED_T(n,t,v) t __attribute__ ((aligned >> (FFMIN(n, 16)))) v >> #define DECLARE_ASM_ALIGNED(n,t,v) t av_used __attribute__ >> ((aligned (FFMIN(n, 16)))) v >> #define DECLARE_ASM_CONST(n,t,v) static const t av_used >> __attribute__ ((aligned (FFMIN(n, 16)))) v >> #elif defined(__GNUC__) || defined(__clang__) >> - #define DECLARE_ALIGNED(n,t,v) t __attribute__ ((aligned >> (n))) v >> + #define DECLARE_ALIGNED_T(n,t,v) t __attribute__ ((aligned >> (n))) v >> #define DECLARE_ASM_ALIGNED(n,t,v) t av_used __attribute__ >> ((aligned (n))) v >> #define DECLARE_ASM_CONST(n,t,v) static const t av_used >> __attribute__ ((aligned (n))) v >> #elif defined(_MSC_VER) >> - #define DECLARE_ALIGNED(n,t,v) __declspec(align(n)) t v >> + #define DECLARE_ALIGNED_T(n,t,v) __declspec(align(n)) t v >> #define DECLARE_ASM_ALIGNED(n,t,v) __declspec(align(n)) t v >> #define DECLARE_ASM_CONST(n,t,v) __declspec(align(n)) static >> const t v >> #else >> - #define DECLARE_ALIGNED(n,t,v) t v >> + #define DECLARE_ALIGNED_T(n,t,v) t v >> #define DECLARE_ASM_ALIGNED(n,t,v) t v >> #define DECLARE_ASM_CONST(n,t,v) static const t v >> #endif >> +#if HAVE_SIMD_ALIGN_64 >> + #define ALIGN_64 64 >> + #define ALIGN_32 32 >> +#elif HAVE_SIMD_ALIGN_32 >> + #define ALIGN_64 32 >> + #define ALIGN_32 32 >> +#else >> + #define ALIGN_64 16 >> + #define ALIGN_32 16 >> +#endif >> + >> +#define DECLARE_ALIGNED(n,t,v) DECLARE_ALIGNED_V(n,t,v) >> + >> +// Macro needs to be double-wrapped in order to expand >> +// possible other macros being passed for n. >> +#define DECLARE_ALIGNED_V(n,t,v) DECLARE_ALIGNED_##n(t,v) >> + >> +#define DECLARE_ALIGNED_4(t,v) DECLARE_ALIGNED_T( 4, t, v) >> +#define DECLARE_ALIGNED_8(t,v) DECLARE_ALIGNED_T( 8, t, v) >> +#define DECLARE_ALIGNED_16(t,v) DECLARE_ALIGNED_T( 16, t, v) >> +#define DECLARE_ALIGNED_32(t,v) DECLARE_ALIGNED_T(ALIGN_32, t, v) >> +#define DECLARE_ALIGNED_64(t,v) DECLARE_ALIGNED_T(ALIGN_64, t, v) >> + >> // Some broken preprocessors need a second expansion >> // to be forced to tokenize __VA_ARGS__ >> #define E1(x) x > > I intend to push this patch soon. applied
diff --git a/libavutil/mem.c b/libavutil/mem.c index 36b8940a0c..62163b4cb3 100644 --- a/libavutil/mem.c +++ b/libavutil/mem.c @@ -62,7 +62,7 @@ void free(void *ptr); #endif /* MALLOC_PREFIX */ -#define ALIGN (HAVE_AVX512 ? 64 : (HAVE_AVX ? 32 : 16)) +#define ALIGN (HAVE_SIMD_ALIGN_64 ? 64 : (HAVE_SIMD_ALIGN_32 ? 32 : 16)) /* NOTE: if you want to override these functions with your own * implementations (not recommended) you have to link libav* as diff --git a/libavutil/mem_internal.h b/libavutil/mem_internal.h index 2448c606f1..b1d89a0605 100644 --- a/libavutil/mem_internal.h +++ b/libavutil/mem_internal.h @@ -76,27 +76,50 @@ */ #if defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1110 || defined(__SUNPRO_C) - #define DECLARE_ALIGNED(n,t,v) t __attribute__ ((aligned (n))) v + #define DECLARE_ALIGNED_T(n,t,v) t __attribute__ ((aligned (n))) v #define DECLARE_ASM_ALIGNED(n,t,v) t __attribute__ ((aligned (n))) v #define DECLARE_ASM_CONST(n,t,v) const t __attribute__ ((aligned (n))) v #elif defined(__DJGPP__) - #define DECLARE_ALIGNED(n,t,v) t __attribute__ ((aligned (FFMIN(n, 16)))) v + #define DECLARE_ALIGNED_T(n,t,v) t __attribute__ ((aligned (FFMIN(n, 16)))) v #define DECLARE_ASM_ALIGNED(n,t,v) t av_used __attribute__ ((aligned (FFMIN(n, 16)))) v #define DECLARE_ASM_CONST(n,t,v) static const t av_used __attribute__ ((aligned (FFMIN(n, 16)))) v #elif defined(__GNUC__) || defined(__clang__) - #define DECLARE_ALIGNED(n,t,v) t __attribute__ ((aligned (n))) v + #define DECLARE_ALIGNED_T(n,t,v) t __attribute__ ((aligned (n))) v #define DECLARE_ASM_ALIGNED(n,t,v) t av_used __attribute__ ((aligned (n))) v #define DECLARE_ASM_CONST(n,t,v) static const t av_used __attribute__ ((aligned (n))) v #elif defined(_MSC_VER) - #define DECLARE_ALIGNED(n,t,v) __declspec(align(n)) t v + #define DECLARE_ALIGNED_T(n,t,v) __declspec(align(n)) t v #define DECLARE_ASM_ALIGNED(n,t,v) __declspec(align(n)) t v #define DECLARE_ASM_CONST(n,t,v) __declspec(align(n)) static const t v #else - #define DECLARE_ALIGNED(n,t,v) t v + #define DECLARE_ALIGNED_T(n,t,v) t v #define DECLARE_ASM_ALIGNED(n,t,v) t v #define DECLARE_ASM_CONST(n,t,v) static const t v #endif +#if HAVE_SIMD_ALIGN_64 + #define ALIGN_64 64 + #define ALIGN_32 32 +#elif HAVE_SIMD_ALIGN_32 + #define ALIGN_64 32 + #define ALIGN_32 32 +#else + #define ALIGN_64 16 + #define ALIGN_32 16 +#endif + +#define DECLARE_ALIGNED(n,t,v) DECLARE_ALIGNED_V(n,t,v) + +// Macro needs to be double-wrapped in order to expand +// possible other macros being passed for n. +#define DECLARE_ALIGNED_V(n,t,v) DECLARE_ALIGNED_##n(t,v) + +#define DECLARE_ALIGNED_4(t,v) DECLARE_ALIGNED_T( 4, t, v) +#define DECLARE_ALIGNED_8(t,v) DECLARE_ALIGNED_T( 8, t, v) +#define DECLARE_ALIGNED_16(t,v) DECLARE_ALIGNED_T( 16, t, v) +#define DECLARE_ALIGNED_32(t,v) DECLARE_ALIGNED_T(ALIGN_32, t, v) +#define DECLARE_ALIGNED_64(t,v) DECLARE_ALIGNED_T(ALIGN_64, t, v) + // Some broken preprocessors need a second expansion // to be forced to tokenize __VA_ARGS__ #define E1(x) x