Message ID | 20170903004738.8928-1-jamrial@gmail.com |
---|---|
State | New |
Headers | show |
On Sat, Sep 02, 2017 at 09:47:38PM -0300, James Almer wrote: > From: Anton Khirnov <anton@khirnov.net> > > (cherry picked from commit e6bff23f1e11aefb16a2b5d6ee72bf7469c5a66e) > Signed-off-by: James Almer <jamrial@gmail.com> > --- > This is (afaics) the last API introduced to libav before the major bump. > > Now checking all the x86 flags that would require aligment of 16 bytes > or more. > > doc/APIchanges | 3 +++ > libavutil/cpu.c | 35 +++++++++++++++++++++++++++++++++++ > libavutil/cpu.h | 13 +++++++++++++ > libavutil/version.h | 2 +- > 4 files changed, 52 insertions(+), 1 deletion(-) > > diff --git a/doc/APIchanges b/doc/APIchanges > index 4effbf9364..6a57c210a9 100644 > --- a/doc/APIchanges > +++ b/doc/APIchanges > @@ -15,6 +15,9 @@ libavutil: 2015-08-28 > > API changes, most recent first: > > +2017-09-xx - xxxxxxx - lavu 55.75.100 / lavu 55.31.0 - cpu.h > + Add av_cpu_max_align() for querying maximum required data alignment. > + > 2017-09-01 - xxxxxxx - lavf 57.81.100 - avio.h > Add avio_read_partial(). > > diff --git a/libavutil/cpu.c b/libavutil/cpu.c > index a22da0fa8c..4f04da2460 100644 > --- a/libavutil/cpu.c > +++ b/libavutil/cpu.c > @@ -16,9 +16,11 @@ > * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > */ > > +#include <stddef.h> > #include <stdint.h> > #include <stdatomic.h> > > +#include "attributes.h" > #include "cpu.h" > #include "cpu_internal.h" > #include "config.h" > @@ -299,3 +301,36 @@ int av_cpu_count(void) > > return nb_cpus; > } > + > +size_t av_cpu_max_align(void) > +{ > + int av_unused flags = av_get_cpu_flags(); > + > +#if ARCH_ARM || ARCH_AARCH64 > + if (flags & AV_CPU_FLAG_NEON) > + return 16; > +#elif ARCH_PPC > + if (flags & AV_CPU_FLAG_ALTIVEC) > + return 16; > +#elif ARCH_X86 > + if (flags & (AV_CPU_FLAG_AVX2 | > + AV_CPU_FLAG_AVX | > + AV_CPU_FLAG_FMA4 | > + AV_CPU_FLAG_FMA3)) > + return 32; > + if (flags & (AV_CPU_FLAG_XOP | > + AV_CPU_FLAG_AESNI | > + AV_CPU_FLAG_SSE42 | > + AV_CPU_FLAG_SSE4 | > + AV_CPU_FLAG_SSSE3 | > + AV_CPU_FLAG_SSE3 | > + AV_CPU_FLAG_SSE2 | > + AV_CPU_FLAG_SSE | > + AV_CPU_FLAG_AVXSLOW | > + AV_CPU_FLAG_SSE3SLOW | > + AV_CPU_FLAG_SSE2SLOW)) > + return 16; > +#endif > + > + return 8; > +} > diff --git a/libavutil/cpu.h b/libavutil/cpu.h > index de05593446..9e5d40affe 100644 > --- a/libavutil/cpu.h > +++ b/libavutil/cpu.h > @@ -21,6 +21,8 @@ > #ifndef AVUTIL_CPU_H > #define AVUTIL_CPU_H > > +#include <stddef.h> > + > #include "attributes.h" > > #define AV_CPU_FLAG_FORCE 0x80000000 /* force usage of selected flags (OR) */ > @@ -113,4 +115,15 @@ int av_parse_cpu_caps(unsigned *flags, const char *s); > */ > int av_cpu_count(void); > > +/** > + * Get the maximum data alignment that may be required by FFmpeg. > + * > + * Note that this is affected by the build configuration and the CPU flags mask, > + * so e.g. if the CPU supports AVX, but libavutil has been built with > + * --disable-avx or the AV_CPU_FLAG_AVX flag has been disabled through > + * av_set_cpu_flags_mask(), then this function will behave as if AVX is not > + * present. > + */ > +size_t av_cpu_max_align(void); This might interact badly with runtime cpu flags/mask changes If its used to choose the alignment for allocated frames and after some are allocated the cpu flags are changed there could be still frames in queues that may not have sufficient alignment for the new flags [...]
On Mon, 4 Sep 2017 21:18:35 +0200 Michael Niedermayer <michael@niedermayer.cc> wrote: > On Sat, Sep 02, 2017 at 09:47:38PM -0300, James Almer wrote: > > From: Anton Khirnov <anton@khirnov.net> > > > > (cherry picked from commit e6bff23f1e11aefb16a2b5d6ee72bf7469c5a66e) > > Signed-off-by: James Almer <jamrial@gmail.com> > > --- > > This is (afaics) the last API introduced to libav before the major bump. > > > > Now checking all the x86 flags that would require aligment of 16 bytes > > or more. > > > > doc/APIchanges | 3 +++ > > libavutil/cpu.c | 35 +++++++++++++++++++++++++++++++++++ > > libavutil/cpu.h | 13 +++++++++++++ > > libavutil/version.h | 2 +- > > 4 files changed, 52 insertions(+), 1 deletion(-) > > > > diff --git a/doc/APIchanges b/doc/APIchanges > > index 4effbf9364..6a57c210a9 100644 > > --- a/doc/APIchanges > > +++ b/doc/APIchanges > > @@ -15,6 +15,9 @@ libavutil: 2015-08-28 > > > > API changes, most recent first: > > > > +2017-09-xx - xxxxxxx - lavu 55.75.100 / lavu 55.31.0 - cpu.h > > + Add av_cpu_max_align() for querying maximum required data alignment. > > + > > 2017-09-01 - xxxxxxx - lavf 57.81.100 - avio.h > > Add avio_read_partial(). > > > > diff --git a/libavutil/cpu.c b/libavutil/cpu.c > > index a22da0fa8c..4f04da2460 100644 > > --- a/libavutil/cpu.c > > +++ b/libavutil/cpu.c > > @@ -16,9 +16,11 @@ > > * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > > */ > > > > +#include <stddef.h> > > #include <stdint.h> > > #include <stdatomic.h> > > > > +#include "attributes.h" > > #include "cpu.h" > > #include "cpu_internal.h" > > #include "config.h" > > @@ -299,3 +301,36 @@ int av_cpu_count(void) > > > > return nb_cpus; > > } > > + > > +size_t av_cpu_max_align(void) > > +{ > > + int av_unused flags = av_get_cpu_flags(); > > + > > +#if ARCH_ARM || ARCH_AARCH64 > > + if (flags & AV_CPU_FLAG_NEON) > > + return 16; > > +#elif ARCH_PPC > > + if (flags & AV_CPU_FLAG_ALTIVEC) > > + return 16; > > +#elif ARCH_X86 > > + if (flags & (AV_CPU_FLAG_AVX2 | > > + AV_CPU_FLAG_AVX | > > + AV_CPU_FLAG_FMA4 | > > + AV_CPU_FLAG_FMA3)) > > + return 32; > > + if (flags & (AV_CPU_FLAG_XOP | > > + AV_CPU_FLAG_AESNI | > > + AV_CPU_FLAG_SSE42 | > > + AV_CPU_FLAG_SSE4 | > > + AV_CPU_FLAG_SSSE3 | > > + AV_CPU_FLAG_SSE3 | > > + AV_CPU_FLAG_SSE2 | > > + AV_CPU_FLAG_SSE | > > + AV_CPU_FLAG_AVXSLOW | > > + AV_CPU_FLAG_SSE3SLOW | > > + AV_CPU_FLAG_SSE2SLOW)) > > + return 16; > > +#endif > > + > > + return 8; > > +} > > diff --git a/libavutil/cpu.h b/libavutil/cpu.h > > index de05593446..9e5d40affe 100644 > > --- a/libavutil/cpu.h > > +++ b/libavutil/cpu.h > > @@ -21,6 +21,8 @@ > > #ifndef AVUTIL_CPU_H > > #define AVUTIL_CPU_H > > > > +#include <stddef.h> > > + > > #include "attributes.h" > > > > #define AV_CPU_FLAG_FORCE 0x80000000 /* force usage of selected flags (OR) */ > > > > @@ -113,4 +115,15 @@ int av_parse_cpu_caps(unsigned *flags, const char *s); > > */ > > int av_cpu_count(void); > > > > +/** > > + * Get the maximum data alignment that may be required by FFmpeg. > > + * > > + * Note that this is affected by the build configuration and the CPU flags mask, > > + * so e.g. if the CPU supports AVX, but libavutil has been built with > > + * --disable-avx or the AV_CPU_FLAG_AVX flag has been disabled through > > + * av_set_cpu_flags_mask(), then this function will behave as if AVX is not > > + * present. > > + */ > > +size_t av_cpu_max_align(void); > > This might interact badly with runtime cpu flags/mask changes > > If its used to choose the alignment for allocated frames and > after some are allocated the cpu flags are changed there could be > still frames in queues that may not have sufficient alignment for the > new flags There's no such thing as runtime CPU flag changes.
On Mon, Sep 04, 2017 at 09:27:32PM +0200, wm4 wrote: > On Mon, 4 Sep 2017 21:18:35 +0200 > Michael Niedermayer <michael@niedermayer.cc> wrote: > > > On Sat, Sep 02, 2017 at 09:47:38PM -0300, James Almer wrote: > > > From: Anton Khirnov <anton@khirnov.net> > > > > > > (cherry picked from commit e6bff23f1e11aefb16a2b5d6ee72bf7469c5a66e) > > > Signed-off-by: James Almer <jamrial@gmail.com> > > > --- > > > This is (afaics) the last API introduced to libav before the major bump. > > > > > > Now checking all the x86 flags that would require aligment of 16 bytes > > > or more. > > > > > > doc/APIchanges | 3 +++ > > > libavutil/cpu.c | 35 +++++++++++++++++++++++++++++++++++ > > > libavutil/cpu.h | 13 +++++++++++++ > > > libavutil/version.h | 2 +- > > > 4 files changed, 52 insertions(+), 1 deletion(-) > > > > > > diff --git a/doc/APIchanges b/doc/APIchanges > > > index 4effbf9364..6a57c210a9 100644 > > > --- a/doc/APIchanges > > > +++ b/doc/APIchanges > > > @@ -15,6 +15,9 @@ libavutil: 2015-08-28 > > > > > > API changes, most recent first: > > > > > > +2017-09-xx - xxxxxxx - lavu 55.75.100 / lavu 55.31.0 - cpu.h > > > + Add av_cpu_max_align() for querying maximum required data alignment. > > > + > > > 2017-09-01 - xxxxxxx - lavf 57.81.100 - avio.h > > > Add avio_read_partial(). > > > > > > diff --git a/libavutil/cpu.c b/libavutil/cpu.c > > > index a22da0fa8c..4f04da2460 100644 > > > --- a/libavutil/cpu.c > > > +++ b/libavutil/cpu.c > > > @@ -16,9 +16,11 @@ > > > * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > > > */ > > > > > > +#include <stddef.h> > > > #include <stdint.h> > > > #include <stdatomic.h> > > > > > > +#include "attributes.h" > > > #include "cpu.h" > > > #include "cpu_internal.h" > > > #include "config.h" > > > @@ -299,3 +301,36 @@ int av_cpu_count(void) > > > > > > return nb_cpus; > > > } > > > + > > > +size_t av_cpu_max_align(void) > > > +{ > > > + int av_unused flags = av_get_cpu_flags(); > > > + > > > +#if ARCH_ARM || ARCH_AARCH64 > > > + if (flags & AV_CPU_FLAG_NEON) > > > + return 16; > > > +#elif ARCH_PPC > > > + if (flags & AV_CPU_FLAG_ALTIVEC) > > > + return 16; > > > +#elif ARCH_X86 > > > + if (flags & (AV_CPU_FLAG_AVX2 | > > > + AV_CPU_FLAG_AVX | > > > + AV_CPU_FLAG_FMA4 | > > > + AV_CPU_FLAG_FMA3)) > > > + return 32; > > > + if (flags & (AV_CPU_FLAG_XOP | > > > + AV_CPU_FLAG_AESNI | > > > + AV_CPU_FLAG_SSE42 | > > > + AV_CPU_FLAG_SSE4 | > > > + AV_CPU_FLAG_SSSE3 | > > > + AV_CPU_FLAG_SSE3 | > > > + AV_CPU_FLAG_SSE2 | > > > + AV_CPU_FLAG_SSE | > > > + AV_CPU_FLAG_AVXSLOW | > > > + AV_CPU_FLAG_SSE3SLOW | > > > + AV_CPU_FLAG_SSE2SLOW)) > > > + return 16; > > > +#endif > > > + > > > + return 8; > > > +} > > > diff --git a/libavutil/cpu.h b/libavutil/cpu.h > > > index de05593446..9e5d40affe 100644 > > > --- a/libavutil/cpu.h > > > +++ b/libavutil/cpu.h > > > @@ -21,6 +21,8 @@ > > > #ifndef AVUTIL_CPU_H > > > #define AVUTIL_CPU_H > > > > > > +#include <stddef.h> > > > + > > > #include "attributes.h" > > > > > > #define AV_CPU_FLAG_FORCE 0x80000000 /* force usage of selected flags (OR) */ > > > > > > > @@ -113,4 +115,15 @@ int av_parse_cpu_caps(unsigned *flags, const char *s); > > > */ > > > int av_cpu_count(void); > > > > > > +/** > > > + * Get the maximum data alignment that may be required by FFmpeg. > > > + * > > > + * Note that this is affected by the build configuration and the CPU flags mask, > > > + * so e.g. if the CPU supports AVX, but libavutil has been built with > > > + * --disable-avx or the AV_CPU_FLAG_AVX flag has been disabled through > > > + * av_set_cpu_flags_mask(), then this function will behave as if AVX is not > > > + * present. > > > + */ > > > +size_t av_cpu_max_align(void); > > > > This might interact badly with runtime cpu flags/mask changes > > > > If its used to choose the alignment for allocated frames and > > after some are allocated the cpu flags are changed there could be > > still frames in queues that may not have sufficient alignment for the > > new flags > > There's no such thing as runtime CPU flag changes. We even have an API to change the cpu flags at runtime. av_set_cpu_flags_mask() and av_force_cpu_flags() There is no restriction in the API on when they can be called. you can call av_force_cpu_flags(0) then open a decoder then call av_force_cpu_flags(AV_CPU_FLAG_MMX|AV_CPU_FLAG_SSE) then open a filter or encoder then run the code and it could crash as the allocated frames from earlier are not aligned enough for the 2nd filter or encoder There also may be other scenarios where this can occur [...]
On Mon, Sep 4, 2017 at 10:53 PM, Michael Niedermayer <michael@niedermayer.cc> wrote: > On Mon, Sep 04, 2017 at 09:27:32PM +0200, wm4 wrote: >> On Mon, 4 Sep 2017 21:18:35 +0200 >> Michael Niedermayer <michael@niedermayer.cc> wrote: >> >> > On Sat, Sep 02, 2017 at 09:47:38PM -0300, James Almer wrote: >> > > From: Anton Khirnov <anton@khirnov.net> >> > > >> > > (cherry picked from commit e6bff23f1e11aefb16a2b5d6ee72bf7469c5a66e) >> > > Signed-off-by: James Almer <jamrial@gmail.com> >> > > --- >> > > This is (afaics) the last API introduced to libav before the major bump. >> > > >> > > Now checking all the x86 flags that would require aligment of 16 bytes >> > > or more. >> > > >> > > doc/APIchanges | 3 +++ >> > > libavutil/cpu.c | 35 +++++++++++++++++++++++++++++++++++ >> > > libavutil/cpu.h | 13 +++++++++++++ >> > > libavutil/version.h | 2 +- >> > > 4 files changed, 52 insertions(+), 1 deletion(-) >> > > >> > > diff --git a/doc/APIchanges b/doc/APIchanges >> > > index 4effbf9364..6a57c210a9 100644 >> > > --- a/doc/APIchanges >> > > +++ b/doc/APIchanges >> > > @@ -15,6 +15,9 @@ libavutil: 2015-08-28 >> > > >> > > API changes, most recent first: >> > > >> > > +2017-09-xx - xxxxxxx - lavu 55.75.100 / lavu 55.31.0 - cpu.h >> > > + Add av_cpu_max_align() for querying maximum required data alignment. >> > > + >> > > 2017-09-01 - xxxxxxx - lavf 57.81.100 - avio.h >> > > Add avio_read_partial(). >> > > >> > > diff --git a/libavutil/cpu.c b/libavutil/cpu.c >> > > index a22da0fa8c..4f04da2460 100644 >> > > --- a/libavutil/cpu.c >> > > +++ b/libavutil/cpu.c >> > > @@ -16,9 +16,11 @@ >> > > * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA >> > > */ >> > > >> > > +#include <stddef.h> >> > > #include <stdint.h> >> > > #include <stdatomic.h> >> > > >> > > +#include "attributes.h" >> > > #include "cpu.h" >> > > #include "cpu_internal.h" >> > > #include "config.h" >> > > @@ -299,3 +301,36 @@ int av_cpu_count(void) >> > > >> > > return nb_cpus; >> > > } >> > > + >> > > +size_t av_cpu_max_align(void) >> > > +{ >> > > + int av_unused flags = av_get_cpu_flags(); >> > > + >> > > +#if ARCH_ARM || ARCH_AARCH64 >> > > + if (flags & AV_CPU_FLAG_NEON) >> > > + return 16; >> > > +#elif ARCH_PPC >> > > + if (flags & AV_CPU_FLAG_ALTIVEC) >> > > + return 16; >> > > +#elif ARCH_X86 >> > > + if (flags & (AV_CPU_FLAG_AVX2 | >> > > + AV_CPU_FLAG_AVX | >> > > + AV_CPU_FLAG_FMA4 | >> > > + AV_CPU_FLAG_FMA3)) >> > > + return 32; >> > > + if (flags & (AV_CPU_FLAG_XOP | >> > > + AV_CPU_FLAG_AESNI | >> > > + AV_CPU_FLAG_SSE42 | >> > > + AV_CPU_FLAG_SSE4 | >> > > + AV_CPU_FLAG_SSSE3 | >> > > + AV_CPU_FLAG_SSE3 | >> > > + AV_CPU_FLAG_SSE2 | >> > > + AV_CPU_FLAG_SSE | >> > > + AV_CPU_FLAG_AVXSLOW | >> > > + AV_CPU_FLAG_SSE3SLOW | >> > > + AV_CPU_FLAG_SSE2SLOW)) >> > > + return 16; >> > > +#endif >> > > + >> > > + return 8; >> > > +} >> > > diff --git a/libavutil/cpu.h b/libavutil/cpu.h >> > > index de05593446..9e5d40affe 100644 >> > > --- a/libavutil/cpu.h >> > > +++ b/libavutil/cpu.h >> > > @@ -21,6 +21,8 @@ >> > > #ifndef AVUTIL_CPU_H >> > > #define AVUTIL_CPU_H >> > > >> > > +#include <stddef.h> >> > > + >> > > #include "attributes.h" >> > > >> > > #define AV_CPU_FLAG_FORCE 0x80000000 /* force usage of selected flags (OR) */ >> > >> > >> > > @@ -113,4 +115,15 @@ int av_parse_cpu_caps(unsigned *flags, const char *s); >> > > */ >> > > int av_cpu_count(void); >> > > >> > > +/** >> > > + * Get the maximum data alignment that may be required by FFmpeg. >> > > + * >> > > + * Note that this is affected by the build configuration and the CPU flags mask, >> > > + * so e.g. if the CPU supports AVX, but libavutil has been built with >> > > + * --disable-avx or the AV_CPU_FLAG_AVX flag has been disabled through >> > > + * av_set_cpu_flags_mask(), then this function will behave as if AVX is not >> > > + * present. >> > > + */ >> > > +size_t av_cpu_max_align(void); >> > >> > This might interact badly with runtime cpu flags/mask changes >> > >> > If its used to choose the alignment for allocated frames and >> > after some are allocated the cpu flags are changed there could be >> > still frames in queues that may not have sufficient alignment for the >> > new flags >> >> There's no such thing as runtime CPU flag changes. > > We even have an API to change the cpu flags at runtime. > > av_set_cpu_flags_mask() and av_force_cpu_flags() > > There is no restriction in the API on when they can be called. > > you can call av_force_cpu_flags(0) then open a decoder then call > av_force_cpu_flags(AV_CPU_FLAG_MMX|AV_CPU_FLAG_SSE) > then open a filter or encoder > then run the code and it could crash as the allocated frames from > earlier are not aligned enough for the 2nd filter or encoder > > There also may be other scenarios where this can occur > Thats why API to change flags is a dumb feature to have user-facing. It should correspond to hardware support and nothing else. - Hendrik
On Mon, 4 Sep 2017 22:53:43 +0200 Michael Niedermayer <michael@niedermayer.cc> wrote: > On Mon, Sep 04, 2017 at 09:27:32PM +0200, wm4 wrote: > > On Mon, 4 Sep 2017 21:18:35 +0200 > > Michael Niedermayer <michael@niedermayer.cc> wrote: > > > > > On Sat, Sep 02, 2017 at 09:47:38PM -0300, James Almer wrote: > > > > From: Anton Khirnov <anton@khirnov.net> > > > > > > > > (cherry picked from commit e6bff23f1e11aefb16a2b5d6ee72bf7469c5a66e) > > > > Signed-off-by: James Almer <jamrial@gmail.com> > > > > --- > > > > This is (afaics) the last API introduced to libav before the major bump. > > > > > > > > Now checking all the x86 flags that would require aligment of 16 bytes > > > > or more. > > > > > > > > doc/APIchanges | 3 +++ > > > > libavutil/cpu.c | 35 +++++++++++++++++++++++++++++++++++ > > > > libavutil/cpu.h | 13 +++++++++++++ > > > > libavutil/version.h | 2 +- > > > > 4 files changed, 52 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/doc/APIchanges b/doc/APIchanges > > > > index 4effbf9364..6a57c210a9 100644 > > > > --- a/doc/APIchanges > > > > +++ b/doc/APIchanges > > > > @@ -15,6 +15,9 @@ libavutil: 2015-08-28 > > > > > > > > API changes, most recent first: > > > > > > > > +2017-09-xx - xxxxxxx - lavu 55.75.100 / lavu 55.31.0 - cpu.h > > > > + Add av_cpu_max_align() for querying maximum required data alignment. > > > > + > > > > 2017-09-01 - xxxxxxx - lavf 57.81.100 - avio.h > > > > Add avio_read_partial(). > > > > > > > > diff --git a/libavutil/cpu.c b/libavutil/cpu.c > > > > index a22da0fa8c..4f04da2460 100644 > > > > --- a/libavutil/cpu.c > > > > +++ b/libavutil/cpu.c > > > > @@ -16,9 +16,11 @@ > > > > * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > > > > */ > > > > > > > > +#include <stddef.h> > > > > #include <stdint.h> > > > > #include <stdatomic.h> > > > > > > > > +#include "attributes.h" > > > > #include "cpu.h" > > > > #include "cpu_internal.h" > > > > #include "config.h" > > > > @@ -299,3 +301,36 @@ int av_cpu_count(void) > > > > > > > > return nb_cpus; > > > > } > > > > + > > > > +size_t av_cpu_max_align(void) > > > > +{ > > > > + int av_unused flags = av_get_cpu_flags(); > > > > + > > > > +#if ARCH_ARM || ARCH_AARCH64 > > > > + if (flags & AV_CPU_FLAG_NEON) > > > > + return 16; > > > > +#elif ARCH_PPC > > > > + if (flags & AV_CPU_FLAG_ALTIVEC) > > > > + return 16; > > > > +#elif ARCH_X86 > > > > + if (flags & (AV_CPU_FLAG_AVX2 | > > > > + AV_CPU_FLAG_AVX | > > > > + AV_CPU_FLAG_FMA4 | > > > > + AV_CPU_FLAG_FMA3)) > > > > + return 32; > > > > + if (flags & (AV_CPU_FLAG_XOP | > > > > + AV_CPU_FLAG_AESNI | > > > > + AV_CPU_FLAG_SSE42 | > > > > + AV_CPU_FLAG_SSE4 | > > > > + AV_CPU_FLAG_SSSE3 | > > > > + AV_CPU_FLAG_SSE3 | > > > > + AV_CPU_FLAG_SSE2 | > > > > + AV_CPU_FLAG_SSE | > > > > + AV_CPU_FLAG_AVXSLOW | > > > > + AV_CPU_FLAG_SSE3SLOW | > > > > + AV_CPU_FLAG_SSE2SLOW)) > > > > + return 16; > > > > +#endif > > > > + > > > > + return 8; > > > > +} > > > > diff --git a/libavutil/cpu.h b/libavutil/cpu.h > > > > index de05593446..9e5d40affe 100644 > > > > --- a/libavutil/cpu.h > > > > +++ b/libavutil/cpu.h > > > > @@ -21,6 +21,8 @@ > > > > #ifndef AVUTIL_CPU_H > > > > #define AVUTIL_CPU_H > > > > > > > > +#include <stddef.h> > > > > + > > > > #include "attributes.h" > > > > > > > > #define AV_CPU_FLAG_FORCE 0x80000000 /* force usage of selected flags (OR) */ > > > > > > > > > > @@ -113,4 +115,15 @@ int av_parse_cpu_caps(unsigned *flags, const char *s); > > > > */ > > > > int av_cpu_count(void); > > > > > > > > +/** > > > > + * Get the maximum data alignment that may be required by FFmpeg. > > > > + * > > > > + * Note that this is affected by the build configuration and the CPU flags mask, > > > > + * so e.g. if the CPU supports AVX, but libavutil has been built with > > > > + * --disable-avx or the AV_CPU_FLAG_AVX flag has been disabled through > > > > + * av_set_cpu_flags_mask(), then this function will behave as if AVX is not > > > > + * present. > > > > + */ > > > > +size_t av_cpu_max_align(void); > > > > > > This might interact badly with runtime cpu flags/mask changes > > > > > > If its used to choose the alignment for allocated frames and > > > after some are allocated the cpu flags are changed there could be > > > still frames in queues that may not have sufficient alignment for the > > > new flags > > > > There's no such thing as runtime CPU flag changes. > > We even have an API to change the cpu flags at runtime. > > av_set_cpu_flags_mask() and av_force_cpu_flags() > > There is no restriction in the API on when they can be called. > > you can call av_force_cpu_flags(0) then open a decoder then call > av_force_cpu_flags(AV_CPU_FLAG_MMX|AV_CPU_FLAG_SSE) > then open a filter or encoder > then run the code and it could crash as the allocated frames from > earlier are not aligned enough for the 2nd filter or encoder > > There also may be other scenarios where this can occur > > [...] I'm looking forward to your patches that make the h264 decoder transparently change the used DSP functions when calling this function.
On 9/4/2017 5:53 PM, Michael Niedermayer wrote: > On Mon, Sep 04, 2017 at 09:27:32PM +0200, wm4 wrote: >> On Mon, 4 Sep 2017 21:18:35 +0200 >> Michael Niedermayer <michael@niedermayer.cc> wrote: >> >>> On Sat, Sep 02, 2017 at 09:47:38PM -0300, James Almer wrote: >>>> From: Anton Khirnov <anton@khirnov.net> >>>> >>>> (cherry picked from commit e6bff23f1e11aefb16a2b5d6ee72bf7469c5a66e) >>>> Signed-off-by: James Almer <jamrial@gmail.com> >>>> --- >>>> This is (afaics) the last API introduced to libav before the major bump. >>>> >>>> Now checking all the x86 flags that would require aligment of 16 bytes >>>> or more. >>>> >>>> doc/APIchanges | 3 +++ >>>> libavutil/cpu.c | 35 +++++++++++++++++++++++++++++++++++ >>>> libavutil/cpu.h | 13 +++++++++++++ >>>> libavutil/version.h | 2 +- >>>> 4 files changed, 52 insertions(+), 1 deletion(-) >>>> >>>> diff --git a/doc/APIchanges b/doc/APIchanges >>>> index 4effbf9364..6a57c210a9 100644 >>>> --- a/doc/APIchanges >>>> +++ b/doc/APIchanges >>>> @@ -15,6 +15,9 @@ libavutil: 2015-08-28 >>>> >>>> API changes, most recent first: >>>> >>>> +2017-09-xx - xxxxxxx - lavu 55.75.100 / lavu 55.31.0 - cpu.h >>>> + Add av_cpu_max_align() for querying maximum required data alignment. >>>> + >>>> 2017-09-01 - xxxxxxx - lavf 57.81.100 - avio.h >>>> Add avio_read_partial(). >>>> >>>> diff --git a/libavutil/cpu.c b/libavutil/cpu.c >>>> index a22da0fa8c..4f04da2460 100644 >>>> --- a/libavutil/cpu.c >>>> +++ b/libavutil/cpu.c >>>> @@ -16,9 +16,11 @@ >>>> * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA >>>> */ >>>> >>>> +#include <stddef.h> >>>> #include <stdint.h> >>>> #include <stdatomic.h> >>>> >>>> +#include "attributes.h" >>>> #include "cpu.h" >>>> #include "cpu_internal.h" >>>> #include "config.h" >>>> @@ -299,3 +301,36 @@ int av_cpu_count(void) >>>> >>>> return nb_cpus; >>>> } >>>> + >>>> +size_t av_cpu_max_align(void) >>>> +{ >>>> + int av_unused flags = av_get_cpu_flags(); >>>> + >>>> +#if ARCH_ARM || ARCH_AARCH64 >>>> + if (flags & AV_CPU_FLAG_NEON) >>>> + return 16; >>>> +#elif ARCH_PPC >>>> + if (flags & AV_CPU_FLAG_ALTIVEC) >>>> + return 16; >>>> +#elif ARCH_X86 >>>> + if (flags & (AV_CPU_FLAG_AVX2 | >>>> + AV_CPU_FLAG_AVX | >>>> + AV_CPU_FLAG_FMA4 | >>>> + AV_CPU_FLAG_FMA3)) >>>> + return 32; >>>> + if (flags & (AV_CPU_FLAG_XOP | >>>> + AV_CPU_FLAG_AESNI | >>>> + AV_CPU_FLAG_SSE42 | >>>> + AV_CPU_FLAG_SSE4 | >>>> + AV_CPU_FLAG_SSSE3 | >>>> + AV_CPU_FLAG_SSE3 | >>>> + AV_CPU_FLAG_SSE2 | >>>> + AV_CPU_FLAG_SSE | >>>> + AV_CPU_FLAG_AVXSLOW | >>>> + AV_CPU_FLAG_SSE3SLOW | >>>> + AV_CPU_FLAG_SSE2SLOW)) >>>> + return 16; >>>> +#endif >>>> + >>>> + return 8; >>>> +} >>>> diff --git a/libavutil/cpu.h b/libavutil/cpu.h >>>> index de05593446..9e5d40affe 100644 >>>> --- a/libavutil/cpu.h >>>> +++ b/libavutil/cpu.h >>>> @@ -21,6 +21,8 @@ >>>> #ifndef AVUTIL_CPU_H >>>> #define AVUTIL_CPU_H >>>> >>>> +#include <stddef.h> >>>> + >>>> #include "attributes.h" >>>> >>>> #define AV_CPU_FLAG_FORCE 0x80000000 /* force usage of selected flags (OR) */ >>> >>> >>>> @@ -113,4 +115,15 @@ int av_parse_cpu_caps(unsigned *flags, const char *s); >>>> */ >>>> int av_cpu_count(void); >>>> >>>> +/** >>>> + * Get the maximum data alignment that may be required by FFmpeg. >>>> + * >>>> + * Note that this is affected by the build configuration and the CPU flags mask, >>>> + * so e.g. if the CPU supports AVX, but libavutil has been built with >>>> + * --disable-avx or the AV_CPU_FLAG_AVX flag has been disabled through >>>> + * av_set_cpu_flags_mask(), then this function will behave as if AVX is not >>>> + * present. >>>> + */ >>>> +size_t av_cpu_max_align(void); >>> >>> This might interact badly with runtime cpu flags/mask changes >>> >>> If its used to choose the alignment for allocated frames and >>> after some are allocated the cpu flags are changed there could be >>> still frames in queues that may not have sufficient alignment for the >>> new flags >> >> There's no such thing as runtime CPU flag changes. > > We even have an API to change the cpu flags at runtime. > > av_set_cpu_flags_mask() and av_force_cpu_flags() > > There is no restriction in the API on when they can be called. > > you can call av_force_cpu_flags(0) then open a decoder then call > av_force_cpu_flags(AV_CPU_FLAG_MMX|AV_CPU_FLAG_SSE) > then open a filter or encoder > then run the code and it could crash as the allocated frames from > earlier are not aligned enough for the 2nd filter or encoder > > There also may be other scenarios where this can occur Alright, I just arrived to this cherry pick during merges. So my question is, do i apply it and skip the commit that makes use of it to choose alignment within ffmpeg? Even if unused at first, I'd rather not skip its addition.
On Wed, 27 Sep 2017 21:15:39 -0300 James Almer <jamrial@gmail.com> wrote: > On 9/4/2017 5:53 PM, Michael Niedermayer wrote: > > On Mon, Sep 04, 2017 at 09:27:32PM +0200, wm4 wrote: > >> On Mon, 4 Sep 2017 21:18:35 +0200 > >> Michael Niedermayer <michael@niedermayer.cc> wrote: > >> > >>> On Sat, Sep 02, 2017 at 09:47:38PM -0300, James Almer wrote: > >>>> From: Anton Khirnov <anton@khirnov.net> > >>>> > >>>> (cherry picked from commit e6bff23f1e11aefb16a2b5d6ee72bf7469c5a66e) > >>>> Signed-off-by: James Almer <jamrial@gmail.com> > >>>> --- > >>>> This is (afaics) the last API introduced to libav before the major bump. > >>>> > >>>> Now checking all the x86 flags that would require aligment of 16 bytes > >>>> or more. > >>>> > >>>> doc/APIchanges | 3 +++ > >>>> libavutil/cpu.c | 35 +++++++++++++++++++++++++++++++++++ > >>>> libavutil/cpu.h | 13 +++++++++++++ > >>>> libavutil/version.h | 2 +- > >>>> 4 files changed, 52 insertions(+), 1 deletion(-) > >>>> > >>>> diff --git a/doc/APIchanges b/doc/APIchanges > >>>> index 4effbf9364..6a57c210a9 100644 > >>>> --- a/doc/APIchanges > >>>> +++ b/doc/APIchanges > >>>> @@ -15,6 +15,9 @@ libavutil: 2015-08-28 > >>>> > >>>> API changes, most recent first: > >>>> > >>>> +2017-09-xx - xxxxxxx - lavu 55.75.100 / lavu 55.31.0 - cpu.h > >>>> + Add av_cpu_max_align() for querying maximum required data alignment. > >>>> + > >>>> 2017-09-01 - xxxxxxx - lavf 57.81.100 - avio.h > >>>> Add avio_read_partial(). > >>>> > >>>> diff --git a/libavutil/cpu.c b/libavutil/cpu.c > >>>> index a22da0fa8c..4f04da2460 100644 > >>>> --- a/libavutil/cpu.c > >>>> +++ b/libavutil/cpu.c > >>>> @@ -16,9 +16,11 @@ > >>>> * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > >>>> */ > >>>> > >>>> +#include <stddef.h> > >>>> #include <stdint.h> > >>>> #include <stdatomic.h> > >>>> > >>>> +#include "attributes.h" > >>>> #include "cpu.h" > >>>> #include "cpu_internal.h" > >>>> #include "config.h" > >>>> @@ -299,3 +301,36 @@ int av_cpu_count(void) > >>>> > >>>> return nb_cpus; > >>>> } > >>>> + > >>>> +size_t av_cpu_max_align(void) > >>>> +{ > >>>> + int av_unused flags = av_get_cpu_flags(); > >>>> + > >>>> +#if ARCH_ARM || ARCH_AARCH64 > >>>> + if (flags & AV_CPU_FLAG_NEON) > >>>> + return 16; > >>>> +#elif ARCH_PPC > >>>> + if (flags & AV_CPU_FLAG_ALTIVEC) > >>>> + return 16; > >>>> +#elif ARCH_X86 > >>>> + if (flags & (AV_CPU_FLAG_AVX2 | > >>>> + AV_CPU_FLAG_AVX | > >>>> + AV_CPU_FLAG_FMA4 | > >>>> + AV_CPU_FLAG_FMA3)) > >>>> + return 32; > >>>> + if (flags & (AV_CPU_FLAG_XOP | > >>>> + AV_CPU_FLAG_AESNI | > >>>> + AV_CPU_FLAG_SSE42 | > >>>> + AV_CPU_FLAG_SSE4 | > >>>> + AV_CPU_FLAG_SSSE3 | > >>>> + AV_CPU_FLAG_SSE3 | > >>>> + AV_CPU_FLAG_SSE2 | > >>>> + AV_CPU_FLAG_SSE | > >>>> + AV_CPU_FLAG_AVXSLOW | > >>>> + AV_CPU_FLAG_SSE3SLOW | > >>>> + AV_CPU_FLAG_SSE2SLOW)) > >>>> + return 16; > >>>> +#endif > >>>> + > >>>> + return 8; > >>>> +} > >>>> diff --git a/libavutil/cpu.h b/libavutil/cpu.h > >>>> index de05593446..9e5d40affe 100644 > >>>> --- a/libavutil/cpu.h > >>>> +++ b/libavutil/cpu.h > >>>> @@ -21,6 +21,8 @@ > >>>> #ifndef AVUTIL_CPU_H > >>>> #define AVUTIL_CPU_H > >>>> > >>>> +#include <stddef.h> > >>>> + > >>>> #include "attributes.h" > >>>> > >>>> #define AV_CPU_FLAG_FORCE 0x80000000 /* force usage of selected flags (OR) */ > >>> > >>> > >>>> @@ -113,4 +115,15 @@ int av_parse_cpu_caps(unsigned *flags, const char *s); > >>>> */ > >>>> int av_cpu_count(void); > >>>> > >>>> +/** > >>>> + * Get the maximum data alignment that may be required by FFmpeg. > >>>> + * > >>>> + * Note that this is affected by the build configuration and the CPU flags mask, > >>>> + * so e.g. if the CPU supports AVX, but libavutil has been built with > >>>> + * --disable-avx or the AV_CPU_FLAG_AVX flag has been disabled through > >>>> + * av_set_cpu_flags_mask(), then this function will behave as if AVX is not > >>>> + * present. > >>>> + */ > >>>> +size_t av_cpu_max_align(void); > >>> > >>> This might interact badly with runtime cpu flags/mask changes > >>> > >>> If its used to choose the alignment for allocated frames and > >>> after some are allocated the cpu flags are changed there could be > >>> still frames in queues that may not have sufficient alignment for the > >>> new flags > >> > >> There's no such thing as runtime CPU flag changes. > > > > We even have an API to change the cpu flags at runtime. > > > > av_set_cpu_flags_mask() and av_force_cpu_flags() > > > > There is no restriction in the API on when they can be called. > > > > you can call av_force_cpu_flags(0) then open a decoder then call > > av_force_cpu_flags(AV_CPU_FLAG_MMX|AV_CPU_FLAG_SSE) > > then open a filter or encoder > > then run the code and it could crash as the allocated frames from > > earlier are not aligned enough for the 2nd filter or encoder > > > > There also may be other scenarios where this can occur > > Alright, I just arrived to this cherry pick during merges. So my > question is, do i apply it and skip the commit that makes use of it to > choose alignment within ffmpeg? > > Even if unused at first, I'd rather not skip its addition. API users can rely on it. So you better make sure that can't break. Other aspects don't really matter.
On 9/27/2017 9:19 PM, wm4 wrote: > On Wed, 27 Sep 2017 21:15:39 -0300 > James Almer <jamrial@gmail.com> wrote: > >> On 9/4/2017 5:53 PM, Michael Niedermayer wrote: >>> On Mon, Sep 04, 2017 at 09:27:32PM +0200, wm4 wrote: >>>> On Mon, 4 Sep 2017 21:18:35 +0200 >>>> Michael Niedermayer <michael@niedermayer.cc> wrote: >>>> >>>>> On Sat, Sep 02, 2017 at 09:47:38PM -0300, James Almer wrote: >>>>>> From: Anton Khirnov <anton@khirnov.net> >>>>>> >>>>>> (cherry picked from commit e6bff23f1e11aefb16a2b5d6ee72bf7469c5a66e) >>>>>> Signed-off-by: James Almer <jamrial@gmail.com> >>>>>> --- >>>>>> This is (afaics) the last API introduced to libav before the major bump. >>>>>> >>>>>> Now checking all the x86 flags that would require aligment of 16 bytes >>>>>> or more. >>>>>> >>>>>> doc/APIchanges | 3 +++ >>>>>> libavutil/cpu.c | 35 +++++++++++++++++++++++++++++++++++ >>>>>> libavutil/cpu.h | 13 +++++++++++++ >>>>>> libavutil/version.h | 2 +- >>>>>> 4 files changed, 52 insertions(+), 1 deletion(-) >>>>>> >>>>>> diff --git a/doc/APIchanges b/doc/APIchanges >>>>>> index 4effbf9364..6a57c210a9 100644 >>>>>> --- a/doc/APIchanges >>>>>> +++ b/doc/APIchanges >>>>>> @@ -15,6 +15,9 @@ libavutil: 2015-08-28 >>>>>> >>>>>> API changes, most recent first: >>>>>> >>>>>> +2017-09-xx - xxxxxxx - lavu 55.75.100 / lavu 55.31.0 - cpu.h >>>>>> + Add av_cpu_max_align() for querying maximum required data alignment. >>>>>> + >>>>>> 2017-09-01 - xxxxxxx - lavf 57.81.100 - avio.h >>>>>> Add avio_read_partial(). >>>>>> >>>>>> diff --git a/libavutil/cpu.c b/libavutil/cpu.c >>>>>> index a22da0fa8c..4f04da2460 100644 >>>>>> --- a/libavutil/cpu.c >>>>>> +++ b/libavutil/cpu.c >>>>>> @@ -16,9 +16,11 @@ >>>>>> * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA >>>>>> */ >>>>>> >>>>>> +#include <stddef.h> >>>>>> #include <stdint.h> >>>>>> #include <stdatomic.h> >>>>>> >>>>>> +#include "attributes.h" >>>>>> #include "cpu.h" >>>>>> #include "cpu_internal.h" >>>>>> #include "config.h" >>>>>> @@ -299,3 +301,36 @@ int av_cpu_count(void) >>>>>> >>>>>> return nb_cpus; >>>>>> } >>>>>> + >>>>>> +size_t av_cpu_max_align(void) >>>>>> +{ >>>>>> + int av_unused flags = av_get_cpu_flags(); >>>>>> + >>>>>> +#if ARCH_ARM || ARCH_AARCH64 >>>>>> + if (flags & AV_CPU_FLAG_NEON) >>>>>> + return 16; >>>>>> +#elif ARCH_PPC >>>>>> + if (flags & AV_CPU_FLAG_ALTIVEC) >>>>>> + return 16; >>>>>> +#elif ARCH_X86 >>>>>> + if (flags & (AV_CPU_FLAG_AVX2 | >>>>>> + AV_CPU_FLAG_AVX | >>>>>> + AV_CPU_FLAG_FMA4 | >>>>>> + AV_CPU_FLAG_FMA3)) >>>>>> + return 32; >>>>>> + if (flags & (AV_CPU_FLAG_XOP | >>>>>> + AV_CPU_FLAG_AESNI | >>>>>> + AV_CPU_FLAG_SSE42 | >>>>>> + AV_CPU_FLAG_SSE4 | >>>>>> + AV_CPU_FLAG_SSSE3 | >>>>>> + AV_CPU_FLAG_SSE3 | >>>>>> + AV_CPU_FLAG_SSE2 | >>>>>> + AV_CPU_FLAG_SSE | >>>>>> + AV_CPU_FLAG_AVXSLOW | >>>>>> + AV_CPU_FLAG_SSE3SLOW | >>>>>> + AV_CPU_FLAG_SSE2SLOW)) >>>>>> + return 16; >>>>>> +#endif >>>>>> + >>>>>> + return 8; >>>>>> +} >>>>>> diff --git a/libavutil/cpu.h b/libavutil/cpu.h >>>>>> index de05593446..9e5d40affe 100644 >>>>>> --- a/libavutil/cpu.h >>>>>> +++ b/libavutil/cpu.h >>>>>> @@ -21,6 +21,8 @@ >>>>>> #ifndef AVUTIL_CPU_H >>>>>> #define AVUTIL_CPU_H >>>>>> >>>>>> +#include <stddef.h> >>>>>> + >>>>>> #include "attributes.h" >>>>>> >>>>>> #define AV_CPU_FLAG_FORCE 0x80000000 /* force usage of selected flags (OR) */ >>>>> >>>>> >>>>>> @@ -113,4 +115,15 @@ int av_parse_cpu_caps(unsigned *flags, const char *s); >>>>>> */ >>>>>> int av_cpu_count(void); >>>>>> >>>>>> +/** >>>>>> + * Get the maximum data alignment that may be required by FFmpeg. >>>>>> + * >>>>>> + * Note that this is affected by the build configuration and the CPU flags mask, >>>>>> + * so e.g. if the CPU supports AVX, but libavutil has been built with >>>>>> + * --disable-avx or the AV_CPU_FLAG_AVX flag has been disabled through >>>>>> + * av_set_cpu_flags_mask(), then this function will behave as if AVX is not >>>>>> + * present. >>>>>> + */ >>>>>> +size_t av_cpu_max_align(void); >>>>> >>>>> This might interact badly with runtime cpu flags/mask changes >>>>> >>>>> If its used to choose the alignment for allocated frames and >>>>> after some are allocated the cpu flags are changed there could be >>>>> still frames in queues that may not have sufficient alignment for the >>>>> new flags >>>> >>>> There's no such thing as runtime CPU flag changes. >>> >>> We even have an API to change the cpu flags at runtime. >>> >>> av_set_cpu_flags_mask() and av_force_cpu_flags() >>> >>> There is no restriction in the API on when they can be called. >>> >>> you can call av_force_cpu_flags(0) then open a decoder then call >>> av_force_cpu_flags(AV_CPU_FLAG_MMX|AV_CPU_FLAG_SSE) >>> then open a filter or encoder >>> then run the code and it could crash as the allocated frames from >>> earlier are not aligned enough for the 2nd filter or encoder >>> >>> There also may be other scenarios where this can occur >> >> Alright, I just arrived to this cherry pick during merges. So my >> question is, do i apply it and skip the commit that makes use of it to >> choose alignment within ffmpeg? >> >> Even if unused at first, I'd rather not skip its addition. > > API users can rely on it. So you better make sure that can't break. > Other aspects don't really matter. What do you mean with this? Do i apply it or not? The function will return a value based on runtime cpuflags. The value is guaranteed to be correct, but how the user uses it is beyond our control.
On Wed, 27 Sep 2017 21:37:54 -0300 James Almer <jamrial@gmail.com> wrote: > > API users can rely on it. So you better make sure that can't break. > > Other aspects don't really matter. > > What do you mean with this? Do i apply it or not? The function will > return a value based on runtime cpuflags. The value is guaranteed to be > correct, but how the user uses it is beyond our control. If the user uses the return value of the API to align pointers, and then passes these pointers to ffmpeg functions that process data, they must not crash or exhibit low performance. Or in other words, the alignment returned by the function must be high enough. At least as long as the global CPU flags are not changed afterwards. So if this is correct, I see no reason not to apply this patch? I'm not sure if your question was only about this patch, or potentially later Libav commits that need to be merged.
On 9/27/2017 9:59 PM, wm4 wrote: > On Wed, 27 Sep 2017 21:37:54 -0300 > James Almer <jamrial@gmail.com> wrote: > >>> API users can rely on it. So you better make sure that can't break. >>> Other aspects don't really matter. >> >> What do you mean with this? Do i apply it or not? The function will >> return a value based on runtime cpuflags. The value is guaranteed to be >> correct, but how the user uses it is beyond our control. > > If the user uses the return value of the API to align pointers, and > then passes these pointers to ffmpeg functions that process data, they > must not crash or exhibit low performance. Or in other words, the > alignment returned by the function must be high enough. At least as > long as the global CPU flags are not changed afterwards. > > So if this is correct, I see no reason not to apply this patch? > > I'm not sure if your question was only about this patch, or potentially > later Libav commits that need to be merged. Yes, i mentioned there's a commit after this one that makes use of the function internally, which based on the above (global CPU flags can be changed by the user), should probably not be applied in favor of keeping the current safe hardcoded STRIDE value. My question was, since it will not be used by ffmpeg itself, does it make sense adding it? You argued that yes (users may have use for it), so I'll merge it.
On Wed, 27 Sep 2017 22:26:48 -0300 James Almer <jamrial@gmail.com> wrote: > On 9/27/2017 9:59 PM, wm4 wrote: > > On Wed, 27 Sep 2017 21:37:54 -0300 > > James Almer <jamrial@gmail.com> wrote: > > > >>> API users can rely on it. So you better make sure that can't break. > >>> Other aspects don't really matter. > >> > >> What do you mean with this? Do i apply it or not? The function will > >> return a value based on runtime cpuflags. The value is guaranteed to be > >> correct, but how the user uses it is beyond our control. > > > > If the user uses the return value of the API to align pointers, and > > then passes these pointers to ffmpeg functions that process data, they > > must not crash or exhibit low performance. Or in other words, the > > alignment returned by the function must be high enough. At least as > > long as the global CPU flags are not changed afterwards. > > > > So if this is correct, I see no reason not to apply this patch? > > > > I'm not sure if your question was only about this patch, or potentially > > later Libav commits that need to be merged. > > Yes, i mentioned there's a commit after this one that makes use of the > function internally, which based on the above (global CPU flags can be > changed by the user), should probably not be applied in favor of keeping > the current safe hardcoded STRIDE value. I'd argue we should use this commit. But it doesn't matter too much, since it doesn't affect the API. But only if the API function is correct. If the API function were to become buggy because FFmpeg does something else internally, the outcome would be worse. > My question was, since it will not be used by ffmpeg itself, does it > make sense adding it? You argued that yes (users may have use for it), > so I'll merge it. Yes, definitely.
diff --git a/doc/APIchanges b/doc/APIchanges index 4effbf9364..6a57c210a9 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -15,6 +15,9 @@ libavutil: 2015-08-28 API changes, most recent first: +2017-09-xx - xxxxxxx - lavu 55.75.100 / lavu 55.31.0 - cpu.h + Add av_cpu_max_align() for querying maximum required data alignment. + 2017-09-01 - xxxxxxx - lavf 57.81.100 - avio.h Add avio_read_partial(). diff --git a/libavutil/cpu.c b/libavutil/cpu.c index a22da0fa8c..4f04da2460 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -16,9 +16,11 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include <stddef.h> #include <stdint.h> #include <stdatomic.h> +#include "attributes.h" #include "cpu.h" #include "cpu_internal.h" #include "config.h" @@ -299,3 +301,36 @@ int av_cpu_count(void) return nb_cpus; } + +size_t av_cpu_max_align(void) +{ + int av_unused flags = av_get_cpu_flags(); + +#if ARCH_ARM || ARCH_AARCH64 + if (flags & AV_CPU_FLAG_NEON) + return 16; +#elif ARCH_PPC + if (flags & AV_CPU_FLAG_ALTIVEC) + return 16; +#elif ARCH_X86 + if (flags & (AV_CPU_FLAG_AVX2 | + AV_CPU_FLAG_AVX | + AV_CPU_FLAG_FMA4 | + AV_CPU_FLAG_FMA3)) + return 32; + if (flags & (AV_CPU_FLAG_XOP | + AV_CPU_FLAG_AESNI | + AV_CPU_FLAG_SSE42 | + AV_CPU_FLAG_SSE4 | + AV_CPU_FLAG_SSSE3 | + AV_CPU_FLAG_SSE3 | + AV_CPU_FLAG_SSE2 | + AV_CPU_FLAG_SSE | + AV_CPU_FLAG_AVXSLOW | + AV_CPU_FLAG_SSE3SLOW | + AV_CPU_FLAG_SSE2SLOW)) + return 16; +#endif + + return 8; +} diff --git a/libavutil/cpu.h b/libavutil/cpu.h index de05593446..9e5d40affe 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -21,6 +21,8 @@ #ifndef AVUTIL_CPU_H #define AVUTIL_CPU_H +#include <stddef.h> + #include "attributes.h" #define AV_CPU_FLAG_FORCE 0x80000000 /* force usage of selected flags (OR) */ @@ -113,4 +115,15 @@ int av_parse_cpu_caps(unsigned *flags, const char *s); */ int av_cpu_count(void); +/** + * Get the maximum data alignment that may be required by FFmpeg. + * + * Note that this is affected by the build configuration and the CPU flags mask, + * so e.g. if the CPU supports AVX, but libavutil has been built with + * --disable-avx or the AV_CPU_FLAG_AVX flag has been disabled through + * av_set_cpu_flags_mask(), then this function will behave as if AVX is not + * present. + */ +size_t av_cpu_max_align(void); + #endif /* AVUTIL_CPU_H */ diff --git a/libavutil/version.h b/libavutil/version.h index 6e25b4690c..d99eff5d15 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -80,7 +80,7 @@ #define LIBAVUTIL_VERSION_MAJOR 55 -#define LIBAVUTIL_VERSION_MINOR 74 +#define LIBAVUTIL_VERSION_MINOR 75 #define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \