diff mbox

[FFmpeg-devel,v2] cpu: add a function for querying maximum required data alignment

Message ID 20170903004738.8928-1-jamrial@gmail.com
State New
Headers show

Commit Message

James Almer Sept. 3, 2017, 12:47 a.m. UTC
From: Anton Khirnov <anton@khirnov.net>

(cherry picked from commit e6bff23f1e11aefb16a2b5d6ee72bf7469c5a66e)
Signed-off-by: James Almer <jamrial@gmail.com>
---
This is (afaics) the last API introduced to libav before the major bump.

Now checking all the x86 flags that would require aligment of 16 bytes
or more.

 doc/APIchanges      |  3 +++
 libavutil/cpu.c     | 35 +++++++++++++++++++++++++++++++++++
 libavutil/cpu.h     | 13 +++++++++++++
 libavutil/version.h |  2 +-
 4 files changed, 52 insertions(+), 1 deletion(-)

Comments

Michael Niedermayer Sept. 4, 2017, 7:18 p.m. UTC | #1
On Sat, Sep 02, 2017 at 09:47:38PM -0300, James Almer wrote:
> From: Anton Khirnov <anton@khirnov.net>
> 
> (cherry picked from commit e6bff23f1e11aefb16a2b5d6ee72bf7469c5a66e)
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
> This is (afaics) the last API introduced to libav before the major bump.
> 
> Now checking all the x86 flags that would require aligment of 16 bytes
> or more.
> 
>  doc/APIchanges      |  3 +++
>  libavutil/cpu.c     | 35 +++++++++++++++++++++++++++++++++++
>  libavutil/cpu.h     | 13 +++++++++++++
>  libavutil/version.h |  2 +-
>  4 files changed, 52 insertions(+), 1 deletion(-)
> 
> diff --git a/doc/APIchanges b/doc/APIchanges
> index 4effbf9364..6a57c210a9 100644
> --- a/doc/APIchanges
> +++ b/doc/APIchanges
> @@ -15,6 +15,9 @@ libavutil:     2015-08-28
>  
>  API changes, most recent first:
>  
> +2017-09-xx - xxxxxxx - lavu 55.75.100 / lavu 55.31.0 - cpu.h
> +  Add av_cpu_max_align() for querying maximum required data alignment.
> +
>  2017-09-01 - xxxxxxx - lavf 57.81.100 - avio.h
>    Add avio_read_partial().
>  
> diff --git a/libavutil/cpu.c b/libavutil/cpu.c
> index a22da0fa8c..4f04da2460 100644
> --- a/libavutil/cpu.c
> +++ b/libavutil/cpu.c
> @@ -16,9 +16,11 @@
>   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>   */
>  
> +#include <stddef.h>
>  #include <stdint.h>
>  #include <stdatomic.h>
>  
> +#include "attributes.h"
>  #include "cpu.h"
>  #include "cpu_internal.h"
>  #include "config.h"
> @@ -299,3 +301,36 @@ int av_cpu_count(void)
>  
>      return nb_cpus;
>  }
> +
> +size_t av_cpu_max_align(void)
> +{
> +    int av_unused flags = av_get_cpu_flags();
> +
> +#if ARCH_ARM || ARCH_AARCH64
> +    if (flags & AV_CPU_FLAG_NEON)
> +        return 16;
> +#elif ARCH_PPC
> +    if (flags & AV_CPU_FLAG_ALTIVEC)
> +        return 16;
> +#elif ARCH_X86
> +    if (flags & (AV_CPU_FLAG_AVX2     |
> +                 AV_CPU_FLAG_AVX      |
> +                 AV_CPU_FLAG_FMA4     |
> +                 AV_CPU_FLAG_FMA3))
> +        return 32;
> +    if (flags & (AV_CPU_FLAG_XOP      |
> +                 AV_CPU_FLAG_AESNI    |
> +                 AV_CPU_FLAG_SSE42    |
> +                 AV_CPU_FLAG_SSE4     |
> +                 AV_CPU_FLAG_SSSE3    |
> +                 AV_CPU_FLAG_SSE3     |
> +                 AV_CPU_FLAG_SSE2     |
> +                 AV_CPU_FLAG_SSE      |
> +                 AV_CPU_FLAG_AVXSLOW  |
> +                 AV_CPU_FLAG_SSE3SLOW |
> +                 AV_CPU_FLAG_SSE2SLOW))
> +        return 16;
> +#endif
> +
> +    return 8;
> +}
> diff --git a/libavutil/cpu.h b/libavutil/cpu.h
> index de05593446..9e5d40affe 100644
> --- a/libavutil/cpu.h
> +++ b/libavutil/cpu.h
> @@ -21,6 +21,8 @@
>  #ifndef AVUTIL_CPU_H
>  #define AVUTIL_CPU_H
>  
> +#include <stddef.h>
> +
>  #include "attributes.h"
>  
>  #define AV_CPU_FLAG_FORCE    0x80000000 /* force usage of selected flags (OR) */


> @@ -113,4 +115,15 @@ int av_parse_cpu_caps(unsigned *flags, const char *s);
>   */
>  int av_cpu_count(void);
>  
> +/**
> + * Get the maximum data alignment that may be required by FFmpeg.
> + *
> + * Note that this is affected by the build configuration and the CPU flags mask,
> + * so e.g. if the CPU supports AVX, but libavutil has been built with
> + * --disable-avx or the AV_CPU_FLAG_AVX flag has been disabled through
> + *  av_set_cpu_flags_mask(), then this function will behave as if AVX is not
> + *  present.
> + */
> +size_t av_cpu_max_align(void);

This might interact badly with runtime cpu flags/mask changes

If its used to choose the alignment for allocated frames and
after some are allocated the cpu flags are changed there could be
still frames in queues that may not have sufficient alignment for the
new flags


[...]
wm4 Sept. 4, 2017, 7:27 p.m. UTC | #2
On Mon, 4 Sep 2017 21:18:35 +0200
Michael Niedermayer <michael@niedermayer.cc> wrote:

> On Sat, Sep 02, 2017 at 09:47:38PM -0300, James Almer wrote:
> > From: Anton Khirnov <anton@khirnov.net>
> > 
> > (cherry picked from commit e6bff23f1e11aefb16a2b5d6ee72bf7469c5a66e)
> > Signed-off-by: James Almer <jamrial@gmail.com>
> > ---
> > This is (afaics) the last API introduced to libav before the major bump.
> > 
> > Now checking all the x86 flags that would require aligment of 16 bytes
> > or more.
> > 
> >  doc/APIchanges      |  3 +++
> >  libavutil/cpu.c     | 35 +++++++++++++++++++++++++++++++++++
> >  libavutil/cpu.h     | 13 +++++++++++++
> >  libavutil/version.h |  2 +-
> >  4 files changed, 52 insertions(+), 1 deletion(-)
> > 
> > diff --git a/doc/APIchanges b/doc/APIchanges
> > index 4effbf9364..6a57c210a9 100644
> > --- a/doc/APIchanges
> > +++ b/doc/APIchanges
> > @@ -15,6 +15,9 @@ libavutil:     2015-08-28
> >  
> >  API changes, most recent first:
> >  
> > +2017-09-xx - xxxxxxx - lavu 55.75.100 / lavu 55.31.0 - cpu.h
> > +  Add av_cpu_max_align() for querying maximum required data alignment.
> > +
> >  2017-09-01 - xxxxxxx - lavf 57.81.100 - avio.h
> >    Add avio_read_partial().
> >  
> > diff --git a/libavutil/cpu.c b/libavutil/cpu.c
> > index a22da0fa8c..4f04da2460 100644
> > --- a/libavutil/cpu.c
> > +++ b/libavutil/cpu.c
> > @@ -16,9 +16,11 @@
> >   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> >   */
> >  
> > +#include <stddef.h>
> >  #include <stdint.h>
> >  #include <stdatomic.h>
> >  
> > +#include "attributes.h"
> >  #include "cpu.h"
> >  #include "cpu_internal.h"
> >  #include "config.h"
> > @@ -299,3 +301,36 @@ int av_cpu_count(void)
> >  
> >      return nb_cpus;
> >  }
> > +
> > +size_t av_cpu_max_align(void)
> > +{
> > +    int av_unused flags = av_get_cpu_flags();
> > +
> > +#if ARCH_ARM || ARCH_AARCH64
> > +    if (flags & AV_CPU_FLAG_NEON)
> > +        return 16;
> > +#elif ARCH_PPC
> > +    if (flags & AV_CPU_FLAG_ALTIVEC)
> > +        return 16;
> > +#elif ARCH_X86
> > +    if (flags & (AV_CPU_FLAG_AVX2     |
> > +                 AV_CPU_FLAG_AVX      |
> > +                 AV_CPU_FLAG_FMA4     |
> > +                 AV_CPU_FLAG_FMA3))
> > +        return 32;
> > +    if (flags & (AV_CPU_FLAG_XOP      |
> > +                 AV_CPU_FLAG_AESNI    |
> > +                 AV_CPU_FLAG_SSE42    |
> > +                 AV_CPU_FLAG_SSE4     |
> > +                 AV_CPU_FLAG_SSSE3    |
> > +                 AV_CPU_FLAG_SSE3     |
> > +                 AV_CPU_FLAG_SSE2     |
> > +                 AV_CPU_FLAG_SSE      |
> > +                 AV_CPU_FLAG_AVXSLOW  |
> > +                 AV_CPU_FLAG_SSE3SLOW |
> > +                 AV_CPU_FLAG_SSE2SLOW))
> > +        return 16;
> > +#endif
> > +
> > +    return 8;
> > +}
> > diff --git a/libavutil/cpu.h b/libavutil/cpu.h
> > index de05593446..9e5d40affe 100644
> > --- a/libavutil/cpu.h
> > +++ b/libavutil/cpu.h
> > @@ -21,6 +21,8 @@
> >  #ifndef AVUTIL_CPU_H
> >  #define AVUTIL_CPU_H
> >  
> > +#include <stddef.h>
> > +
> >  #include "attributes.h"
> >  
> >  #define AV_CPU_FLAG_FORCE    0x80000000 /* force usage of selected flags (OR) */  
> 
> 
> > @@ -113,4 +115,15 @@ int av_parse_cpu_caps(unsigned *flags, const char *s);
> >   */
> >  int av_cpu_count(void);
> >  
> > +/**
> > + * Get the maximum data alignment that may be required by FFmpeg.
> > + *
> > + * Note that this is affected by the build configuration and the CPU flags mask,
> > + * so e.g. if the CPU supports AVX, but libavutil has been built with
> > + * --disable-avx or the AV_CPU_FLAG_AVX flag has been disabled through
> > + *  av_set_cpu_flags_mask(), then this function will behave as if AVX is not
> > + *  present.
> > + */
> > +size_t av_cpu_max_align(void);  
> 
> This might interact badly with runtime cpu flags/mask changes
> 
> If its used to choose the alignment for allocated frames and
> after some are allocated the cpu flags are changed there could be
> still frames in queues that may not have sufficient alignment for the
> new flags

There's no such thing as runtime CPU flag changes.
Michael Niedermayer Sept. 4, 2017, 8:53 p.m. UTC | #3
On Mon, Sep 04, 2017 at 09:27:32PM +0200, wm4 wrote:
> On Mon, 4 Sep 2017 21:18:35 +0200
> Michael Niedermayer <michael@niedermayer.cc> wrote:
> 
> > On Sat, Sep 02, 2017 at 09:47:38PM -0300, James Almer wrote:
> > > From: Anton Khirnov <anton@khirnov.net>
> > > 
> > > (cherry picked from commit e6bff23f1e11aefb16a2b5d6ee72bf7469c5a66e)
> > > Signed-off-by: James Almer <jamrial@gmail.com>
> > > ---
> > > This is (afaics) the last API introduced to libav before the major bump.
> > > 
> > > Now checking all the x86 flags that would require aligment of 16 bytes
> > > or more.
> > > 
> > >  doc/APIchanges      |  3 +++
> > >  libavutil/cpu.c     | 35 +++++++++++++++++++++++++++++++++++
> > >  libavutil/cpu.h     | 13 +++++++++++++
> > >  libavutil/version.h |  2 +-
> > >  4 files changed, 52 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/doc/APIchanges b/doc/APIchanges
> > > index 4effbf9364..6a57c210a9 100644
> > > --- a/doc/APIchanges
> > > +++ b/doc/APIchanges
> > > @@ -15,6 +15,9 @@ libavutil:     2015-08-28
> > >  
> > >  API changes, most recent first:
> > >  
> > > +2017-09-xx - xxxxxxx - lavu 55.75.100 / lavu 55.31.0 - cpu.h
> > > +  Add av_cpu_max_align() for querying maximum required data alignment.
> > > +
> > >  2017-09-01 - xxxxxxx - lavf 57.81.100 - avio.h
> > >    Add avio_read_partial().
> > >  
> > > diff --git a/libavutil/cpu.c b/libavutil/cpu.c
> > > index a22da0fa8c..4f04da2460 100644
> > > --- a/libavutil/cpu.c
> > > +++ b/libavutil/cpu.c
> > > @@ -16,9 +16,11 @@
> > >   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> > >   */
> > >  
> > > +#include <stddef.h>
> > >  #include <stdint.h>
> > >  #include <stdatomic.h>
> > >  
> > > +#include "attributes.h"
> > >  #include "cpu.h"
> > >  #include "cpu_internal.h"
> > >  #include "config.h"
> > > @@ -299,3 +301,36 @@ int av_cpu_count(void)
> > >  
> > >      return nb_cpus;
> > >  }
> > > +
> > > +size_t av_cpu_max_align(void)
> > > +{
> > > +    int av_unused flags = av_get_cpu_flags();
> > > +
> > > +#if ARCH_ARM || ARCH_AARCH64
> > > +    if (flags & AV_CPU_FLAG_NEON)
> > > +        return 16;
> > > +#elif ARCH_PPC
> > > +    if (flags & AV_CPU_FLAG_ALTIVEC)
> > > +        return 16;
> > > +#elif ARCH_X86
> > > +    if (flags & (AV_CPU_FLAG_AVX2     |
> > > +                 AV_CPU_FLAG_AVX      |
> > > +                 AV_CPU_FLAG_FMA4     |
> > > +                 AV_CPU_FLAG_FMA3))
> > > +        return 32;
> > > +    if (flags & (AV_CPU_FLAG_XOP      |
> > > +                 AV_CPU_FLAG_AESNI    |
> > > +                 AV_CPU_FLAG_SSE42    |
> > > +                 AV_CPU_FLAG_SSE4     |
> > > +                 AV_CPU_FLAG_SSSE3    |
> > > +                 AV_CPU_FLAG_SSE3     |
> > > +                 AV_CPU_FLAG_SSE2     |
> > > +                 AV_CPU_FLAG_SSE      |
> > > +                 AV_CPU_FLAG_AVXSLOW  |
> > > +                 AV_CPU_FLAG_SSE3SLOW |
> > > +                 AV_CPU_FLAG_SSE2SLOW))
> > > +        return 16;
> > > +#endif
> > > +
> > > +    return 8;
> > > +}
> > > diff --git a/libavutil/cpu.h b/libavutil/cpu.h
> > > index de05593446..9e5d40affe 100644
> > > --- a/libavutil/cpu.h
> > > +++ b/libavutil/cpu.h
> > > @@ -21,6 +21,8 @@
> > >  #ifndef AVUTIL_CPU_H
> > >  #define AVUTIL_CPU_H
> > >  
> > > +#include <stddef.h>
> > > +
> > >  #include "attributes.h"
> > >  
> > >  #define AV_CPU_FLAG_FORCE    0x80000000 /* force usage of selected flags (OR) */  
> > 
> > 
> > > @@ -113,4 +115,15 @@ int av_parse_cpu_caps(unsigned *flags, const char *s);
> > >   */
> > >  int av_cpu_count(void);
> > >  
> > > +/**
> > > + * Get the maximum data alignment that may be required by FFmpeg.
> > > + *
> > > + * Note that this is affected by the build configuration and the CPU flags mask,
> > > + * so e.g. if the CPU supports AVX, but libavutil has been built with
> > > + * --disable-avx or the AV_CPU_FLAG_AVX flag has been disabled through
> > > + *  av_set_cpu_flags_mask(), then this function will behave as if AVX is not
> > > + *  present.
> > > + */
> > > +size_t av_cpu_max_align(void);  
> > 
> > This might interact badly with runtime cpu flags/mask changes
> > 
> > If its used to choose the alignment for allocated frames and
> > after some are allocated the cpu flags are changed there could be
> > still frames in queues that may not have sufficient alignment for the
> > new flags
> 
> There's no such thing as runtime CPU flag changes.

We even have an API to change the cpu flags at runtime.

av_set_cpu_flags_mask() and av_force_cpu_flags()

There is no restriction in the API on when they can be called.

you can call av_force_cpu_flags(0) then open a decoder then call
av_force_cpu_flags(AV_CPU_FLAG_MMX|AV_CPU_FLAG_SSE)
then open a filter or encoder
then run the code and it could crash as the allocated frames from
earlier are not aligned enough for the 2nd filter or encoder

There also may be other scenarios where this can occur

[...]
Hendrik Leppkes Sept. 4, 2017, 9:26 p.m. UTC | #4
On Mon, Sep 4, 2017 at 10:53 PM, Michael Niedermayer
<michael@niedermayer.cc> wrote:
> On Mon, Sep 04, 2017 at 09:27:32PM +0200, wm4 wrote:
>> On Mon, 4 Sep 2017 21:18:35 +0200
>> Michael Niedermayer <michael@niedermayer.cc> wrote:
>>
>> > On Sat, Sep 02, 2017 at 09:47:38PM -0300, James Almer wrote:
>> > > From: Anton Khirnov <anton@khirnov.net>
>> > >
>> > > (cherry picked from commit e6bff23f1e11aefb16a2b5d6ee72bf7469c5a66e)
>> > > Signed-off-by: James Almer <jamrial@gmail.com>
>> > > ---
>> > > This is (afaics) the last API introduced to libav before the major bump.
>> > >
>> > > Now checking all the x86 flags that would require aligment of 16 bytes
>> > > or more.
>> > >
>> > >  doc/APIchanges      |  3 +++
>> > >  libavutil/cpu.c     | 35 +++++++++++++++++++++++++++++++++++
>> > >  libavutil/cpu.h     | 13 +++++++++++++
>> > >  libavutil/version.h |  2 +-
>> > >  4 files changed, 52 insertions(+), 1 deletion(-)
>> > >
>> > > diff --git a/doc/APIchanges b/doc/APIchanges
>> > > index 4effbf9364..6a57c210a9 100644
>> > > --- a/doc/APIchanges
>> > > +++ b/doc/APIchanges
>> > > @@ -15,6 +15,9 @@ libavutil:     2015-08-28
>> > >
>> > >  API changes, most recent first:
>> > >
>> > > +2017-09-xx - xxxxxxx - lavu 55.75.100 / lavu 55.31.0 - cpu.h
>> > > +  Add av_cpu_max_align() for querying maximum required data alignment.
>> > > +
>> > >  2017-09-01 - xxxxxxx - lavf 57.81.100 - avio.h
>> > >    Add avio_read_partial().
>> > >
>> > > diff --git a/libavutil/cpu.c b/libavutil/cpu.c
>> > > index a22da0fa8c..4f04da2460 100644
>> > > --- a/libavutil/cpu.c
>> > > +++ b/libavutil/cpu.c
>> > > @@ -16,9 +16,11 @@
>> > >   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> > >   */
>> > >
>> > > +#include <stddef.h>
>> > >  #include <stdint.h>
>> > >  #include <stdatomic.h>
>> > >
>> > > +#include "attributes.h"
>> > >  #include "cpu.h"
>> > >  #include "cpu_internal.h"
>> > >  #include "config.h"
>> > > @@ -299,3 +301,36 @@ int av_cpu_count(void)
>> > >
>> > >      return nb_cpus;
>> > >  }
>> > > +
>> > > +size_t av_cpu_max_align(void)
>> > > +{
>> > > +    int av_unused flags = av_get_cpu_flags();
>> > > +
>> > > +#if ARCH_ARM || ARCH_AARCH64
>> > > +    if (flags & AV_CPU_FLAG_NEON)
>> > > +        return 16;
>> > > +#elif ARCH_PPC
>> > > +    if (flags & AV_CPU_FLAG_ALTIVEC)
>> > > +        return 16;
>> > > +#elif ARCH_X86
>> > > +    if (flags & (AV_CPU_FLAG_AVX2     |
>> > > +                 AV_CPU_FLAG_AVX      |
>> > > +                 AV_CPU_FLAG_FMA4     |
>> > > +                 AV_CPU_FLAG_FMA3))
>> > > +        return 32;
>> > > +    if (flags & (AV_CPU_FLAG_XOP      |
>> > > +                 AV_CPU_FLAG_AESNI    |
>> > > +                 AV_CPU_FLAG_SSE42    |
>> > > +                 AV_CPU_FLAG_SSE4     |
>> > > +                 AV_CPU_FLAG_SSSE3    |
>> > > +                 AV_CPU_FLAG_SSE3     |
>> > > +                 AV_CPU_FLAG_SSE2     |
>> > > +                 AV_CPU_FLAG_SSE      |
>> > > +                 AV_CPU_FLAG_AVXSLOW  |
>> > > +                 AV_CPU_FLAG_SSE3SLOW |
>> > > +                 AV_CPU_FLAG_SSE2SLOW))
>> > > +        return 16;
>> > > +#endif
>> > > +
>> > > +    return 8;
>> > > +}
>> > > diff --git a/libavutil/cpu.h b/libavutil/cpu.h
>> > > index de05593446..9e5d40affe 100644
>> > > --- a/libavutil/cpu.h
>> > > +++ b/libavutil/cpu.h
>> > > @@ -21,6 +21,8 @@
>> > >  #ifndef AVUTIL_CPU_H
>> > >  #define AVUTIL_CPU_H
>> > >
>> > > +#include <stddef.h>
>> > > +
>> > >  #include "attributes.h"
>> > >
>> > >  #define AV_CPU_FLAG_FORCE    0x80000000 /* force usage of selected flags (OR) */
>> >
>> >
>> > > @@ -113,4 +115,15 @@ int av_parse_cpu_caps(unsigned *flags, const char *s);
>> > >   */
>> > >  int av_cpu_count(void);
>> > >
>> > > +/**
>> > > + * Get the maximum data alignment that may be required by FFmpeg.
>> > > + *
>> > > + * Note that this is affected by the build configuration and the CPU flags mask,
>> > > + * so e.g. if the CPU supports AVX, but libavutil has been built with
>> > > + * --disable-avx or the AV_CPU_FLAG_AVX flag has been disabled through
>> > > + *  av_set_cpu_flags_mask(), then this function will behave as if AVX is not
>> > > + *  present.
>> > > + */
>> > > +size_t av_cpu_max_align(void);
>> >
>> > This might interact badly with runtime cpu flags/mask changes
>> >
>> > If its used to choose the alignment for allocated frames and
>> > after some are allocated the cpu flags are changed there could be
>> > still frames in queues that may not have sufficient alignment for the
>> > new flags
>>
>> There's no such thing as runtime CPU flag changes.
>
> We even have an API to change the cpu flags at runtime.
>
> av_set_cpu_flags_mask() and av_force_cpu_flags()
>
> There is no restriction in the API on when they can be called.
>
> you can call av_force_cpu_flags(0) then open a decoder then call
> av_force_cpu_flags(AV_CPU_FLAG_MMX|AV_CPU_FLAG_SSE)
> then open a filter or encoder
> then run the code and it could crash as the allocated frames from
> earlier are not aligned enough for the 2nd filter or encoder
>
> There also may be other scenarios where this can occur
>

Thats why API to change flags is a dumb feature to have user-facing.
It should correspond to hardware support and nothing else.

- Hendrik
wm4 Sept. 5, 2017, 8:15 a.m. UTC | #5
On Mon, 4 Sep 2017 22:53:43 +0200
Michael Niedermayer <michael@niedermayer.cc> wrote:

> On Mon, Sep 04, 2017 at 09:27:32PM +0200, wm4 wrote:
> > On Mon, 4 Sep 2017 21:18:35 +0200
> > Michael Niedermayer <michael@niedermayer.cc> wrote:
> >   
> > > On Sat, Sep 02, 2017 at 09:47:38PM -0300, James Almer wrote:  
> > > > From: Anton Khirnov <anton@khirnov.net>
> > > > 
> > > > (cherry picked from commit e6bff23f1e11aefb16a2b5d6ee72bf7469c5a66e)
> > > > Signed-off-by: James Almer <jamrial@gmail.com>
> > > > ---
> > > > This is (afaics) the last API introduced to libav before the major bump.
> > > > 
> > > > Now checking all the x86 flags that would require aligment of 16 bytes
> > > > or more.
> > > > 
> > > >  doc/APIchanges      |  3 +++
> > > >  libavutil/cpu.c     | 35 +++++++++++++++++++++++++++++++++++
> > > >  libavutil/cpu.h     | 13 +++++++++++++
> > > >  libavutil/version.h |  2 +-
> > > >  4 files changed, 52 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/doc/APIchanges b/doc/APIchanges
> > > > index 4effbf9364..6a57c210a9 100644
> > > > --- a/doc/APIchanges
> > > > +++ b/doc/APIchanges
> > > > @@ -15,6 +15,9 @@ libavutil:     2015-08-28
> > > >  
> > > >  API changes, most recent first:
> > > >  
> > > > +2017-09-xx - xxxxxxx - lavu 55.75.100 / lavu 55.31.0 - cpu.h
> > > > +  Add av_cpu_max_align() for querying maximum required data alignment.
> > > > +
> > > >  2017-09-01 - xxxxxxx - lavf 57.81.100 - avio.h
> > > >    Add avio_read_partial().
> > > >  
> > > > diff --git a/libavutil/cpu.c b/libavutil/cpu.c
> > > > index a22da0fa8c..4f04da2460 100644
> > > > --- a/libavutil/cpu.c
> > > > +++ b/libavutil/cpu.c
> > > > @@ -16,9 +16,11 @@
> > > >   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> > > >   */
> > > >  
> > > > +#include <stddef.h>
> > > >  #include <stdint.h>
> > > >  #include <stdatomic.h>
> > > >  
> > > > +#include "attributes.h"
> > > >  #include "cpu.h"
> > > >  #include "cpu_internal.h"
> > > >  #include "config.h"
> > > > @@ -299,3 +301,36 @@ int av_cpu_count(void)
> > > >  
> > > >      return nb_cpus;
> > > >  }
> > > > +
> > > > +size_t av_cpu_max_align(void)
> > > > +{
> > > > +    int av_unused flags = av_get_cpu_flags();
> > > > +
> > > > +#if ARCH_ARM || ARCH_AARCH64
> > > > +    if (flags & AV_CPU_FLAG_NEON)
> > > > +        return 16;
> > > > +#elif ARCH_PPC
> > > > +    if (flags & AV_CPU_FLAG_ALTIVEC)
> > > > +        return 16;
> > > > +#elif ARCH_X86
> > > > +    if (flags & (AV_CPU_FLAG_AVX2     |
> > > > +                 AV_CPU_FLAG_AVX      |
> > > > +                 AV_CPU_FLAG_FMA4     |
> > > > +                 AV_CPU_FLAG_FMA3))
> > > > +        return 32;
> > > > +    if (flags & (AV_CPU_FLAG_XOP      |
> > > > +                 AV_CPU_FLAG_AESNI    |
> > > > +                 AV_CPU_FLAG_SSE42    |
> > > > +                 AV_CPU_FLAG_SSE4     |
> > > > +                 AV_CPU_FLAG_SSSE3    |
> > > > +                 AV_CPU_FLAG_SSE3     |
> > > > +                 AV_CPU_FLAG_SSE2     |
> > > > +                 AV_CPU_FLAG_SSE      |
> > > > +                 AV_CPU_FLAG_AVXSLOW  |
> > > > +                 AV_CPU_FLAG_SSE3SLOW |
> > > > +                 AV_CPU_FLAG_SSE2SLOW))
> > > > +        return 16;
> > > > +#endif
> > > > +
> > > > +    return 8;
> > > > +}
> > > > diff --git a/libavutil/cpu.h b/libavutil/cpu.h
> > > > index de05593446..9e5d40affe 100644
> > > > --- a/libavutil/cpu.h
> > > > +++ b/libavutil/cpu.h
> > > > @@ -21,6 +21,8 @@
> > > >  #ifndef AVUTIL_CPU_H
> > > >  #define AVUTIL_CPU_H
> > > >  
> > > > +#include <stddef.h>
> > > > +
> > > >  #include "attributes.h"
> > > >  
> > > >  #define AV_CPU_FLAG_FORCE    0x80000000 /* force usage of selected flags (OR) */    
> > > 
> > >   
> > > > @@ -113,4 +115,15 @@ int av_parse_cpu_caps(unsigned *flags, const char *s);
> > > >   */
> > > >  int av_cpu_count(void);
> > > >  
> > > > +/**
> > > > + * Get the maximum data alignment that may be required by FFmpeg.
> > > > + *
> > > > + * Note that this is affected by the build configuration and the CPU flags mask,
> > > > + * so e.g. if the CPU supports AVX, but libavutil has been built with
> > > > + * --disable-avx or the AV_CPU_FLAG_AVX flag has been disabled through
> > > > + *  av_set_cpu_flags_mask(), then this function will behave as if AVX is not
> > > > + *  present.
> > > > + */
> > > > +size_t av_cpu_max_align(void);    
> > > 
> > > This might interact badly with runtime cpu flags/mask changes
> > > 
> > > If its used to choose the alignment for allocated frames and
> > > after some are allocated the cpu flags are changed there could be
> > > still frames in queues that may not have sufficient alignment for the
> > > new flags  
> > 
> > There's no such thing as runtime CPU flag changes.  
> 
> We even have an API to change the cpu flags at runtime.
> 
> av_set_cpu_flags_mask() and av_force_cpu_flags()
> 
> There is no restriction in the API on when they can be called.
> 
> you can call av_force_cpu_flags(0) then open a decoder then call
> av_force_cpu_flags(AV_CPU_FLAG_MMX|AV_CPU_FLAG_SSE)
> then open a filter or encoder
> then run the code and it could crash as the allocated frames from
> earlier are not aligned enough for the 2nd filter or encoder
> 
> There also may be other scenarios where this can occur
> 
> [...]

I'm looking forward to your patches that make the h264 decoder
transparently change the used DSP functions when calling this function.
James Almer Sept. 28, 2017, 12:15 a.m. UTC | #6
On 9/4/2017 5:53 PM, Michael Niedermayer wrote:
> On Mon, Sep 04, 2017 at 09:27:32PM +0200, wm4 wrote:
>> On Mon, 4 Sep 2017 21:18:35 +0200
>> Michael Niedermayer <michael@niedermayer.cc> wrote:
>>
>>> On Sat, Sep 02, 2017 at 09:47:38PM -0300, James Almer wrote:
>>>> From: Anton Khirnov <anton@khirnov.net>
>>>>
>>>> (cherry picked from commit e6bff23f1e11aefb16a2b5d6ee72bf7469c5a66e)
>>>> Signed-off-by: James Almer <jamrial@gmail.com>
>>>> ---
>>>> This is (afaics) the last API introduced to libav before the major bump.
>>>>
>>>> Now checking all the x86 flags that would require aligment of 16 bytes
>>>> or more.
>>>>
>>>>  doc/APIchanges      |  3 +++
>>>>  libavutil/cpu.c     | 35 +++++++++++++++++++++++++++++++++++
>>>>  libavutil/cpu.h     | 13 +++++++++++++
>>>>  libavutil/version.h |  2 +-
>>>>  4 files changed, 52 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/doc/APIchanges b/doc/APIchanges
>>>> index 4effbf9364..6a57c210a9 100644
>>>> --- a/doc/APIchanges
>>>> +++ b/doc/APIchanges
>>>> @@ -15,6 +15,9 @@ libavutil:     2015-08-28
>>>>  
>>>>  API changes, most recent first:
>>>>  
>>>> +2017-09-xx - xxxxxxx - lavu 55.75.100 / lavu 55.31.0 - cpu.h
>>>> +  Add av_cpu_max_align() for querying maximum required data alignment.
>>>> +
>>>>  2017-09-01 - xxxxxxx - lavf 57.81.100 - avio.h
>>>>    Add avio_read_partial().
>>>>  
>>>> diff --git a/libavutil/cpu.c b/libavutil/cpu.c
>>>> index a22da0fa8c..4f04da2460 100644
>>>> --- a/libavutil/cpu.c
>>>> +++ b/libavutil/cpu.c
>>>> @@ -16,9 +16,11 @@
>>>>   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>>>>   */
>>>>  
>>>> +#include <stddef.h>
>>>>  #include <stdint.h>
>>>>  #include <stdatomic.h>
>>>>  
>>>> +#include "attributes.h"
>>>>  #include "cpu.h"
>>>>  #include "cpu_internal.h"
>>>>  #include "config.h"
>>>> @@ -299,3 +301,36 @@ int av_cpu_count(void)
>>>>  
>>>>      return nb_cpus;
>>>>  }
>>>> +
>>>> +size_t av_cpu_max_align(void)
>>>> +{
>>>> +    int av_unused flags = av_get_cpu_flags();
>>>> +
>>>> +#if ARCH_ARM || ARCH_AARCH64
>>>> +    if (flags & AV_CPU_FLAG_NEON)
>>>> +        return 16;
>>>> +#elif ARCH_PPC
>>>> +    if (flags & AV_CPU_FLAG_ALTIVEC)
>>>> +        return 16;
>>>> +#elif ARCH_X86
>>>> +    if (flags & (AV_CPU_FLAG_AVX2     |
>>>> +                 AV_CPU_FLAG_AVX      |
>>>> +                 AV_CPU_FLAG_FMA4     |
>>>> +                 AV_CPU_FLAG_FMA3))
>>>> +        return 32;
>>>> +    if (flags & (AV_CPU_FLAG_XOP      |
>>>> +                 AV_CPU_FLAG_AESNI    |
>>>> +                 AV_CPU_FLAG_SSE42    |
>>>> +                 AV_CPU_FLAG_SSE4     |
>>>> +                 AV_CPU_FLAG_SSSE3    |
>>>> +                 AV_CPU_FLAG_SSE3     |
>>>> +                 AV_CPU_FLAG_SSE2     |
>>>> +                 AV_CPU_FLAG_SSE      |
>>>> +                 AV_CPU_FLAG_AVXSLOW  |
>>>> +                 AV_CPU_FLAG_SSE3SLOW |
>>>> +                 AV_CPU_FLAG_SSE2SLOW))
>>>> +        return 16;
>>>> +#endif
>>>> +
>>>> +    return 8;
>>>> +}
>>>> diff --git a/libavutil/cpu.h b/libavutil/cpu.h
>>>> index de05593446..9e5d40affe 100644
>>>> --- a/libavutil/cpu.h
>>>> +++ b/libavutil/cpu.h
>>>> @@ -21,6 +21,8 @@
>>>>  #ifndef AVUTIL_CPU_H
>>>>  #define AVUTIL_CPU_H
>>>>  
>>>> +#include <stddef.h>
>>>> +
>>>>  #include "attributes.h"
>>>>  
>>>>  #define AV_CPU_FLAG_FORCE    0x80000000 /* force usage of selected flags (OR) */  
>>>
>>>
>>>> @@ -113,4 +115,15 @@ int av_parse_cpu_caps(unsigned *flags, const char *s);
>>>>   */
>>>>  int av_cpu_count(void);
>>>>  
>>>> +/**
>>>> + * Get the maximum data alignment that may be required by FFmpeg.
>>>> + *
>>>> + * Note that this is affected by the build configuration and the CPU flags mask,
>>>> + * so e.g. if the CPU supports AVX, but libavutil has been built with
>>>> + * --disable-avx or the AV_CPU_FLAG_AVX flag has been disabled through
>>>> + *  av_set_cpu_flags_mask(), then this function will behave as if AVX is not
>>>> + *  present.
>>>> + */
>>>> +size_t av_cpu_max_align(void);  
>>>
>>> This might interact badly with runtime cpu flags/mask changes
>>>
>>> If its used to choose the alignment for allocated frames and
>>> after some are allocated the cpu flags are changed there could be
>>> still frames in queues that may not have sufficient alignment for the
>>> new flags
>>
>> There's no such thing as runtime CPU flag changes.
> 
> We even have an API to change the cpu flags at runtime.
> 
> av_set_cpu_flags_mask() and av_force_cpu_flags()
> 
> There is no restriction in the API on when they can be called.
> 
> you can call av_force_cpu_flags(0) then open a decoder then call
> av_force_cpu_flags(AV_CPU_FLAG_MMX|AV_CPU_FLAG_SSE)
> then open a filter or encoder
> then run the code and it could crash as the allocated frames from
> earlier are not aligned enough for the 2nd filter or encoder
> 
> There also may be other scenarios where this can occur

Alright, I just arrived to this cherry pick during merges. So my
question is, do i apply it and skip the commit that makes use of it to
choose alignment within ffmpeg?

Even if unused at first, I'd rather not skip its addition.
wm4 Sept. 28, 2017, 12:19 a.m. UTC | #7
On Wed, 27 Sep 2017 21:15:39 -0300
James Almer <jamrial@gmail.com> wrote:

> On 9/4/2017 5:53 PM, Michael Niedermayer wrote:
> > On Mon, Sep 04, 2017 at 09:27:32PM +0200, wm4 wrote:  
> >> On Mon, 4 Sep 2017 21:18:35 +0200
> >> Michael Niedermayer <michael@niedermayer.cc> wrote:
> >>  
> >>> On Sat, Sep 02, 2017 at 09:47:38PM -0300, James Almer wrote:  
> >>>> From: Anton Khirnov <anton@khirnov.net>
> >>>>
> >>>> (cherry picked from commit e6bff23f1e11aefb16a2b5d6ee72bf7469c5a66e)
> >>>> Signed-off-by: James Almer <jamrial@gmail.com>
> >>>> ---
> >>>> This is (afaics) the last API introduced to libav before the major bump.
> >>>>
> >>>> Now checking all the x86 flags that would require aligment of 16 bytes
> >>>> or more.
> >>>>
> >>>>  doc/APIchanges      |  3 +++
> >>>>  libavutil/cpu.c     | 35 +++++++++++++++++++++++++++++++++++
> >>>>  libavutil/cpu.h     | 13 +++++++++++++
> >>>>  libavutil/version.h |  2 +-
> >>>>  4 files changed, 52 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/doc/APIchanges b/doc/APIchanges
> >>>> index 4effbf9364..6a57c210a9 100644
> >>>> --- a/doc/APIchanges
> >>>> +++ b/doc/APIchanges
> >>>> @@ -15,6 +15,9 @@ libavutil:     2015-08-28
> >>>>  
> >>>>  API changes, most recent first:
> >>>>  
> >>>> +2017-09-xx - xxxxxxx - lavu 55.75.100 / lavu 55.31.0 - cpu.h
> >>>> +  Add av_cpu_max_align() for querying maximum required data alignment.
> >>>> +
> >>>>  2017-09-01 - xxxxxxx - lavf 57.81.100 - avio.h
> >>>>    Add avio_read_partial().
> >>>>  
> >>>> diff --git a/libavutil/cpu.c b/libavutil/cpu.c
> >>>> index a22da0fa8c..4f04da2460 100644
> >>>> --- a/libavutil/cpu.c
> >>>> +++ b/libavutil/cpu.c
> >>>> @@ -16,9 +16,11 @@
> >>>>   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> >>>>   */
> >>>>  
> >>>> +#include <stddef.h>
> >>>>  #include <stdint.h>
> >>>>  #include <stdatomic.h>
> >>>>  
> >>>> +#include "attributes.h"
> >>>>  #include "cpu.h"
> >>>>  #include "cpu_internal.h"
> >>>>  #include "config.h"
> >>>> @@ -299,3 +301,36 @@ int av_cpu_count(void)
> >>>>  
> >>>>      return nb_cpus;
> >>>>  }
> >>>> +
> >>>> +size_t av_cpu_max_align(void)
> >>>> +{
> >>>> +    int av_unused flags = av_get_cpu_flags();
> >>>> +
> >>>> +#if ARCH_ARM || ARCH_AARCH64
> >>>> +    if (flags & AV_CPU_FLAG_NEON)
> >>>> +        return 16;
> >>>> +#elif ARCH_PPC
> >>>> +    if (flags & AV_CPU_FLAG_ALTIVEC)
> >>>> +        return 16;
> >>>> +#elif ARCH_X86
> >>>> +    if (flags & (AV_CPU_FLAG_AVX2     |
> >>>> +                 AV_CPU_FLAG_AVX      |
> >>>> +                 AV_CPU_FLAG_FMA4     |
> >>>> +                 AV_CPU_FLAG_FMA3))
> >>>> +        return 32;
> >>>> +    if (flags & (AV_CPU_FLAG_XOP      |
> >>>> +                 AV_CPU_FLAG_AESNI    |
> >>>> +                 AV_CPU_FLAG_SSE42    |
> >>>> +                 AV_CPU_FLAG_SSE4     |
> >>>> +                 AV_CPU_FLAG_SSSE3    |
> >>>> +                 AV_CPU_FLAG_SSE3     |
> >>>> +                 AV_CPU_FLAG_SSE2     |
> >>>> +                 AV_CPU_FLAG_SSE      |
> >>>> +                 AV_CPU_FLAG_AVXSLOW  |
> >>>> +                 AV_CPU_FLAG_SSE3SLOW |
> >>>> +                 AV_CPU_FLAG_SSE2SLOW))
> >>>> +        return 16;
> >>>> +#endif
> >>>> +
> >>>> +    return 8;
> >>>> +}
> >>>> diff --git a/libavutil/cpu.h b/libavutil/cpu.h
> >>>> index de05593446..9e5d40affe 100644
> >>>> --- a/libavutil/cpu.h
> >>>> +++ b/libavutil/cpu.h
> >>>> @@ -21,6 +21,8 @@
> >>>>  #ifndef AVUTIL_CPU_H
> >>>>  #define AVUTIL_CPU_H
> >>>>  
> >>>> +#include <stddef.h>
> >>>> +
> >>>>  #include "attributes.h"
> >>>>  
> >>>>  #define AV_CPU_FLAG_FORCE    0x80000000 /* force usage of selected flags (OR) */    
> >>>
> >>>  
> >>>> @@ -113,4 +115,15 @@ int av_parse_cpu_caps(unsigned *flags, const char *s);
> >>>>   */
> >>>>  int av_cpu_count(void);
> >>>>  
> >>>> +/**
> >>>> + * Get the maximum data alignment that may be required by FFmpeg.
> >>>> + *
> >>>> + * Note that this is affected by the build configuration and the CPU flags mask,
> >>>> + * so e.g. if the CPU supports AVX, but libavutil has been built with
> >>>> + * --disable-avx or the AV_CPU_FLAG_AVX flag has been disabled through
> >>>> + *  av_set_cpu_flags_mask(), then this function will behave as if AVX is not
> >>>> + *  present.
> >>>> + */
> >>>> +size_t av_cpu_max_align(void);    
> >>>
> >>> This might interact badly with runtime cpu flags/mask changes
> >>>
> >>> If its used to choose the alignment for allocated frames and
> >>> after some are allocated the cpu flags are changed there could be
> >>> still frames in queues that may not have sufficient alignment for the
> >>> new flags  
> >>
> >> There's no such thing as runtime CPU flag changes.  
> > 
> > We even have an API to change the cpu flags at runtime.
> > 
> > av_set_cpu_flags_mask() and av_force_cpu_flags()
> > 
> > There is no restriction in the API on when they can be called.
> > 
> > you can call av_force_cpu_flags(0) then open a decoder then call
> > av_force_cpu_flags(AV_CPU_FLAG_MMX|AV_CPU_FLAG_SSE)
> > then open a filter or encoder
> > then run the code and it could crash as the allocated frames from
> > earlier are not aligned enough for the 2nd filter or encoder
> > 
> > There also may be other scenarios where this can occur  
> 
> Alright, I just arrived to this cherry pick during merges. So my
> question is, do i apply it and skip the commit that makes use of it to
> choose alignment within ffmpeg?
> 
> Even if unused at first, I'd rather not skip its addition.

API users can rely on it. So you better make sure that can't break.
Other aspects don't really matter.
James Almer Sept. 28, 2017, 12:37 a.m. UTC | #8
On 9/27/2017 9:19 PM, wm4 wrote:
> On Wed, 27 Sep 2017 21:15:39 -0300
> James Almer <jamrial@gmail.com> wrote:
> 
>> On 9/4/2017 5:53 PM, Michael Niedermayer wrote:
>>> On Mon, Sep 04, 2017 at 09:27:32PM +0200, wm4 wrote:  
>>>> On Mon, 4 Sep 2017 21:18:35 +0200
>>>> Michael Niedermayer <michael@niedermayer.cc> wrote:
>>>>  
>>>>> On Sat, Sep 02, 2017 at 09:47:38PM -0300, James Almer wrote:  
>>>>>> From: Anton Khirnov <anton@khirnov.net>
>>>>>>
>>>>>> (cherry picked from commit e6bff23f1e11aefb16a2b5d6ee72bf7469c5a66e)
>>>>>> Signed-off-by: James Almer <jamrial@gmail.com>
>>>>>> ---
>>>>>> This is (afaics) the last API introduced to libav before the major bump.
>>>>>>
>>>>>> Now checking all the x86 flags that would require aligment of 16 bytes
>>>>>> or more.
>>>>>>
>>>>>>  doc/APIchanges      |  3 +++
>>>>>>  libavutil/cpu.c     | 35 +++++++++++++++++++++++++++++++++++
>>>>>>  libavutil/cpu.h     | 13 +++++++++++++
>>>>>>  libavutil/version.h |  2 +-
>>>>>>  4 files changed, 52 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/doc/APIchanges b/doc/APIchanges
>>>>>> index 4effbf9364..6a57c210a9 100644
>>>>>> --- a/doc/APIchanges
>>>>>> +++ b/doc/APIchanges
>>>>>> @@ -15,6 +15,9 @@ libavutil:     2015-08-28
>>>>>>  
>>>>>>  API changes, most recent first:
>>>>>>  
>>>>>> +2017-09-xx - xxxxxxx - lavu 55.75.100 / lavu 55.31.0 - cpu.h
>>>>>> +  Add av_cpu_max_align() for querying maximum required data alignment.
>>>>>> +
>>>>>>  2017-09-01 - xxxxxxx - lavf 57.81.100 - avio.h
>>>>>>    Add avio_read_partial().
>>>>>>  
>>>>>> diff --git a/libavutil/cpu.c b/libavutil/cpu.c
>>>>>> index a22da0fa8c..4f04da2460 100644
>>>>>> --- a/libavutil/cpu.c
>>>>>> +++ b/libavutil/cpu.c
>>>>>> @@ -16,9 +16,11 @@
>>>>>>   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>>>>>>   */
>>>>>>  
>>>>>> +#include <stddef.h>
>>>>>>  #include <stdint.h>
>>>>>>  #include <stdatomic.h>
>>>>>>  
>>>>>> +#include "attributes.h"
>>>>>>  #include "cpu.h"
>>>>>>  #include "cpu_internal.h"
>>>>>>  #include "config.h"
>>>>>> @@ -299,3 +301,36 @@ int av_cpu_count(void)
>>>>>>  
>>>>>>      return nb_cpus;
>>>>>>  }
>>>>>> +
>>>>>> +size_t av_cpu_max_align(void)
>>>>>> +{
>>>>>> +    int av_unused flags = av_get_cpu_flags();
>>>>>> +
>>>>>> +#if ARCH_ARM || ARCH_AARCH64
>>>>>> +    if (flags & AV_CPU_FLAG_NEON)
>>>>>> +        return 16;
>>>>>> +#elif ARCH_PPC
>>>>>> +    if (flags & AV_CPU_FLAG_ALTIVEC)
>>>>>> +        return 16;
>>>>>> +#elif ARCH_X86
>>>>>> +    if (flags & (AV_CPU_FLAG_AVX2     |
>>>>>> +                 AV_CPU_FLAG_AVX      |
>>>>>> +                 AV_CPU_FLAG_FMA4     |
>>>>>> +                 AV_CPU_FLAG_FMA3))
>>>>>> +        return 32;
>>>>>> +    if (flags & (AV_CPU_FLAG_XOP      |
>>>>>> +                 AV_CPU_FLAG_AESNI    |
>>>>>> +                 AV_CPU_FLAG_SSE42    |
>>>>>> +                 AV_CPU_FLAG_SSE4     |
>>>>>> +                 AV_CPU_FLAG_SSSE3    |
>>>>>> +                 AV_CPU_FLAG_SSE3     |
>>>>>> +                 AV_CPU_FLAG_SSE2     |
>>>>>> +                 AV_CPU_FLAG_SSE      |
>>>>>> +                 AV_CPU_FLAG_AVXSLOW  |
>>>>>> +                 AV_CPU_FLAG_SSE3SLOW |
>>>>>> +                 AV_CPU_FLAG_SSE2SLOW))
>>>>>> +        return 16;
>>>>>> +#endif
>>>>>> +
>>>>>> +    return 8;
>>>>>> +}
>>>>>> diff --git a/libavutil/cpu.h b/libavutil/cpu.h
>>>>>> index de05593446..9e5d40affe 100644
>>>>>> --- a/libavutil/cpu.h
>>>>>> +++ b/libavutil/cpu.h
>>>>>> @@ -21,6 +21,8 @@
>>>>>>  #ifndef AVUTIL_CPU_H
>>>>>>  #define AVUTIL_CPU_H
>>>>>>  
>>>>>> +#include <stddef.h>
>>>>>> +
>>>>>>  #include "attributes.h"
>>>>>>  
>>>>>>  #define AV_CPU_FLAG_FORCE    0x80000000 /* force usage of selected flags (OR) */    
>>>>>
>>>>>  
>>>>>> @@ -113,4 +115,15 @@ int av_parse_cpu_caps(unsigned *flags, const char *s);
>>>>>>   */
>>>>>>  int av_cpu_count(void);
>>>>>>  
>>>>>> +/**
>>>>>> + * Get the maximum data alignment that may be required by FFmpeg.
>>>>>> + *
>>>>>> + * Note that this is affected by the build configuration and the CPU flags mask,
>>>>>> + * so e.g. if the CPU supports AVX, but libavutil has been built with
>>>>>> + * --disable-avx or the AV_CPU_FLAG_AVX flag has been disabled through
>>>>>> + *  av_set_cpu_flags_mask(), then this function will behave as if AVX is not
>>>>>> + *  present.
>>>>>> + */
>>>>>> +size_t av_cpu_max_align(void);    
>>>>>
>>>>> This might interact badly with runtime cpu flags/mask changes
>>>>>
>>>>> If its used to choose the alignment for allocated frames and
>>>>> after some are allocated the cpu flags are changed there could be
>>>>> still frames in queues that may not have sufficient alignment for the
>>>>> new flags  
>>>>
>>>> There's no such thing as runtime CPU flag changes.  
>>>
>>> We even have an API to change the cpu flags at runtime.
>>>
>>> av_set_cpu_flags_mask() and av_force_cpu_flags()
>>>
>>> There is no restriction in the API on when they can be called.
>>>
>>> you can call av_force_cpu_flags(0) then open a decoder then call
>>> av_force_cpu_flags(AV_CPU_FLAG_MMX|AV_CPU_FLAG_SSE)
>>> then open a filter or encoder
>>> then run the code and it could crash as the allocated frames from
>>> earlier are not aligned enough for the 2nd filter or encoder
>>>
>>> There also may be other scenarios where this can occur  
>>
>> Alright, I just arrived to this cherry pick during merges. So my
>> question is, do i apply it and skip the commit that makes use of it to
>> choose alignment within ffmpeg?
>>
>> Even if unused at first, I'd rather not skip its addition.
> 
> API users can rely on it. So you better make sure that can't break.
> Other aspects don't really matter.

What do you mean with this? Do i apply it or not? The function will
return a value based on runtime cpuflags. The value is guaranteed to be
correct, but how the user uses it is beyond our control.
wm4 Sept. 28, 2017, 12:59 a.m. UTC | #9
On Wed, 27 Sep 2017 21:37:54 -0300
James Almer <jamrial@gmail.com> wrote:

> > API users can rely on it. So you better make sure that can't break.
> > Other aspects don't really matter.  
> 
> What do you mean with this? Do i apply it or not? The function will
> return a value based on runtime cpuflags. The value is guaranteed to be
> correct, but how the user uses it is beyond our control.

If the user uses the return value of the API to align pointers, and
then passes these pointers to ffmpeg functions that process data, they
must not crash or exhibit low performance. Or in other words, the
alignment returned by the function must be high enough. At least as
long as the global CPU flags are not changed afterwards.

So if this is correct, I see no reason not to apply this patch?

I'm not sure if your question was only about this patch, or potentially
later Libav commits that need to be merged.
James Almer Sept. 28, 2017, 1:26 a.m. UTC | #10
On 9/27/2017 9:59 PM, wm4 wrote:
> On Wed, 27 Sep 2017 21:37:54 -0300
> James Almer <jamrial@gmail.com> wrote:
> 
>>> API users can rely on it. So you better make sure that can't break.
>>> Other aspects don't really matter.  
>>
>> What do you mean with this? Do i apply it or not? The function will
>> return a value based on runtime cpuflags. The value is guaranteed to be
>> correct, but how the user uses it is beyond our control.
> 
> If the user uses the return value of the API to align pointers, and
> then passes these pointers to ffmpeg functions that process data, they
> must not crash or exhibit low performance. Or in other words, the
> alignment returned by the function must be high enough. At least as
> long as the global CPU flags are not changed afterwards.
> 
> So if this is correct, I see no reason not to apply this patch?
> 
> I'm not sure if your question was only about this patch, or potentially
> later Libav commits that need to be merged.

Yes, i mentioned there's a commit after this one that makes use of the
function internally, which based on the above (global CPU flags can be
changed by the user), should probably not be applied in favor of keeping
the current safe hardcoded STRIDE value.

My question was, since it will not be used by ffmpeg itself, does it
make sense adding it? You argued that yes (users may have use for it),
so I'll merge it.
wm4 Sept. 28, 2017, 1:43 a.m. UTC | #11
On Wed, 27 Sep 2017 22:26:48 -0300
James Almer <jamrial@gmail.com> wrote:

> On 9/27/2017 9:59 PM, wm4 wrote:
> > On Wed, 27 Sep 2017 21:37:54 -0300
> > James Almer <jamrial@gmail.com> wrote:
> >   
> >>> API users can rely on it. So you better make sure that can't break.
> >>> Other aspects don't really matter.    
> >>
> >> What do you mean with this? Do i apply it or not? The function will
> >> return a value based on runtime cpuflags. The value is guaranteed to be
> >> correct, but how the user uses it is beyond our control.  
> > 
> > If the user uses the return value of the API to align pointers, and
> > then passes these pointers to ffmpeg functions that process data, they
> > must not crash or exhibit low performance. Or in other words, the
> > alignment returned by the function must be high enough. At least as
> > long as the global CPU flags are not changed afterwards.
> > 
> > So if this is correct, I see no reason not to apply this patch?
> > 
> > I'm not sure if your question was only about this patch, or potentially
> > later Libav commits that need to be merged.  
> 
> Yes, i mentioned there's a commit after this one that makes use of the
> function internally, which based on the above (global CPU flags can be
> changed by the user), should probably not be applied in favor of keeping
> the current safe hardcoded STRIDE value.

I'd argue we should use this commit. But it doesn't matter too much,
since it doesn't affect the API. But only if the API function is
correct. If the API function were to become buggy because FFmpeg does
something else internally, the outcome would be worse.

> My question was, since it will not be used by ffmpeg itself, does it
> make sense adding it? You argued that yes (users may have use for it),
> so I'll merge it.

Yes, definitely.
diff mbox

Patch

diff --git a/doc/APIchanges b/doc/APIchanges
index 4effbf9364..6a57c210a9 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -15,6 +15,9 @@  libavutil:     2015-08-28
 
 API changes, most recent first:
 
+2017-09-xx - xxxxxxx - lavu 55.75.100 / lavu 55.31.0 - cpu.h
+  Add av_cpu_max_align() for querying maximum required data alignment.
+
 2017-09-01 - xxxxxxx - lavf 57.81.100 - avio.h
   Add avio_read_partial().
 
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index a22da0fa8c..4f04da2460 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -16,9 +16,11 @@ 
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <stddef.h>
 #include <stdint.h>
 #include <stdatomic.h>
 
+#include "attributes.h"
 #include "cpu.h"
 #include "cpu_internal.h"
 #include "config.h"
@@ -299,3 +301,36 @@  int av_cpu_count(void)
 
     return nb_cpus;
 }
+
+size_t av_cpu_max_align(void)
+{
+    int av_unused flags = av_get_cpu_flags();
+
+#if ARCH_ARM || ARCH_AARCH64
+    if (flags & AV_CPU_FLAG_NEON)
+        return 16;
+#elif ARCH_PPC
+    if (flags & AV_CPU_FLAG_ALTIVEC)
+        return 16;
+#elif ARCH_X86
+    if (flags & (AV_CPU_FLAG_AVX2     |
+                 AV_CPU_FLAG_AVX      |
+                 AV_CPU_FLAG_FMA4     |
+                 AV_CPU_FLAG_FMA3))
+        return 32;
+    if (flags & (AV_CPU_FLAG_XOP      |
+                 AV_CPU_FLAG_AESNI    |
+                 AV_CPU_FLAG_SSE42    |
+                 AV_CPU_FLAG_SSE4     |
+                 AV_CPU_FLAG_SSSE3    |
+                 AV_CPU_FLAG_SSE3     |
+                 AV_CPU_FLAG_SSE2     |
+                 AV_CPU_FLAG_SSE      |
+                 AV_CPU_FLAG_AVXSLOW  |
+                 AV_CPU_FLAG_SSE3SLOW |
+                 AV_CPU_FLAG_SSE2SLOW))
+        return 16;
+#endif
+
+    return 8;
+}
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index de05593446..9e5d40affe 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -21,6 +21,8 @@ 
 #ifndef AVUTIL_CPU_H
 #define AVUTIL_CPU_H
 
+#include <stddef.h>
+
 #include "attributes.h"
 
 #define AV_CPU_FLAG_FORCE    0x80000000 /* force usage of selected flags (OR) */
@@ -113,4 +115,15 @@  int av_parse_cpu_caps(unsigned *flags, const char *s);
  */
 int av_cpu_count(void);
 
+/**
+ * Get the maximum data alignment that may be required by FFmpeg.
+ *
+ * Note that this is affected by the build configuration and the CPU flags mask,
+ * so e.g. if the CPU supports AVX, but libavutil has been built with
+ * --disable-avx or the AV_CPU_FLAG_AVX flag has been disabled through
+ *  av_set_cpu_flags_mask(), then this function will behave as if AVX is not
+ *  present.
+ */
+size_t av_cpu_max_align(void);
+
 #endif /* AVUTIL_CPU_H */
diff --git a/libavutil/version.h b/libavutil/version.h
index 6e25b4690c..d99eff5d15 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -80,7 +80,7 @@ 
 
 
 #define LIBAVUTIL_VERSION_MAJOR  55
-#define LIBAVUTIL_VERSION_MINOR  74
+#define LIBAVUTIL_VERSION_MINOR  75
 #define LIBAVUTIL_VERSION_MICRO 100
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \