diff mbox series

[FFmpeg-devel,1/6] avutil/cpu: add AVX512 Icelake flag

Message ID 20220223085735.70854-1-jianhua.wu@intel.com
State New
Headers show
Series [FFmpeg-devel,1/6] avutil/cpu: add AVX512 Icelake flag | expand

Commit Message

Wu, Jianhua Feb. 23, 2022, 8:57 a.m. UTC
From: Wu Jianhua <jianhua.wu@intel.com>

Signed-off-by: Wu Jianhua <jianhua.wu@intel.com>
---
 configure                 | 13 +++++++---
 libavutil/cpu.c           |  1 +
 libavutil/cpu.h           |  1 +
 libavutil/x86/cpu.c       |  8 ++++--
 libavutil/x86/cpu.h       |  1 +
 libavutil/x86/x86inc.asm  | 53 ++++++++++++++++++++-------------------
 tests/checkasm/checkasm.c | 35 +++++++++++++-------------
 7 files changed, 63 insertions(+), 49 deletions(-)

Comments

Wu, Jianhua March 2, 2022, 5:33 a.m. UTC | #1
Ping.
> -----Original Message-----
> From: Wu, Jianhua <jianhua.wu@intel.com>
> Sent: Wednesday, February 23, 2022 4:58 PM
> To: ffmpeg-devel@ffmpeg.org
> Cc: Wu, Jianhua <jianhua.wu@intel.com>
> Subject: [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag
> 
> From: Wu Jianhua <jianhua.wu@intel.com>
> 
> Signed-off-by: Wu Jianhua <jianhua.wu@intel.com>
> ---
>  configure                 | 13 +++++++---
>  libavutil/cpu.c           |  1 +
>  libavutil/cpu.h           |  1 +
>  libavutil/x86/cpu.c       |  8 ++++--
>  libavutil/x86/cpu.h       |  1 +
>  libavutil/x86/x86inc.asm  | 53 ++++++++++++++++++++-------------------
>  tests/checkasm/checkasm.c | 35 +++++++++++++-------------
>  7 files changed, 63 insertions(+), 49 deletions(-)
> 
> diff --git a/configure b/configure
> index 1535dc3c5b..d88c2ae979 100755
> --- a/configure
> +++ b/configure
> @@ -444,6 +444,7 @@ Optimization options (experts only):
>    --disable-fma4           disable FMA4 optimizations
>    --disable-avx2           disable AVX2 optimizations
>    --disable-avx512         disable AVX-512 optimizations
> +  --disable-avx512icl      disable AVX-512ICL optimizations
>    --disable-aesni          disable AESNI optimizations
>    --disable-armv5te        disable armv5te optimizations
>    --disable-armv6          disable armv6 optimizations
> @@ -2098,6 +2099,7 @@ ARCH_EXT_LIST_X86_SIMD="
>      avx
>      avx2
>      avx512
> +    avx512icl
>      fma3
>      fma4
>      mmx
> @@ -2666,6 +2668,7 @@ fma3_deps="avx"
>  fma4_deps="avx"
>  avx2_deps="avx"
>  avx512_deps="avx2"
> +avx512icl_deps="avx512"
> 
>  mmx_external_deps="x86asm"
>  mmx_inline_deps="inline_asm x86"
> @@ -6128,10 +6131,11 @@ EOF
>              elf*) enabled debug && append X86ASMFLAGS $x86asm_debug ;;
>          esac
> 
> -        enabled avx512 && check_x86asm avx512_external "vmovdqa32
> [eax]{k1}{z}, zmm0"
> -        enabled avx2   && check_x86asm avx2_external   "vextracti128 xmm0,
> ymm0, 0"
> -        enabled xop    && check_x86asm xop_external    "vpmacsdd xmm0,
> xmm1, xmm2, xmm3"
> -        enabled fma4   && check_x86asm fma4_external   "vfmaddps ymm0,
> ymm1, ymm2, ymm3"
> +        enabled avx512    && check_x86asm avx512_external    "vmovdqa32
> [eax]{k1}{z}, zmm0"
> +        enabled avx512icl && check_x86asm avx512icl_external "vpdpwssds
> zmm31{k1}{z}, zmm29, zmm28"
> +        enabled avx2      && check_x86asm avx2_external      "vextracti128
> xmm0, ymm0, 0"
> +        enabled xop       && check_x86asm xop_external       "vpmacsdd xmm0,
> xmm1, xmm2, xmm3"
> +        enabled fma4      && check_x86asm fma4_external      "vfmaddps ymm0,
> ymm1, ymm2, ymm3"
>          check_x86asm cpunop          "CPU amdnop"
>      fi
> 
> @@ -7471,6 +7475,7 @@ if enabled x86; then
>      echo "AVX enabled               ${avx-no}"
>      echo "AVX2 enabled              ${avx2-no}"
>      echo "AVX-512 enabled           ${avx512-no}"
> +    echo "AVX-512ICL enabled        ${avx512icl-no}"
>      echo "XOP enabled               ${xop-no}"
>      echo "FMA3 enabled              ${fma3-no}"
>      echo "FMA4 enabled              ${fma4-no}"
> diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 1368502245..833c220192
> 100644
> --- a/libavutil/cpu.c
> +++ b/libavutil/cpu.c
> @@ -137,6 +137,7 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
>          { "cmov",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> AV_CPU_FLAG_CMOV     },    .unit = "flags" },
>          { "aesni",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> AV_CPU_FLAG_AESNI    },    .unit = "flags" },
>          { "avx512"  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> AV_CPU_FLAG_AVX512   },    .unit = "flags" },
> +        { "avx512icl",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> AV_CPU_FLAG_AVX512ICL   }, .unit = "flags" },
>          { "slowgather", NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> AV_CPU_FLAG_SLOW_GATHER }, .unit = "flags" },
> 
>  #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX diff --
> git a/libavutil/cpu.h b/libavutil/cpu.h index ce9bf14bf7..9711e574c5 100644
> --- a/libavutil/cpu.h
> +++ b/libavutil/cpu.h
> @@ -54,6 +54,7 @@
>  #define AV_CPU_FLAG_BMI1        0x20000 ///< Bit Manipulation Instruction
> Set 1
>  #define AV_CPU_FLAG_BMI2        0x40000 ///< Bit Manipulation Instruction
> Set 2
>  #define AV_CPU_FLAG_AVX512     0x100000 ///< AVX-512 functions:
> requires OS support even if YMM/ZMM registers aren't used
> +#define AV_CPU_FLAG_AVX512ICL  0x200000 ///<
> +F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/VPOPCNTDQ/BITALG/GFNI/V
> AES/VPCLMULQD
> +Q
>  #define AV_CPU_FLAG_SLOW_GATHER  0x2000000 ///< CPU has slow
> gathers.
> 
>  #define AV_CPU_FLAG_ALTIVEC      0x0001 ///< standard
> diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index
> 7b13fcae91..d6cd4fab9c 100644
> --- a/libavutil/x86/cpu.c
> +++ b/libavutil/x86/cpu.c
> @@ -150,9 +150,13 @@ int ff_get_cpu_flags_x86(void)
>              rval |= AV_CPU_FLAG_AVX2;
>  #if HAVE_AVX512 /* F, CD, BW, DQ, VL */
>          if ((xcr0_lo & 0xe0) == 0xe0) { /* OPMASK/ZMM state */
> -            if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd0030000) ==
> 0xd0030000)
> +            if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd0030000) ==
> + 0xd0030000) {
>                  rval |= AV_CPU_FLAG_AVX512;
> -
> +#if HAVE_AVX512ICL
> +                if ((ebx & 0xd0200000) == 0xd0200000 && (ecx & 0x5f42) == 0x5f42)
> +                    rval |= AV_CPU_FLAG_AVX512ICL; #endif /*
> +HAVE_AVX512ICL */
> +            }
>          }
>  #endif /* HAVE_AVX512 */
>  #endif /* HAVE_AVX2 */
> diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h index
> 937c697fa0..40a1eef0ab 100644
> --- a/libavutil/x86/cpu.h
> +++ b/libavutil/x86/cpu.h
> @@ -80,6 +80,7 @@
>  #define EXTERNAL_AVX2_SLOW(flags)   CPUEXT_SUFFIX_SLOW2(flags,
> _EXTERNAL, AVX2, AVX)
>  #define EXTERNAL_AESNI(flags)       CPUEXT_SUFFIX(flags, _EXTERNAL,
> AESNI)
>  #define EXTERNAL_AVX512(flags)      CPUEXT_SUFFIX(flags, _EXTERNAL,
> AVX512)
> +#define EXTERNAL_AVX512ICL(flags)   CPUEXT_SUFFIX(flags, _EXTERNAL,
> AVX512ICL)
> 
>  #define INLINE_AMD3DNOW(flags)      CPUEXT_SUFFIX(flags, _INLINE,
> AMD3DNOW)
>  #define INLINE_AMD3DNOWEXT(flags)   CPUEXT_SUFFIX(flags, _INLINE,
> AMD3DNOWEXT)
> diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index
> 01c35e3a4b..251ee797de 100644
> --- a/libavutil/x86/x86inc.asm
> +++ b/libavutil/x86/x86inc.asm
> @@ -817,32 +817,33 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge,
> jng, jnge, ja, jae,
> 
>  ; cpuflags
> 
> -%assign cpuflags_mmx      (1<<0)
> -%assign cpuflags_mmx2     (1<<1) | cpuflags_mmx
> -%assign cpuflags_3dnow    (1<<2) | cpuflags_mmx
> -%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow
> -%assign cpuflags_sse      (1<<4) | cpuflags_mmx2
> -%assign cpuflags_sse2     (1<<5) | cpuflags_sse
> -%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2
> -%assign cpuflags_lzcnt    (1<<7) | cpuflags_sse2
> -%assign cpuflags_sse3     (1<<8) | cpuflags_sse2
> -%assign cpuflags_ssse3    (1<<9) | cpuflags_sse3
> -%assign cpuflags_sse4     (1<<10)| cpuflags_ssse3
> -%assign cpuflags_sse42    (1<<11)| cpuflags_sse4
> -%assign cpuflags_aesni    (1<<12)| cpuflags_sse42
> -%assign cpuflags_avx      (1<<13)| cpuflags_sse42
> -%assign cpuflags_xop      (1<<14)| cpuflags_avx
> -%assign cpuflags_fma4     (1<<15)| cpuflags_avx
> -%assign cpuflags_fma3     (1<<16)| cpuflags_avx
> -%assign cpuflags_bmi1     (1<<17)| cpuflags_avx|cpuflags_lzcnt
> -%assign cpuflags_bmi2     (1<<18)| cpuflags_bmi1
> -%assign cpuflags_avx2     (1<<19)| cpuflags_fma3|cpuflags_bmi2
> -%assign cpuflags_avx512   (1<<20)| cpuflags_avx2 ; F, CD, BW, DQ, VL
> -
> -%assign cpuflags_cache32  (1<<21)
> -%assign cpuflags_cache64  (1<<22)
> -%assign cpuflags_aligned  (1<<23) ; not a cpu feature, but a function variant
> -%assign cpuflags_atom     (1<<24)
> +%assign cpuflags_mmx       (1<<0)
> +%assign cpuflags_mmx2      (1<<1) | cpuflags_mmx
> +%assign cpuflags_3dnow     (1<<2) | cpuflags_mmx
> +%assign cpuflags_3dnowext  (1<<3) | cpuflags_3dnow
> +%assign cpuflags_sse       (1<<4) | cpuflags_mmx2
> +%assign cpuflags_sse2      (1<<5) | cpuflags_sse
> +%assign cpuflags_sse2slow  (1<<6) | cpuflags_sse2
> +%assign cpuflags_lzcnt     (1<<7) | cpuflags_sse2
> +%assign cpuflags_sse3      (1<<8) | cpuflags_sse2
> +%assign cpuflags_ssse3     (1<<9) | cpuflags_sse3
> +%assign cpuflags_sse4      (1<<10)| cpuflags_ssse3
> +%assign cpuflags_sse42     (1<<11)| cpuflags_sse4
> +%assign cpuflags_aesni     (1<<12)| cpuflags_sse42
> +%assign cpuflags_avx       (1<<13)| cpuflags_sse42
> +%assign cpuflags_xop       (1<<14)| cpuflags_avx
> +%assign cpuflags_fma4      (1<<15)| cpuflags_avx
> +%assign cpuflags_fma3      (1<<16)| cpuflags_avx
> +%assign cpuflags_bmi1      (1<<17)| cpuflags_avx|cpuflags_lzcnt
> +%assign cpuflags_bmi2      (1<<18)| cpuflags_bmi1
> +%assign cpuflags_avx2      (1<<19)| cpuflags_fma3|cpuflags_bmi2
> +%assign cpuflags_avx512    (1<<20)| cpuflags_avx2 ; F, CD, BW, DQ, VL
> +%assign cpuflags_avx512icl (1<<25)| cpuflags_avx512
> +
> +%assign cpuflags_cache32   (1<<21)
> +%assign cpuflags_cache64   (1<<22)
> +%assign cpuflags_aligned   (1<<23) ; not a cpu feature, but a function variant
> +%assign cpuflags_atom      (1<<24)
> 
>  ; Returns a boolean value expressing whether or not the specified cpuflag is
> enabled.
>  %define    cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) -
> 1) >> 31) & 1)
> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index
> f74125e810..e77b4ec20f 100644
> --- a/tests/checkasm/checkasm.c
> +++ b/tests/checkasm/checkasm.c
> @@ -220,23 +220,24 @@ static const struct {
>      { "MMI",      "mmi",      AV_CPU_FLAG_MMI },
>      { "MSA",      "msa",      AV_CPU_FLAG_MSA },
>  #elif ARCH_X86
> -    { "MMX",      "mmx",      AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV },
> -    { "MMXEXT",   "mmxext",   AV_CPU_FLAG_MMXEXT },
> -    { "3DNOW",    "3dnow",    AV_CPU_FLAG_3DNOW },
> -    { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT },
> -    { "SSE",      "sse",      AV_CPU_FLAG_SSE },
> -    { "SSE2",     "sse2",     AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW },
> -    { "SSE3",     "sse3",     AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW },
> -    { "SSSE3",    "ssse3",    AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM },
> -    { "SSE4.1",   "sse4",     AV_CPU_FLAG_SSE4 },
> -    { "SSE4.2",   "sse42",    AV_CPU_FLAG_SSE42 },
> -    { "AES-NI",   "aesni",    AV_CPU_FLAG_AESNI },
> -    { "AVX",      "avx",      AV_CPU_FLAG_AVX },
> -    { "XOP",      "xop",      AV_CPU_FLAG_XOP },
> -    { "FMA3",     "fma3",     AV_CPU_FLAG_FMA3 },
> -    { "FMA4",     "fma4",     AV_CPU_FLAG_FMA4 },
> -    { "AVX2",     "avx2",     AV_CPU_FLAG_AVX2 },
> -    { "AVX-512",  "avx512",   AV_CPU_FLAG_AVX512 },
> +    { "MMX",        "mmx",       AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV },
> +    { "MMXEXT",     "mmxext",    AV_CPU_FLAG_MMXEXT },
> +    { "3DNOW",      "3dnow",     AV_CPU_FLAG_3DNOW },
> +    { "3DNOWEXT",   "3dnowext",  AV_CPU_FLAG_3DNOWEXT },
> +    { "SSE",        "sse",       AV_CPU_FLAG_SSE },
> +    { "SSE2",       "sse2",      AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW },
> +    { "SSE3",       "sse3",      AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW },
> +    { "SSSE3",      "ssse3",     AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM },
> +    { "SSE4.1",     "sse4",      AV_CPU_FLAG_SSE4 },
> +    { "SSE4.2",     "sse42",     AV_CPU_FLAG_SSE42 },
> +    { "AES-NI",     "aesni",     AV_CPU_FLAG_AESNI },
> +    { "AVX",        "avx",       AV_CPU_FLAG_AVX },
> +    { "XOP",        "xop",       AV_CPU_FLAG_XOP },
> +    { "FMA3",       "fma3",      AV_CPU_FLAG_FMA3 },
> +    { "FMA4",       "fma4",      AV_CPU_FLAG_FMA4 },
> +    { "AVX2",       "avx2",      AV_CPU_FLAG_AVX2 },
> +    { "AVX-512",    "avx512",    AV_CPU_FLAG_AVX512 },
> +    { "AVX-512ICL", "avx512icl", AV_CPU_FLAG_AVX512ICL },
>  #elif ARCH_LOONGARCH
>      { "LSX",      "lsx",      AV_CPU_FLAG_LSX },
>      { "LASX",     "lasx",     AV_CPU_FLAG_LASX },
> --
> 2.17.1
Wu, Jianhua March 9, 2022, 7:38 a.m. UTC | #2
Ping.
> From: Wu, Jianhua
> Sent: Wednesday, March 2, 2022 1:34 PM
> To: ffmpeg-devel@ffmpeg.org
> Subject: RE: [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag
> 
> Ping.
> > From: Wu, Jianhua <jianhua.wu@intel.com>
> > Sent: Wednesday, February 23, 2022 4:58 PM
> > To: ffmpeg-devel@ffmpeg.org
> > Cc: Wu, Jianhua <jianhua.wu@intel.com>
> > Subject: [PATCH 1/6] avutil/cpu: add AVX512 Icelake flag
> >
> > From: Wu Jianhua <jianhua.wu@intel.com>
> >
> > Signed-off-by: Wu Jianhua <jianhua.wu@intel.com>
> > ---
> >  configure                 | 13 +++++++---
> >  libavutil/cpu.c           |  1 +
> >  libavutil/cpu.h           |  1 +
> >  libavutil/x86/cpu.c       |  8 ++++--
> >  libavutil/x86/cpu.h       |  1 +
> >  libavutil/x86/x86inc.asm  | 53
> > ++++++++++++++++++++-------------------
> >  tests/checkasm/checkasm.c | 35 +++++++++++++-------------
> >  7 files changed, 63 insertions(+), 49 deletions(-)
> >
> > diff --git a/configure b/configure
> > index 1535dc3c5b..d88c2ae979 100755
> > --- a/configure
> > +++ b/configure
> > @@ -444,6 +444,7 @@ Optimization options (experts only):
> >    --disable-fma4           disable FMA4 optimizations
> >    --disable-avx2           disable AVX2 optimizations
> >    --disable-avx512         disable AVX-512 optimizations
> > +  --disable-avx512icl      disable AVX-512ICL optimizations
> >    --disable-aesni          disable AESNI optimizations
> >    --disable-armv5te        disable armv5te optimizations
> >    --disable-armv6          disable armv6 optimizations
> > @@ -2098,6 +2099,7 @@ ARCH_EXT_LIST_X86_SIMD="
> >      avx
> >      avx2
> >      avx512
> > +    avx512icl
> >      fma3
> >      fma4
> >      mmx
> > @@ -2666,6 +2668,7 @@ fma3_deps="avx"
> >  fma4_deps="avx"
> >  avx2_deps="avx"
> >  avx512_deps="avx2"
> > +avx512icl_deps="avx512"
> >
> >  mmx_external_deps="x86asm"
> >  mmx_inline_deps="inline_asm x86"
> > @@ -6128,10 +6131,11 @@ EOF
> >              elf*) enabled debug && append X86ASMFLAGS $x86asm_debug ;;
> >          esac
> >
> > -        enabled avx512 && check_x86asm avx512_external "vmovdqa32
> > [eax]{k1}{z}, zmm0"
> > -        enabled avx2   && check_x86asm avx2_external   "vextracti128 xmm0,
> > ymm0, 0"
> > -        enabled xop    && check_x86asm xop_external    "vpmacsdd xmm0,
> > xmm1, xmm2, xmm3"
> > -        enabled fma4   && check_x86asm fma4_external   "vfmaddps ymm0,
> > ymm1, ymm2, ymm3"
> > +        enabled avx512    && check_x86asm avx512_external    "vmovdqa32
> > [eax]{k1}{z}, zmm0"
> > +        enabled avx512icl && check_x86asm avx512icl_external
> > + "vpdpwssds
> > zmm31{k1}{z}, zmm29, zmm28"
> > +        enabled avx2      && check_x86asm avx2_external      "vextracti128
> > xmm0, ymm0, 0"
> > +        enabled xop       && check_x86asm xop_external       "vpmacsdd xmm0,
> > xmm1, xmm2, xmm3"
> > +        enabled fma4      && check_x86asm fma4_external      "vfmaddps
> ymm0,
> > ymm1, ymm2, ymm3"
> >          check_x86asm cpunop          "CPU amdnop"
> >      fi
> >
> > @@ -7471,6 +7475,7 @@ if enabled x86; then
> >      echo "AVX enabled               ${avx-no}"
> >      echo "AVX2 enabled              ${avx2-no}"
> >      echo "AVX-512 enabled           ${avx512-no}"
> > +    echo "AVX-512ICL enabled        ${avx512icl-no}"
> >      echo "XOP enabled               ${xop-no}"
> >      echo "FMA3 enabled              ${fma3-no}"
> >      echo "FMA4 enabled              ${fma4-no}"
> > diff --git a/libavutil/cpu.c b/libavutil/cpu.c index
> > 1368502245..833c220192
> > 100644
> > --- a/libavutil/cpu.c
> > +++ b/libavutil/cpu.c
> > @@ -137,6 +137,7 @@ int av_parse_cpu_caps(unsigned *flags, const char
> *s)
> >          { "cmov",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> > AV_CPU_FLAG_CMOV     },    .unit = "flags" },
> >          { "aesni",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> > AV_CPU_FLAG_AESNI    },    .unit = "flags" },
> >          { "avx512"  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> > AV_CPU_FLAG_AVX512   },    .unit = "flags" },
> > +        { "avx512icl",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> > AV_CPU_FLAG_AVX512ICL   }, .unit = "flags" },
> >          { "slowgather", NULL, 0, AV_OPT_TYPE_CONST, { .i64 =
> > AV_CPU_FLAG_SLOW_GATHER }, .unit = "flags" },
> >
> >  #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX diff
> -- git
> > a/libavutil/cpu.h b/libavutil/cpu.h index ce9bf14bf7..9711e574c5
> > 100644
> > --- a/libavutil/cpu.h
> > +++ b/libavutil/cpu.h
> > @@ -54,6 +54,7 @@
> >  #define AV_CPU_FLAG_BMI1        0x20000 ///< Bit Manipulation
> Instruction
> > Set 1
> >  #define AV_CPU_FLAG_BMI2        0x40000 ///< Bit Manipulation
> Instruction
> > Set 2
> >  #define AV_CPU_FLAG_AVX512     0x100000 ///< AVX-512 functions:
> > requires OS support even if YMM/ZMM registers aren't used
> > +#define AV_CPU_FLAG_AVX512ICL  0x200000 ///<
> >
> +F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/VPOPCNTDQ/BITALG/GFNI/V
> > AES/VPCLMULQD
> > +Q
> >  #define AV_CPU_FLAG_SLOW_GATHER  0x2000000 ///< CPU has slow
> gathers.
> >
> >  #define AV_CPU_FLAG_ALTIVEC      0x0001 ///< standard
> > diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index
> > 7b13fcae91..d6cd4fab9c 100644
> > --- a/libavutil/x86/cpu.c
> > +++ b/libavutil/x86/cpu.c
> > @@ -150,9 +150,13 @@ int ff_get_cpu_flags_x86(void)
> >              rval |= AV_CPU_FLAG_AVX2;  #if HAVE_AVX512 /* F, CD, BW,
> > DQ, VL */
> >          if ((xcr0_lo & 0xe0) == 0xe0) { /* OPMASK/ZMM state */
> > -            if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd0030000) ==
> > 0xd0030000)
> > +            if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd0030000) ==
> > + 0xd0030000) {
> >                  rval |= AV_CPU_FLAG_AVX512;
> > -
> > +#if HAVE_AVX512ICL
> > +                if ((ebx & 0xd0200000) == 0xd0200000 && (ecx & 0x5f42) ==
> 0x5f42)
> > +                    rval |= AV_CPU_FLAG_AVX512ICL; #endif /*
> > +HAVE_AVX512ICL */
> > +            }
> >          }
> >  #endif /* HAVE_AVX512 */
> >  #endif /* HAVE_AVX2 */
> > diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h index
> > 937c697fa0..40a1eef0ab 100644
> > --- a/libavutil/x86/cpu.h
> > +++ b/libavutil/x86/cpu.h
> > @@ -80,6 +80,7 @@
> >  #define EXTERNAL_AVX2_SLOW(flags)   CPUEXT_SUFFIX_SLOW2(flags,
> > _EXTERNAL, AVX2, AVX)
> >  #define EXTERNAL_AESNI(flags)       CPUEXT_SUFFIX(flags, _EXTERNAL,
> > AESNI)
> >  #define EXTERNAL_AVX512(flags)      CPUEXT_SUFFIX(flags, _EXTERNAL,
> > AVX512)
> > +#define EXTERNAL_AVX512ICL(flags)   CPUEXT_SUFFIX(flags, _EXTERNAL,
> > AVX512ICL)
> >
> >  #define INLINE_AMD3DNOW(flags)      CPUEXT_SUFFIX(flags, _INLINE,
> > AMD3DNOW)
> >  #define INLINE_AMD3DNOWEXT(flags)   CPUEXT_SUFFIX(flags, _INLINE,
> > AMD3DNOWEXT)
> > diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index
> > 01c35e3a4b..251ee797de 100644
> > --- a/libavutil/x86/x86inc.asm
> > +++ b/libavutil/x86/x86inc.asm
> > @@ -817,32 +817,33 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl,
> > jnle, jg, jge, jng, jnge, ja, jae,
> >
> >  ; cpuflags
> >
> > -%assign cpuflags_mmx      (1<<0)
> > -%assign cpuflags_mmx2     (1<<1) | cpuflags_mmx
> > -%assign cpuflags_3dnow    (1<<2) | cpuflags_mmx
> > -%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow
> > -%assign cpuflags_sse      (1<<4) | cpuflags_mmx2
> > -%assign cpuflags_sse2     (1<<5) | cpuflags_sse
> > -%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2
> > -%assign cpuflags_lzcnt    (1<<7) | cpuflags_sse2
> > -%assign cpuflags_sse3     (1<<8) | cpuflags_sse2
> > -%assign cpuflags_ssse3    (1<<9) | cpuflags_sse3
> > -%assign cpuflags_sse4     (1<<10)| cpuflags_ssse3
> > -%assign cpuflags_sse42    (1<<11)| cpuflags_sse4
> > -%assign cpuflags_aesni    (1<<12)| cpuflags_sse42
> > -%assign cpuflags_avx      (1<<13)| cpuflags_sse42
> > -%assign cpuflags_xop      (1<<14)| cpuflags_avx
> > -%assign cpuflags_fma4     (1<<15)| cpuflags_avx
> > -%assign cpuflags_fma3     (1<<16)| cpuflags_avx
> > -%assign cpuflags_bmi1     (1<<17)| cpuflags_avx|cpuflags_lzcnt
> > -%assign cpuflags_bmi2     (1<<18)| cpuflags_bmi1
> > -%assign cpuflags_avx2     (1<<19)| cpuflags_fma3|cpuflags_bmi2
> > -%assign cpuflags_avx512   (1<<20)| cpuflags_avx2 ; F, CD, BW, DQ, VL
> > -
> > -%assign cpuflags_cache32  (1<<21)
> > -%assign cpuflags_cache64  (1<<22)
> > -%assign cpuflags_aligned  (1<<23) ; not a cpu feature, but a function
> variant
> > -%assign cpuflags_atom     (1<<24)
> > +%assign cpuflags_mmx       (1<<0)
> > +%assign cpuflags_mmx2      (1<<1) | cpuflags_mmx
> > +%assign cpuflags_3dnow     (1<<2) | cpuflags_mmx
> > +%assign cpuflags_3dnowext  (1<<3) | cpuflags_3dnow
> > +%assign cpuflags_sse       (1<<4) | cpuflags_mmx2
> > +%assign cpuflags_sse2      (1<<5) | cpuflags_sse
> > +%assign cpuflags_sse2slow  (1<<6) | cpuflags_sse2
> > +%assign cpuflags_lzcnt     (1<<7) | cpuflags_sse2
> > +%assign cpuflags_sse3      (1<<8) | cpuflags_sse2
> > +%assign cpuflags_ssse3     (1<<9) | cpuflags_sse3
> > +%assign cpuflags_sse4      (1<<10)| cpuflags_ssse3
> > +%assign cpuflags_sse42     (1<<11)| cpuflags_sse4
> > +%assign cpuflags_aesni     (1<<12)| cpuflags_sse42
> > +%assign cpuflags_avx       (1<<13)| cpuflags_sse42
> > +%assign cpuflags_xop       (1<<14)| cpuflags_avx
> > +%assign cpuflags_fma4      (1<<15)| cpuflags_avx
> > +%assign cpuflags_fma3      (1<<16)| cpuflags_avx
> > +%assign cpuflags_bmi1      (1<<17)| cpuflags_avx|cpuflags_lzcnt
> > +%assign cpuflags_bmi2      (1<<18)| cpuflags_bmi1
> > +%assign cpuflags_avx2      (1<<19)| cpuflags_fma3|cpuflags_bmi2
> > +%assign cpuflags_avx512    (1<<20)| cpuflags_avx2 ; F, CD, BW, DQ, VL
> > +%assign cpuflags_avx512icl (1<<25)| cpuflags_avx512
> > +
> > +%assign cpuflags_cache32   (1<<21)
> > +%assign cpuflags_cache64   (1<<22)
> > +%assign cpuflags_aligned   (1<<23) ; not a cpu feature, but a function
> variant
> > +%assign cpuflags_atom      (1<<24)
> >
> >  ; Returns a boolean value expressing whether or not the specified
> > cpuflag is enabled.
> >  %define    cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) -
> > 1) >> 31) & 1)
> > diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
> > index f74125e810..e77b4ec20f 100644
> > --- a/tests/checkasm/checkasm.c
> > +++ b/tests/checkasm/checkasm.c
> > @@ -220,23 +220,24 @@ static const struct {
> >      { "MMI",      "mmi",      AV_CPU_FLAG_MMI },
> >      { "MSA",      "msa",      AV_CPU_FLAG_MSA },
> >  #elif ARCH_X86
> > -    { "MMX",      "mmx",      AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV },
> > -    { "MMXEXT",   "mmxext",   AV_CPU_FLAG_MMXEXT },
> > -    { "3DNOW",    "3dnow",    AV_CPU_FLAG_3DNOW },
> > -    { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT },
> > -    { "SSE",      "sse",      AV_CPU_FLAG_SSE },
> > -    { "SSE2",     "sse2",     AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW },
> > -    { "SSE3",     "sse3",     AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW },
> > -    { "SSSE3",    "ssse3",    AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM },
> > -    { "SSE4.1",   "sse4",     AV_CPU_FLAG_SSE4 },
> > -    { "SSE4.2",   "sse42",    AV_CPU_FLAG_SSE42 },
> > -    { "AES-NI",   "aesni",    AV_CPU_FLAG_AESNI },
> > -    { "AVX",      "avx",      AV_CPU_FLAG_AVX },
> > -    { "XOP",      "xop",      AV_CPU_FLAG_XOP },
> > -    { "FMA3",     "fma3",     AV_CPU_FLAG_FMA3 },
> > -    { "FMA4",     "fma4",     AV_CPU_FLAG_FMA4 },
> > -    { "AVX2",     "avx2",     AV_CPU_FLAG_AVX2 },
> > -    { "AVX-512",  "avx512",   AV_CPU_FLAG_AVX512 },
> > +    { "MMX",        "mmx",       AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV },
> > +    { "MMXEXT",     "mmxext",    AV_CPU_FLAG_MMXEXT },
> > +    { "3DNOW",      "3dnow",     AV_CPU_FLAG_3DNOW },
> > +    { "3DNOWEXT",   "3dnowext",  AV_CPU_FLAG_3DNOWEXT },
> > +    { "SSE",        "sse",       AV_CPU_FLAG_SSE },
> > +    { "SSE2",       "sse2",
> AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW },
> > +    { "SSE3",       "sse3",
> AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW },
> > +    { "SSSE3",      "ssse3",     AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM },
> > +    { "SSE4.1",     "sse4",      AV_CPU_FLAG_SSE4 },
> > +    { "SSE4.2",     "sse42",     AV_CPU_FLAG_SSE42 },
> > +    { "AES-NI",     "aesni",     AV_CPU_FLAG_AESNI },
> > +    { "AVX",        "avx",       AV_CPU_FLAG_AVX },
> > +    { "XOP",        "xop",       AV_CPU_FLAG_XOP },
> > +    { "FMA3",       "fma3",      AV_CPU_FLAG_FMA3 },
> > +    { "FMA4",       "fma4",      AV_CPU_FLAG_FMA4 },
> > +    { "AVX2",       "avx2",      AV_CPU_FLAG_AVX2 },
> > +    { "AVX-512",    "avx512",    AV_CPU_FLAG_AVX512 },
> > +    { "AVX-512ICL", "avx512icl", AV_CPU_FLAG_AVX512ICL },
> >  #elif ARCH_LOONGARCH
> >      { "LSX",      "lsx",      AV_CPU_FLAG_LSX },
> >      { "LASX",     "lasx",     AV_CPU_FLAG_LASX },
> > --
> > 2.17.1

Hi there,

These patches have been sent for two weeks but got zero response so far. Could the
maintainers of CPU flags and native HEVC decoding help review this patchset? 

Thanks,
Jianhua
Henrik Gramner March 10, 2022, 2:34 p.m. UTC | #3
On Wed, Feb 23, 2022 at 9:57 AM <jianhua.wu-at-intel.com@ffmpeg.org> wrote:
>
> From: Wu Jianhua <jianhua.wu@intel.com>
>
> Signed-off-by: Wu Jianhua <jianhua.wu@intel.com>
> ---
>  configure                 | 13 +++++++---
>  libavutil/cpu.c           |  1 +
>  libavutil/cpu.h           |  1 +
>  libavutil/x86/cpu.c       |  8 ++++--
>  libavutil/x86/cpu.h       |  1 +
>  libavutil/x86/x86inc.asm  | 53 ++++++++++++++++++++-------------------
>  tests/checkasm/checkasm.c | 35 +++++++++++++-------------
>  7 files changed, 63 insertions(+), 49 deletions(-)

This patch LGTM (didn't look at the actual asm code yet though).
diff mbox series

Patch

diff --git a/configure b/configure
index 1535dc3c5b..d88c2ae979 100755
--- a/configure
+++ b/configure
@@ -444,6 +444,7 @@  Optimization options (experts only):
   --disable-fma4           disable FMA4 optimizations
   --disable-avx2           disable AVX2 optimizations
   --disable-avx512         disable AVX-512 optimizations
+  --disable-avx512icl      disable AVX-512ICL optimizations
   --disable-aesni          disable AESNI optimizations
   --disable-armv5te        disable armv5te optimizations
   --disable-armv6          disable armv6 optimizations
@@ -2098,6 +2099,7 @@  ARCH_EXT_LIST_X86_SIMD="
     avx
     avx2
     avx512
+    avx512icl
     fma3
     fma4
     mmx
@@ -2666,6 +2668,7 @@  fma3_deps="avx"
 fma4_deps="avx"
 avx2_deps="avx"
 avx512_deps="avx2"
+avx512icl_deps="avx512"
 
 mmx_external_deps="x86asm"
 mmx_inline_deps="inline_asm x86"
@@ -6128,10 +6131,11 @@  EOF
             elf*) enabled debug && append X86ASMFLAGS $x86asm_debug ;;
         esac
 
-        enabled avx512 && check_x86asm avx512_external "vmovdqa32 [eax]{k1}{z}, zmm0"
-        enabled avx2   && check_x86asm avx2_external   "vextracti128 xmm0, ymm0, 0"
-        enabled xop    && check_x86asm xop_external    "vpmacsdd xmm0, xmm1, xmm2, xmm3"
-        enabled fma4   && check_x86asm fma4_external   "vfmaddps ymm0, ymm1, ymm2, ymm3"
+        enabled avx512    && check_x86asm avx512_external    "vmovdqa32 [eax]{k1}{z}, zmm0"
+        enabled avx512icl && check_x86asm avx512icl_external "vpdpwssds zmm31{k1}{z}, zmm29, zmm28"
+        enabled avx2      && check_x86asm avx2_external      "vextracti128 xmm0, ymm0, 0"
+        enabled xop       && check_x86asm xop_external       "vpmacsdd xmm0, xmm1, xmm2, xmm3"
+        enabled fma4      && check_x86asm fma4_external      "vfmaddps ymm0, ymm1, ymm2, ymm3"
         check_x86asm cpunop          "CPU amdnop"
     fi
 
@@ -7471,6 +7475,7 @@  if enabled x86; then
     echo "AVX enabled               ${avx-no}"
     echo "AVX2 enabled              ${avx2-no}"
     echo "AVX-512 enabled           ${avx512-no}"
+    echo "AVX-512ICL enabled        ${avx512icl-no}"
     echo "XOP enabled               ${xop-no}"
     echo "FMA3 enabled              ${fma3-no}"
     echo "FMA4 enabled              ${fma4-no}"
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 1368502245..833c220192 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -137,6 +137,7 @@  int av_parse_cpu_caps(unsigned *flags, const char *s)
         { "cmov",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV     },    .unit = "flags" },
         { "aesni",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AESNI    },    .unit = "flags" },
         { "avx512"  , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX512   },    .unit = "flags" },
+        { "avx512icl",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX512ICL   }, .unit = "flags" },
         { "slowgather", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SLOW_GATHER }, .unit = "flags" },
 
 #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index ce9bf14bf7..9711e574c5 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -54,6 +54,7 @@ 
 #define AV_CPU_FLAG_BMI1        0x20000 ///< Bit Manipulation Instruction Set 1
 #define AV_CPU_FLAG_BMI2        0x40000 ///< Bit Manipulation Instruction Set 2
 #define AV_CPU_FLAG_AVX512     0x100000 ///< AVX-512 functions: requires OS support even if YMM/ZMM registers aren't used
+#define AV_CPU_FLAG_AVX512ICL  0x200000 ///< F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/VPOPCNTDQ/BITALG/GFNI/VAES/VPCLMULQDQ
 #define AV_CPU_FLAG_SLOW_GATHER  0x2000000 ///< CPU has slow gathers.
 
 #define AV_CPU_FLAG_ALTIVEC      0x0001 ///< standard
diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index 7b13fcae91..d6cd4fab9c 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -150,9 +150,13 @@  int ff_get_cpu_flags_x86(void)
             rval |= AV_CPU_FLAG_AVX2;
 #if HAVE_AVX512 /* F, CD, BW, DQ, VL */
         if ((xcr0_lo & 0xe0) == 0xe0) { /* OPMASK/ZMM state */
-            if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd0030000) == 0xd0030000)
+            if ((rval & AV_CPU_FLAG_AVX2) && (ebx & 0xd0030000) == 0xd0030000) {
                 rval |= AV_CPU_FLAG_AVX512;
-
+#if HAVE_AVX512ICL
+                if ((ebx & 0xd0200000) == 0xd0200000 && (ecx & 0x5f42) == 0x5f42)
+                    rval |= AV_CPU_FLAG_AVX512ICL;
+#endif /* HAVE_AVX512ICL */
+            }
         }
 #endif /* HAVE_AVX512 */
 #endif /* HAVE_AVX2 */
diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h
index 937c697fa0..40a1eef0ab 100644
--- a/libavutil/x86/cpu.h
+++ b/libavutil/x86/cpu.h
@@ -80,6 +80,7 @@ 
 #define EXTERNAL_AVX2_SLOW(flags)   CPUEXT_SUFFIX_SLOW2(flags, _EXTERNAL, AVX2, AVX)
 #define EXTERNAL_AESNI(flags)       CPUEXT_SUFFIX(flags, _EXTERNAL, AESNI)
 #define EXTERNAL_AVX512(flags)      CPUEXT_SUFFIX(flags, _EXTERNAL, AVX512)
+#define EXTERNAL_AVX512ICL(flags)   CPUEXT_SUFFIX(flags, _EXTERNAL, AVX512ICL)
 
 #define INLINE_AMD3DNOW(flags)      CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOW)
 #define INLINE_AMD3DNOWEXT(flags)   CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOWEXT)
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index 01c35e3a4b..251ee797de 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -817,32 +817,33 @@  BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
 
 ; cpuflags
 
-%assign cpuflags_mmx      (1<<0)
-%assign cpuflags_mmx2     (1<<1) | cpuflags_mmx
-%assign cpuflags_3dnow    (1<<2) | cpuflags_mmx
-%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow
-%assign cpuflags_sse      (1<<4) | cpuflags_mmx2
-%assign cpuflags_sse2     (1<<5) | cpuflags_sse
-%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2
-%assign cpuflags_lzcnt    (1<<7) | cpuflags_sse2
-%assign cpuflags_sse3     (1<<8) | cpuflags_sse2
-%assign cpuflags_ssse3    (1<<9) | cpuflags_sse3
-%assign cpuflags_sse4     (1<<10)| cpuflags_ssse3
-%assign cpuflags_sse42    (1<<11)| cpuflags_sse4
-%assign cpuflags_aesni    (1<<12)| cpuflags_sse42
-%assign cpuflags_avx      (1<<13)| cpuflags_sse42
-%assign cpuflags_xop      (1<<14)| cpuflags_avx
-%assign cpuflags_fma4     (1<<15)| cpuflags_avx
-%assign cpuflags_fma3     (1<<16)| cpuflags_avx
-%assign cpuflags_bmi1     (1<<17)| cpuflags_avx|cpuflags_lzcnt
-%assign cpuflags_bmi2     (1<<18)| cpuflags_bmi1
-%assign cpuflags_avx2     (1<<19)| cpuflags_fma3|cpuflags_bmi2
-%assign cpuflags_avx512   (1<<20)| cpuflags_avx2 ; F, CD, BW, DQ, VL
-
-%assign cpuflags_cache32  (1<<21)
-%assign cpuflags_cache64  (1<<22)
-%assign cpuflags_aligned  (1<<23) ; not a cpu feature, but a function variant
-%assign cpuflags_atom     (1<<24)
+%assign cpuflags_mmx       (1<<0)
+%assign cpuflags_mmx2      (1<<1) | cpuflags_mmx
+%assign cpuflags_3dnow     (1<<2) | cpuflags_mmx
+%assign cpuflags_3dnowext  (1<<3) | cpuflags_3dnow
+%assign cpuflags_sse       (1<<4) | cpuflags_mmx2
+%assign cpuflags_sse2      (1<<5) | cpuflags_sse
+%assign cpuflags_sse2slow  (1<<6) | cpuflags_sse2
+%assign cpuflags_lzcnt     (1<<7) | cpuflags_sse2
+%assign cpuflags_sse3      (1<<8) | cpuflags_sse2
+%assign cpuflags_ssse3     (1<<9) | cpuflags_sse3
+%assign cpuflags_sse4      (1<<10)| cpuflags_ssse3
+%assign cpuflags_sse42     (1<<11)| cpuflags_sse4
+%assign cpuflags_aesni     (1<<12)| cpuflags_sse42
+%assign cpuflags_avx       (1<<13)| cpuflags_sse42
+%assign cpuflags_xop       (1<<14)| cpuflags_avx
+%assign cpuflags_fma4      (1<<15)| cpuflags_avx
+%assign cpuflags_fma3      (1<<16)| cpuflags_avx
+%assign cpuflags_bmi1      (1<<17)| cpuflags_avx|cpuflags_lzcnt
+%assign cpuflags_bmi2      (1<<18)| cpuflags_bmi1
+%assign cpuflags_avx2      (1<<19)| cpuflags_fma3|cpuflags_bmi2
+%assign cpuflags_avx512    (1<<20)| cpuflags_avx2 ; F, CD, BW, DQ, VL
+%assign cpuflags_avx512icl (1<<25)| cpuflags_avx512
+
+%assign cpuflags_cache32   (1<<21)
+%assign cpuflags_cache64   (1<<22)
+%assign cpuflags_aligned   (1<<23) ; not a cpu feature, but a function variant
+%assign cpuflags_atom      (1<<24)
 
 ; Returns a boolean value expressing whether or not the specified cpuflag is enabled.
 %define    cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) - 1) >> 31) & 1)
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index f74125e810..e77b4ec20f 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -220,23 +220,24 @@  static const struct {
     { "MMI",      "mmi",      AV_CPU_FLAG_MMI },
     { "MSA",      "msa",      AV_CPU_FLAG_MSA },
 #elif ARCH_X86
-    { "MMX",      "mmx",      AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV },
-    { "MMXEXT",   "mmxext",   AV_CPU_FLAG_MMXEXT },
-    { "3DNOW",    "3dnow",    AV_CPU_FLAG_3DNOW },
-    { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT },
-    { "SSE",      "sse",      AV_CPU_FLAG_SSE },
-    { "SSE2",     "sse2",     AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW },
-    { "SSE3",     "sse3",     AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW },
-    { "SSSE3",    "ssse3",    AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM },
-    { "SSE4.1",   "sse4",     AV_CPU_FLAG_SSE4 },
-    { "SSE4.2",   "sse42",    AV_CPU_FLAG_SSE42 },
-    { "AES-NI",   "aesni",    AV_CPU_FLAG_AESNI },
-    { "AVX",      "avx",      AV_CPU_FLAG_AVX },
-    { "XOP",      "xop",      AV_CPU_FLAG_XOP },
-    { "FMA3",     "fma3",     AV_CPU_FLAG_FMA3 },
-    { "FMA4",     "fma4",     AV_CPU_FLAG_FMA4 },
-    { "AVX2",     "avx2",     AV_CPU_FLAG_AVX2 },
-    { "AVX-512",  "avx512",   AV_CPU_FLAG_AVX512 },
+    { "MMX",        "mmx",       AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV },
+    { "MMXEXT",     "mmxext",    AV_CPU_FLAG_MMXEXT },
+    { "3DNOW",      "3dnow",     AV_CPU_FLAG_3DNOW },
+    { "3DNOWEXT",   "3dnowext",  AV_CPU_FLAG_3DNOWEXT },
+    { "SSE",        "sse",       AV_CPU_FLAG_SSE },
+    { "SSE2",       "sse2",      AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW },
+    { "SSE3",       "sse3",      AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW },
+    { "SSSE3",      "ssse3",     AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM },
+    { "SSE4.1",     "sse4",      AV_CPU_FLAG_SSE4 },
+    { "SSE4.2",     "sse42",     AV_CPU_FLAG_SSE42 },
+    { "AES-NI",     "aesni",     AV_CPU_FLAG_AESNI },
+    { "AVX",        "avx",       AV_CPU_FLAG_AVX },
+    { "XOP",        "xop",       AV_CPU_FLAG_XOP },
+    { "FMA3",       "fma3",      AV_CPU_FLAG_FMA3 },
+    { "FMA4",       "fma4",      AV_CPU_FLAG_FMA4 },
+    { "AVX2",       "avx2",      AV_CPU_FLAG_AVX2 },
+    { "AVX-512",    "avx512",    AV_CPU_FLAG_AVX512 },
+    { "AVX-512ICL", "avx512icl", AV_CPU_FLAG_AVX512ICL },
 #elif ARCH_LOONGARCH
     { "LSX",      "lsx",      AV_CPU_FLAG_LSX },
     { "LASX",     "lasx",     AV_CPU_FLAG_LASX },