[FFmpeg-devel,5/5] x86: Add some additional cpuflag relations

Submitted by James Darnley on June 8, 2017, 11:05 p.m.

Details

Message ID 20170608230502.29258-6-jdarnley@obe.tv
State New
Headers show

Commit Message

James Darnley June 8, 2017, 11:05 p.m.
From: Henrik Gramner <henrik@gramner.com>

Simplifies writing assembly code that depends on available instructions.

LZCNT implies SSE2
BMI1 implies AVX+LZCNT
AVX2 implies BMI2
---
This is the patch I was talking about.  Where should I put the aesni define?
x264 doesn't have it but I will try to get it upstreamed.

 libavutil/x86/x86inc.asm | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

Comments

Henrik Gramner June 9, 2017, 8:08 a.m.
On Fri, Jun 9, 2017 at 1:05 AM, James Darnley <jdarnley@obe.tv> wrote:
>Where should I put the aesni define?

Between sse42 and avx.
James Darnley June 9, 2017, 9:22 a.m.
On 2017-06-09 10:08, Henrik Gramner wrote:
> On Fri, Jun 9, 2017 at 1:05 AM, James Darnley <jdarnley@obe.tv> wrote:
>> Where should I put the aesni define?
> 
> Between sse42 and avx.

Thank you.  I will change this and the first patch to bump the date.
I'll give other people about an hour to make other comments.

Patch hide | download patch | download mbox

diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index 2a13ca957e..acda0e0b4e 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -788,25 +788,25 @@  BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
 %assign cpuflags_sse      (1<<4) | cpuflags_mmx2
 %assign cpuflags_sse2     (1<<5) | cpuflags_sse
 %assign cpuflags_sse2slow (1<<6) | cpuflags_sse2
-%assign cpuflags_sse3     (1<<7) | cpuflags_sse2
-%assign cpuflags_ssse3    (1<<8) | cpuflags_sse3
-%assign cpuflags_sse4     (1<<9) | cpuflags_ssse3
-%assign cpuflags_sse42    (1<<10)| cpuflags_sse4
-%assign cpuflags_avx      (1<<11)| cpuflags_sse42
-%assign cpuflags_xop      (1<<12)| cpuflags_avx
-%assign cpuflags_fma4     (1<<13)| cpuflags_avx
-%assign cpuflags_fma3     (1<<14)| cpuflags_avx
-%assign cpuflags_avx2     (1<<15)| cpuflags_fma3
-
-%assign cpuflags_cache32  (1<<16)
-%assign cpuflags_cache64  (1<<17)
-%assign cpuflags_slowctz  (1<<18)
-%assign cpuflags_lzcnt    (1<<19)
-%assign cpuflags_aligned  (1<<20) ; not a cpu feature, but a function variant
-%assign cpuflags_atom     (1<<21)
-%assign cpuflags_bmi1     (1<<22)|cpuflags_lzcnt
-%assign cpuflags_bmi2     (1<<23)|cpuflags_bmi1
-%assign cpuflags_aesni    (1<<24)|cpuflags_sse42
+%assign cpuflags_lzcnt    (1<<7) | cpuflags_sse2
+%assign cpuflags_sse3     (1<<8) | cpuflags_sse2
+%assign cpuflags_ssse3    (1<<9) | cpuflags_sse3
+%assign cpuflags_sse4     (1<<10)| cpuflags_ssse3
+%assign cpuflags_sse42    (1<<11)| cpuflags_sse4
+%assign cpuflags_avx      (1<<12)| cpuflags_sse42
+%assign cpuflags_xop      (1<<13)| cpuflags_avx
+%assign cpuflags_fma4     (1<<14)| cpuflags_avx
+%assign cpuflags_fma3     (1<<15)| cpuflags_avx
+%assign cpuflags_bmi1     (1<<16)| cpuflags_avx|cpuflags_lzcnt
+%assign cpuflags_bmi2     (1<<17)| cpuflags_bmi1
+%assign cpuflags_avx2     (1<<18)| cpuflags_fma3|cpuflags_bmi2
+
+%assign cpuflags_cache32  (1<<19)
+%assign cpuflags_cache64  (1<<20)
+%assign cpuflags_slowctz  (1<<21)
+%assign cpuflags_aligned  (1<<22) ; not a cpu feature, but a function variant
+%assign cpuflags_atom     (1<<23)
+%assign cpuflags_aesni    (1<<24)| cpuflags_sse42
 
 ; Returns a boolean value expressing whether or not the specified cpuflag is enabled.
 %define    cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) - 1) >> 31) & 1)