@@ -27,7 +27,7 @@ SECTION .text
; void ff_fcmul_add(float *sum, const float *t, const float *c, int len)
;------------------------------------------------------------------------------
-INIT_XMM sse3
+%macro VECTOR_FCMUL_ADD 0
cglobal fcmul_add, 4,4,6, sum, t, c, len
shl lend, 3
add lend, mmsize*2
@@ -43,8 +43,8 @@ ALIGN 16
movaps m4, [cq + lenq+mmsize]
mulps m0, m1
mulps m3, m4
- shufps m1, m1, 0xb1
- shufps m4, m4, 0xb1
+ shufps m1, m1, m1, 0xb1
+ shufps m4, m4, m4, 0xb1
movshdup m2, [tq + lenq]
movshdup m5, [tq + lenq+mmsize]
mulps m2, m1
@@ -58,3 +58,12 @@ ALIGN 16
add lenq, mmsize*2
jl .loop
REP_RET
+%endmacro
+
+INIT_XMM sse3
+VECTOR_FCMUL_ADD
+
+%if HAVE_AVX_EXTERNAL
+INIT_YMM avx
+VECTOR_FCMUL_ADD
+%endif
@@ -25,6 +25,9 @@
void ff_fcmul_add_sse3(float *sum, const float *t, const float *c,
ptrdiff_t len);
+void ff_fcmul_add_avx(float *sum, const float *t, const float *c,
+ ptrdiff_t len);
+
av_cold void ff_afir_init_x86(AudioFIRContext *s)
{
int cpu_flags = av_get_cpu_flags();
@@ -32,4 +35,7 @@ av_cold void ff_afir_init_x86(AudioFIRContext *s)
if (EXTERNAL_SSE3(cpu_flags)) {
s->fcmul_add = ff_fcmul_add_sse3;
}
+ if (EXTERNAL_AVX_FAST(cpu_flags)) {
+ s->fcmul_add = ff_fcmul_add_avx;
+ }
}
Signed-off-by: Paul B Mahol <onemda@gmail.com> --- libavfilter/x86/af_afir.asm | 15 ++++++++++++--- libavfilter/x86/af_afir_init.c | 6 ++++++ 2 files changed, 18 insertions(+), 3 deletions(-)