[FFmpeg-devel] avfilter/x86/af_afir: add avx version of fcmul_add

Submitted by Paul B Mahol on Dec. 30, 2018, 5:48 p.m.

Details

Message ID 20181230174817.19964-1-onemda@gmail.com
State New
Headers show

Commit Message

Paul B Mahol Dec. 30, 2018, 5:48 p.m.
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libavfilter/x86/af_afir.asm    | 15 ++++++++++++---
 libavfilter/x86/af_afir_init.c |  6 ++++++
 2 files changed, 18 insertions(+), 3 deletions(-)

Patch hide | download patch | download mbox

diff --git a/libavfilter/x86/af_afir.asm b/libavfilter/x86/af_afir.asm
index 849d85e70f..e770420a21 100644
--- a/libavfilter/x86/af_afir.asm
+++ b/libavfilter/x86/af_afir.asm
@@ -27,7 +27,7 @@  SECTION .text
 ; void ff_fcmul_add(float *sum, const float *t, const float *c, int len)
 ;------------------------------------------------------------------------------
 
-INIT_XMM sse3
+%macro VECTOR_FCMUL_ADD 0
 cglobal fcmul_add, 4,4,6, sum, t, c, len
     shl       lend, 3
     add       lend, mmsize*2
@@ -43,8 +43,8 @@  ALIGN 16
     movaps    m4, [cq + lenq+mmsize]
     mulps     m0, m1
     mulps     m3, m4
-    shufps    m1, m1, 0xb1
-    shufps    m4, m4, 0xb1
+    shufps    m1, m1, m1, 0xb1
+    shufps    m4, m4, m4, 0xb1
     movshdup  m2, [tq + lenq]
     movshdup  m5, [tq + lenq+mmsize]
     mulps     m2, m1
@@ -58,3 +58,12 @@  ALIGN 16
     add       lenq, mmsize*2
     jl .loop
     REP_RET
+%endmacro
+
+INIT_XMM sse3
+VECTOR_FCMUL_ADD
+
+%if HAVE_AVX_EXTERNAL
+INIT_YMM avx
+VECTOR_FCMUL_ADD
+%endif
diff --git a/libavfilter/x86/af_afir_init.c b/libavfilter/x86/af_afir_init.c
index 6a652b9b83..214aaf9719 100644
--- a/libavfilter/x86/af_afir_init.c
+++ b/libavfilter/x86/af_afir_init.c
@@ -25,6 +25,9 @@ 
 void ff_fcmul_add_sse3(float *sum, const float *t, const float *c,
                        ptrdiff_t len);
 
+void ff_fcmul_add_avx(float *sum, const float *t, const float *c,
+                      ptrdiff_t len);
+
 av_cold void ff_afir_init_x86(AudioFIRContext *s)
 {
     int cpu_flags = av_get_cpu_flags();
@@ -32,4 +35,7 @@  av_cold void ff_afir_init_x86(AudioFIRContext *s)
     if (EXTERNAL_SSE3(cpu_flags)) {
         s->fcmul_add = ff_fcmul_add_sse3;
     }
+    if (EXTERNAL_AVX_FAST(cpu_flags)) {
+        s->fcmul_add = ff_fcmul_add_avx;
+    }
 }