From 7735a84fd0fdae731955f50bddba8dfef395713b Mon Sep 17 00:00:00 2001
From: Paul B Mahol <onemda@gmail.com>
Date: Sun, 10 Sep 2023 19:25:20 +0200
Subject: [PATCH] avfilter/x86/af_afir: add FMA3 SIMD
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
libavfilter/x86/af_afir.asm | 27 +++++++++++++++++++++++++++
libavfilter/x86/af_afir_init.c | 5 +++++
2 files changed, 32 insertions(+)
@@ -67,3 +67,30 @@ INIT_XMM sse3
FCMUL_ADD
INIT_YMM avx
FCMUL_ADD
+
+%if HAVE_FMA3_EXTERNAL
+INIT_YMM fma3
+cglobal fcmul_add, 4,4,4, sum, t, c, len
+ shl lend, 3
+ add tq, lenq
+ add cq, lenq
+ add sumq, lenq
+ neg lenq
+.loop:
+ movaps m0, [tq + lenq]
+ movaps m1, [cq + lenq]
+ vpermilps m3, m0, 177
+ vpermilps m2, m1, 160
+ vpermilps m1, m1, 245
+ mulps m1, m1, m3
+ vfmaddsub132ps m0, m1, m2
+ addps m0, m0, [sumq + lenq]
+ movaps [sumq + lenq], m0
+ add lenq, mmsize
+ jl .loop
+ movss xm0, [tq + lenq]
+ mulss xm0, [cq + lenq]
+ addss xm0, [sumq + lenq]
+ movss [sumq + lenq], xm0
+ RET
+%endif
@@ -26,6 +26,8 @@ void ff_fcmul_add_sse3(float *sum, const float *t, const float *c,
ptrdiff_t len);
void ff_fcmul_add_avx(float *sum, const float *t, const float *c,
ptrdiff_t len);
+void ff_fcmul_add_fma3(float *sum, const float *t, const float *c,
+ ptrdiff_t len);
av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
{
@@ -37,4 +39,7 @@ av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
if (EXTERNAL_AVX_FAST(cpu_flags)) {
s->fcmul_add = ff_fcmul_add_avx;
}
+ if (EXTERNAL_FMA3_FAST(cpu_flags)) {
+ s->fcmul_add = ff_fcmul_add_fma3;
+ }
}
--
2.39.1