diff mbox series

[FFmpeg-devel,v3,2/3] avfilter/x86/vf_exposure: add ff_exposure_avx2

Message ID 20211122080848.39566-2-jianhua.wu@intel.com
State New
Headers show
Series [FFmpeg-devel,v3,1/3] avfilter/x86/vf_exposure: add x86 SIMD optimization | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished

Commit Message

Wu Jianhua Nov. 22, 2021, 8:08 a.m. UTC
Performance data(Less is better):
    exposure_sse:   500491
    exposure_avx2:  449122

Signed-off-by: Wu Jianhua <jianhua.wu@intel.com>
---
 libavfilter/x86/vf_exposure.asm    | 15 +++++++++++++++
 libavfilter/x86/vf_exposure_init.c |  4 ++++
 2 files changed, 19 insertions(+)
diff mbox series

Patch

diff --git a/libavfilter/x86/vf_exposure.asm b/libavfilter/x86/vf_exposure.asm
index 3351c6fb3b..4ee9fbcb15 100644
--- a/libavfilter/x86/vf_exposure.asm
+++ b/libavfilter/x86/vf_exposure.asm
@@ -36,11 +36,21 @@  cglobal exposure, 2, 2, 4, ptr, length, black, scale
     VBROADCASTSS m1, xmm1
 %endif
 
+%if cpuflag(fma3)
+    mulps       m0, m0, m1 ; black * scale
+%endif
+
 .loop:
+%if cpuflag(fma3)
+    mova        m2, m0
+    vfmsub231ps m2, m1, [ptrq]
+    movu    [ptrq], m2
+%else
     movu        m2, [ptrq]
     subps       m2, m2, m0
     mulps       m2, m2, m1
     movu    [ptrq], m2
+%endif
     add       ptrq, mmsize
     sub    lengthq, mmsize/4
 
@@ -52,4 +62,9 @@  cglobal exposure, 2, 2, 4, ptr, length, black, scale
 %if ARCH_X86_64
 INIT_XMM sse
 EXPOSURE
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+EXPOSURE
+%endif
 %endif
diff --git a/libavfilter/x86/vf_exposure_init.c b/libavfilter/x86/vf_exposure_init.c
index de1b360f6c..edc1452850 100644
--- a/libavfilter/x86/vf_exposure_init.c
+++ b/libavfilter/x86/vf_exposure_init.c
@@ -24,6 +24,7 @@ 
 #include "libavfilter/exposure.h"
 
 void ff_exposure_sse(float *ptr, int length, float black, float scale);
+void ff_exposure_avx2(float *ptr, int length, float black, float scale);
 
 av_cold void ff_exposure_init_x86(ExposureContext *s)
 {
@@ -32,5 +33,8 @@  av_cold void ff_exposure_init_x86(ExposureContext *s)
 #if ARCH_X86_64
     if (EXTERNAL_SSE(cpu_flags))
         s->exposure_func = ff_exposure_sse;
+
+    if (EXTERNAL_AVX2_FAST(cpu_flags))
+        s->exposure_func = ff_exposure_avx2;
 #endif
 }