diff mbox

[FFmpeg-devel,4/4] x86/af_afir: add ff_fcmul_add_avx()

Message ID 20190103004357.5604-4-jamrial@gmail.com
State Accepted
Commit 5402c1886b97a0c46e843b5ae0c08de74d2e6091
Headers show

Commit Message

James Almer Jan. 3, 2019, 12:43 a.m. UTC
fcmul_add_c: 1228.8
fcmul_add_sse3: 334.3
fcmul_add_avx: 186.3

Signed-off-by: James Almer <jamrial@gmail.com>
---
 libavfilter/x86/af_afir.asm    | 8 +++++++-
 libavfilter/x86/af_afir_init.c | 5 +++++
 2 files changed, 12 insertions(+), 1 deletion(-)

Comments

Paul B Mahol Jan. 3, 2019, 8:47 a.m. UTC | #1
On 1/3/19, James Almer <jamrial@gmail.com> wrote:
> fcmul_add_c: 1228.8
> fcmul_add_sse3: 334.3
> fcmul_add_avx: 186.3
>
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  libavfilter/x86/af_afir.asm    | 8 +++++++-
>  libavfilter/x86/af_afir_init.c | 5 +++++
>  2 files changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/libavfilter/x86/af_afir.asm b/libavfilter/x86/af_afir.asm
> index fcc1f426db..8054ac5f10 100644
> --- a/libavfilter/x86/af_afir.asm
> +++ b/libavfilter/x86/af_afir.asm
> @@ -27,7 +27,7 @@ SECTION .text
>  ; void ff_fcmul_add(float *sum, const float *t, const float *c, int len)
> ;------------------------------------------------------------------------------
>
> -INIT_XMM sse3
> +%macro FCMUL_ADD 0
>  cglobal fcmul_add, 4,4,6, sum, t, c, len
>      shl       lend, 3
>      add         tq, lenq
> @@ -61,3 +61,9 @@ ALIGN 16
>      addss xm0, [sumq + lenq]
>      movss [sumq + lenq], xm0
>      RET
> +%endmacro
> +
> +INIT_XMM sse3
> +FCMUL_ADD
> +INIT_YMM avx
> +FCMUL_ADD
> diff --git a/libavfilter/x86/af_afir_init.c b/libavfilter/x86/af_afir_init.c
> index 29e6f976b2..c37212c381 100644
> --- a/libavfilter/x86/af_afir_init.c
> +++ b/libavfilter/x86/af_afir_init.c
> @@ -24,6 +24,8 @@
>
>  void ff_fcmul_add_sse3(float *sum, const float *t, const float *c,
>                         ptrdiff_t len);
> +void ff_fcmul_add_avx(float *sum, const float *t, const float *c,
> +                      ptrdiff_t len);
>
>  av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
>  {
> @@ -32,4 +34,7 @@ av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
>      if (EXTERNAL_SSE3(cpu_flags)) {
>          s->fcmul_add = ff_fcmul_add_sse3;
>      }
> +    if (EXTERNAL_AVX_FAST(cpu_flags)) {
> +        s->fcmul_add = ff_fcmul_add_avx;
> +    }
>  }
> --
> 2.20.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>

OK

Also write CPU on which you tested it.
James Almer Jan. 3, 2019, 1:15 p.m. UTC | #2
On 1/3/2019 5:47 AM, Paul B Mahol wrote:
> On 1/3/19, James Almer <jamrial@gmail.com> wrote:
>> fcmul_add_c: 1228.8
>> fcmul_add_sse3: 334.3
>> fcmul_add_avx: 186.3
>>
>> Signed-off-by: James Almer <jamrial@gmail.com>
>> ---
>>  libavfilter/x86/af_afir.asm    | 8 +++++++-
>>  libavfilter/x86/af_afir_init.c | 5 +++++
>>  2 files changed, 12 insertions(+), 1 deletion(-)
>>
>> diff --git a/libavfilter/x86/af_afir.asm b/libavfilter/x86/af_afir.asm
>> index fcc1f426db..8054ac5f10 100644
>> --- a/libavfilter/x86/af_afir.asm
>> +++ b/libavfilter/x86/af_afir.asm
>> @@ -27,7 +27,7 @@ SECTION .text
>>  ; void ff_fcmul_add(float *sum, const float *t, const float *c, int len)
>> ;------------------------------------------------------------------------------
>>
>> -INIT_XMM sse3
>> +%macro FCMUL_ADD 0
>>  cglobal fcmul_add, 4,4,6, sum, t, c, len
>>      shl       lend, 3
>>      add         tq, lenq
>> @@ -61,3 +61,9 @@ ALIGN 16
>>      addss xm0, [sumq + lenq]
>>      movss [sumq + lenq], xm0
>>      RET
>> +%endmacro
>> +
>> +INIT_XMM sse3
>> +FCMUL_ADD
>> +INIT_YMM avx
>> +FCMUL_ADD
>> diff --git a/libavfilter/x86/af_afir_init.c b/libavfilter/x86/af_afir_init.c
>> index 29e6f976b2..c37212c381 100644
>> --- a/libavfilter/x86/af_afir_init.c
>> +++ b/libavfilter/x86/af_afir_init.c
>> @@ -24,6 +24,8 @@
>>
>>  void ff_fcmul_add_sse3(float *sum, const float *t, const float *c,
>>                         ptrdiff_t len);
>> +void ff_fcmul_add_avx(float *sum, const float *t, const float *c,
>> +                      ptrdiff_t len);
>>
>>  av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
>>  {
>> @@ -32,4 +34,7 @@ av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
>>      if (EXTERNAL_SSE3(cpu_flags)) {
>>          s->fcmul_add = ff_fcmul_add_sse3;
>>      }
>> +    if (EXTERNAL_AVX_FAST(cpu_flags)) {
>> +        s->fcmul_add = ff_fcmul_add_avx;
>> +    }
>>  }
>> --
>> 2.20.1
>>
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
> 
> OK
> 
> Also write CPU on which you tested it.

Set pushed with that addition.

Thanks.
diff mbox

Patch

diff --git a/libavfilter/x86/af_afir.asm b/libavfilter/x86/af_afir.asm
index fcc1f426db..8054ac5f10 100644
--- a/libavfilter/x86/af_afir.asm
+++ b/libavfilter/x86/af_afir.asm
@@ -27,7 +27,7 @@  SECTION .text
 ; void ff_fcmul_add(float *sum, const float *t, const float *c, int len)
 ;------------------------------------------------------------------------------
 
-INIT_XMM sse3
+%macro FCMUL_ADD 0
 cglobal fcmul_add, 4,4,6, sum, t, c, len
     shl       lend, 3
     add         tq, lenq
@@ -61,3 +61,9 @@  ALIGN 16
     addss xm0, [sumq + lenq]
     movss [sumq + lenq], xm0
     RET
+%endmacro
+
+INIT_XMM sse3
+FCMUL_ADD
+INIT_YMM avx
+FCMUL_ADD
diff --git a/libavfilter/x86/af_afir_init.c b/libavfilter/x86/af_afir_init.c
index 29e6f976b2..c37212c381 100644
--- a/libavfilter/x86/af_afir_init.c
+++ b/libavfilter/x86/af_afir_init.c
@@ -24,6 +24,8 @@ 
 
 void ff_fcmul_add_sse3(float *sum, const float *t, const float *c,
                        ptrdiff_t len);
+void ff_fcmul_add_avx(float *sum, const float *t, const float *c,
+                      ptrdiff_t len);
 
 av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
 {
@@ -32,4 +34,7 @@  av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
     if (EXTERNAL_SSE3(cpu_flags)) {
         s->fcmul_add = ff_fcmul_add_sse3;
     }
+    if (EXTERNAL_AVX_FAST(cpu_flags)) {
+        s->fcmul_add = ff_fcmul_add_avx;
+    }
 }