Message ID | 20190103004357.5604-4-jamrial@gmail.com |
---|---|
State | Accepted |
Commit | 5402c1886b97a0c46e843b5ae0c08de74d2e6091 |
Headers | show |
On 1/3/19, James Almer <jamrial@gmail.com> wrote: > fcmul_add_c: 1228.8 > fcmul_add_sse3: 334.3 > fcmul_add_avx: 186.3 > > Signed-off-by: James Almer <jamrial@gmail.com> > --- > libavfilter/x86/af_afir.asm | 8 +++++++- > libavfilter/x86/af_afir_init.c | 5 +++++ > 2 files changed, 12 insertions(+), 1 deletion(-) > > diff --git a/libavfilter/x86/af_afir.asm b/libavfilter/x86/af_afir.asm > index fcc1f426db..8054ac5f10 100644 > --- a/libavfilter/x86/af_afir.asm > +++ b/libavfilter/x86/af_afir.asm > @@ -27,7 +27,7 @@ SECTION .text > ; void ff_fcmul_add(float *sum, const float *t, const float *c, int len) > ;------------------------------------------------------------------------------ > > -INIT_XMM sse3 > +%macro FCMUL_ADD 0 > cglobal fcmul_add, 4,4,6, sum, t, c, len > shl lend, 3 > add tq, lenq > @@ -61,3 +61,9 @@ ALIGN 16 > addss xm0, [sumq + lenq] > movss [sumq + lenq], xm0 > RET > +%endmacro > + > +INIT_XMM sse3 > +FCMUL_ADD > +INIT_YMM avx > +FCMUL_ADD > diff --git a/libavfilter/x86/af_afir_init.c b/libavfilter/x86/af_afir_init.c > index 29e6f976b2..c37212c381 100644 > --- a/libavfilter/x86/af_afir_init.c > +++ b/libavfilter/x86/af_afir_init.c > @@ -24,6 +24,8 @@ > > void ff_fcmul_add_sse3(float *sum, const float *t, const float *c, > ptrdiff_t len); > +void ff_fcmul_add_avx(float *sum, const float *t, const float *c, > + ptrdiff_t len); > > av_cold void ff_afir_init_x86(AudioFIRDSPContext *s) > { > @@ -32,4 +34,7 @@ av_cold void ff_afir_init_x86(AudioFIRDSPContext *s) > if (EXTERNAL_SSE3(cpu_flags)) { > s->fcmul_add = ff_fcmul_add_sse3; > } > + if (EXTERNAL_AVX_FAST(cpu_flags)) { > + s->fcmul_add = ff_fcmul_add_avx; > + } > } > -- > 2.20.1 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel > OK Also write CPU on which you tested it.
On 1/3/2019 5:47 AM, Paul B Mahol wrote: > On 1/3/19, James Almer <jamrial@gmail.com> wrote: >> fcmul_add_c: 1228.8 >> fcmul_add_sse3: 334.3 >> fcmul_add_avx: 186.3 >> >> Signed-off-by: James Almer <jamrial@gmail.com> >> --- >> libavfilter/x86/af_afir.asm | 8 +++++++- >> libavfilter/x86/af_afir_init.c | 5 +++++ >> 2 files changed, 12 insertions(+), 1 deletion(-) >> >> diff --git a/libavfilter/x86/af_afir.asm b/libavfilter/x86/af_afir.asm >> index fcc1f426db..8054ac5f10 100644 >> --- a/libavfilter/x86/af_afir.asm >> +++ b/libavfilter/x86/af_afir.asm >> @@ -27,7 +27,7 @@ SECTION .text >> ; void ff_fcmul_add(float *sum, const float *t, const float *c, int len) >> ;------------------------------------------------------------------------------ >> >> -INIT_XMM sse3 >> +%macro FCMUL_ADD 0 >> cglobal fcmul_add, 4,4,6, sum, t, c, len >> shl lend, 3 >> add tq, lenq >> @@ -61,3 +61,9 @@ ALIGN 16 >> addss xm0, [sumq + lenq] >> movss [sumq + lenq], xm0 >> RET >> +%endmacro >> + >> +INIT_XMM sse3 >> +FCMUL_ADD >> +INIT_YMM avx >> +FCMUL_ADD >> diff --git a/libavfilter/x86/af_afir_init.c b/libavfilter/x86/af_afir_init.c >> index 29e6f976b2..c37212c381 100644 >> --- a/libavfilter/x86/af_afir_init.c >> +++ b/libavfilter/x86/af_afir_init.c >> @@ -24,6 +24,8 @@ >> >> void ff_fcmul_add_sse3(float *sum, const float *t, const float *c, >> ptrdiff_t len); >> +void ff_fcmul_add_avx(float *sum, const float *t, const float *c, >> + ptrdiff_t len); >> >> av_cold void ff_afir_init_x86(AudioFIRDSPContext *s) >> { >> @@ -32,4 +34,7 @@ av_cold void ff_afir_init_x86(AudioFIRDSPContext *s) >> if (EXTERNAL_SSE3(cpu_flags)) { >> s->fcmul_add = ff_fcmul_add_sse3; >> } >> + if (EXTERNAL_AVX_FAST(cpu_flags)) { >> + s->fcmul_add = ff_fcmul_add_avx; >> + } >> } >> -- >> 2.20.1 >> >> _______________________________________________ >> ffmpeg-devel mailing list >> ffmpeg-devel@ffmpeg.org >> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel >> > > OK > > Also write CPU on which you tested it. Set pushed with that addition. Thanks.
diff --git a/libavfilter/x86/af_afir.asm b/libavfilter/x86/af_afir.asm index fcc1f426db..8054ac5f10 100644 --- a/libavfilter/x86/af_afir.asm +++ b/libavfilter/x86/af_afir.asm @@ -27,7 +27,7 @@ SECTION .text ; void ff_fcmul_add(float *sum, const float *t, const float *c, int len) ;------------------------------------------------------------------------------ -INIT_XMM sse3 +%macro FCMUL_ADD 0 cglobal fcmul_add, 4,4,6, sum, t, c, len shl lend, 3 add tq, lenq @@ -61,3 +61,9 @@ ALIGN 16 addss xm0, [sumq + lenq] movss [sumq + lenq], xm0 RET +%endmacro + +INIT_XMM sse3 +FCMUL_ADD +INIT_YMM avx +FCMUL_ADD diff --git a/libavfilter/x86/af_afir_init.c b/libavfilter/x86/af_afir_init.c index 29e6f976b2..c37212c381 100644 --- a/libavfilter/x86/af_afir_init.c +++ b/libavfilter/x86/af_afir_init.c @@ -24,6 +24,8 @@ void ff_fcmul_add_sse3(float *sum, const float *t, const float *c, ptrdiff_t len); +void ff_fcmul_add_avx(float *sum, const float *t, const float *c, + ptrdiff_t len); av_cold void ff_afir_init_x86(AudioFIRDSPContext *s) { @@ -32,4 +34,7 @@ av_cold void ff_afir_init_x86(AudioFIRDSPContext *s) if (EXTERNAL_SSE3(cpu_flags)) { s->fcmul_add = ff_fcmul_add_sse3; } + if (EXTERNAL_AVX_FAST(cpu_flags)) { + s->fcmul_add = ff_fcmul_add_avx; + } }
fcmul_add_c: 1228.8 fcmul_add_sse3: 334.3 fcmul_add_avx: 186.3 Signed-off-by: James Almer <jamrial@gmail.com> --- libavfilter/x86/af_afir.asm | 8 +++++++- libavfilter/x86/af_afir_init.c | 5 +++++ 2 files changed, 12 insertions(+), 1 deletion(-)