Message ID | 20210214143205.9320-2-onemda@gmail.com |
---|---|
State | Accepted |
Headers | show |
Series | [FFmpeg-devel,1/2] avfilter/vf_gblur: factor out postscale function | expand |
Context | Check | Description |
---|---|---|
andriy/x86_make | success | Make finished |
andriy/x86_make_fate | success | Make fate finished |
andriy/PPC64_make | success | Make finished |
andriy/PPC64_make_fate | success | Make fate finished |
On 2/14/2021 11:32 AM, Paul B Mahol wrote: > Signed-off-by: Paul B Mahol <onemda@gmail.com> > --- > libavfilter/x86/vf_gblur.asm | 49 +++++++++++++++++++++++++++++++++ > libavfilter/x86/vf_gblur_init.c | 17 ++++++++++-- > 2 files changed, 63 insertions(+), 3 deletions(-) > > diff --git a/libavfilter/x86/vf_gblur.asm b/libavfilter/x86/vf_gblur.asm > index a25b1659f5..8ccfbdc56b 100644 > --- a/libavfilter/x86/vf_gblur.asm > +++ b/libavfilter/x86/vf_gblur.asm > @@ -183,3 +183,52 @@ HORIZ_SLICE > INIT_XMM avx2 > HORIZ_SLICE > %endif > + > +%macro POSTSCALE_SLICE 0 > +%if UNIX64 > +cglobal postscale_slice, 2, 3, 4, ptr, length, x 2, 2, 4, ptr, length > +%else > +cglobal postscale_slice, 5, 6, 4, ptr, length, postscale, min, max, x 5, 5, 4, ptr, length, postscale, min, max > +%endif > + shl lengthd, 2 shl lengthd, 2 add ptrq, lengthq neg lengthq > +%if WIN64 > + SWAP 0, 2 > + SWAP 1, 3 > + SWAP 2, 4 > +%endif > +%if cpuflag(avx2) > + vbroadcastss m0, xm0 > + vbroadcastss m1, xm1 > + vbroadcastss m2, xm2 > +%else > + shufps xm0, xm0, 0 > + shufps xm1, xm1, 0 > + shufps xm2, xm2, 0 > +%endif > + xor xq, xq remove this instruction. > + > + .loop: > +%if cpuflag(avx2) > + mulps m3, m0, [ptrq + xq] Replace xq with lengthq here and everywhere else. > +%else > + movu m3, [ptrq + xq] > + mulps m3, m0 > +%endif > + maxps m3, m1 > + minps m3, m2 > + movu [ptrq+xq], m3 > + > + add xq, mmsize > + cmp xd, lengthd remove this cmp. > + jl .loop > + > + RET > +%endmacro > + > +INIT_XMM sse > +POSTSCALE_SLICE > + > +%if HAVE_AVX2_EXTERNAL > +INIT_YMM avx2 > +POSTSCALE_SLICE > +%endif > diff --git a/libavfilter/x86/vf_gblur_init.c b/libavfilter/x86/vf_gblur_init.c > index e63e59fe23..9223cb797d 100644 > --- a/libavfilter/x86/vf_gblur_init.c > +++ b/libavfilter/x86/vf_gblur_init.c > @@ -27,14 +27,25 @@ > void ff_horiz_slice_sse4(float *ptr, int width, int height, int steps, float nu, float bscale); > void ff_horiz_slice_avx2(float *ptr, int width, int height, int steps, float nu, float bscale); > > +void ff_postscale_slice_sse(float *ptr, int length, float postscale, float min, float max); > +void ff_postscale_slice_avx2(float *ptr, int length, float postscale, float min, float max); > + > av_cold void ff_gblur_init_x86(GBlurContext *s) > { > -#if ARCH_X86_64 > int cpu_flags = av_get_cpu_flags(); > > - if (EXTERNAL_SSE4(cpu_flags)) > + if (EXTERNAL_SSE(cpu_flags)) { > + s->postscale_slice = ff_postscale_slice_sse; > + } > + if (EXTERNAL_AVX2(cpu_flags)) { EXTERNAL_AVX2_FAST > + s->postscale_slice = ff_postscale_slice_avx2; > + } > +#if ARCH_X86_64 > + if (EXTERNAL_SSE4(cpu_flags)) { > s->horiz_slice = ff_horiz_slice_sse4; > - if (EXTERNAL_AVX2(cpu_flags)) > + } > + if (EXTERNAL_AVX2(cpu_flags)) { > s->horiz_slice = ff_horiz_slice_avx2; > + } > #endif > } LGTM with the above.
diff --git a/libavfilter/x86/vf_gblur.asm b/libavfilter/x86/vf_gblur.asm index a25b1659f5..8ccfbdc56b 100644 --- a/libavfilter/x86/vf_gblur.asm +++ b/libavfilter/x86/vf_gblur.asm @@ -183,3 +183,52 @@ HORIZ_SLICE INIT_XMM avx2 HORIZ_SLICE %endif + +%macro POSTSCALE_SLICE 0 +%if UNIX64 +cglobal postscale_slice, 2, 3, 4, ptr, length, x +%else +cglobal postscale_slice, 5, 6, 4, ptr, length, postscale, min, max, x +%endif + shl lengthd, 2 +%if WIN64 + SWAP 0, 2 + SWAP 1, 3 + SWAP 2, 4 +%endif +%if cpuflag(avx2) + vbroadcastss m0, xm0 + vbroadcastss m1, xm1 + vbroadcastss m2, xm2 +%else + shufps xm0, xm0, 0 + shufps xm1, xm1, 0 + shufps xm2, xm2, 0 +%endif + xor xq, xq + + .loop: +%if cpuflag(avx2) + mulps m3, m0, [ptrq + xq] +%else + movu m3, [ptrq + xq] + mulps m3, m0 +%endif + maxps m3, m1 + minps m3, m2 + movu [ptrq+xq], m3 + + add xq, mmsize + cmp xd, lengthd + jl .loop + + RET +%endmacro + +INIT_XMM sse +POSTSCALE_SLICE + +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +POSTSCALE_SLICE +%endif diff --git a/libavfilter/x86/vf_gblur_init.c b/libavfilter/x86/vf_gblur_init.c index e63e59fe23..9223cb797d 100644 --- a/libavfilter/x86/vf_gblur_init.c +++ b/libavfilter/x86/vf_gblur_init.c @@ -27,14 +27,25 @@ void ff_horiz_slice_sse4(float *ptr, int width, int height, int steps, float nu, float bscale); void ff_horiz_slice_avx2(float *ptr, int width, int height, int steps, float nu, float bscale); +void ff_postscale_slice_sse(float *ptr, int length, float postscale, float min, float max); +void ff_postscale_slice_avx2(float *ptr, int length, float postscale, float min, float max); + av_cold void ff_gblur_init_x86(GBlurContext *s) { -#if ARCH_X86_64 int cpu_flags = av_get_cpu_flags(); - if (EXTERNAL_SSE4(cpu_flags)) + if (EXTERNAL_SSE(cpu_flags)) { + s->postscale_slice = ff_postscale_slice_sse; + } + if (EXTERNAL_AVX2(cpu_flags)) { + s->postscale_slice = ff_postscale_slice_avx2; + } +#if ARCH_X86_64 + if (EXTERNAL_SSE4(cpu_flags)) { s->horiz_slice = ff_horiz_slice_sse4; - if (EXTERNAL_AVX2(cpu_flags)) + } + if (EXTERNAL_AVX2(cpu_flags)) { s->horiz_slice = ff_horiz_slice_avx2; + } #endif }
Signed-off-by: Paul B Mahol <onemda@gmail.com> --- libavfilter/x86/vf_gblur.asm | 49 +++++++++++++++++++++++++++++++++ libavfilter/x86/vf_gblur_init.c | 17 ++++++++++-- 2 files changed, 63 insertions(+), 3 deletions(-)