Message ID | 20210802053439.42828-1-jianhua.wu@intel.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,1/5] libavfilter/x86/vf_gblur: add ff_postscale_slice_avx512() | expand |
Context | Check | Description |
---|---|---|
andriy/x86_make | success | Make finished |
andriy/x86_make_fate | success | Make fate finished |
andriy/PPC64_make | success | Make finished |
andriy/PPC64_make_fate | success | Make fate finished |
Do you have CPU info, I can't find more info from the patch set On Mon, Aug 2, 2021 at 1:35 PM Wu Jianhua <jianhua.wu@intel.com> wrote: > > Co-authored-by: Cheng Yanfei <yanfei.cheng@intel.com> > Co-authored-by: Jin Jun <jun.i.jin@intel.com> > Signed-off-by: Wu Jianhua <jianhua.wu@intel.com> > --- > libavfilter/x86/vf_gblur.asm | 21 ++++++++++++--------- > libavfilter/x86/vf_gblur_init.c | 4 ++++ > 2 files changed, 16 insertions(+), 9 deletions(-) > > diff --git a/libavfilter/x86/vf_gblur.asm b/libavfilter/x86/vf_gblur.asm > index 4d84e6d011..276fe347f5 100644 > --- a/libavfilter/x86/vf_gblur.asm > +++ b/libavfilter/x86/vf_gblur.asm > @@ -194,19 +194,17 @@ cglobal postscale_slice, 2, 2, 4, ptr, length, postscale, min, max > VBROADCASTSS m1, minm > VBROADCASTSS m2, maxm > %elif WIN64 > - SWAP 0, 2 > - SWAP 1, 3 > - VBROADCASTSS m0, xm0 > - VBROADCASTSS m1, xm1 > + VBROADCASTSS m0, xmm2 > + VBROADCASTSS m1, xmm3 > VBROADCASTSS m2, maxm > -%else ; UNIX64 > - VBROADCASTSS m0, xm0 > - VBROADCASTSS m1, xm1 > - VBROADCASTSS m2, xm2 > +%else ; UNIX > + VBROADCASTSS m0, xmm0 > + VBROADCASTSS m1, xmm1 > + VBROADCASTSS m2, xmm2 > %endif > > .loop: > -%if cpuflag(avx2) > +%if cpuflag(avx2) || cpuflag(avx512) > mulps m3, m0, [ptrq + lengthq] > %else > movu m3, [ptrq + lengthq] > @@ -229,3 +227,8 @@ POSTSCALE_SLICE > INIT_YMM avx2 > POSTSCALE_SLICE > %endif > + > +%if HAVE_AVX512_EXTERNAL > +INIT_ZMM avx512 > +POSTSCALE_SLICE > +%endif > diff --git a/libavfilter/x86/vf_gblur_init.c b/libavfilter/x86/vf_gblur_init.c > index d80fb46fe4..34aba4ca6e 100644 > --- a/libavfilter/x86/vf_gblur_init.c > +++ b/libavfilter/x86/vf_gblur_init.c > @@ -29,6 +29,7 @@ void ff_horiz_slice_avx2(float *ptr, int width, int height, int steps, float nu, > > void ff_postscale_slice_sse(float *ptr, int length, float postscale, float min, float max); > void ff_postscale_slice_avx2(float *ptr, int length, float postscale, float min, float max); > +void ff_postscale_slice_avx512(float *ptr, int length, float postscale, float min, float max); > > av_cold void ff_gblur_init_x86(GBlurContext *s) > { > @@ -47,5 +48,8 @@ av_cold void ff_gblur_init_x86(GBlurContext *s) > if (EXTERNAL_AVX2(cpu_flags)) { > s->horiz_slice = ff_horiz_slice_avx2; > } > + if (EXTERNAL_AVX512(cpu_flags)) { > + s->postscale_slice = ff_postscale_slice_avx512; > + } > #endif > } > -- > 2.17.1 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". -- ======================================= Jun zhao/赵军 +++++++++++++++++++++++++++++++++++++++
> -----Original Message----- > From: mypopy@gmail.com <mypopy@gmail.com> > Sent: Monday, August 2, 2021 5:31 PM > To: FFmpeg development discussions and patches <ffmpeg- > devel@ffmpeg.org> > Cc: Wu, Jianhua <jianhua.wu@intel.com>; Cheng, Yanfei > <yanfei.cheng@intel.com> > Subject: Re: [FFmpeg-devel] [PATCH 1/5] libavfilter/x86/vf_gblur: add > ff_postscale_slice_avx512() > > Do you have CPU info, I can't find more info from the patch set > I am sorry for the missing info. Do you need the specific CPU model? Below is some helpful additional information: 1. We have tested the codes on different Intel X86_64 architectures, and the codes were platform-agnostic, which means they could be enabled and run when the CPUs support AVX2 or AVX512 instructions set, or they would be disabled. 2. We have tested the codes both on Linux and Windows operating systems. 3. In the CPU that only supports the AVX2 instructions set, we use the common instructions to emulate some features introduced by AVX51 to keep compatibility. Best regards. Jianhua
diff --git a/libavfilter/x86/vf_gblur.asm b/libavfilter/x86/vf_gblur.asm index 4d84e6d011..276fe347f5 100644 --- a/libavfilter/x86/vf_gblur.asm +++ b/libavfilter/x86/vf_gblur.asm @@ -194,19 +194,17 @@ cglobal postscale_slice, 2, 2, 4, ptr, length, postscale, min, max VBROADCASTSS m1, minm VBROADCASTSS m2, maxm %elif WIN64 - SWAP 0, 2 - SWAP 1, 3 - VBROADCASTSS m0, xm0 - VBROADCASTSS m1, xm1 + VBROADCASTSS m0, xmm2 + VBROADCASTSS m1, xmm3 VBROADCASTSS m2, maxm -%else ; UNIX64 - VBROADCASTSS m0, xm0 - VBROADCASTSS m1, xm1 - VBROADCASTSS m2, xm2 +%else ; UNIX + VBROADCASTSS m0, xmm0 + VBROADCASTSS m1, xmm1 + VBROADCASTSS m2, xmm2 %endif .loop: -%if cpuflag(avx2) +%if cpuflag(avx2) || cpuflag(avx512) mulps m3, m0, [ptrq + lengthq] %else movu m3, [ptrq + lengthq] @@ -229,3 +227,8 @@ POSTSCALE_SLICE INIT_YMM avx2 POSTSCALE_SLICE %endif + +%if HAVE_AVX512_EXTERNAL +INIT_ZMM avx512 +POSTSCALE_SLICE +%endif diff --git a/libavfilter/x86/vf_gblur_init.c b/libavfilter/x86/vf_gblur_init.c index d80fb46fe4..34aba4ca6e 100644 --- a/libavfilter/x86/vf_gblur_init.c +++ b/libavfilter/x86/vf_gblur_init.c @@ -29,6 +29,7 @@ void ff_horiz_slice_avx2(float *ptr, int width, int height, int steps, float nu, void ff_postscale_slice_sse(float *ptr, int length, float postscale, float min, float max); void ff_postscale_slice_avx2(float *ptr, int length, float postscale, float min, float max); +void ff_postscale_slice_avx512(float *ptr, int length, float postscale, float min, float max); av_cold void ff_gblur_init_x86(GBlurContext *s) { @@ -47,5 +48,8 @@ av_cold void ff_gblur_init_x86(GBlurContext *s) if (EXTERNAL_AVX2(cpu_flags)) { s->horiz_slice = ff_horiz_slice_avx2; } + if (EXTERNAL_AVX512(cpu_flags)) { + s->postscale_slice = ff_postscale_slice_avx512; + } #endif }
Co-authored-by: Cheng Yanfei <yanfei.cheng@intel.com> Co-authored-by: Jin Jun <jun.i.jin@intel.com> Signed-off-by: Wu Jianhua <jianhua.wu@intel.com> --- libavfilter/x86/vf_gblur.asm | 21 ++++++++++++--------- libavfilter/x86/vf_gblur_init.c | 4 ++++ 2 files changed, 16 insertions(+), 9 deletions(-)