diff mbox series

[FFmpeg-devel,1/5] libavfilter/x86/vf_gblur: add ff_postscale_slice_avx512()

Message ID 20210802053439.42828-1-jianhua.wu@intel.com
State New
Headers show
Series [FFmpeg-devel,1/5] libavfilter/x86/vf_gblur: add ff_postscale_slice_avx512()
Related show

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Wu Jianhua Aug. 2, 2021, 5:34 a.m. UTC
Co-authored-by: Cheng Yanfei <yanfei.cheng@intel.com>
Co-authored-by: Jin Jun <jun.i.jin@intel.com>
Signed-off-by: Wu Jianhua <jianhua.wu@intel.com>
---
 libavfilter/x86/vf_gblur.asm    | 21 ++++++++++++---------
 libavfilter/x86/vf_gblur_init.c |  4 ++++
 2 files changed, 16 insertions(+), 9 deletions(-)

Comments

mypopy@gmail.com Aug. 2, 2021, 9:30 a.m. UTC | #1
Do you have CPU info, I can't find more info from the patch set

On Mon, Aug 2, 2021 at 1:35 PM Wu Jianhua <jianhua.wu@intel.com> wrote:
>
> Co-authored-by: Cheng Yanfei <yanfei.cheng@intel.com>
> Co-authored-by: Jin Jun <jun.i.jin@intel.com>
> Signed-off-by: Wu Jianhua <jianhua.wu@intel.com>
> ---
>  libavfilter/x86/vf_gblur.asm    | 21 ++++++++++++---------
>  libavfilter/x86/vf_gblur_init.c |  4 ++++
>  2 files changed, 16 insertions(+), 9 deletions(-)
>
> diff --git a/libavfilter/x86/vf_gblur.asm b/libavfilter/x86/vf_gblur.asm
> index 4d84e6d011..276fe347f5 100644
> --- a/libavfilter/x86/vf_gblur.asm
> +++ b/libavfilter/x86/vf_gblur.asm
> @@ -194,19 +194,17 @@ cglobal postscale_slice, 2, 2, 4, ptr, length, postscale, min, max
>      VBROADCASTSS m1, minm
>      VBROADCASTSS m2, maxm
>  %elif WIN64
> -    SWAP 0, 2
> -    SWAP 1, 3
> -    VBROADCASTSS m0, xm0
> -    VBROADCASTSS m1, xm1
> +    VBROADCASTSS m0, xmm2
> +    VBROADCASTSS m1, xmm3
>      VBROADCASTSS m2, maxm
> -%else ; UNIX64
> -    VBROADCASTSS m0, xm0
> -    VBROADCASTSS m1, xm1
> -    VBROADCASTSS m2, xm2
> +%else ; UNIX
> +    VBROADCASTSS m0, xmm0
> +    VBROADCASTSS m1, xmm1
> +    VBROADCASTSS m2, xmm2
>  %endif
>
>      .loop:
> -%if cpuflag(avx2)
> +%if cpuflag(avx2) || cpuflag(avx512)
>      mulps         m3, m0, [ptrq + lengthq]
>  %else
>      movu          m3, [ptrq + lengthq]
> @@ -229,3 +227,8 @@ POSTSCALE_SLICE
>  INIT_YMM avx2
>  POSTSCALE_SLICE
>  %endif
> +
> +%if HAVE_AVX512_EXTERNAL
> +INIT_ZMM avx512
> +POSTSCALE_SLICE
> +%endif
> diff --git a/libavfilter/x86/vf_gblur_init.c b/libavfilter/x86/vf_gblur_init.c
> index d80fb46fe4..34aba4ca6e 100644
> --- a/libavfilter/x86/vf_gblur_init.c
> +++ b/libavfilter/x86/vf_gblur_init.c
> @@ -29,6 +29,7 @@ void ff_horiz_slice_avx2(float *ptr, int width, int height, int steps, float nu,
>
>  void ff_postscale_slice_sse(float *ptr, int length, float postscale, float min, float max);
>  void ff_postscale_slice_avx2(float *ptr, int length, float postscale, float min, float max);
> +void ff_postscale_slice_avx512(float *ptr, int length, float postscale, float min, float max);
>
>  av_cold void ff_gblur_init_x86(GBlurContext *s)
>  {
> @@ -47,5 +48,8 @@ av_cold void ff_gblur_init_x86(GBlurContext *s)
>      if (EXTERNAL_AVX2(cpu_flags)) {
>          s->horiz_slice = ff_horiz_slice_avx2;
>      }
> +    if (EXTERNAL_AVX512(cpu_flags)) {
> +        s->postscale_slice = ff_postscale_slice_avx512;
> +    }
>  #endif
>  }
> --
> 2.17.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".



--
=======================================
Jun zhao/赵军
+++++++++++++++++++++++++++++++++++++++
Wu Jianhua Aug. 3, 2021, 2:40 a.m. UTC | #2
> -----Original Message-----
> From: mypopy@gmail.com <mypopy@gmail.com>
> Sent: Monday, August 2, 2021 5:31 PM
> To: FFmpeg development discussions and patches <ffmpeg-
> devel@ffmpeg.org>
> Cc: Wu, Jianhua <jianhua.wu@intel.com>; Cheng, Yanfei
> <yanfei.cheng@intel.com>
> Subject: Re: [FFmpeg-devel] [PATCH 1/5] libavfilter/x86/vf_gblur: add
> ff_postscale_slice_avx512()
> 
> Do you have CPU info, I can't find more info from the patch set
> 

I am sorry for the missing info. Do you need the specific CPU model?
Below is some helpful additional information:
1. We have tested the codes on different Intel X86_64 architectures,
and the codes were platform-agnostic, which means they could be
enabled and run when the CPUs support AVX2 or AVX512 instructions
set, or they would be disabled.
2. We have tested the codes both on Linux and Windows operating systems.
3. In the CPU that only supports the AVX2 instructions set, we use the
common instructions to emulate some features introduced by AVX51
to keep compatibility.

Best regards.
Jianhua
diff mbox series

Patch

diff --git a/libavfilter/x86/vf_gblur.asm b/libavfilter/x86/vf_gblur.asm
index 4d84e6d011..276fe347f5 100644
--- a/libavfilter/x86/vf_gblur.asm
+++ b/libavfilter/x86/vf_gblur.asm
@@ -194,19 +194,17 @@  cglobal postscale_slice, 2, 2, 4, ptr, length, postscale, min, max
     VBROADCASTSS m1, minm
     VBROADCASTSS m2, maxm
 %elif WIN64
-    SWAP 0, 2
-    SWAP 1, 3
-    VBROADCASTSS m0, xm0
-    VBROADCASTSS m1, xm1
+    VBROADCASTSS m0, xmm2
+    VBROADCASTSS m1, xmm3
     VBROADCASTSS m2, maxm
-%else ; UNIX64
-    VBROADCASTSS m0, xm0
-    VBROADCASTSS m1, xm1
-    VBROADCASTSS m2, xm2
+%else ; UNIX
+    VBROADCASTSS m0, xmm0
+    VBROADCASTSS m1, xmm1
+    VBROADCASTSS m2, xmm2
 %endif
 
     .loop:
-%if cpuflag(avx2)
+%if cpuflag(avx2) || cpuflag(avx512)
     mulps         m3, m0, [ptrq + lengthq]
 %else
     movu          m3, [ptrq + lengthq]
@@ -229,3 +227,8 @@  POSTSCALE_SLICE
 INIT_YMM avx2
 POSTSCALE_SLICE
 %endif
+
+%if HAVE_AVX512_EXTERNAL
+INIT_ZMM avx512
+POSTSCALE_SLICE
+%endif
diff --git a/libavfilter/x86/vf_gblur_init.c b/libavfilter/x86/vf_gblur_init.c
index d80fb46fe4..34aba4ca6e 100644
--- a/libavfilter/x86/vf_gblur_init.c
+++ b/libavfilter/x86/vf_gblur_init.c
@@ -29,6 +29,7 @@  void ff_horiz_slice_avx2(float *ptr, int width, int height, int steps, float nu,
 
 void ff_postscale_slice_sse(float *ptr, int length, float postscale, float min, float max);
 void ff_postscale_slice_avx2(float *ptr, int length, float postscale, float min, float max);
+void ff_postscale_slice_avx512(float *ptr, int length, float postscale, float min, float max);
 
 av_cold void ff_gblur_init_x86(GBlurContext *s)
 {
@@ -47,5 +48,8 @@  av_cold void ff_gblur_init_x86(GBlurContext *s)
     if (EXTERNAL_AVX2(cpu_flags)) {
         s->horiz_slice = ff_horiz_slice_avx2;
     }
+    if (EXTERNAL_AVX512(cpu_flags)) {
+        s->postscale_slice = ff_postscale_slice_avx512;
+    }
 #endif
 }