diff mbox series

[FFmpeg-devel,2/3] swscale/x86/yuv2yuvX: Add yuv2yuvX avx512

Message ID 20230714100836.474580-1-alankelly@google.com
State New
Headers show
Series [FFmpeg-devel,1/3] swscale/x86/swscale: Process yuv2yuvX tails using next largest register size | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Alan Kelly July 14, 2023, 10:08 a.m. UTC
---
 libswscale/x86/swscale.c    |  7 +++++++
 libswscale/x86/yuv2yuvX.asm | 19 ++++++++++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

Comments

Kieran Kunhya July 14, 2023, 12:59 p.m. UTC | #1
> +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
> +        if (EXTERNAL_AVX512(cpu_flags))
> +            c->yuv2planeX = yuv2yuvX_avx512;
>  #endif
>

 You want EXTERNAL_AVX512ICL here.

Kieran
James Almer July 14, 2023, 1:03 p.m. UTC | #2
On 7/14/2023 9:59 AM, Kieran Kunhya wrote:
>> +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
>> +        if (EXTERNAL_AVX512(cpu_flags))
>> +            c->yuv2planeX = yuv2yuvX_avx512;
>>   #endif
>>
> 
>   You want EXTERNAL_AVX512ICL here.

vpermt2q with zmm registers is avx512f and not any of the extensions, so 
that check is fine.
Kieran Kunhya July 14, 2023, 2:57 p.m. UTC | #3
On Fri, 14 Jul 2023 at 14:03, James Almer <jamrial@gmail.com> wrote:

> On 7/14/2023 9:59 AM, Kieran Kunhya wrote:
> >> +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
> >> +        if (EXTERNAL_AVX512(cpu_flags))
> >> +            c->yuv2planeX = yuv2yuvX_avx512;
> >>   #endif
> >>
> >
> >   You want EXTERNAL_AVX512ICL here.
>
> vpermt2q with zmm registers is avx512f and not any of the extensions, so
> that check is fine.
>

We still support Skylake and we don't want downclocking on that platform.
At least that was my understanding of the intention of AVX512 vs AVX512ICL.
It appears I'm the only one following this convention though.

Kieran
James Almer July 14, 2023, 2:59 p.m. UTC | #4
On 7/14/2023 11:57 AM, Kieran Kunhya wrote:
> On Fri, 14 Jul 2023 at 14:03, James Almer <jamrial@gmail.com> wrote:
> 
>> On 7/14/2023 9:59 AM, Kieran Kunhya wrote:
>>>> +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
>>>> +        if (EXTERNAL_AVX512(cpu_flags))
>>>> +            c->yuv2planeX = yuv2yuvX_avx512;
>>>>    #endif
>>>>
>>>
>>>    You want EXTERNAL_AVX512ICL here.
>>
>> vpermt2q with zmm registers is avx512f and not any of the extensions, so
>> that check is fine.
>>
> 
> We still support Skylake and we don't want downclocking on that platform.
> At least that was my understanding of the intention of AVX512 vs AVX512ICL.
> It appears I'm the only one following this convention though.

Ah, no opinion in that regard. I was following the use of the checks in 
the strict technical sense of instruction availability.
Alan Kelly July 17, 2023, 9:23 a.m. UTC | #5
Happy to add the check.

Thanks,
Alan

On Fri, Jul 14, 2023 at 4:59 PM James Almer <jamrial@gmail.com> wrote:

> On 7/14/2023 11:57 AM, Kieran Kunhya wrote:
> > On Fri, 14 Jul 2023 at 14:03, James Almer <jamrial@gmail.com> wrote:
> >
> >> On 7/14/2023 9:59 AM, Kieran Kunhya wrote:
> >>>> +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
> >>>> +        if (EXTERNAL_AVX512(cpu_flags))
> >>>> +            c->yuv2planeX = yuv2yuvX_avx512;
> >>>>    #endif
> >>>>
> >>>
> >>>    You want EXTERNAL_AVX512ICL here.
> >>
> >> vpermt2q with zmm registers is avx512f and not any of the extensions, so
> >> that check is fine.
> >>
> >
> > We still support Skylake and we don't want downclocking on that platform.
> > At least that was my understanding of the intention of AVX512 vs
> AVX512ICL.
> > It appears I'm the only one following this convention though.
>
> Ah, no opinion in that regard. I was following the use of the checks in
> the strict technical sense of instruction availability.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
diff mbox series

Patch

diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 8c67bf4fab..52423a1199 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -225,6 +225,9 @@  YUV2YUVX_FUNC(sse3, 32, mmxext)
 #if HAVE_AVX2_EXTERNAL
 YUV2YUVX_FUNC(avx2, 64, sse3)
 #endif
+#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
+YUV2YUVX_FUNC(avx512, 128, avx2)
+#endif
 
 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
@@ -467,6 +470,10 @@  av_cold void ff_sws_init_swscale_x86(SwsContext *c)
 #if HAVE_AVX2_EXTERNAL
         if (EXTERNAL_AVX2_FAST(cpu_flags))
             c->yuv2planeX = yuv2yuvX_avx2;
+#endif
+#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL
+        if (EXTERNAL_AVX512(cpu_flags))
+            c->yuv2planeX = yuv2yuvX_avx512;
 #endif
     }
 #if ARCH_X86_32 && !HAVE_ALIGNED_STACK
diff --git a/libswscale/x86/yuv2yuvX.asm b/libswscale/x86/yuv2yuvX.asm
index 369c850674..57bfa09d66 100644
--- a/libswscale/x86/yuv2yuvX.asm
+++ b/libswscale/x86/yuv2yuvX.asm
@@ -22,6 +22,10 @@ 
 
 %include "libavutil/x86/x86util.asm"
 
+SECTION_RODATA 64
+
+permutation: dq 0, 2, 4, 6, 1, 3, 5, 7
+
 SECTION .text
 
 ;-----------------------------------------------------------------------------
@@ -50,6 +54,10 @@  cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
 %else
     movq                 xm3, [ditherq]
 %endif ; avx2
+
+%if cpuflag(avx512)
+    mova                 m15, [permutation]
+%endif
     cmp                  offsetd, 0
     jz                   .offset
 
@@ -109,7 +117,10 @@  cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
     packuswb             m6, m6, m1
 %endif
     mov                  srcq, [filterq]
-%if cpuflag(avx2)
+%if cpuflag(avx512)
+    vpermt2q             m3, m15, m3
+    vpermt2q             m6, m15, m6
+%elif cpuflag(avx2)
     vpermq               m3, m3, 216
     vpermq               m6, m6, 216
 %endif
@@ -131,4 +142,10 @@  YUV2YUVX_FUNC
 %if HAVE_AVX2_EXTERNAL
 INIT_YMM avx2
 YUV2YUVX_FUNC
+%if HAVE_AVX512_EXTERNAL
+%if ARCH_X86_64
+INIT_ZMM avx512
+YUV2YUVX_FUNC
+%endif
+%endif
 %endif