Message ID | 20230714100836.474580-1-alankelly@google.com |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,1/3] swscale/x86/swscale: Process yuv2yuvX tails using next largest register size | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
> +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL > + if (EXTERNAL_AVX512(cpu_flags)) > + c->yuv2planeX = yuv2yuvX_avx512; > #endif > You want EXTERNAL_AVX512ICL here. Kieran
On 7/14/2023 9:59 AM, Kieran Kunhya wrote: >> +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL >> + if (EXTERNAL_AVX512(cpu_flags)) >> + c->yuv2planeX = yuv2yuvX_avx512; >> #endif >> > > You want EXTERNAL_AVX512ICL here. vpermt2q with zmm registers is avx512f and not any of the extensions, so that check is fine.
On Fri, 14 Jul 2023 at 14:03, James Almer <jamrial@gmail.com> wrote: > On 7/14/2023 9:59 AM, Kieran Kunhya wrote: > >> +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL > >> + if (EXTERNAL_AVX512(cpu_flags)) > >> + c->yuv2planeX = yuv2yuvX_avx512; > >> #endif > >> > > > > You want EXTERNAL_AVX512ICL here. > > vpermt2q with zmm registers is avx512f and not any of the extensions, so > that check is fine. > We still support Skylake and we don't want downclocking on that platform. At least that was my understanding of the intention of AVX512 vs AVX512ICL. It appears I'm the only one following this convention though. Kieran
On 7/14/2023 11:57 AM, Kieran Kunhya wrote: > On Fri, 14 Jul 2023 at 14:03, James Almer <jamrial@gmail.com> wrote: > >> On 7/14/2023 9:59 AM, Kieran Kunhya wrote: >>>> +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL >>>> + if (EXTERNAL_AVX512(cpu_flags)) >>>> + c->yuv2planeX = yuv2yuvX_avx512; >>>> #endif >>>> >>> >>> You want EXTERNAL_AVX512ICL here. >> >> vpermt2q with zmm registers is avx512f and not any of the extensions, so >> that check is fine. >> > > We still support Skylake and we don't want downclocking on that platform. > At least that was my understanding of the intention of AVX512 vs AVX512ICL. > It appears I'm the only one following this convention though. Ah, no opinion in that regard. I was following the use of the checks in the strict technical sense of instruction availability.
Happy to add the check. Thanks, Alan On Fri, Jul 14, 2023 at 4:59 PM James Almer <jamrial@gmail.com> wrote: > On 7/14/2023 11:57 AM, Kieran Kunhya wrote: > > On Fri, 14 Jul 2023 at 14:03, James Almer <jamrial@gmail.com> wrote: > > > >> On 7/14/2023 9:59 AM, Kieran Kunhya wrote: > >>>> +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL > >>>> + if (EXTERNAL_AVX512(cpu_flags)) > >>>> + c->yuv2planeX = yuv2yuvX_avx512; > >>>> #endif > >>>> > >>> > >>> You want EXTERNAL_AVX512ICL here. > >> > >> vpermt2q with zmm registers is avx512f and not any of the extensions, so > >> that check is fine. > >> > > > > We still support Skylake and we don't want downclocking on that platform. > > At least that was my understanding of the intention of AVX512 vs > AVX512ICL. > > It appears I'm the only one following this convention though. > > Ah, no opinion in that regard. I was following the use of the checks in > the strict technical sense of instruction availability. > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index 8c67bf4fab..52423a1199 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -225,6 +225,9 @@ YUV2YUVX_FUNC(sse3, 32, mmxext) #if HAVE_AVX2_EXTERNAL YUV2YUVX_FUNC(avx2, 64, sse3) #endif +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL +YUV2YUVX_FUNC(avx512, 128, avx2) +#endif #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \ void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \ @@ -467,6 +470,10 @@ av_cold void ff_sws_init_swscale_x86(SwsContext *c) #if HAVE_AVX2_EXTERNAL if (EXTERNAL_AVX2_FAST(cpu_flags)) c->yuv2planeX = yuv2yuvX_avx2; +#endif +#if ARCH_X86_64 && HAVE_AVX512_EXTERNAL + if (EXTERNAL_AVX512(cpu_flags)) + c->yuv2planeX = yuv2yuvX_avx512; #endif } #if ARCH_X86_32 && !HAVE_ALIGNED_STACK diff --git a/libswscale/x86/yuv2yuvX.asm b/libswscale/x86/yuv2yuvX.asm index 369c850674..57bfa09d66 100644 --- a/libswscale/x86/yuv2yuvX.asm +++ b/libswscale/x86/yuv2yuvX.asm @@ -22,6 +22,10 @@ %include "libavutil/x86/x86util.asm" +SECTION_RODATA 64 + +permutation: dq 0, 2, 4, 6, 1, 3, 5, 7 + SECTION .text ;----------------------------------------------------------------------------- @@ -50,6 +54,10 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset %else movq xm3, [ditherq] %endif ; avx2 + +%if cpuflag(avx512) + mova m15, [permutation] +%endif cmp offsetd, 0 jz .offset @@ -109,7 +117,10 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset packuswb m6, m6, m1 %endif mov srcq, [filterq] -%if cpuflag(avx2) +%if cpuflag(avx512) + vpermt2q m3, m15, m3 + vpermt2q m6, m15, m6 +%elif cpuflag(avx2) vpermq m3, m3, 216 vpermq m6, m6, 216 %endif @@ -131,4 +142,10 @@ YUV2YUVX_FUNC %if HAVE_AVX2_EXTERNAL INIT_YMM avx2 YUV2YUVX_FUNC +%if HAVE_AVX512_EXTERNAL +%if ARCH_X86_64 +INIT_ZMM avx512 +YUV2YUVX_FUNC +%endif +%endif %endif