diff mbox series

[FFmpeg-devel,24/41] avcodec/x86/idctdsp_init: Disable overridden functions on x64

Message ID DB6PR0101MB2214103419EC77E9792CB1D78FA79@DB6PR0101MB2214.eurprd01.prod.exchangelabs.com
State Superseded
Headers show
Series Stop including superseded functions for x64 | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Andreas Rheinhardt June 9, 2022, 11:55 p.m. UTC
x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT, SSE and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2). This commit therefore disables
the MMX as well as the non-64 bit (which are overridden by the 64bit
specific implementation) at compile-time for x64.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/tests/x86/dct.c     | 2 +-
 libavcodec/x86/idctdsp.asm     | 6 ++++++
 libavcodec/x86/idctdsp_init.c  | 4 ++++
 libavcodec/x86/simple_idct.asm | 2 ++
 4 files changed, 13 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/libavcodec/tests/x86/dct.c b/libavcodec/tests/x86/dct.c
index 1eb9400567..144d055cff 100644
--- a/libavcodec/tests/x86/dct.c
+++ b/libavcodec/tests/x86/dct.c
@@ -73,7 +73,7 @@  static const struct algo fdct_tab_arch[] = {
 };
 
 static const struct algo idct_tab_arch[] = {
-#if HAVE_MMX_EXTERNAL
+#if ARCH_X86_32 && HAVE_MMX_EXTERNAL
     { "SIMPLE-MMX",  ff_simple_idct_mmx,  FF_IDCT_PERM_SIMPLE, AV_CPU_FLAG_MMX },
 #endif
 #if CONFIG_MPEG4_DECODER && HAVE_X86ASM
diff --git a/libavcodec/x86/idctdsp.asm b/libavcodec/x86/idctdsp.asm
index 089425a9ab..701a8c5a43 100644
--- a/libavcodec/x86/idctdsp.asm
+++ b/libavcodec/x86/idctdsp.asm
@@ -74,8 +74,10 @@  cglobal put_signed_pixels_clamped, 3, 4, %1, block, pixels, lsize, lsize3
     RET
 %endmacro
 
+%if ARCH_X86_32
 INIT_MMX mmx
 PUT_SIGNED_PIXELS_CLAMPED 0
+%endif
 INIT_XMM sse2
 PUT_SIGNED_PIXELS_CLAMPED 3
 
@@ -117,8 +119,10 @@  cglobal put_pixels_clamped, 3, 4, 2, block, pixels, lsize, lsize3
     RET
 %endmacro
 
+%if ARCH_X86_32
 INIT_MMX mmx
 PUT_PIXELS_CLAMPED
+%endif
 INIT_XMM sse2
 PUT_PIXELS_CLAMPED
 
@@ -177,7 +181,9 @@  cglobal add_pixels_clamped, 3, 3, 5, block, pixels, lsize
     RET
 %endmacro
 
+%if ARCH_X86_32
 INIT_MMX mmx
 ADD_PIXELS_CLAMPED
+%endif
 INIT_XMM sse2
 ADD_PIXELS_CLAMPED
diff --git a/libavcodec/x86/idctdsp_init.c b/libavcodec/x86/idctdsp_init.c
index 9103b92ce7..41ba9d68cb 100644
--- a/libavcodec/x86/idctdsp_init.c
+++ b/libavcodec/x86/idctdsp_init.c
@@ -63,6 +63,7 @@  av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
 {
     int cpu_flags = av_get_cpu_flags();
 
+#if ARCH_X86_32
     if (EXTERNAL_MMX(cpu_flags)) {
         c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
         c->put_pixels_clamped        = ff_put_pixels_clamped_mmx;
@@ -79,12 +80,14 @@  av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
                 c->perm_type = FF_IDCT_PERM_SIMPLE;
         }
     }
+#endif
 
     if (EXTERNAL_SSE2(cpu_flags)) {
         c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2;
         c->put_pixels_clamped        = ff_put_pixels_clamped_sse2;
         c->add_pixels_clamped        = ff_add_pixels_clamped_sse2;
 
+#if ARCH_X86_32
         if (!high_bit_depth &&
             avctx->lowres == 0 &&
             (avctx->idct_algo == FF_IDCT_AUTO ||
@@ -94,6 +97,7 @@  av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
                 c->idct_add  = ff_simple_idct_add_sse2;
                 c->perm_type = FF_IDCT_PERM_SIMPLE;
         }
+#endif
 
         if (ARCH_X86_64 &&
             !high_bit_depth &&
diff --git a/libavcodec/x86/simple_idct.asm b/libavcodec/x86/simple_idct.asm
index 6fedbb5784..002fdede90 100644
--- a/libavcodec/x86/simple_idct.asm
+++ b/libavcodec/x86/simple_idct.asm
@@ -25,6 +25,7 @@ 
 
 %include "libavutil/x86/x86util.asm"
 
+%if ARCH_X86_32
 SECTION_RODATA
 
 cextern pb_80
@@ -887,3 +888,4 @@  cglobal simple_idct_add, 3, 4, 8, 128, pixels, lsize, block, t0
     lea        pixelsq, [pixelsq+lsizeq*2]
     ADD_PIXELS_CLAMPED 96
 RET
+%endif