diff mbox series

[FFmpeg-devel,18/41] avcodec/x86/h264_intrapred_init: Disable overridden functions on x64

Message ID DB6PR0101MB2214D1853A4D25D8B1CC170C8FA79@DB6PR0101MB2214.eurprd01.prod.exchangelabs.com
State Superseded
Headers show
Series Stop including superseded functions for x64 | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Andreas Rheinhardt June 9, 2022, 11:55 p.m. UTC
x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT, SSE and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2). This commit therefore disables such
H.264-intrapred-dsp functions at compile-time.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/x86/h264_intrapred.asm       | 26 +++++++++++++++++++++++++
 libavcodec/x86/h264_intrapred_10bit.asm | 16 +++++++++++++++
 libavcodec/x86/h264_intrapred_init.c    | 20 +++++++++++++++----
 3 files changed, 58 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
index b36c198fbb..9426598a63 100644
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -48,6 +48,7 @@  cextern pw_8
 ; void ff_pred16x16_vertical_8(uint8_t *src, ptrdiff_t stride)
 ;-----------------------------------------------------------------------------
 
+%if ARCH_X86_32
 INIT_MMX mmx
 cglobal pred16x16_vertical_8, 2,3
     sub   r0, r1
@@ -63,6 +64,7 @@  cglobal pred16x16_vertical_8, 2,3
     dec   r2
     jg .loop
     REP_RET
+%endif
 
 INIT_XMM sse
 cglobal pred16x16_vertical_8, 2,3
@@ -114,8 +116,10 @@  cglobal pred16x16_horizontal_8, 2,3
     REP_RET
 %endmacro
 
+%if ARCH_X86_32
 INIT_MMX mmx
 PRED16x16_H
+%endif
 INIT_MMX mmxext
 PRED16x16_H
 INIT_XMM ssse3
@@ -176,8 +180,10 @@  cglobal pred16x16_dc_8, 2,7
     REP_RET
 %endmacro
 
+%if ARCH_X86_32
 INIT_MMX mmxext
 PRED16x16_DC
+%endif
 INIT_XMM sse2
 PRED16x16_DC
 INIT_XMM ssse3
@@ -187,6 +193,7 @@  PRED16x16_DC
 ; void ff_pred16x16_tm_vp8_8(uint8_t *src, ptrdiff_t stride)
 ;-----------------------------------------------------------------------------
 
+%if ARCH_X86_32
 %macro PRED16x16_TM 0
 cglobal pred16x16_tm_vp8_8, 2,5
     sub        r0, r1
@@ -227,6 +234,7 @@  INIT_MMX mmx
 PRED16x16_TM
 INIT_MMX mmxext
 PRED16x16_TM
+%endif
 
 INIT_XMM sse2
 cglobal pred16x16_tm_vp8_8, 2,6,6
@@ -565,6 +573,7 @@  cglobal pred16x16_plane_%1_8, 2,9,7
     REP_RET
 %endmacro
 
+%if ARCH_X86_32
 INIT_MMX mmx
 H264_PRED16x16_PLANE h264
 H264_PRED16x16_PLANE rv40
@@ -573,6 +582,7 @@  INIT_MMX mmxext
 H264_PRED16x16_PLANE h264
 H264_PRED16x16_PLANE rv40
 H264_PRED16x16_PLANE svq3
+%endif
 INIT_XMM sse2
 H264_PRED16x16_PLANE h264
 H264_PRED16x16_PLANE rv40
@@ -747,10 +757,12 @@  ALIGN 16
     REP_RET
 %endmacro
 
+%if ARCH_X86_32
 INIT_MMX mmx
 H264_PRED8x8_PLANE
 INIT_MMX mmxext
 H264_PRED8x8_PLANE
+%endif
 INIT_XMM sse2
 H264_PRED8x8_PLANE
 INIT_XMM ssse3
@@ -794,8 +806,10 @@  cglobal pred8x8_horizontal_8, 2,3
     REP_RET
 %endmacro
 
+%if ARCH_X86_32
 INIT_MMX mmx
 PRED8x8_H
+%endif
 INIT_MMX mmxext
 PRED8x8_H
 INIT_MMX ssse3
@@ -937,6 +951,7 @@  cglobal pred8x8_dc_rv40_8, 2,7
 ; void ff_pred8x8_tm_vp8_8(uint8_t *src, ptrdiff_t stride)
 ;-----------------------------------------------------------------------------
 
+%if ARCH_X86_32
 %macro PRED8x8_TM 0
 cglobal pred8x8_tm_vp8_8, 2,6
     sub        r0, r1
@@ -976,6 +991,7 @@  INIT_MMX mmx
 PRED8x8_TM
 INIT_MMX mmxext
 PRED8x8_TM
+%endif
 
 INIT_XMM sse2
 cglobal pred8x8_tm_vp8_8, 2,6,4
@@ -1333,6 +1349,7 @@  PRED8x8L_VERTICAL
 ;                              int has_topright, ptrdiff_t stride)
 ;-----------------------------------------------------------------------------
 
+%if ARCH_X86_32
 INIT_MMX mmxext
 cglobal pred8x8l_down_left_8, 4,5
     sub          r0, r3
@@ -1440,6 +1457,7 @@  cglobal pred8x8l_down_left_8, 4,5
     por         mm1, mm0
     movq  [r0+r3*1], mm1
     RET
+%endif
 
 %macro PRED8x8L_DOWN_LEFT 0
 cglobal pred8x8l_down_left_8, 4,4
@@ -1534,6 +1552,7 @@  PRED8x8L_DOWN_LEFT
 ;                                      int has_topright, ptrdiff_t stride)
 ;-----------------------------------------------------------------------------
 
+%if ARCH_X86_32
 INIT_MMX mmxext
 cglobal pred8x8l_down_right_8, 4,5
     sub          r0, r3
@@ -1665,6 +1684,7 @@  cglobal pred8x8l_down_right_8, 4,5
     por        mm0, mm1
     movq [r0+r3*1], mm0
     RET
+%endif
 
 %macro PRED8x8L_DOWN_RIGHT 0
 cglobal pred8x8l_down_right_8, 4,5
@@ -1786,6 +1806,7 @@  PRED8x8L_DOWN_RIGHT
 ;                                   int has_topright, ptrdiff_t stride)
 ;-----------------------------------------------------------------------------
 
+%if ARCH_X86_32
 INIT_MMX mmxext
 cglobal pred8x8l_vertical_right_8, 4,5
     sub          r0, r3
@@ -1892,6 +1913,7 @@  cglobal pred8x8l_vertical_right_8, 4,5
     PALIGNR    mm5, mm0, 7, mm1
     movq [r4+r3*2], mm5
     RET
+%endif
 
 %macro PRED8x8L_VERTICAL_RIGHT 0
 cglobal pred8x8l_vertical_right_8, 4,5,7
@@ -2192,6 +2214,7 @@  PRED8x8L_HORIZONTAL_UP
 ;                                    int has_topright, ptrdiff_t stride)
 ;-----------------------------------------------------------------------------
 
+%if ARCH_X86_32
 INIT_MMX mmxext
 cglobal pred8x8l_horizontal_down_8, 4,5
     sub          r0, r3
@@ -2306,6 +2329,7 @@  cglobal pred8x8l_horizontal_down_8, 4,5
     PALIGNR    mm3, mm4, 6, mm4
     movq [r0+r3*1], mm3
     RET
+%endif
 
 %macro PRED8x8L_HORIZONTAL_DOWN 0
 cglobal pred8x8l_horizontal_down_8, 4,5
@@ -2508,8 +2532,10 @@  cglobal pred4x4_tm_vp8_8, 3,6
     REP_RET
 %endmacro
 
+%if ARCH_X86_32
 INIT_MMX mmx
 PRED4x4_TM
+%endif
 INIT_MMX mmxext
 PRED4x4_TM
 
diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm
index 629e0a72e3..e978d91ff1 100644
--- a/libavcodec/x86/h264_intrapred_10bit.asm
+++ b/libavcodec/x86/h264_intrapred_10bit.asm
@@ -411,8 +411,10 @@  cglobal pred8x8_dc_10, 2, 6
     RET
 %endmacro
 
+%if ARCH_X86_32
 INIT_MMX mmxext
 PRED8x8_DC pshufw
+%endif
 INIT_XMM sse2
 PRED8x8_DC pshuflw
 
@@ -526,8 +528,10 @@  cglobal pred8x8l_128_dc_10, 4, 4
     RET
 %endmacro
 
+%if ARCH_X86_32
 INIT_MMX mmxext
 PRED8x8L_128_DC
+%endif
 INIT_XMM sse2
 PRED8x8L_128_DC
 
@@ -1033,8 +1037,10 @@  cglobal pred16x16_vertical_10, 2, 3
     REP_RET
 %endmacro
 
+%if ARCH_X86_32
 INIT_MMX mmxext
 PRED16x16_VERTICAL
+%endif
 INIT_XMM sse2
 PRED16x16_VERTICAL
 
@@ -1057,8 +1063,10 @@  cglobal pred16x16_horizontal_10, 2, 3
     REP_RET
 %endmacro
 
+%if ARCH_X86_32
 INIT_MMX mmxext
 PRED16x16_HORIZONTAL
+%endif
 INIT_XMM sse2
 PRED16x16_HORIZONTAL
 
@@ -1103,8 +1111,10 @@  cglobal pred16x16_dc_10, 2, 6
     REP_RET
 %endmacro
 
+%if ARCH_X86_32
 INIT_MMX mmxext
 PRED16x16_DC
+%endif
 INIT_XMM sse2
 PRED16x16_DC
 
@@ -1135,8 +1145,10 @@  cglobal pred16x16_top_dc_10, 2, 3
     REP_RET
 %endmacro
 
+%if ARCH_X86_32
 INIT_MMX mmxext
 PRED16x16_TOP_DC
+%endif
 INIT_XMM sse2
 PRED16x16_TOP_DC
 
@@ -1172,8 +1184,10 @@  cglobal pred16x16_left_dc_10, 2, 6
     REP_RET
 %endmacro
 
+%if ARCH_X86_32
 INIT_MMX mmxext
 PRED16x16_LEFT_DC
+%endif
 INIT_XMM sse2
 PRED16x16_LEFT_DC
 
@@ -1193,7 +1207,9 @@  cglobal pred16x16_128_dc_10, 2,3
     REP_RET
 %endmacro
 
+%if ARCH_X86_32
 INIT_MMX mmxext
 PRED16x16_128_DC
+%endif
 INIT_XMM sse2
 PRED16x16_128_DC
diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
index a95cfbca55..b4b04beff5 100644
--- a/libavcodec/x86/h264_intrapred_init.c
+++ b/libavcodec/x86/h264_intrapred_init.c
@@ -193,10 +193,13 @@  av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
 
     if (bit_depth == 8) {
         if (EXTERNAL_MMX(cpu_flags)) {
+#if ARCH_X86_32
             h->pred16x16[VERT_PRED8x8         ] = ff_pred16x16_vertical_8_mmx;
             h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_8_mmx;
+#endif
             if (chroma_format_idc <= 1) {
                 h->pred8x8  [VERT_PRED8x8     ] = ff_pred8x8_vertical_8_mmx;
+#if ARCH_X86_32
                 h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_8_mmx;
             }
             if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
@@ -214,23 +217,28 @@  av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
                 } else {
                     h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_mmx;
                 }
+#endif
             }
         }
 
         if (EXTERNAL_MMXEXT(cpu_flags)) {
             h->pred16x16[HOR_PRED8x8            ] = ff_pred16x16_horizontal_8_mmxext;
+#if ARCH_X86_32
             h->pred16x16[DC_PRED8x8             ] = ff_pred16x16_dc_8_mmxext;
+#endif
             if (chroma_format_idc <= 1)
                 h->pred8x8[HOR_PRED8x8          ] = ff_pred8x8_horizontal_8_mmxext;
             h->pred8x8l [TOP_DC_PRED            ] = ff_pred8x8l_top_dc_8_mmxext;
             h->pred8x8l [DC_PRED                ] = ff_pred8x8l_dc_8_mmxext;
             h->pred8x8l [HOR_PRED               ] = ff_pred8x8l_horizontal_8_mmxext;
             h->pred8x8l [VERT_PRED              ] = ff_pred8x8l_vertical_8_mmxext;
-            h->pred8x8l [DIAG_DOWN_RIGHT_PRED   ] = ff_pred8x8l_down_right_8_mmxext;
-            h->pred8x8l [VERT_RIGHT_PRED        ] = ff_pred8x8l_vertical_right_8_mmxext;
             h->pred8x8l [HOR_UP_PRED            ] = ff_pred8x8l_horizontal_up_8_mmxext;
+#if ARCH_X86_32
             h->pred8x8l [DIAG_DOWN_LEFT_PRED    ] = ff_pred8x8l_down_left_8_mmxext;
+            h->pred8x8l [DIAG_DOWN_RIGHT_PRED   ] = ff_pred8x8l_down_right_8_mmxext;
+            h->pred8x8l [VERT_RIGHT_PRED        ] = ff_pred8x8l_vertical_right_8_mmxext;
             h->pred8x8l [HOR_DOWN_PRED          ] = ff_pred8x8l_horizontal_down_8_mmxext;
+#endif
             h->pred4x4  [DIAG_DOWN_RIGHT_PRED   ] = ff_pred4x4_down_right_8_mmxext;
             h->pred4x4  [VERT_RIGHT_PRED        ] = ff_pred4x4_vertical_right_8_mmxext;
             h->pred4x4  [HOR_DOWN_PRED          ] = ff_pred4x4_horizontal_down_8_mmxext;
@@ -252,11 +260,12 @@  av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
                 }
             }
             if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
-                h->pred16x16[PLANE_PRED8x8      ] = ff_pred16x16_tm_vp8_8_mmxext;
                 h->pred8x8  [DC_PRED8x8         ] = ff_pred8x8_dc_rv40_8_mmxext;
-                h->pred8x8  [PLANE_PRED8x8      ] = ff_pred8x8_tm_vp8_8_mmxext;
                 h->pred4x4  [TM_VP8_PRED        ] = ff_pred4x4_tm_vp8_8_mmxext;
                 h->pred4x4  [VERT_PRED          ] = ff_pred4x4_vertical_vp8_8_mmxext;
+#if ARCH_X86_32
+                h->pred16x16[PLANE_PRED8x8      ] = ff_pred16x16_tm_vp8_8_mmxext;
+                h->pred8x8  [PLANE_PRED8x8      ] = ff_pred8x8_tm_vp8_8_mmxext;
             } else {
                 if (chroma_format_idc <= 1)
                     h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_mmxext;
@@ -267,6 +276,7 @@  av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
                 } else {
                     h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_h264_8_mmxext;
                 }
+#endif
             }
         }
 
@@ -338,6 +348,7 @@  av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
             h->pred4x4[DC_PRED             ] = ff_pred4x4_dc_10_mmxext;
             h->pred4x4[HOR_UP_PRED         ] = ff_pred4x4_horizontal_up_10_mmxext;
 
+#if ARCH_X86_32
             if (chroma_format_idc <= 1)
                 h->pred8x8[DC_PRED8x8      ] = ff_pred8x8_dc_10_mmxext;
 
@@ -349,6 +360,7 @@  av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
             h->pred16x16[LEFT_DC_PRED8x8   ] = ff_pred16x16_left_dc_10_mmxext;
             h->pred16x16[VERT_PRED8x8      ] = ff_pred16x16_vertical_10_mmxext;
             h->pred16x16[HOR_PRED8x8       ] = ff_pred16x16_horizontal_10_mmxext;
+#endif
         }
         if (EXTERNAL_SSE2(cpu_flags)) {
             h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2;