diff mbox series

[FFmpeg-devel,33/41] avcodec/x86/h264_qpel: Disable overridden functions on x64

Message ID DB6PR0101MB2214EB7F33C2C0EC0F495DE58FA79@DB6PR0101MB2214.eurprd01.prod.exchangelabs.com
State Superseded
Headers show
Series Stop including superseded functions for x64 | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Andreas Rheinhardt June 9, 2022, 11:55 p.m. UTC
x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT, SSE and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2). This commit therefore disables several MMXEXT
functions (that are overridden by SSE2 functions)
at compile-time for x64.

Notice that some 10-bit SSE2 functions are overridden by sse2_cache64
functions in the same code block. This is suboptimal and the functions
that are overridden should either be removed or the sse2_cache64
functions be put behind suitable checks. This commit does neither.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
I would love to get input on what to do with these sse2_cache64
functions. If no one says anything, I will send a patch that
retains the current behaviour and removes the functions
overridden by the sse2_cache64 functions.

 libavcodec/x86/h264_qpel.c        | 44 +++++++++++++++++++++----------
 libavcodec/x86/h264_qpel_8bit.asm |  4 +++
 2 files changed, 34 insertions(+), 14 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c
index fd1070247b..cb5f8a126c 100644
--- a/libavcodec/x86/h264_qpel.c
+++ b/libavcodec/x86/h264_qpel.c
@@ -236,7 +236,11 @@  static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uin
 #define ff_put_h264_qpel8or16_hv2_lowpass_sse2 ff_put_h264_qpel8or16_hv2_lowpass_mmxext
 #define ff_avg_h264_qpel8or16_hv2_lowpass_sse2 ff_avg_h264_qpel8or16_hv2_lowpass_mmxext
 
-#define H264_MC(OPNAME, SIZE, MMX, ALIGN) \
+#define H264_MC_C_H(OPNAME, SIZE, MMX, ALIGN) \
+H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\
+H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\
+
+#define H264_MC_C_V_H_HV(OPNAME, SIZE, MMX, ALIGN) \
 H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\
 H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\
 H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\
@@ -372,13 +376,9 @@  static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, const uin
     ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+3, halfHV, stride, SIZE, SIZE);\
 }\
 
-#define H264_MC_4816(MMX)\
-H264_MC(put_, 4, MMX, 8)\
-H264_MC(put_, 8, MMX, 8)\
-H264_MC(put_, 16,MMX, 8)\
-H264_MC(avg_, 4, MMX, 8)\
-H264_MC(avg_, 8, MMX, 8)\
-H264_MC(avg_, 16,MMX, 8)\
+#define H264_MC(QPEL, SIZE, MMX, ALIGN)\
+QPEL(put_, SIZE, MMX, ALIGN) \
+QPEL(avg_, SIZE, MMX, ALIGN) \
 
 #define H264_MC_816(QPEL, XMM)\
 QPEL(put_, 8, XMM, 16)\
@@ -397,7 +397,14 @@  QPEL_H264_H_XMM(avg_,AVG_MMXEXT_OP, ssse3)
 QPEL_H264_HV_XMM(put_,       PUT_OP, ssse3)
 QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3)
 
-H264_MC_4816(mmxext)
+H264_MC(H264_MC_C_V_H_HV, 4, mmxext, 8)
+#if ARCH_X86_32
+H264_MC(H264_MC_C_V_H_HV, 8, mmxext, 8)
+H264_MC(H264_MC_C_V_H_HV, 16, mmxext, 8)
+#else
+H264_MC(H264_MC_C_H, 8, mmxext, 8)
+H264_MC(H264_MC_C_H, 16, mmxext, 8)
+#endif
 H264_MC_816(H264_MC_V, sse2)
 H264_MC_816(H264_MC_HV, sse2)
 H264_MC_816(H264_MC_H, ssse3)
@@ -499,12 +506,16 @@  QPEL16(mmxext)
 
 #endif /* HAVE_X86ASM */
 
-#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)                          \
+#define SET_QPEL_FUNCS0123(PFX, IDX, SIZE, CPU, PREFIX)                      \
     do {                                                                     \
     c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
     c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
     c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
     c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
+    } while (0)
+#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)                          \
+    do {                                                                     \
+    SET_QPEL_FUNCS0123(PFX, IDX, SIZE, CPU, PREFIX);                         \
     c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
     c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
     c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
@@ -543,11 +554,16 @@  av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
 
     if (EXTERNAL_MMXEXT(cpu_flags)) {
         if (!high_bit_depth) {
-            SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, );
-            SET_QPEL_FUNCS(put_h264_qpel, 1,  8, mmxext, );
+#if ARCH_X86_32
+#define SET_MMXEXT_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)
+#else
+#define SET_MMXEXT_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) SET_QPEL_FUNCS0123(PFX, IDX, SIZE, CPU, PREFIX)
+#endif
+            SET_MMXEXT_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, );
+            SET_MMXEXT_QPEL_FUNCS(put_h264_qpel, 1,  8, mmxext, );
             SET_QPEL_FUNCS(put_h264_qpel, 2,  4, mmxext, );
-            SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, );
-            SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, mmxext, );
+            SET_MMXEXT_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, );
+            SET_MMXEXT_QPEL_FUNCS(avg_h264_qpel, 1,  8, mmxext, );
             SET_QPEL_FUNCS(avg_h264_qpel, 2,  4, mmxext, );
         } else if (bit_depth == 10) {
 #if ARCH_X86_32
diff --git a/libavcodec/x86/h264_qpel_8bit.asm b/libavcodec/x86/h264_qpel_8bit.asm
index 03c7d88f8c..72e98248d8 100644
--- a/libavcodec/x86/h264_qpel_8bit.asm
+++ b/libavcodec/x86/h264_qpel_8bit.asm
@@ -461,9 +461,11 @@  cglobal %1_h264_qpel8or16_v_lowpass_op, 5,5,8 ; dst, src, dstStride, srcStride,
     REP_RET
 %endmacro
 
+%if ARCH_X86_32
 INIT_MMX mmxext
 QPEL8OR16_V_LOWPASS_OP put
 QPEL8OR16_V_LOWPASS_OP avg
+%endif
 
 INIT_XMM sse2
 QPEL8OR16_V_LOWPASS_OP put
@@ -581,8 +583,10 @@  cglobal %1_h264_qpel8or16_hv1_lowpass_op, 4,4,8 ; src, tmp, srcStride, size
     REP_RET
 %endmacro
 
+%if ARCH_X86_32
 INIT_MMX mmxext
 QPEL8OR16_HV1_LOWPASS_OP put
+%endif
 
 INIT_XMM sse2
 QPEL8OR16_HV1_LOWPASS_OP put