diff mbox series

[FFmpeg-devel,15/41] avcodec/x86/hevcdsp_init: Disable overridden functions on x64

Message ID DB6PR0101MB22141FBFA6D3AC054FCF6F588FA79@DB6PR0101MB2214.eurprd01.prod.exchangelabs.com
State Superseded
Headers show
Series Stop including superseded functions for x64 | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Andreas Rheinhardt June 9, 2022, 11:54 p.m. UTC
x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT, SSE and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2). This commit therefore disables such hevcdsp
functions at compile-time.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
FYI: There is a pre-existing stride/alignment bug in this code:
If one configures with --disable-sse3, one gets STRIDE_ALIGN 16.
Then the test fate-hevc-conformance-DBLK_A_MAIN10_VIXS_3 fails
when using SSE2; more exactly, if one comments out both
SAO_BAND_INIT(10, sse2); and SAO_EDGE_INIT(10, sse2); in
x86/hevcdsp_init.c, the test passes. It also passes if one hardcodes
STRIDE_ALIGN to 32 in avcodec_align_dimensions2().

 libavcodec/x86/hevc_idct.asm  | 2 ++
 libavcodec/x86/hevcdsp_init.c | 6 ++++++
 2 files changed, 8 insertions(+)
diff mbox series

Patch

diff --git a/libavcodec/x86/hevc_idct.asm b/libavcodec/x86/hevc_idct.asm
index 1eb1973f27..eb44e06123 100644
--- a/libavcodec/x86/hevc_idct.asm
+++ b/libavcodec/x86/hevc_idct.asm
@@ -811,7 +811,9 @@  cglobal hevc_idct_32x32_%1, 1, 6, 16, 256, coeffs
 %macro INIT_IDCT_DC 1
 INIT_MMX mmxext
 IDCT_DC_NL  4,      %1
+%if ARCH_X86_32
 IDCT_DC     8,  2,  %1
+%endif
 
 INIT_XMM sse2
 IDCT_DC_NL  8,      %1
diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c
index 48f48a925f..b48661fe35 100644
--- a/libavcodec/x86/hevcdsp_init.c
+++ b/libavcodec/x86/hevcdsp_init.c
@@ -712,7 +712,9 @@  void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
     if (bit_depth == 8) {
         if (EXTERNAL_MMXEXT(cpu_flags)) {
             c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext;
+#if ARCH_X86_32
             c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_mmxext;
+#endif
 
             c->add_residual[0] = ff_hevc_add_residual_4_8_mmxext;
         }
@@ -889,7 +891,9 @@  void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
         if (EXTERNAL_MMXEXT(cpu_flags)) {
             c->add_residual[0] = ff_hevc_add_residual_4_10_mmxext;
             c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext;
+#if ARCH_X86_32
             c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_mmxext;
+#endif
         }
         if (EXTERNAL_SSE2(cpu_flags)) {
             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
@@ -1105,7 +1109,9 @@  void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
     } else if (bit_depth == 12) {
         if (EXTERNAL_MMXEXT(cpu_flags)) {
             c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_mmxext;
+#if ARCH_X86_32
             c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_mmxext;
+#endif
         }
         if (EXTERNAL_SSE2(cpu_flags)) {
             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;