Message ID | 06138cd36b600db9865a573cd95cd9e33129a103.1667325395.git.amy@amyspark.me |
---|---|
State | New |
Headers | show |
Series | Fix FFmpeg compilation without DCE | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
andriy/make_fate_x86 | success | Make fate finished |
andriy/make_x86 | warning | New warnings during build |
L. E. Segovia: > Continuation of 40e6575aa3eed64cd32bf28c00ae57edc5acb25a > > Co-authored-by: Nirbheek Chauhan <nirbheek@centricular.com> > > Signed-off-by: L. E. Segovia <amy@amyspark.me> > --- > libavcodec/x86/dcadsp_init.c | 2 + > libavcodec/x86/fdctdsp_init.c | 2 + > libavcodec/x86/flacdsp_init.c | 8 +- > libavcodec/x86/hevcdsp_init.c | 463 ++++++++++++++------------- > libavcodec/x86/idctdsp_init.c | 9 +- > libavcodec/x86/mlpdsp_init.c | 6 +- > libavcodec/x86/vc1dsp_init.c | 6 +- > libavfilter/x86/colorspacedsp_init.c | 4 +- > libavfilter/x86/vf_atadenoise_init.c | 6 +- > libavfilter/x86/vf_ssim_init.c | 6 +- > libavfilter/x86/vf_w3fdif_init.c | 4 +- > 11 files changed, 272 insertions(+), 244 deletions(-) > > diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c > index 0c78dd1c9e..3c125f33fd 100644 > --- a/libavcodec/x86/dcadsp_init.c > +++ b/libavcodec/x86/dcadsp_init.c > @@ -36,6 +36,7 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s) > { > int cpu_flags = av_get_cpu_flags(); > > +#if ARCH_X86_32 This check is obviously wrong. You are disabling these functions for x64. > if (EXTERNAL_SSE2(cpu_flags)) > s->lfe_fir_float[0] = ff_lfe_fir0_float_sse2; > if (EXTERNAL_SSE3(cpu_flags)) > @@ -46,4 +47,5 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s) > } > if (EXTERNAL_FMA3(cpu_flags)) > s->lfe_fir_float[0] = ff_lfe_fir0_float_fma3; > +#endif > } > diff --git a/libavcodec/x86/fdctdsp_init.c b/libavcodec/x86/fdctdsp_init.c > index 92a842433d..4a874a640d 100644 > --- a/libavcodec/x86/fdctdsp_init.c > +++ b/libavcodec/x86/fdctdsp_init.c > @@ -31,8 +31,10 @@ av_cold void ff_fdctdsp_init_x86(FDCTDSPContext *c, AVCodecContext *avctx, > > if (!high_bit_depth) { > if ((dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) { > +#if HAVE_INLINE_SSE2 > if (INLINE_SSE2(cpu_flags)) > c->fdct = ff_fdct_sse2; > +#endif > } > } > } > diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c > index 87daed7005..49e67ee2b0 100644 > --- a/libavcodec/x86/flacdsp_init.c > +++ b/libavcodec/x86/flacdsp_init.c > @@ -97,15 +97,19 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int > } > if (EXTERNAL_AVX(cpu_flags)) { > if (fmt == AV_SAMPLE_FMT_S16) { > - if (ARCH_X86_64 && channels == 8) > +#if ARCH_X86_64 > + if (channels == 8) > c->decorrelate[0] = ff_flac_decorrelate_indep8_16_avx; > +#endif > } else if (fmt == AV_SAMPLE_FMT_S32) { > if (channels == 4) > c->decorrelate[0] = ff_flac_decorrelate_indep4_32_avx; > else if (channels == 6) > c->decorrelate[0] = ff_flac_decorrelate_indep6_32_avx; > - else if (ARCH_X86_64 && channels == 8) > +#if ARCH_X86_64 > + else if (channels == 8) > c->decorrelate[0] = ff_flac_decorrelate_indep8_32_avx; > +#endif Why does the last function need ARCH_X86_64? And why only the last? > } > } > if (EXTERNAL_XOP(cpu_flags)) { > diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c > index 6f45e5e0db..c7060085a2 100644 > --- a/libavcodec/x86/hevcdsp_init.c > +++ b/libavcodec/x86/hevcdsp_init.c > @@ -710,13 +710,13 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) > if (EXTERNAL_SSE2(cpu_flags)) { > c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2; > c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2; > - if (ARCH_X86_64) { > - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2; > - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2; > +#if ARCH_X86_64 > + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2; > + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2; > > - c->idct[2] = ff_hevc_idct_16x16_8_sse2; > - c->idct[3] = ff_hevc_idct_32x32_8_sse2; > - } > + c->idct[2] = ff_hevc_idct_16x16_8_sse2; > + c->idct[3] = ff_hevc_idct_32x32_8_sse2; > +#endif > SAO_BAND_INIT(8, sse2); > > c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2; > @@ -731,14 +731,14 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) > c->add_residual[3] = ff_hevc_add_residual_32_8_sse2; > } > if (EXTERNAL_SSSE3(cpu_flags)) { > - if(ARCH_X86_64) { > - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3; > - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3; > - } > +#if ARCH_X86_64 > + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3; > + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3; > +#endif > SAO_EDGE_INIT(8, ssse3); > } > - if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) { > - > +#if ARCH_X86_64 > + if (EXTERNAL_SSE4(cpu_flags)) { > EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4); > EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4); > EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 8, sse4); > @@ -749,16 +749,17 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) > QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4); > QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4); > } > +#endif > if (EXTERNAL_AVX(cpu_flags)) { > c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx; > c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx; > - if (ARCH_X86_64) { > - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx; > - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx; > +#if ARCH_X86_64 > + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx; > + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx; > > - c->idct[2] = ff_hevc_idct_16x16_8_avx; > - c->idct[3] = ff_hevc_idct_32x32_8_avx; > - } > + c->idct[2] = ff_hevc_idct_16x16_8_avx; > + c->idct[3] = ff_hevc_idct_32x32_8_avx; > +#endif > SAO_BAND_INIT(8, avx); > > c->idct[0] = ff_hevc_idct_4x4_8_avx; > @@ -775,91 +776,91 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) > if (EXTERNAL_AVX2_FAST(cpu_flags)) { > c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2; > c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2; > - if (ARCH_X86_64) { > - c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2; > - c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2; > - c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2; > +#if ARCH_X86_64 > + c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2; > + c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2; > + c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2; > > - c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2; > - c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2; > - c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2; > + c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2; > + c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2; > + c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2; > > - c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; > - c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; > - c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; > + c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; > + c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; > + c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; > > - c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; > - c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; > - c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; > + c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; > + c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; > + c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; > > - c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2; > - c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2; > - c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2; > + c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2; > + c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2; > + c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2; > > - c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2; > - c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2; > - c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2; > + c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2; > + c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2; > + c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2; > > - c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_8_avx2; > - c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_8_avx2; > - c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_8_avx2; > + c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_8_avx2; > + c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_8_avx2; > + c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_8_avx2; > > - c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_8_avx2; > - c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_8_avx2; > - c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_8_avx2; > + c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_8_avx2; > + c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_8_avx2; > + c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_8_avx2; > > - c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_8_avx2; > - c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_8_avx2; > - c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_8_avx2; > + c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_8_avx2; > + c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_8_avx2; > + c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_8_avx2; > > - c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_8_avx2; > - c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_8_avx2; > - c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_8_avx2; > + c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_8_avx2; > + c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_8_avx2; > + c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_8_avx2; > > - c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_8_avx2; > - c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_8_avx2; > - c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_8_avx2; > + c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_8_avx2; > + c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_8_avx2; > + c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_8_avx2; > > - c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_8_avx2; > - c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_8_avx2; > - c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_8_avx2; > + c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_8_avx2; > + c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_8_avx2; > + c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_8_avx2; > > - c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_8_avx2; > - c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_8_avx2; > - c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_8_avx2; > + c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_8_avx2; > + c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_8_avx2; > + c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_8_avx2; > > - c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_8_avx2; > - c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_8_avx2; > - c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_8_avx2; > + c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_8_avx2; > + c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_8_avx2; > + c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_8_avx2; > > - c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_8_avx2; > - c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_8_avx2; > - c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_8_avx2; > + c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_8_avx2; > + c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_8_avx2; > + c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_8_avx2; > > - c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx2; > - c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_avx2; > - c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx2; > + c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx2; > + c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_avx2; > + c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx2; > > - c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_avx2; > - c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_avx2; > - c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_avx2; > + c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_avx2; > + c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_avx2; > + c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_avx2; > > - c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_8_avx2; > - c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_8_avx2; > - c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_8_avx2; > + c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_8_avx2; > + c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_8_avx2; > + c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_8_avx2; > > - c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_avx2; > - c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_avx2; > - c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_avx2; > + c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_avx2; > + c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_avx2; > + c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_avx2; > > - c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_8_avx2; > - c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_8_avx2; > - c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_8_avx2; > + c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_8_avx2; > + c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_8_avx2; > + c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_8_avx2; > > - c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_8_avx2; > - c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_8_avx2; > - c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_8_avx2; > - } > + c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_8_avx2; > + c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_8_avx2; > + c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_8_avx2; > +#endif The reindentation should be performed in a separate commit in cases like these. > SAO_BAND_INIT(8, avx2); > > c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2; > @@ -884,13 +885,13 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) > if (EXTERNAL_SSE2(cpu_flags)) { > c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2; > c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2; > - if (ARCH_X86_64) { > - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2; > - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2; > +#if ARCH_X86_64 > + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2; > + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2; > > - c->idct[2] = ff_hevc_idct_16x16_10_sse2; > - c->idct[3] = ff_hevc_idct_32x32_10_sse2; > - } > + c->idct[2] = ff_hevc_idct_16x16_10_sse2; > + c->idct[3] = ff_hevc_idct_32x32_10_sse2; > +#endif > SAO_BAND_INIT(10, sse2); > SAO_EDGE_INIT(10, sse2); > > @@ -905,11 +906,12 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) > c->add_residual[2] = ff_hevc_add_residual_16_10_sse2; > c->add_residual[3] = ff_hevc_add_residual_32_10_sse2; > } > - if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { > +#if ARCH_X86_64 > + if (EXTERNAL_SSSE3(cpu_flags)) { > c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3; > c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3; > } > - if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) { > + if (EXTERNAL_SSE4(cpu_flags)) { > EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4); > EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4); > EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4); > @@ -920,16 +922,17 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) > QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4); > QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4); > } > +#endif > if (EXTERNAL_AVX(cpu_flags)) { > c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx; > c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx; > - if (ARCH_X86_64) { > - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx; > - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx; > +#if ARCH_X86_64 > + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx; > + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx; > > - c->idct[2] = ff_hevc_idct_16x16_10_avx; > - c->idct[3] = ff_hevc_idct_32x32_10_avx; > - } > + c->idct[2] = ff_hevc_idct_16x16_10_avx; > + c->idct[3] = ff_hevc_idct_32x32_10_avx; > +#endif > > c->idct[0] = ff_hevc_idct_4x4_10_avx; > c->idct[1] = ff_hevc_idct_8x8_10_avx; > @@ -942,150 +945,150 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) > if (EXTERNAL_AVX2_FAST(cpu_flags)) { > c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2; > c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2; > - if (ARCH_X86_64) { > - c->put_hevc_epel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2; > - c->put_hevc_epel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2; > - c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2; > - c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2; > - c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2; > +#if ARCH_X86_64 > + c->put_hevc_epel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2; > + c->put_hevc_epel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2; > + c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2; > + c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2; > + c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2; > > - c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2; > - c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2; > - c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2; > - c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2; > - c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2; > + c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2; > + c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2; > + c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2; > + c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2; > + c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2; > > - c->put_hevc_epel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; > - c->put_hevc_epel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; > - c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; > - c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2; > - c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2; > + c->put_hevc_epel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; > + c->put_hevc_epel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; > + c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; > + c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2; > + c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2; > > - c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; > - c->put_hevc_qpel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; > - c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; > - c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2; > - c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2; > + c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; > + c->put_hevc_qpel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; > + c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; > + c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2; > + c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2; > > - c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2; > - c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2; > - c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2; > - c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2; > - c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2; > - c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2; > - c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2; > - c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2; > - c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2; > - c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2; > + c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2; > + c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2; > + c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2; > + c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2; > + c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2; > + c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2; > + c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2; > + c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2; > + c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2; > + c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2; > > - c->put_hevc_epel[5][0][1] = ff_hevc_put_hevc_epel_h16_10_avx2; > - c->put_hevc_epel[6][0][1] = ff_hevc_put_hevc_epel_h24_10_avx2; > - c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_10_avx2; > - c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_10_avx2; > - c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_10_avx2; > + c->put_hevc_epel[5][0][1] = ff_hevc_put_hevc_epel_h16_10_avx2; > + c->put_hevc_epel[6][0][1] = ff_hevc_put_hevc_epel_h24_10_avx2; > + c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_10_avx2; > + c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_10_avx2; > + c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_10_avx2; > > - c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_hevc_uni_epel_h16_10_avx2; > - c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_hevc_uni_epel_h24_10_avx2; > - c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_10_avx2; > - c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_10_avx2; > - c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_10_avx2; > + c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_hevc_uni_epel_h16_10_avx2; > + c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_hevc_uni_epel_h24_10_avx2; > + c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_10_avx2; > + c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_10_avx2; > + c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_10_avx2; > > - c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_hevc_bi_epel_h16_10_avx2; > - c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_hevc_bi_epel_h24_10_avx2; > - c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_10_avx2; > - c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_10_avx2; > - c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_10_avx2; > + c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_hevc_bi_epel_h16_10_avx2; > + c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_hevc_bi_epel_h24_10_avx2; > + c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_10_avx2; > + c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_10_avx2; > + c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_10_avx2; > > - c->put_hevc_epel[5][1][0] = ff_hevc_put_hevc_epel_v16_10_avx2; > - c->put_hevc_epel[6][1][0] = ff_hevc_put_hevc_epel_v24_10_avx2; > - c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_10_avx2; > - c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_10_avx2; > - c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_10_avx2; > + c->put_hevc_epel[5][1][0] = ff_hevc_put_hevc_epel_v16_10_avx2; > + c->put_hevc_epel[6][1][0] = ff_hevc_put_hevc_epel_v24_10_avx2; > + c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_10_avx2; > + c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_10_avx2; > + c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_10_avx2; > > - c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_hevc_uni_epel_v16_10_avx2; > - c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_hevc_uni_epel_v24_10_avx2; > - c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_10_avx2; > - c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_10_avx2; > - c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_10_avx2; > + c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_hevc_uni_epel_v16_10_avx2; > + c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_hevc_uni_epel_v24_10_avx2; > + c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_10_avx2; > + c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_10_avx2; > + c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_10_avx2; > > - c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_hevc_bi_epel_v16_10_avx2; > - c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_hevc_bi_epel_v24_10_avx2; > - c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_10_avx2; > - c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_10_avx2; > - c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_10_avx2; > + c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_hevc_bi_epel_v16_10_avx2; > + c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_hevc_bi_epel_v24_10_avx2; > + c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_10_avx2; > + c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_10_avx2; > + c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_10_avx2; > > - c->put_hevc_epel[5][1][1] = ff_hevc_put_hevc_epel_hv16_10_avx2; > - c->put_hevc_epel[6][1][1] = ff_hevc_put_hevc_epel_hv24_10_avx2; > - c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_10_avx2; > - c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_10_avx2; > - c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_10_avx2; > + c->put_hevc_epel[5][1][1] = ff_hevc_put_hevc_epel_hv16_10_avx2; > + c->put_hevc_epel[6][1][1] = ff_hevc_put_hevc_epel_hv24_10_avx2; > + c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_10_avx2; > + c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_10_avx2; > + c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_10_avx2; > > - c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_hevc_uni_epel_hv16_10_avx2; > - c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_hevc_uni_epel_hv24_10_avx2; > - c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_10_avx2; > - c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_10_avx2; > - c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_10_avx2; > + c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_hevc_uni_epel_hv16_10_avx2; > + c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_hevc_uni_epel_hv24_10_avx2; > + c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_10_avx2; > + c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_10_avx2; > + c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_10_avx2; > > - c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_hevc_bi_epel_hv16_10_avx2; > - c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_hevc_bi_epel_hv24_10_avx2; > - c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_10_avx2; > - c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_10_avx2; > - c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_10_avx2; > + c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_hevc_bi_epel_hv16_10_avx2; > + c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_hevc_bi_epel_hv24_10_avx2; > + c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_10_avx2; > + c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_10_avx2; > + c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_10_avx2; > > - c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_10_avx2; > - c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_10_avx2; > - c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_10_avx2; > - c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_10_avx2; > - c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_10_avx2; > + c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_10_avx2; > + c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_10_avx2; > + c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_10_avx2; > + c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_10_avx2; > + c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_10_avx2; > > - c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_uni_qpel_h16_10_avx2; > - c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_uni_qpel_h24_10_avx2; > - c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_10_avx2; > - c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_10_avx2; > - c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_10_avx2; > + c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_uni_qpel_h16_10_avx2; > + c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_uni_qpel_h24_10_avx2; > + c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_10_avx2; > + c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_10_avx2; > + c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_10_avx2; > > - c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_bi_qpel_h16_10_avx2; > - c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_bi_qpel_h24_10_avx2; > - c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_10_avx2; > - c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_10_avx2; > - c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_10_avx2; > + c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_bi_qpel_h16_10_avx2; > + c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_bi_qpel_h24_10_avx2; > + c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_10_avx2; > + c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_10_avx2; > + c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_10_avx2; > > - c->put_hevc_qpel[5][1][0] = ff_hevc_put_hevc_qpel_v16_10_avx2; > - c->put_hevc_qpel[6][1][0] = ff_hevc_put_hevc_qpel_v24_10_avx2; > - c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_10_avx2; > - c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_10_avx2; > - c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_10_avx2; > + c->put_hevc_qpel[5][1][0] = ff_hevc_put_hevc_qpel_v16_10_avx2; > + c->put_hevc_qpel[6][1][0] = ff_hevc_put_hevc_qpel_v24_10_avx2; > + c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_10_avx2; > + c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_10_avx2; > + c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_10_avx2; > > - c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_hevc_uni_qpel_v16_10_avx2; > - c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_hevc_uni_qpel_v24_10_avx2; > - c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_10_avx2; > - c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_10_avx2; > - c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_10_avx2; > + c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_hevc_uni_qpel_v16_10_avx2; > + c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_hevc_uni_qpel_v24_10_avx2; > + c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_10_avx2; > + c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_10_avx2; > + c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_10_avx2; > > - c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_hevc_bi_qpel_v16_10_avx2; > - c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_hevc_bi_qpel_v24_10_avx2; > - c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_10_avx2; > - c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_10_avx2; > - c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_10_avx2; > + c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_hevc_bi_qpel_v16_10_avx2; > + c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_hevc_bi_qpel_v24_10_avx2; > + c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_10_avx2; > + c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_10_avx2; > + c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_10_avx2; > > - c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_10_avx2; > - c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_10_avx2; > - c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_10_avx2; > - c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_10_avx2; > - c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_10_avx2; > + c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_10_avx2; > + c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_10_avx2; > + c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_10_avx2; > + c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_10_avx2; > + c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_10_avx2; > > - c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_uni_qpel_hv16_10_avx2; > - c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_uni_qpel_hv24_10_avx2; > - c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_uni_qpel_hv32_10_avx2; > - c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_10_avx2; > - c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_10_avx2; > + c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_uni_qpel_hv16_10_avx2; > + c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_uni_qpel_hv24_10_avx2; > + c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_uni_qpel_hv32_10_avx2; > + c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_10_avx2; > + c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_10_avx2; > > - c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_hevc_bi_qpel_hv16_10_avx2; > - c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_hevc_bi_qpel_hv24_10_avx2; > - c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_hevc_bi_qpel_hv32_10_avx2; > - c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_hevc_bi_qpel_hv48_10_avx2; > - c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2; > - } > + c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_hevc_bi_qpel_hv16_10_avx2; > + c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_hevc_bi_qpel_hv24_10_avx2; > + c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_hevc_bi_qpel_hv32_10_avx2; > + c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_hevc_bi_qpel_hv48_10_avx2; > + c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2; > +#endif > SAO_BAND_INIT(10, avx2); > SAO_EDGE_INIT(10, avx2); > > @@ -1099,10 +1102,10 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) > if (EXTERNAL_SSE2(cpu_flags)) { > c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2; > c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2; > - if (ARCH_X86_64) { > - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2; > - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2; > - } > +#if ARCH_X86_64 > + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2; > + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2; > +#endif > SAO_BAND_INIT(12, sse2); > SAO_EDGE_INIT(12, sse2); > > @@ -1110,11 +1113,12 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) > c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2; > c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2; > } > - if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { > +#if ARCH_X86_64 > + if (EXTERNAL_SSSE3(cpu_flags)) { > c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3; > c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3; > } > - if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) { > + if (EXTERNAL_SSE4(cpu_flags)) { > EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4); > EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 12, sse4); > EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 12, sse4); > @@ -1125,13 +1129,14 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) > QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4); > QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4); > } > +#endif > if (EXTERNAL_AVX(cpu_flags)) { > c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx; > c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx; > - if (ARCH_X86_64) { > - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx; > - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx; > - } > +#if ARCH_X86_64 > + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx; > + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx; > +#endif > SAO_BAND_INIT(12, avx); > } > if (EXTERNAL_AVX2(cpu_flags)) { > diff --git a/libavcodec/x86/idctdsp_init.c b/libavcodec/x86/idctdsp_init.c > index f28a1ad744..4ade52a880 100644 > --- a/libavcodec/x86/idctdsp_init.c > +++ b/libavcodec/x86/idctdsp_init.c > @@ -92,8 +92,8 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, > } > #endif > > - if (ARCH_X86_64 && > - !high_bit_depth && > +#if ARCH_X86_64 > + if (!high_bit_depth && > avctx->lowres == 0 && > (avctx->idct_algo == FF_IDCT_AUTO || > avctx->idct_algo == FF_IDCT_SIMPLEAUTO || > @@ -104,9 +104,11 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, > c->idct_add = ff_simple_idct8_add_sse2; > c->perm_type = FF_IDCT_PERM_TRANSPOSE; > } > +#endif > } > > - if (ARCH_X86_64 && avctx->lowres == 0) { > +#if ARCH_X86_64 > + if (avctx->lowres == 0) { > if (EXTERNAL_AVX(cpu_flags) && > !high_bit_depth && > (avctx->idct_algo == FF_IDCT_AUTO || > @@ -156,4 +158,5 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, > } > } > } > +#endif > } > diff --git a/libavcodec/x86/mlpdsp_init.c b/libavcodec/x86/mlpdsp_init.c > index 950f996832..333a685f47 100644 > --- a/libavcodec/x86/mlpdsp_init.c > +++ b/libavcodec/x86/mlpdsp_init.c > @@ -200,8 +200,10 @@ av_cold void ff_mlpdsp_init_x86(MLPDSPContext *c) > if (INLINE_MMX(cpu_flags)) > c->mlp_filter_channel = mlp_filter_channel_x86; > #endif > - if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags)) > +#if ARCH_X86_64 > + if (EXTERNAL_SSE4(cpu_flags)) > c->mlp_rematrix_channel = ff_mlp_rematrix_channel_sse4; > - if (ARCH_X86_64 && EXTERNAL_AVX2_FAST(cpu_flags) && cpu_flags & AV_CPU_FLAG_BMI2) > + if (EXTERNAL_AVX2_FAST(cpu_flags) && cpu_flags & AV_CPU_FLAG_BMI2) > c->mlp_rematrix_channel = ff_mlp_rematrix_channel_avx2_bmi2; > +#endif > } > diff --git a/libavcodec/x86/vc1dsp_init.c b/libavcodec/x86/vc1dsp_init.c > index 90b2f3624e..bc63933e83 100644 > --- a/libavcodec/x86/vc1dsp_init.c > +++ b/libavcodec/x86/vc1dsp_init.c > @@ -102,13 +102,15 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) > { > int cpu_flags = av_get_cpu_flags(); > > - if (HAVE_6REGS && INLINE_MMX(cpu_flags)) > +#if HAVE_6REGS > + if (INLINE_MMX(cpu_flags)) > if (EXTERNAL_MMX(cpu_flags)) > ff_vc1dsp_init_mmx(dsp); > > - if (HAVE_6REGS && INLINE_MMXEXT(cpu_flags)) > + if (INLINE_MMXEXT(cpu_flags)) > if (EXTERNAL_MMXEXT(cpu_flags)) > ff_vc1dsp_init_mmxext(dsp); > +#endif > > #define ASSIGN_LF4(EXT) \ > dsp->vc1_v_loop_filter4 = ff_vc1_v_loop_filter4_ ## EXT; \ > diff --git a/libavfilter/x86/colorspacedsp_init.c b/libavfilter/x86/colorspacedsp_init.c > index b5006ac295..f01db4baf4 100644 > --- a/libavfilter/x86/colorspacedsp_init.c > +++ b/libavfilter/x86/colorspacedsp_init.c > @@ -80,7 +80,8 @@ void ff_colorspacedsp_x86_init(ColorSpaceDSPContext *dsp) > { > int cpu_flags = av_get_cpu_flags(); > > - if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) { > +#if ARCH_X86_64 > + if (EXTERNAL_SSE2(cpu_flags)) { > #define assign_yuv2yuv_fns(ss) \ > dsp->yuv2yuv[BPP_8 ][BPP_8 ][SS_##ss] = ff_yuv2yuv_##ss##p8to8_sse2; \ > dsp->yuv2yuv[BPP_8 ][BPP_10][SS_##ss] = ff_yuv2yuv_##ss##p8to10_sse2; \ > @@ -116,4 +117,5 @@ void ff_colorspacedsp_x86_init(ColorSpaceDSPContext *dsp) > > dsp->multiply3x3 = ff_multiply3x3_sse2; > } > +#endif > } > diff --git a/libavfilter/x86/vf_atadenoise_init.c b/libavfilter/x86/vf_atadenoise_init.c > index e7a653f191..eb621e172c 100644 > --- a/libavfilter/x86/vf_atadenoise_init.c > +++ b/libavfilter/x86/vf_atadenoise_init.c > @@ -39,12 +39,14 @@ av_cold void ff_atadenoise_init_x86(ATADenoiseDSPContext *dsp, int depth, int al > int cpu_flags = av_get_cpu_flags(); > > for (int p = 0; p < 4; p++) { > - if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags) && depth <= 8 && algorithm == PARALLEL && sigma[p] == INT16_MAX) { > +#if ARCH_X86_64 > + if (EXTERNAL_SSE4(cpu_flags) && depth <= 8 && algorithm == PARALLEL && sigma[p] == INT16_MAX) { > dsp->filter_row[p] = ff_atadenoise_filter_row8_sse4; > } > > - if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags) && depth <= 8 && algorithm == SERIAL && sigma[p] == INT16_MAX) { > + if (EXTERNAL_SSE4(cpu_flags) && depth <= 8 && algorithm == SERIAL && sigma[p] == INT16_MAX) { > dsp->filter_row[p] = ff_atadenoise_filter_row8_serial_sse4; > } > +#endif > } > } > diff --git a/libavfilter/x86/vf_ssim_init.c b/libavfilter/x86/vf_ssim_init.c > index cbaa20ef16..ddc91f791a 100644 > --- a/libavfilter/x86/vf_ssim_init.c > +++ b/libavfilter/x86/vf_ssim_init.c > @@ -34,8 +34,10 @@ void ff_ssim_init_x86(SSIMDSPContext *dsp) > { > int cpu_flags = av_get_cpu_flags(); > > - if (ARCH_X86_64 && EXTERNAL_SSSE3(cpu_flags)) > - dsp->ssim_4x4_line = ff_ssim_4x4_line_ssse3; > +#if ARCH_X86_64 > + if (EXTERNAL_SSSE3(cpu_flags)) > + dsp->ssim_4x4_line = ff_ssim_4x4_line_ssse3; We indent by four spaces. And there is actually no reason to touch this line at all. > +#endif > if (EXTERNAL_SSE4(cpu_flags)) > dsp->ssim_end_line = ff_ssim_end_line_sse4; > if (EXTERNAL_XOP(cpu_flags)) > diff --git a/libavfilter/x86/vf_w3fdif_init.c b/libavfilter/x86/vf_w3fdif_init.c > index 16202fba76..6d677d651d 100644 > --- a/libavfilter/x86/vf_w3fdif_init.c > +++ b/libavfilter/x86/vf_w3fdif_init.c > @@ -56,7 +56,9 @@ av_cold void ff_w3fdif_init_x86(W3FDIFDSPContext *dsp, int depth) > dsp->filter_scale = ff_w3fdif_scale_sse2; > } > > - if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags) && depth <= 8) { > +#if ARCH_X86_64 > + if (EXTERNAL_SSE2(cpu_flags) && depth <= 8) { > dsp->filter_complex_high = ff_w3fdif_complex_high_sse2; > } > +#endif > }
Hi! Thanks for the review. The comments will be fixed in the next version of the patchset. Just a couple remarks: re dcadsp_init -- this one comes from 4.4.x having a clause there to support SSE, which also checks for the x86-32 architecture [1]. I've removed it for the current version. re flacdsp_init -- ff_flac_decorrelate_indep8_16_avx and ff_flac_decorrelate_indep8_32_avx are from their inception only generated for x86-64 [2][3]. This check was missing from the C side, implying it relied on DCE to pass linking. [1]: https://github.com/FFmpeg/FFmpeg/blob/release/4.4/libavcodec/x86/dcadsp_init.c#L40-L41 [2]: https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/x86/flacdsp.asm#L315-L318 [3]: https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/x86/flacdsp.asm#L323-L326 On 01/11/2022 19:28, andreas.rheinhardt at outlook.com (Andreas Rheinhardt) wrote: > L. E. Segovia: >> Continuation of 40e6575aa3eed64cd32bf28c00ae57edc5acb25a >> >> Co-authored-by: Nirbheek Chauhan <nirbheek at centricular.com> >> >> Signed-off-by: L. E. Segovia <amy at amyspark.me> >> --- >> libavcodec/x86/dcadsp_init.c | 2 + >> libavcodec/x86/fdctdsp_init.c | 2 + >> libavcodec/x86/flacdsp_init.c | 8 +- >> libavcodec/x86/hevcdsp_init.c | 463 ++++++++++++++------------- >> libavcodec/x86/idctdsp_init.c | 9 +- >> libavcodec/x86/mlpdsp_init.c | 6 +- >> libavcodec/x86/vc1dsp_init.c | 6 +- >> libavfilter/x86/colorspacedsp_init.c | 4 +- >> libavfilter/x86/vf_atadenoise_init.c | 6 +- >> libavfilter/x86/vf_ssim_init.c | 6 +- >> libavfilter/x86/vf_w3fdif_init.c | 4 +- >> 11 files changed, 272 insertions(+), 244 deletions(-) >> >> diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c >> index 0c78dd1c9e..3c125f33fd 100644 >> --- a/libavcodec/x86/dcadsp_init.c >> +++ b/libavcodec/x86/dcadsp_init.c >> @@ -36,6 +36,7 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s) >> { >> int cpu_flags = av_get_cpu_flags(); >> >> +#if ARCH_X86_32 > > This check is obviously wrong. You are disabling these functions for x64. > >> if (EXTERNAL_SSE2(cpu_flags)) >> s->lfe_fir_float[0] = ff_lfe_fir0_float_sse2; >> if (EXTERNAL_SSE3(cpu_flags)) >> @@ -46,4 +47,5 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s) >> } >> if (EXTERNAL_FMA3(cpu_flags)) >> s->lfe_fir_float[0] = ff_lfe_fir0_float_fma3; >> +#endif >> } >> diff --git a/libavcodec/x86/fdctdsp_init.c b/libavcodec/x86/fdctdsp_init.c >> index 92a842433d..4a874a640d 100644 >> --- a/libavcodec/x86/fdctdsp_init.c >> +++ b/libavcodec/x86/fdctdsp_init.c >> @@ -31,8 +31,10 @@ av_cold void ff_fdctdsp_init_x86(FDCTDSPContext *c, AVCodecContext *avctx, >> >> if (!high_bit_depth) { >> if ((dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) { >> +#if HAVE_INLINE_SSE2 >> if (INLINE_SSE2(cpu_flags)) >> c->fdct = ff_fdct_sse2; >> +#endif >> } >> } >> } >> diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c >> index 87daed7005..49e67ee2b0 100644 >> --- a/libavcodec/x86/flacdsp_init.c >> +++ b/libavcodec/x86/flacdsp_init.c >> @@ -97,15 +97,19 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int >> } >> if (EXTERNAL_AVX(cpu_flags)) { >> if (fmt == AV_SAMPLE_FMT_S16) { >> - if (ARCH_X86_64 && channels == 8) >> +#if ARCH_X86_64 >> + if (channels == 8) >> c->decorrelate[0] = ff_flac_decorrelate_indep8_16_avx; >> +#endif >> } else if (fmt == AV_SAMPLE_FMT_S32) { >> if (channels == 4) >> c->decorrelate[0] = ff_flac_decorrelate_indep4_32_avx; >> else if (channels == 6) >> c->decorrelate[0] = ff_flac_decorrelate_indep6_32_avx; >> - else if (ARCH_X86_64 && channels == 8) >> +#if ARCH_X86_64 >> + else if (channels == 8) >> c->decorrelate[0] = ff_flac_decorrelate_indep8_32_avx; >> +#endif > > Why does the last function need ARCH_X86_64? And why only the last? > >> } >> } >> if (EXTERNAL_XOP(cpu_flags)) { >> diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c >> index 6f45e5e0db..c7060085a2 100644 >> --- a/libavcodec/x86/hevcdsp_init.c >> +++ b/libavcodec/x86/hevcdsp_init.c >> @@ -710,13 +710,13 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) >> if (EXTERNAL_SSE2(cpu_flags)) { >> c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2; >> c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2; >> - if (ARCH_X86_64) { >> - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2; >> - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2; >> +#if ARCH_X86_64 >> + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2; >> + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2; >> >> - c->idct[2] = ff_hevc_idct_16x16_8_sse2; >> - c->idct[3] = ff_hevc_idct_32x32_8_sse2; >> - } >> + c->idct[2] = ff_hevc_idct_16x16_8_sse2; >> + c->idct[3] = ff_hevc_idct_32x32_8_sse2; >> +#endif >> SAO_BAND_INIT(8, sse2); >> >> c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2; >> @@ -731,14 +731,14 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) >> c->add_residual[3] = ff_hevc_add_residual_32_8_sse2; >> } >> if (EXTERNAL_SSSE3(cpu_flags)) { >> - if(ARCH_X86_64) { >> - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3; >> - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3; >> - } >> +#if ARCH_X86_64 >> + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3; >> + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3; >> +#endif >> SAO_EDGE_INIT(8, ssse3); >> } >> - if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) { >> - >> +#if ARCH_X86_64 >> + if (EXTERNAL_SSE4(cpu_flags)) { >> EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4); >> EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4); >> EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 8, sse4); >> @@ -749,16 +749,17 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) >> QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4); >> QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4); >> } >> +#endif >> if (EXTERNAL_AVX(cpu_flags)) { >> c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx; >> c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx; >> - if (ARCH_X86_64) { >> - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx; >> - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx; >> +#if ARCH_X86_64 >> + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx; >> + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx; >> >> - c->idct[2] = ff_hevc_idct_16x16_8_avx; >> - c->idct[3] = ff_hevc_idct_32x32_8_avx; >> - } >> + c->idct[2] = ff_hevc_idct_16x16_8_avx; >> + c->idct[3] = ff_hevc_idct_32x32_8_avx; >> +#endif >> SAO_BAND_INIT(8, avx); >> >> c->idct[0] = ff_hevc_idct_4x4_8_avx; >> @@ -775,91 +776,91 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) >> if (EXTERNAL_AVX2_FAST(cpu_flags)) { >> c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2; >> c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2; >> - if (ARCH_X86_64) { >> - c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2; >> - c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2; >> - c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2; >> +#if ARCH_X86_64 >> + c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2; >> + c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2; >> + c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2; >> >> - c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2; >> - c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2; >> - c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2; >> + c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2; >> + c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2; >> + c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2; >> >> - c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; >> - c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; >> - c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; >> + c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; >> + c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; >> + c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; >> >> - c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; >> - c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; >> - c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; >> + c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; >> + c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; >> + c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; >> >> - c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2; >> - c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2; >> - c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2; >> + c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2; >> + c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2; >> + c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2; >> >> - c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2; >> - c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2; >> - c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2; >> + c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2; >> + c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2; >> + c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2; >> >> - c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_8_avx2; >> - c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_8_avx2; >> - c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_8_avx2; >> + c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_8_avx2; >> + c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_8_avx2; >> + c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_8_avx2; >> >> - c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_8_avx2; >> - c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_8_avx2; >> - c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_8_avx2; >> + c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_8_avx2; >> + c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_8_avx2; >> + c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_8_avx2; >> >> - c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_8_avx2; >> - c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_8_avx2; >> - c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_8_avx2; >> + c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_8_avx2; >> + c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_8_avx2; >> + c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_8_avx2; >> >> - c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_8_avx2; >> - c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_8_avx2; >> - c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_8_avx2; >> + c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_8_avx2; >> + c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_8_avx2; >> + c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_8_avx2; >> >> - c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_8_avx2; >> - c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_8_avx2; >> - c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_8_avx2; >> + c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_8_avx2; >> + c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_8_avx2; >> + c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_8_avx2; >> >> - c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_8_avx2; >> - c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_8_avx2; >> - c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_8_avx2; >> + c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_8_avx2; >> + c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_8_avx2; >> + c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_8_avx2; >> >> - c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_8_avx2; >> - c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_8_avx2; >> - c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_8_avx2; >> + c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_8_avx2; >> + c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_8_avx2; >> + c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_8_avx2; >> >> - c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_8_avx2; >> - c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_8_avx2; >> - c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_8_avx2; >> + c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_8_avx2; >> + c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_8_avx2; >> + c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_8_avx2; >> >> - c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_8_avx2; >> - c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_8_avx2; >> - c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_8_avx2; >> + c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_8_avx2; >> + c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_8_avx2; >> + c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_8_avx2; >> >> - c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx2; >> - c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_avx2; >> - c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx2; >> + c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx2; >> + c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_avx2; >> + c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx2; >> >> - c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_avx2; >> - c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_avx2; >> - c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_avx2; >> + c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_avx2; >> + c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_avx2; >> + c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_avx2; >> >> - c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_8_avx2; >> - c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_8_avx2; >> - c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_8_avx2; >> + c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_8_avx2; >> + c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_8_avx2; >> + c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_8_avx2; >> >> - c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_avx2; >> - c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_avx2; >> - c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_avx2; >> + c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_avx2; >> + c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_avx2; >> + c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_avx2; >> >> - c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_8_avx2; >> - c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_8_avx2; >> - c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_8_avx2; >> + c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_8_avx2; >> + c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_8_avx2; >> + c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_8_avx2; >> >> - c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_8_avx2; >> - c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_8_avx2; >> - c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_8_avx2; >> - } >> + c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_8_avx2; >> + c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_8_avx2; >> + c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_8_avx2; >> +#endif > > The reindentation should be performed in a separate commit in cases like > these. > >> SAO_BAND_INIT(8, avx2); >> >> c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2; >> @@ -884,13 +885,13 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) >> if (EXTERNAL_SSE2(cpu_flags)) { >> c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2; >> c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2; >> - if (ARCH_X86_64) { >> - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2; >> - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2; >> +#if ARCH_X86_64 >> + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2; >> + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2; >> >> - c->idct[2] = ff_hevc_idct_16x16_10_sse2; >> - c->idct[3] = ff_hevc_idct_32x32_10_sse2; >> - } >> + c->idct[2] = ff_hevc_idct_16x16_10_sse2; >> + c->idct[3] = ff_hevc_idct_32x32_10_sse2; >> +#endif >> SAO_BAND_INIT(10, sse2); >> SAO_EDGE_INIT(10, sse2); >> >> @@ -905,11 +906,12 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) >> c->add_residual[2] = ff_hevc_add_residual_16_10_sse2; >> c->add_residual[3] = ff_hevc_add_residual_32_10_sse2; >> } >> - if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { >> +#if ARCH_X86_64 >> + if (EXTERNAL_SSSE3(cpu_flags)) { >> c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3; >> c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3; >> } >> - if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) { >> + if (EXTERNAL_SSE4(cpu_flags)) { >> EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4); >> EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4); >> EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4); >> @@ -920,16 +922,17 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) >> QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4); >> QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4); >> } >> +#endif >> if (EXTERNAL_AVX(cpu_flags)) { >> c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx; >> c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx; >> - if (ARCH_X86_64) { >> - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx; >> - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx; >> +#if ARCH_X86_64 >> + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx; >> + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx; >> >> - c->idct[2] = ff_hevc_idct_16x16_10_avx; >> - c->idct[3] = ff_hevc_idct_32x32_10_avx; >> - } >> + c->idct[2] = ff_hevc_idct_16x16_10_avx; >> + c->idct[3] = ff_hevc_idct_32x32_10_avx; >> +#endif >> >> c->idct[0] = ff_hevc_idct_4x4_10_avx; >> c->idct[1] = ff_hevc_idct_8x8_10_avx; >> @@ -942,150 +945,150 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) >> if (EXTERNAL_AVX2_FAST(cpu_flags)) { >> c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2; >> c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2; >> - if (ARCH_X86_64) { >> - c->put_hevc_epel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2; >> - c->put_hevc_epel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2; >> - c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2; >> - c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2; >> - c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2; >> +#if ARCH_X86_64 >> + c->put_hevc_epel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2; >> + c->put_hevc_epel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2; >> + c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2; >> + c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2; >> + c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2; >> >> - c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2; >> - c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2; >> - c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2; >> - c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2; >> - c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2; >> + c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2; >> + c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2; >> + c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2; >> + c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2; >> + c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2; >> >> - c->put_hevc_epel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; >> - c->put_hevc_epel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; >> - c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; >> - c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2; >> - c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2; >> + c->put_hevc_epel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; >> + c->put_hevc_epel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; >> + c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; >> + c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2; >> + c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2; >> >> - c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; >> - c->put_hevc_qpel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; >> - c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; >> - c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2; >> - c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2; >> + c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; >> + c->put_hevc_qpel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; >> + c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; >> + c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2; >> + c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2; >> >> - c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2; >> - c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2; >> - c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2; >> - c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2; >> - c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2; >> - c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2; >> - c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2; >> - c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2; >> - c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2; >> - c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2; >> + c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2; >> + c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2; >> + c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2; >> + c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2; >> + c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2; >> + c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2; >> + c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2; >> + c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2; >> + c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2; >> + c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2; >> >> - c->put_hevc_epel[5][0][1] = ff_hevc_put_hevc_epel_h16_10_avx2; >> - c->put_hevc_epel[6][0][1] = ff_hevc_put_hevc_epel_h24_10_avx2; >> - c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_10_avx2; >> - c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_10_avx2; >> - c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_10_avx2; >> + c->put_hevc_epel[5][0][1] = ff_hevc_put_hevc_epel_h16_10_avx2; >> + c->put_hevc_epel[6][0][1] = ff_hevc_put_hevc_epel_h24_10_avx2; >> + c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_10_avx2; >> + c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_10_avx2; >> + c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_10_avx2; >> >> - c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_hevc_uni_epel_h16_10_avx2; >> - c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_hevc_uni_epel_h24_10_avx2; >> - c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_10_avx2; >> - c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_10_avx2; >> - c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_10_avx2; >> + c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_hevc_uni_epel_h16_10_avx2; >> + c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_hevc_uni_epel_h24_10_avx2; >> + c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_10_avx2; >> + c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_10_avx2; >> + c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_10_avx2; >> >> - c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_hevc_bi_epel_h16_10_avx2; >> - c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_hevc_bi_epel_h24_10_avx2; >> - c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_10_avx2; >> - c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_10_avx2; >> - c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_10_avx2; >> + c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_hevc_bi_epel_h16_10_avx2; >> + c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_hevc_bi_epel_h24_10_avx2; >> + c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_10_avx2; >> + c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_10_avx2; >> + c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_10_avx2; >> >> - c->put_hevc_epel[5][1][0] = ff_hevc_put_hevc_epel_v16_10_avx2; >> - c->put_hevc_epel[6][1][0] = ff_hevc_put_hevc_epel_v24_10_avx2; >> - c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_10_avx2; >> - c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_10_avx2; >> - c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_10_avx2; >> + c->put_hevc_epel[5][1][0] = ff_hevc_put_hevc_epel_v16_10_avx2; >> + c->put_hevc_epel[6][1][0] = ff_hevc_put_hevc_epel_v24_10_avx2; >> + c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_10_avx2; >> + c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_10_avx2; >> + c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_10_avx2; >> >> - c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_hevc_uni_epel_v16_10_avx2; >> - c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_hevc_uni_epel_v24_10_avx2; >> - c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_10_avx2; >> - c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_10_avx2; >> - c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_10_avx2; >> + c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_hevc_uni_epel_v16_10_avx2; >> + c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_hevc_uni_epel_v24_10_avx2; >> + c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_10_avx2; >> + c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_10_avx2; >> + c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_10_avx2; >> >> - c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_hevc_bi_epel_v16_10_avx2; >> - c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_hevc_bi_epel_v24_10_avx2; >> - c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_10_avx2; >> - c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_10_avx2; >> - c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_10_avx2; >> + c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_hevc_bi_epel_v16_10_avx2; >> + c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_hevc_bi_epel_v24_10_avx2; >> + c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_10_avx2; >> + c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_10_avx2; >> + c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_10_avx2; >> >> - c->put_hevc_epel[5][1][1] = ff_hevc_put_hevc_epel_hv16_10_avx2; >> - c->put_hevc_epel[6][1][1] = ff_hevc_put_hevc_epel_hv24_10_avx2; >> - c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_10_avx2; >> - c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_10_avx2; >> - c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_10_avx2; >> + c->put_hevc_epel[5][1][1] = ff_hevc_put_hevc_epel_hv16_10_avx2; >> + c->put_hevc_epel[6][1][1] = ff_hevc_put_hevc_epel_hv24_10_avx2; >> + c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_10_avx2; >> + c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_10_avx2; >> + c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_10_avx2; >> >> - c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_hevc_uni_epel_hv16_10_avx2; >> - c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_hevc_uni_epel_hv24_10_avx2; >> - c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_10_avx2; >> - c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_10_avx2; >> - c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_10_avx2; >> + c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_hevc_uni_epel_hv16_10_avx2; >> + c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_hevc_uni_epel_hv24_10_avx2; >> + c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_10_avx2; >> + c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_10_avx2; >> + c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_10_avx2; >> >> - c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_hevc_bi_epel_hv16_10_avx2; >> - c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_hevc_bi_epel_hv24_10_avx2; >> - c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_10_avx2; >> - c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_10_avx2; >> - c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_10_avx2; >> + c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_hevc_bi_epel_hv16_10_avx2; >> + c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_hevc_bi_epel_hv24_10_avx2; >> + c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_10_avx2; >> + c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_10_avx2; >> + c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_10_avx2; >> >> - c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_10_avx2; >> - c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_10_avx2; >> - c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_10_avx2; >> - c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_10_avx2; >> - c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_10_avx2; >> + c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_10_avx2; >> + c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_10_avx2; >> + c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_10_avx2; >> + c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_10_avx2; >> + c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_10_avx2; >> >> - c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_uni_qpel_h16_10_avx2; >> - c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_uni_qpel_h24_10_avx2; >> - c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_10_avx2; >> - c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_10_avx2; >> - c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_10_avx2; >> + c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_uni_qpel_h16_10_avx2; >> + c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_uni_qpel_h24_10_avx2; >> + c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_10_avx2; >> + c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_10_avx2; >> + c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_10_avx2; >> >> - c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_bi_qpel_h16_10_avx2; >> - c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_bi_qpel_h24_10_avx2; >> - c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_10_avx2; >> - c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_10_avx2; >> - c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_10_avx2; >> + c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_bi_qpel_h16_10_avx2; >> + c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_bi_qpel_h24_10_avx2; >> + c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_10_avx2; >> + c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_10_avx2; >> + c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_10_avx2; >> >> - c->put_hevc_qpel[5][1][0] = ff_hevc_put_hevc_qpel_v16_10_avx2; >> - c->put_hevc_qpel[6][1][0] = ff_hevc_put_hevc_qpel_v24_10_avx2; >> - c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_10_avx2; >> - c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_10_avx2; >> - c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_10_avx2; >> + c->put_hevc_qpel[5][1][0] = ff_hevc_put_hevc_qpel_v16_10_avx2; >> + c->put_hevc_qpel[6][1][0] = ff_hevc_put_hevc_qpel_v24_10_avx2; >> + c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_10_avx2; >> + c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_10_avx2; >> + c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_10_avx2; >> >> - c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_hevc_uni_qpel_v16_10_avx2; >> - c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_hevc_uni_qpel_v24_10_avx2; >> - c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_10_avx2; >> - c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_10_avx2; >> - c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_10_avx2; >> + c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_hevc_uni_qpel_v16_10_avx2; >> + c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_hevc_uni_qpel_v24_10_avx2; >> + c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_10_avx2; >> + c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_10_avx2; >> + c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_10_avx2; >> >> - c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_hevc_bi_qpel_v16_10_avx2; >> - c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_hevc_bi_qpel_v24_10_avx2; >> - c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_10_avx2; >> - c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_10_avx2; >> - c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_10_avx2; >> + c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_hevc_bi_qpel_v16_10_avx2; >> + c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_hevc_bi_qpel_v24_10_avx2; >> + c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_10_avx2; >> + c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_10_avx2; >> + c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_10_avx2; >> >> - c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_10_avx2; >> - c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_10_avx2; >> - c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_10_avx2; >> - c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_10_avx2; >> - c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_10_avx2; >> + c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_10_avx2; >> + c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_10_avx2; >> + c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_10_avx2; >> + c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_10_avx2; >> + c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_10_avx2; >> >> - c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_uni_qpel_hv16_10_avx2; >> - c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_uni_qpel_hv24_10_avx2; >> - c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_uni_qpel_hv32_10_avx2; >> - c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_10_avx2; >> - c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_10_avx2; >> + c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_uni_qpel_hv16_10_avx2; >> + c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_uni_qpel_hv24_10_avx2; >> + c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_uni_qpel_hv32_10_avx2; >> + c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_10_avx2; >> + c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_10_avx2; >> >> - c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_hevc_bi_qpel_hv16_10_avx2; >> - c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_hevc_bi_qpel_hv24_10_avx2; >> - c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_hevc_bi_qpel_hv32_10_avx2; >> - c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_hevc_bi_qpel_hv48_10_avx2; >> - c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2; >> - } >> + c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_hevc_bi_qpel_hv16_10_avx2; >> + c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_hevc_bi_qpel_hv24_10_avx2; >> + c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_hevc_bi_qpel_hv32_10_avx2; >> + c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_hevc_bi_qpel_hv48_10_avx2; >> + c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2; >> +#endif >> SAO_BAND_INIT(10, avx2); >> SAO_EDGE_INIT(10, avx2); >> >> @@ -1099,10 +1102,10 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) >> if (EXTERNAL_SSE2(cpu_flags)) { >> c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2; >> c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2; >> - if (ARCH_X86_64) { >> - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2; >> - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2; >> - } >> +#if ARCH_X86_64 >> + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2; >> + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2; >> +#endif >> SAO_BAND_INIT(12, sse2); >> SAO_EDGE_INIT(12, sse2); >> >> @@ -1110,11 +1113,12 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) >> c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2; >> c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2; >> } >> - if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { >> +#if ARCH_X86_64 >> + if (EXTERNAL_SSSE3(cpu_flags)) { >> c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3; >> c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3; >> } >> - if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) { >> + if (EXTERNAL_SSE4(cpu_flags)) { >> EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4); >> EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 12, sse4); >> EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 12, sse4); >> @@ -1125,13 +1129,14 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) >> QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4); >> QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4); >> } >> +#endif >> if (EXTERNAL_AVX(cpu_flags)) { >> c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx; >> c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx; >> - if (ARCH_X86_64) { >> - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx; >> - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx; >> - } >> +#if ARCH_X86_64 >> + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx; >> + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx; >> +#endif >> SAO_BAND_INIT(12, avx); >> } >> if (EXTERNAL_AVX2(cpu_flags)) { >> diff --git a/libavcodec/x86/idctdsp_init.c b/libavcodec/x86/idctdsp_init.c >> index f28a1ad744..4ade52a880 100644 >> --- a/libavcodec/x86/idctdsp_init.c >> +++ b/libavcodec/x86/idctdsp_init.c >> @@ -92,8 +92,8 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, >> } >> #endif >> >> - if (ARCH_X86_64 && >> - !high_bit_depth && >> +#if ARCH_X86_64 >> + if (!high_bit_depth && >> avctx->lowres == 0 && >> (avctx->idct_algo == FF_IDCT_AUTO || >> avctx->idct_algo == FF_IDCT_SIMPLEAUTO || >> @@ -104,9 +104,11 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, >> c->idct_add = ff_simple_idct8_add_sse2; >> c->perm_type = FF_IDCT_PERM_TRANSPOSE; >> } >> +#endif >> } >> >> - if (ARCH_X86_64 && avctx->lowres == 0) { >> +#if ARCH_X86_64 >> + if (avctx->lowres == 0) { >> if (EXTERNAL_AVX(cpu_flags) && >> !high_bit_depth && >> (avctx->idct_algo == FF_IDCT_AUTO || >> @@ -156,4 +158,5 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, >> } >> } >> } >> +#endif >> } >> diff --git a/libavcodec/x86/mlpdsp_init.c b/libavcodec/x86/mlpdsp_init.c >> index 950f996832..333a685f47 100644 >> --- a/libavcodec/x86/mlpdsp_init.c >> +++ b/libavcodec/x86/mlpdsp_init.c >> @@ -200,8 +200,10 @@ av_cold void ff_mlpdsp_init_x86(MLPDSPContext *c) >> if (INLINE_MMX(cpu_flags)) >> c->mlp_filter_channel = mlp_filter_channel_x86; >> #endif >> - if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags)) >> +#if ARCH_X86_64 >> + if (EXTERNAL_SSE4(cpu_flags)) >> c->mlp_rematrix_channel = ff_mlp_rematrix_channel_sse4; >> - if (ARCH_X86_64 && EXTERNAL_AVX2_FAST(cpu_flags) && cpu_flags & AV_CPU_FLAG_BMI2) >> + if (EXTERNAL_AVX2_FAST(cpu_flags) && cpu_flags & AV_CPU_FLAG_BMI2) >> c->mlp_rematrix_channel = ff_mlp_rematrix_channel_avx2_bmi2; >> +#endif >> } >> diff --git a/libavcodec/x86/vc1dsp_init.c b/libavcodec/x86/vc1dsp_init.c >> index 90b2f3624e..bc63933e83 100644 >> --- a/libavcodec/x86/vc1dsp_init.c >> +++ b/libavcodec/x86/vc1dsp_init.c >> @@ -102,13 +102,15 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) >> { >> int cpu_flags = av_get_cpu_flags(); >> >> - if (HAVE_6REGS && INLINE_MMX(cpu_flags)) >> +#if HAVE_6REGS >> + if (INLINE_MMX(cpu_flags)) >> if (EXTERNAL_MMX(cpu_flags)) >> ff_vc1dsp_init_mmx(dsp); >> >> - if (HAVE_6REGS && INLINE_MMXEXT(cpu_flags)) >> + if (INLINE_MMXEXT(cpu_flags)) >> if (EXTERNAL_MMXEXT(cpu_flags)) >> ff_vc1dsp_init_mmxext(dsp); >> +#endif >> >> #define ASSIGN_LF4(EXT) \ >> dsp->vc1_v_loop_filter4 = ff_vc1_v_loop_filter4_ ## EXT; \ >> diff --git a/libavfilter/x86/colorspacedsp_init.c b/libavfilter/x86/colorspacedsp_init.c >> index b5006ac295..f01db4baf4 100644 >> --- a/libavfilter/x86/colorspacedsp_init.c >> +++ b/libavfilter/x86/colorspacedsp_init.c >> @@ -80,7 +80,8 @@ void ff_colorspacedsp_x86_init(ColorSpaceDSPContext *dsp) >> { >> int cpu_flags = av_get_cpu_flags(); >> >> - if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) { >> +#if ARCH_X86_64 >> + if (EXTERNAL_SSE2(cpu_flags)) { >> #define assign_yuv2yuv_fns(ss) \ >> dsp->yuv2yuv[BPP_8 ][BPP_8 ][SS_##ss] = ff_yuv2yuv_##ss##p8to8_sse2; \ >> dsp->yuv2yuv[BPP_8 ][BPP_10][SS_##ss] = ff_yuv2yuv_##ss##p8to10_sse2; \ >> @@ -116,4 +117,5 @@ void ff_colorspacedsp_x86_init(ColorSpaceDSPContext *dsp) >> >> dsp->multiply3x3 = ff_multiply3x3_sse2; >> } >> +#endif >> } >> diff --git a/libavfilter/x86/vf_atadenoise_init.c b/libavfilter/x86/vf_atadenoise_init.c >> index e7a653f191..eb621e172c 100644 >> --- a/libavfilter/x86/vf_atadenoise_init.c >> +++ b/libavfilter/x86/vf_atadenoise_init.c >> @@ -39,12 +39,14 @@ av_cold void ff_atadenoise_init_x86(ATADenoiseDSPContext *dsp, int depth, int al >> int cpu_flags = av_get_cpu_flags(); >> >> for (int p = 0; p < 4; p++) { >> - if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags) && depth <= 8 && algorithm == PARALLEL && sigma[p] == INT16_MAX) { >> +#if ARCH_X86_64 >> + if (EXTERNAL_SSE4(cpu_flags) && depth <= 8 && algorithm == PARALLEL && sigma[p] == INT16_MAX) { >> dsp->filter_row[p] = ff_atadenoise_filter_row8_sse4; >> } >> >> - if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags) && depth <= 8 && algorithm == SERIAL && sigma[p] == INT16_MAX) { >> + if (EXTERNAL_SSE4(cpu_flags) && depth <= 8 && algorithm == SERIAL && sigma[p] == INT16_MAX) { >> dsp->filter_row[p] = ff_atadenoise_filter_row8_serial_sse4; >> } >> +#endif >> } >> } >> diff --git a/libavfilter/x86/vf_ssim_init.c b/libavfilter/x86/vf_ssim_init.c >> index cbaa20ef16..ddc91f791a 100644 >> --- a/libavfilter/x86/vf_ssim_init.c >> +++ b/libavfilter/x86/vf_ssim_init.c >> @@ -34,8 +34,10 @@ void ff_ssim_init_x86(SSIMDSPContext *dsp) >> { >> int cpu_flags = av_get_cpu_flags(); >> >> - if (ARCH_X86_64 && EXTERNAL_SSSE3(cpu_flags)) >> - dsp->ssim_4x4_line = ff_ssim_4x4_line_ssse3; >> +#if ARCH_X86_64 >> + if (EXTERNAL_SSSE3(cpu_flags)) >> + dsp->ssim_4x4_line = ff_ssim_4x4_line_ssse3; > > We indent by four spaces. And there is actually no reason to touch this > line at all. > >> +#endif >> if (EXTERNAL_SSE4(cpu_flags)) >> dsp->ssim_end_line = ff_ssim_end_line_sse4; >> if (EXTERNAL_XOP(cpu_flags)) >> diff --git a/libavfilter/x86/vf_w3fdif_init.c b/libavfilter/x86/vf_w3fdif_init.c >> index 16202fba76..6d677d651d 100644 >> --- a/libavfilter/x86/vf_w3fdif_init.c >> +++ b/libavfilter/x86/vf_w3fdif_init.c >> @@ -56,7 +56,9 @@ av_cold void ff_w3fdif_init_x86(W3FDIFDSPContext *dsp, int depth) >> dsp->filter_scale = ff_w3fdif_scale_sse2; >> } >> >> - if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags) && depth <= 8) { >> +#if ARCH_X86_64 >> + if (EXTERNAL_SSE2(cpu_flags) && depth <= 8) { >> dsp->filter_complex_high = ff_w3fdif_complex_high_sse2; >> } >> +#endif >> } >
L. E. Segovia: > Hi! > > Thanks for the review. The comments will be fixed in the next version of > the patchset. Just a couple remarks: > > re dcadsp_init -- this one comes from 4.4.x having a clause there to > support SSE, which also checks for the x86-32 architecture [1]. I've > removed it for the current version. > > re flacdsp_init -- ff_flac_decorrelate_indep8_16_avx and > ff_flac_decorrelate_indep8_32_avx are from their inception only > generated for x86-64 [2][3]. This check was missing from the C side, > implying it relied on DCE to pass linking. > Sorry, I somehow did not recognize that there were already ARCH_X86_64 checks for these flacdsp functions. > [1]: > https://github.com/FFmpeg/FFmpeg/blob/release/4.4/libavcodec/x86/dcadsp_init.c#L40-L41 > > [2]: > https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/x86/flacdsp.asm#L315-L318 > > [3]: > https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/x86/flacdsp.asm#L323-L326 > Another idea that I had is as follows: Instead of using if (EXTERNAL_MMX(cpu_flags)) { ... } we add a macro IF_EXTERNAL_MMX(cpu_flags, ...) that expands to "if (cpu_flags & AV_CPU_FLAG_MMX) { __VA_ARGS__ }" if HAVE_EXTERNAL_MMX is true and to nothing (or a null statement) of it is not. Advantages of this approach are that it does not involve sprinkling the codebase with many #ifs and that it automatically checks the right stuff -- namely the corresponding HAVE_* instead of just HAVE_X86ASM as you do in the last patch; notice that if an assembler is present, we expect it to be able to assemble MMX(EXT), SSE etc., but not AVX, and consequently mostly don't check for it in the .asm files, but as has been said AVX(2) and XOP are an exception to this, so that one would still get linker errors with your patchset if one used an assembler that does not support AVX(2) (or if one disabled it via configure). Disadvantages of it are that one can not add more #if checks to the "..." of these macros and that there would be some calls to av_get_cpu_flags() where the return value would be unused (we might have to mark the variable as av_unused or mark said function as av_pure to make the compiler optimize that away and not emit warnings for it). But it would still be usable for the common case where there are no further #if checks. - Andreas
Am Di., 1. Nov. 2022 um 23:00 Uhr schrieb L. E. Segovia <amy@amyspark.me>: > > Continuation of 40e6575aa3eed64cd32bf28c00ae57edc5acb25a > > Co-authored-by: Nirbheek Chauhan <nirbheek@centricular.com> > > Signed-off-by: L. E. Segovia <amy@amyspark.me> > --- > libavcodec/x86/dcadsp_init.c | 2 + > libavcodec/x86/fdctdsp_init.c | 2 + > libavcodec/x86/flacdsp_init.c | 8 +- > libavcodec/x86/hevcdsp_init.c | 463 ++++++++++++++------------- > libavcodec/x86/idctdsp_init.c | 9 +- > libavcodec/x86/mlpdsp_init.c | 6 +- > libavcodec/x86/vc1dsp_init.c | 6 +- > libavfilter/x86/colorspacedsp_init.c | 4 +- > libavfilter/x86/vf_atadenoise_init.c | 6 +- > libavfilter/x86/vf_ssim_init.c | 6 +- > libavfilter/x86/vf_w3fdif_init.c | 4 +- > 11 files changed, 272 insertions(+), 244 deletions(-) I regularly compile with MSVC without this patch and I believe the patch makes our code less readable and harder to maintain. Carl Eugen
diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c index 0c78dd1c9e..3c125f33fd 100644 --- a/libavcodec/x86/dcadsp_init.c +++ b/libavcodec/x86/dcadsp_init.c @@ -36,6 +36,7 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s) { int cpu_flags = av_get_cpu_flags(); +#if ARCH_X86_32 if (EXTERNAL_SSE2(cpu_flags)) s->lfe_fir_float[0] = ff_lfe_fir0_float_sse2; if (EXTERNAL_SSE3(cpu_flags)) @@ -46,4 +47,5 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s) } if (EXTERNAL_FMA3(cpu_flags)) s->lfe_fir_float[0] = ff_lfe_fir0_float_fma3; +#endif } diff --git a/libavcodec/x86/fdctdsp_init.c b/libavcodec/x86/fdctdsp_init.c index 92a842433d..4a874a640d 100644 --- a/libavcodec/x86/fdctdsp_init.c +++ b/libavcodec/x86/fdctdsp_init.c @@ -31,8 +31,10 @@ av_cold void ff_fdctdsp_init_x86(FDCTDSPContext *c, AVCodecContext *avctx, if (!high_bit_depth) { if ((dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) { +#if HAVE_INLINE_SSE2 if (INLINE_SSE2(cpu_flags)) c->fdct = ff_fdct_sse2; +#endif } } } diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c index 87daed7005..49e67ee2b0 100644 --- a/libavcodec/x86/flacdsp_init.c +++ b/libavcodec/x86/flacdsp_init.c @@ -97,15 +97,19 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int } if (EXTERNAL_AVX(cpu_flags)) { if (fmt == AV_SAMPLE_FMT_S16) { - if (ARCH_X86_64 && channels == 8) +#if ARCH_X86_64 + if (channels == 8) c->decorrelate[0] = ff_flac_decorrelate_indep8_16_avx; +#endif } else if (fmt == AV_SAMPLE_FMT_S32) { if (channels == 4) c->decorrelate[0] = ff_flac_decorrelate_indep4_32_avx; else if (channels == 6) c->decorrelate[0] = ff_flac_decorrelate_indep6_32_avx; - else if (ARCH_X86_64 && channels == 8) +#if ARCH_X86_64 + else if (channels == 8) c->decorrelate[0] = ff_flac_decorrelate_indep8_32_avx; +#endif } } if (EXTERNAL_XOP(cpu_flags)) { diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c index 6f45e5e0db..c7060085a2 100644 --- a/libavcodec/x86/hevcdsp_init.c +++ b/libavcodec/x86/hevcdsp_init.c @@ -710,13 +710,13 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) if (EXTERNAL_SSE2(cpu_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2; - if (ARCH_X86_64) { - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2; - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2; +#if ARCH_X86_64 + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2; + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2; - c->idct[2] = ff_hevc_idct_16x16_8_sse2; - c->idct[3] = ff_hevc_idct_32x32_8_sse2; - } + c->idct[2] = ff_hevc_idct_16x16_8_sse2; + c->idct[3] = ff_hevc_idct_32x32_8_sse2; +#endif SAO_BAND_INIT(8, sse2); c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2; @@ -731,14 +731,14 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->add_residual[3] = ff_hevc_add_residual_32_8_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) { - if(ARCH_X86_64) { - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3; - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3; - } +#if ARCH_X86_64 + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3; + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3; +#endif SAO_EDGE_INIT(8, ssse3); } - if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) { - +#if ARCH_X86_64 + if (EXTERNAL_SSE4(cpu_flags)) { EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4); EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 8, sse4); @@ -749,16 +749,17 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4); } +#endif if (EXTERNAL_AVX(cpu_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx; - if (ARCH_X86_64) { - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx; - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx; +#if ARCH_X86_64 + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx; + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx; - c->idct[2] = ff_hevc_idct_16x16_8_avx; - c->idct[3] = ff_hevc_idct_32x32_8_avx; - } + c->idct[2] = ff_hevc_idct_16x16_8_avx; + c->idct[3] = ff_hevc_idct_32x32_8_avx; +#endif SAO_BAND_INIT(8, avx); c->idct[0] = ff_hevc_idct_4x4_8_avx; @@ -775,91 +776,91 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) if (EXTERNAL_AVX2_FAST(cpu_flags)) { c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2; c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2; - if (ARCH_X86_64) { - c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2; - c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2; - c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2; +#if ARCH_X86_64 + c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2; + c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2; + c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2; - c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2; - c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2; - c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2; + c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2; + c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2; + c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2; - c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; - c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; - c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; + c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; + c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; + c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; - c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; - c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; - c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; + c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; + c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; + c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; - c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2; - c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2; - c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2; + c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2; + c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2; + c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2; - c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2; - c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2; - c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2; + c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2; + c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2; + c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2; - c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_8_avx2; - c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_8_avx2; - c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_8_avx2; + c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_8_avx2; + c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_8_avx2; + c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_8_avx2; - c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_8_avx2; - c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_8_avx2; - c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_8_avx2; + c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_8_avx2; + c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_8_avx2; + c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_8_avx2; - c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_8_avx2; - c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_8_avx2; - c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_8_avx2; + c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_8_avx2; + c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_8_avx2; + c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_8_avx2; - c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_8_avx2; - c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_8_avx2; - c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_8_avx2; + c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_8_avx2; + c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_8_avx2; + c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_8_avx2; - c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_8_avx2; - c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_8_avx2; - c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_8_avx2; + c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_8_avx2; + c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_8_avx2; + c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_8_avx2; - c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_8_avx2; - c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_8_avx2; - c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_8_avx2; + c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_8_avx2; + c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_8_avx2; + c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_8_avx2; - c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_8_avx2; - c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_8_avx2; - c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_8_avx2; + c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_8_avx2; + c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_8_avx2; + c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_8_avx2; - c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_8_avx2; - c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_8_avx2; - c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_8_avx2; + c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_8_avx2; + c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_8_avx2; + c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_8_avx2; - c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_8_avx2; - c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_8_avx2; - c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_8_avx2; + c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_8_avx2; + c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_8_avx2; + c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_8_avx2; - c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx2; - c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_avx2; - c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx2; + c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx2; + c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_avx2; + c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx2; - c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_avx2; - c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_avx2; - c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_avx2; + c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_avx2; + c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_avx2; + c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_avx2; - c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_8_avx2; - c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_8_avx2; - c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_8_avx2; + c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_8_avx2; + c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_8_avx2; + c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_8_avx2; - c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_avx2; - c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_avx2; - c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_avx2; + c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_avx2; + c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_avx2; + c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_avx2; - c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_8_avx2; - c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_8_avx2; - c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_8_avx2; + c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_8_avx2; + c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_8_avx2; + c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_8_avx2; - c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_8_avx2; - c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_8_avx2; - c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_8_avx2; - } + c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_8_avx2; + c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_8_avx2; + c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_8_avx2; +#endif SAO_BAND_INIT(8, avx2); c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2; @@ -884,13 +885,13 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) if (EXTERNAL_SSE2(cpu_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2; - if (ARCH_X86_64) { - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2; - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2; +#if ARCH_X86_64 + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2; + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2; - c->idct[2] = ff_hevc_idct_16x16_10_sse2; - c->idct[3] = ff_hevc_idct_32x32_10_sse2; - } + c->idct[2] = ff_hevc_idct_16x16_10_sse2; + c->idct[3] = ff_hevc_idct_32x32_10_sse2; +#endif SAO_BAND_INIT(10, sse2); SAO_EDGE_INIT(10, sse2); @@ -905,11 +906,12 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->add_residual[2] = ff_hevc_add_residual_16_10_sse2; c->add_residual[3] = ff_hevc_add_residual_32_10_sse2; } - if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { +#if ARCH_X86_64 + if (EXTERNAL_SSSE3(cpu_flags)) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3; } - if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) { + if (EXTERNAL_SSE4(cpu_flags)) { EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4); EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4); @@ -920,16 +922,17 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4); } +#endif if (EXTERNAL_AVX(cpu_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx; - if (ARCH_X86_64) { - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx; - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx; +#if ARCH_X86_64 + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx; + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx; - c->idct[2] = ff_hevc_idct_16x16_10_avx; - c->idct[3] = ff_hevc_idct_32x32_10_avx; - } + c->idct[2] = ff_hevc_idct_16x16_10_avx; + c->idct[3] = ff_hevc_idct_32x32_10_avx; +#endif c->idct[0] = ff_hevc_idct_4x4_10_avx; c->idct[1] = ff_hevc_idct_8x8_10_avx; @@ -942,150 +945,150 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) if (EXTERNAL_AVX2_FAST(cpu_flags)) { c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2; c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2; - if (ARCH_X86_64) { - c->put_hevc_epel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2; - c->put_hevc_epel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2; - c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2; - c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2; - c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2; +#if ARCH_X86_64 + c->put_hevc_epel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2; + c->put_hevc_epel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2; + c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2; + c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2; + c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2; - c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2; - c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2; - c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2; - c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2; - c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2; + c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2; + c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2; + c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2; + c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2; + c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2; - c->put_hevc_epel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; - c->put_hevc_epel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; - c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; - c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2; - c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2; + c->put_hevc_epel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; + c->put_hevc_epel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; + c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; + c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2; + c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2; - c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; - c->put_hevc_qpel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; - c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; - c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2; - c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2; + c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; + c->put_hevc_qpel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; + c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; + c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2; + c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2; - c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2; - c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2; - c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2; - c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2; - c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2; - c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2; - c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2; - c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2; - c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2; - c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2; + c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2; + c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2; + c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2; + c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2; + c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2; + c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2; + c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2; + c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2; + c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2; + c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2; - c->put_hevc_epel[5][0][1] = ff_hevc_put_hevc_epel_h16_10_avx2; - c->put_hevc_epel[6][0][1] = ff_hevc_put_hevc_epel_h24_10_avx2; - c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_10_avx2; - c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_10_avx2; - c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_10_avx2; + c->put_hevc_epel[5][0][1] = ff_hevc_put_hevc_epel_h16_10_avx2; + c->put_hevc_epel[6][0][1] = ff_hevc_put_hevc_epel_h24_10_avx2; + c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_10_avx2; + c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_10_avx2; + c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_10_avx2; - c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_hevc_uni_epel_h16_10_avx2; - c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_hevc_uni_epel_h24_10_avx2; - c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_10_avx2; - c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_10_avx2; - c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_10_avx2; + c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_hevc_uni_epel_h16_10_avx2; + c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_hevc_uni_epel_h24_10_avx2; + c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_10_avx2; + c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_10_avx2; + c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_10_avx2; - c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_hevc_bi_epel_h16_10_avx2; - c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_hevc_bi_epel_h24_10_avx2; - c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_10_avx2; - c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_10_avx2; - c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_10_avx2; + c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_hevc_bi_epel_h16_10_avx2; + c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_hevc_bi_epel_h24_10_avx2; + c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_10_avx2; + c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_10_avx2; + c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_10_avx2; - c->put_hevc_epel[5][1][0] = ff_hevc_put_hevc_epel_v16_10_avx2; - c->put_hevc_epel[6][1][0] = ff_hevc_put_hevc_epel_v24_10_avx2; - c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_10_avx2; - c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_10_avx2; - c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_10_avx2; + c->put_hevc_epel[5][1][0] = ff_hevc_put_hevc_epel_v16_10_avx2; + c->put_hevc_epel[6][1][0] = ff_hevc_put_hevc_epel_v24_10_avx2; + c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_10_avx2; + c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_10_avx2; + c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_10_avx2; - c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_hevc_uni_epel_v16_10_avx2; - c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_hevc_uni_epel_v24_10_avx2; - c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_10_avx2; - c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_10_avx2; - c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_10_avx2; + c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_hevc_uni_epel_v16_10_avx2; + c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_hevc_uni_epel_v24_10_avx2; + c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_10_avx2; + c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_10_avx2; + c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_10_avx2; - c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_hevc_bi_epel_v16_10_avx2; - c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_hevc_bi_epel_v24_10_avx2; - c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_10_avx2; - c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_10_avx2; - c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_10_avx2; + c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_hevc_bi_epel_v16_10_avx2; + c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_hevc_bi_epel_v24_10_avx2; + c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_10_avx2; + c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_10_avx2; + c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_10_avx2; - c->put_hevc_epel[5][1][1] = ff_hevc_put_hevc_epel_hv16_10_avx2; - c->put_hevc_epel[6][1][1] = ff_hevc_put_hevc_epel_hv24_10_avx2; - c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_10_avx2; - c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_10_avx2; - c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_10_avx2; + c->put_hevc_epel[5][1][1] = ff_hevc_put_hevc_epel_hv16_10_avx2; + c->put_hevc_epel[6][1][1] = ff_hevc_put_hevc_epel_hv24_10_avx2; + c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_10_avx2; + c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_10_avx2; + c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_10_avx2; - c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_hevc_uni_epel_hv16_10_avx2; - c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_hevc_uni_epel_hv24_10_avx2; - c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_10_avx2; - c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_10_avx2; - c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_10_avx2; + c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_hevc_uni_epel_hv16_10_avx2; + c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_hevc_uni_epel_hv24_10_avx2; + c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_10_avx2; + c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_10_avx2; + c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_10_avx2; - c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_hevc_bi_epel_hv16_10_avx2; - c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_hevc_bi_epel_hv24_10_avx2; - c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_10_avx2; - c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_10_avx2; - c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_10_avx2; + c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_hevc_bi_epel_hv16_10_avx2; + c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_hevc_bi_epel_hv24_10_avx2; + c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_10_avx2; + c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_10_avx2; + c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_10_avx2; - c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_10_avx2; - c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_10_avx2; - c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_10_avx2; - c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_10_avx2; - c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_10_avx2; + c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_10_avx2; + c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_10_avx2; + c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_10_avx2; + c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_10_avx2; + c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_10_avx2; - c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_uni_qpel_h16_10_avx2; - c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_uni_qpel_h24_10_avx2; - c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_10_avx2; - c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_10_avx2; - c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_10_avx2; + c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_uni_qpel_h16_10_avx2; + c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_uni_qpel_h24_10_avx2; + c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_10_avx2; + c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_10_avx2; + c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_10_avx2; - c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_bi_qpel_h16_10_avx2; - c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_bi_qpel_h24_10_avx2; - c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_10_avx2; - c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_10_avx2; - c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_10_avx2; + c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_bi_qpel_h16_10_avx2; + c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_bi_qpel_h24_10_avx2; + c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_10_avx2; + c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_10_avx2; + c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_10_avx2; - c->put_hevc_qpel[5][1][0] = ff_hevc_put_hevc_qpel_v16_10_avx2; - c->put_hevc_qpel[6][1][0] = ff_hevc_put_hevc_qpel_v24_10_avx2; - c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_10_avx2; - c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_10_avx2; - c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_10_avx2; + c->put_hevc_qpel[5][1][0] = ff_hevc_put_hevc_qpel_v16_10_avx2; + c->put_hevc_qpel[6][1][0] = ff_hevc_put_hevc_qpel_v24_10_avx2; + c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_10_avx2; + c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_10_avx2; + c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_10_avx2; - c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_hevc_uni_qpel_v16_10_avx2; - c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_hevc_uni_qpel_v24_10_avx2; - c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_10_avx2; - c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_10_avx2; - c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_10_avx2; + c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_hevc_uni_qpel_v16_10_avx2; + c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_hevc_uni_qpel_v24_10_avx2; + c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_10_avx2; + c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_10_avx2; + c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_10_avx2; - c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_hevc_bi_qpel_v16_10_avx2; - c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_hevc_bi_qpel_v24_10_avx2; - c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_10_avx2; - c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_10_avx2; - c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_10_avx2; + c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_hevc_bi_qpel_v16_10_avx2; + c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_hevc_bi_qpel_v24_10_avx2; + c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_10_avx2; + c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_10_avx2; + c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_10_avx2; - c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_10_avx2; - c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_10_avx2; - c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_10_avx2; - c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_10_avx2; - c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_10_avx2; + c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_10_avx2; + c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_10_avx2; + c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_10_avx2; + c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_10_avx2; + c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_10_avx2; - c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_uni_qpel_hv16_10_avx2; - c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_uni_qpel_hv24_10_avx2; - c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_uni_qpel_hv32_10_avx2; - c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_10_avx2; - c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_10_avx2; + c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_uni_qpel_hv16_10_avx2; + c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_uni_qpel_hv24_10_avx2; + c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_uni_qpel_hv32_10_avx2; + c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_10_avx2; + c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_10_avx2; - c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_hevc_bi_qpel_hv16_10_avx2; - c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_hevc_bi_qpel_hv24_10_avx2; - c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_hevc_bi_qpel_hv32_10_avx2; - c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_hevc_bi_qpel_hv48_10_avx2; - c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2; - } + c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_hevc_bi_qpel_hv16_10_avx2; + c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_hevc_bi_qpel_hv24_10_avx2; + c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_hevc_bi_qpel_hv32_10_avx2; + c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_hevc_bi_qpel_hv48_10_avx2; + c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2; +#endif SAO_BAND_INIT(10, avx2); SAO_EDGE_INIT(10, avx2); @@ -1099,10 +1102,10 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) if (EXTERNAL_SSE2(cpu_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2; - if (ARCH_X86_64) { - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2; - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2; - } +#if ARCH_X86_64 + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2; + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2; +#endif SAO_BAND_INIT(12, sse2); SAO_EDGE_INIT(12, sse2); @@ -1110,11 +1113,12 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2; c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2; } - if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { +#if ARCH_X86_64 + if (EXTERNAL_SSSE3(cpu_flags)) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3; } - if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) { + if (EXTERNAL_SSE4(cpu_flags)) { EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4); EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 12, sse4); EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 12, sse4); @@ -1125,13 +1129,14 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4); } +#endif if (EXTERNAL_AVX(cpu_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx; - if (ARCH_X86_64) { - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx; - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx; - } +#if ARCH_X86_64 + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx; + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx; +#endif SAO_BAND_INIT(12, avx); } if (EXTERNAL_AVX2(cpu_flags)) { diff --git a/libavcodec/x86/idctdsp_init.c b/libavcodec/x86/idctdsp_init.c index f28a1ad744..4ade52a880 100644 --- a/libavcodec/x86/idctdsp_init.c +++ b/libavcodec/x86/idctdsp_init.c @@ -92,8 +92,8 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, } #endif - if (ARCH_X86_64 && - !high_bit_depth && +#if ARCH_X86_64 + if (!high_bit_depth && avctx->lowres == 0 && (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_SIMPLEAUTO || @@ -104,9 +104,11 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, c->idct_add = ff_simple_idct8_add_sse2; c->perm_type = FF_IDCT_PERM_TRANSPOSE; } +#endif } - if (ARCH_X86_64 && avctx->lowres == 0) { +#if ARCH_X86_64 + if (avctx->lowres == 0) { if (EXTERNAL_AVX(cpu_flags) && !high_bit_depth && (avctx->idct_algo == FF_IDCT_AUTO || @@ -156,4 +158,5 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, } } } +#endif } diff --git a/libavcodec/x86/mlpdsp_init.c b/libavcodec/x86/mlpdsp_init.c index 950f996832..333a685f47 100644 --- a/libavcodec/x86/mlpdsp_init.c +++ b/libavcodec/x86/mlpdsp_init.c @@ -200,8 +200,10 @@ av_cold void ff_mlpdsp_init_x86(MLPDSPContext *c) if (INLINE_MMX(cpu_flags)) c->mlp_filter_channel = mlp_filter_channel_x86; #endif - if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags)) +#if ARCH_X86_64 + if (EXTERNAL_SSE4(cpu_flags)) c->mlp_rematrix_channel = ff_mlp_rematrix_channel_sse4; - if (ARCH_X86_64 && EXTERNAL_AVX2_FAST(cpu_flags) && cpu_flags & AV_CPU_FLAG_BMI2) + if (EXTERNAL_AVX2_FAST(cpu_flags) && cpu_flags & AV_CPU_FLAG_BMI2) c->mlp_rematrix_channel = ff_mlp_rematrix_channel_avx2_bmi2; +#endif } diff --git a/libavcodec/x86/vc1dsp_init.c b/libavcodec/x86/vc1dsp_init.c index 90b2f3624e..bc63933e83 100644 --- a/libavcodec/x86/vc1dsp_init.c +++ b/libavcodec/x86/vc1dsp_init.c @@ -102,13 +102,15 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp) { int cpu_flags = av_get_cpu_flags(); - if (HAVE_6REGS && INLINE_MMX(cpu_flags)) +#if HAVE_6REGS + if (INLINE_MMX(cpu_flags)) if (EXTERNAL_MMX(cpu_flags)) ff_vc1dsp_init_mmx(dsp); - if (HAVE_6REGS && INLINE_MMXEXT(cpu_flags)) + if (INLINE_MMXEXT(cpu_flags)) if (EXTERNAL_MMXEXT(cpu_flags)) ff_vc1dsp_init_mmxext(dsp); +#endif #define ASSIGN_LF4(EXT) \ dsp->vc1_v_loop_filter4 = ff_vc1_v_loop_filter4_ ## EXT; \ diff --git a/libavfilter/x86/colorspacedsp_init.c b/libavfilter/x86/colorspacedsp_init.c index b5006ac295..f01db4baf4 100644 --- a/libavfilter/x86/colorspacedsp_init.c +++ b/libavfilter/x86/colorspacedsp_init.c @@ -80,7 +80,8 @@ void ff_colorspacedsp_x86_init(ColorSpaceDSPContext *dsp) { int cpu_flags = av_get_cpu_flags(); - if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) { +#if ARCH_X86_64 + if (EXTERNAL_SSE2(cpu_flags)) { #define assign_yuv2yuv_fns(ss) \ dsp->yuv2yuv[BPP_8 ][BPP_8 ][SS_##ss] = ff_yuv2yuv_##ss##p8to8_sse2; \ dsp->yuv2yuv[BPP_8 ][BPP_10][SS_##ss] = ff_yuv2yuv_##ss##p8to10_sse2; \ @@ -116,4 +117,5 @@ void ff_colorspacedsp_x86_init(ColorSpaceDSPContext *dsp) dsp->multiply3x3 = ff_multiply3x3_sse2; } +#endif } diff --git a/libavfilter/x86/vf_atadenoise_init.c b/libavfilter/x86/vf_atadenoise_init.c index e7a653f191..eb621e172c 100644 --- a/libavfilter/x86/vf_atadenoise_init.c +++ b/libavfilter/x86/vf_atadenoise_init.c @@ -39,12 +39,14 @@ av_cold void ff_atadenoise_init_x86(ATADenoiseDSPContext *dsp, int depth, int al int cpu_flags = av_get_cpu_flags(); for (int p = 0; p < 4; p++) { - if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags) && depth <= 8 && algorithm == PARALLEL && sigma[p] == INT16_MAX) { +#if ARCH_X86_64 + if (EXTERNAL_SSE4(cpu_flags) && depth <= 8 && algorithm == PARALLEL && sigma[p] == INT16_MAX) { dsp->filter_row[p] = ff_atadenoise_filter_row8_sse4; } - if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags) && depth <= 8 && algorithm == SERIAL && sigma[p] == INT16_MAX) { + if (EXTERNAL_SSE4(cpu_flags) && depth <= 8 && algorithm == SERIAL && sigma[p] == INT16_MAX) { dsp->filter_row[p] = ff_atadenoise_filter_row8_serial_sse4; } +#endif } } diff --git a/libavfilter/x86/vf_ssim_init.c b/libavfilter/x86/vf_ssim_init.c index cbaa20ef16..ddc91f791a 100644 --- a/libavfilter/x86/vf_ssim_init.c +++ b/libavfilter/x86/vf_ssim_init.c @@ -34,8 +34,10 @@ void ff_ssim_init_x86(SSIMDSPContext *dsp) { int cpu_flags = av_get_cpu_flags(); - if (ARCH_X86_64 && EXTERNAL_SSSE3(cpu_flags)) - dsp->ssim_4x4_line = ff_ssim_4x4_line_ssse3; +#if ARCH_X86_64 + if (EXTERNAL_SSSE3(cpu_flags)) + dsp->ssim_4x4_line = ff_ssim_4x4_line_ssse3; +#endif if (EXTERNAL_SSE4(cpu_flags)) dsp->ssim_end_line = ff_ssim_end_line_sse4; if (EXTERNAL_XOP(cpu_flags)) diff --git a/libavfilter/x86/vf_w3fdif_init.c b/libavfilter/x86/vf_w3fdif_init.c index 16202fba76..6d677d651d 100644 --- a/libavfilter/x86/vf_w3fdif_init.c +++ b/libavfilter/x86/vf_w3fdif_init.c @@ -56,7 +56,9 @@ av_cold void ff_w3fdif_init_x86(W3FDIFDSPContext *dsp, int depth) dsp->filter_scale = ff_w3fdif_scale_sse2; } - if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags) && depth <= 8) { +#if ARCH_X86_64 + if (EXTERNAL_SSE2(cpu_flags) && depth <= 8) { dsp->filter_complex_high = ff_w3fdif_complex_high_sse2; } +#endif }
Continuation of 40e6575aa3eed64cd32bf28c00ae57edc5acb25a Co-authored-by: Nirbheek Chauhan <nirbheek@centricular.com> Signed-off-by: L. E. Segovia <amy@amyspark.me> --- libavcodec/x86/dcadsp_init.c | 2 + libavcodec/x86/fdctdsp_init.c | 2 + libavcodec/x86/flacdsp_init.c | 8 +- libavcodec/x86/hevcdsp_init.c | 463 ++++++++++++++------------- libavcodec/x86/idctdsp_init.c | 9 +- libavcodec/x86/mlpdsp_init.c | 6 +- libavcodec/x86/vc1dsp_init.c | 6 +- libavfilter/x86/colorspacedsp_init.c | 4 +- libavfilter/x86/vf_atadenoise_init.c | 6 +- libavfilter/x86/vf_ssim_init.c | 6 +- libavfilter/x86/vf_w3fdif_init.c | 4 +- 11 files changed, 272 insertions(+), 244 deletions(-)