Message ID | 20211214071545.26283-2-chenhao@loongson.cn |
---|---|
State | Superseded |
Headers | show |
Series | [FFmpeg-devel,1/7] avutil: [loongarch] Add support for loongarch SIMD. | expand |
Context | Check | Description |
---|---|---|
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
andriy/make_ppc | success | Make finished |
andriy/make_fate_ppc | success | Make fate finished |
> 2021年12月14日 下午3:15,Hao Chen <chenhao@loongson.cn> 写道: > > From: Shiyou Yin <yinshiyou-hf@loongson.cn> > > LSX and LASX is loongarch SIMD extention. > They are enabled by default if compiler support it, and can be disabled > with '--disable-lsx' '--disable-lasx'. > > Change-Id: Ie2608ea61dbd9b7fffadbf0ec2348bad6c124476 > --- > Makefile | 2 +- > configure | 20 +++++++++-- > ffbuild/arch.mak | 4 ++- > ffbuild/common.mak | 8 +++++ > libavutil/cpu.c | 7 ++++ > libavutil/cpu.h | 4 +++ > libavutil/cpu_internal.h | 2 ++ > libavutil/loongarch/Makefile | 1 + > libavutil/loongarch/cpu.c | 69 ++++++++++++++++++++++++++++++++++++ > libavutil/loongarch/cpu.h | 31 ++++++++++++++++ > libavutil/tests/cpu.c | 3 ++ > tests/checkasm/checkasm.c | 3 ++ > 12 files changed, 150 insertions(+), 4 deletions(-) > create mode 100644 libavutil/loongarch/Makefile > create mode 100644 libavutil/loongarch/cpu.c > create mode 100644 libavutil/loongarch/cpu.h > > diff --git a/Makefile b/Makefile > index 26c9107237..5b20658b52 100644 > --- a/Makefile > +++ b/Makefile > @@ -89,7 +89,7 @@ SUBDIR_VARS := CLEANFILES FFLIBS HOSTPROGS TESTPROGS TOOLS \ > ARMV5TE-OBJS ARMV6-OBJS ARMV8-OBJS VFP-OBJS NEON-OBJS \ > ALTIVEC-OBJS VSX-OBJS MMX-OBJS X86ASM-OBJS \ > MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSP-OBJS MSA-OBJS \ > - MMI-OBJS OBJS SLIBOBJS HOSTOBJS TESTOBJS > + MMI-OBJS LSX-OBJS LASX-OBJS OBJS SLIBOBJS HOSTOBJS TESTOBJS > > define RESET > $(1) := > diff --git a/configure b/configure > index a7593ec2db..c4afde4c5c 100755 > --- a/configure > +++ b/configure > @@ -452,7 +452,9 @@ Optimization options (experts only): > --disable-mipsdspr2 disable MIPS DSP ASE R2 optimizations > --disable-msa disable MSA optimizations > --disable-mipsfpu disable floating point MIPS optimizations > - --disable-mmi disable Loongson SIMD optimizations > + --disable-mmi disable Loongson MMI optimizations > + --disable-lsx disable Loongson LSX optimizations > + --disable-lasx disable Loongson LASX optimizations > --disable-fast-unaligned consider unaligned accesses slow > > Developer options (useful when working on FFmpeg itself): > @@ -2081,6 +2083,8 @@ ARCH_EXT_LIST_LOONGSON=" > loongson2 > loongson3 > mmi > + lsx > + lasx > " > > ARCH_EXT_LIST_X86_SIMD=" > @@ -2617,6 +2621,10 @@ power8_deps="vsx" > > loongson2_deps="mips" > loongson3_deps="mips" > +mmi_deps_any="loongson2 loongson3" > +lsx_deps="loongarch" > +lasx_deps="lsx" > + > mips32r2_deps="mips" > mips32r5_deps="mips" > mips32r6_deps="mips" > @@ -2625,7 +2633,6 @@ mips64r6_deps="mips" > mipsfpu_deps="mips" > mipsdsp_deps="mips" > mipsdspr2_deps="mips" > -mmi_deps_any="loongson2 loongson3" > msa_deps="mipsfpu" > > cpunop_deps="i686" > @@ -6134,6 +6141,9 @@ EOF > ;; > esac > > +elif enabled loongarch; then > + enabled lsx && check_inline_asm lsx '"vadd.b $vr0, $vr1, $vr2"' '-mlsx' && append LSXFLAGS '-mlsx' > + enabled lasx && check_inline_asm lasx '"xvadd.b $xr0, $xr1, $xr2"' '-mlasx' && append LASXFLAGS '-mlasx' > fi > > check_cc intrinsics_neon arm_neon.h "int16x8_t test = vdupq_n_s16(0)" > @@ -7484,6 +7494,10 @@ if enabled ppc; then > echo "PPC 4xx optimizations ${ppc4xx-no}" > echo "dcbzl available ${dcbzl-no}" > fi > +if enabled loongarch; then > + echo "LSX enabled ${lsx-no}" > + echo "LASX enabled ${lasx-no}" > +fi > echo "debug symbols ${debug-no}" > echo "strip symbols ${stripping-no}" > echo "optimize for size ${small-no}" > @@ -7645,6 +7659,8 @@ ASMSTRIPFLAGS=$ASMSTRIPFLAGS > X86ASMFLAGS=$X86ASMFLAGS > MSAFLAGS=$MSAFLAGS > MMIFLAGS=$MMIFLAGS > +LSXFLAGS=$LSXFLAGS > +LASXFLAGS=$LASXFLAGS > BUILDSUF=$build_suffix > PROGSSUF=$progs_suffix > FULLNAME=$FULLNAME > diff --git a/ffbuild/arch.mak b/ffbuild/arch.mak > index e09006efca..997e31e85e 100644 > --- a/ffbuild/arch.mak > +++ b/ffbuild/arch.mak > @@ -8,7 +8,9 @@ OBJS-$(HAVE_MIPSFPU) += $(MIPSFPU-OBJS) $(MIPSFPU-OBJS-yes) > OBJS-$(HAVE_MIPSDSP) += $(MIPSDSP-OBJS) $(MIPSDSP-OBJS-yes) > OBJS-$(HAVE_MIPSDSPR2) += $(MIPSDSPR2-OBJS) $(MIPSDSPR2-OBJS-yes) > OBJS-$(HAVE_MSA) += $(MSA-OBJS) $(MSA-OBJS-yes) > -OBJS-$(HAVE_MMI) += $(MMI-OBJS) $(MMI-OBJS-yes) > +OBJS-$(HAVE_MMI) += $(MMI-OBJS) $(MMI-OBJS-yes) > +OBJS-$(HAVE_LSX) += $(LSX-OBJS) $(LSX-OBJS-yes) > +OBJS-$(HAVE_LASX) += $(LASX-OBJS) $(LASX-OBJS-yes) > > OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS) $(ALTIVEC-OBJS-yes) > OBJS-$(HAVE_VSX) += $(VSX-OBJS) $(VSX-OBJS-yes) > diff --git a/ffbuild/common.mak b/ffbuild/common.mak > index 268ae61154..0eb831d434 100644 > --- a/ffbuild/common.mak > +++ b/ffbuild/common.mak > @@ -59,6 +59,8 @@ COMPILE_HOSTC = $(call COMPILE,HOSTCC) > COMPILE_NVCC = $(call COMPILE,NVCC) > COMPILE_MMI = $(call COMPILE,CC,MMIFLAGS) > COMPILE_MSA = $(call COMPILE,CC,MSAFLAGS) > +COMPILE_LSX = $(call COMPILE,CC,LSXFLAGS) > +COMPILE_LASX = $(call COMPILE,CC,LASXFLAGS) > > %_mmi.o: %_mmi.c > $(COMPILE_MMI) > @@ -66,6 +68,12 @@ COMPILE_MSA = $(call COMPILE,CC,MSAFLAGS) > %_msa.o: %_msa.c > $(COMPILE_MSA) > > +%_lsx.o: %_lsx.c > + $(COMPILE_LSX) > + > +%_lasx.o: %_lasx.c > + $(COMPILE_LASX) > + > %.o: %.c > $(COMPILE_C) > > diff --git a/libavutil/cpu.c b/libavutil/cpu.c > index 4627af4f23..63efb97ffd 100644 > --- a/libavutil/cpu.c > +++ b/libavutil/cpu.c > @@ -62,6 +62,8 @@ static int get_cpu_flags(void) > return ff_get_cpu_flags_ppc(); > if (ARCH_X86) > return ff_get_cpu_flags_x86(); > + if (ARCH_LOONGARCH) > + return ff_get_cpu_flags_loongarch(); > return 0; > } > > @@ -168,6 +170,9 @@ int av_parse_cpu_caps(unsigned *flags, const char *s) > #elif ARCH_MIPS > { "mmi", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMI }, .unit = "flags" }, > { "msa", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MSA }, .unit = "flags" }, > +#elif ARCH_LOONGARCH > + { "lsx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LSX }, .unit = "flags" }, > + { "lasx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LASX }, .unit = "flags" }, > #endif > { NULL }, > }; > @@ -253,6 +258,8 @@ size_t av_cpu_max_align(void) > return ff_get_cpu_max_align_ppc(); > if (ARCH_X86) > return ff_get_cpu_max_align_x86(); > + if (ARCH_LOONGARCH) > + return ff_get_cpu_max_align_loongarch(); > > return 8; > } > diff --git a/libavutil/cpu.h b/libavutil/cpu.h > index afea0640b4..ae443eccad 100644 > --- a/libavutil/cpu.h > +++ b/libavutil/cpu.h > @@ -72,6 +72,10 @@ > #define AV_CPU_FLAG_MMI (1 << 0) > #define AV_CPU_FLAG_MSA (1 << 1) > > +//Loongarch SIMD extension. > +#define AV_CPU_FLAG_LSX (1 << 0) > +#define AV_CPU_FLAG_LASX (1 << 1) > + > /** > * Return the flags which specify extensions supported by the CPU. > * The returned value is affected by av_force_cpu_flags() if that was used > diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h > index 889764320b..e207b2d480 100644 > --- a/libavutil/cpu_internal.h > +++ b/libavutil/cpu_internal.h > @@ -46,11 +46,13 @@ int ff_get_cpu_flags_aarch64(void); > int ff_get_cpu_flags_arm(void); > int ff_get_cpu_flags_ppc(void); > int ff_get_cpu_flags_x86(void); > +int ff_get_cpu_flags_loongarch(void); > > size_t ff_get_cpu_max_align_mips(void); > size_t ff_get_cpu_max_align_aarch64(void); > size_t ff_get_cpu_max_align_arm(void); > size_t ff_get_cpu_max_align_ppc(void); > size_t ff_get_cpu_max_align_x86(void); > +size_t ff_get_cpu_max_align_loongarch(void); > > #endif /* AVUTIL_CPU_INTERNAL_H */ > diff --git a/libavutil/loongarch/Makefile b/libavutil/loongarch/Makefile > new file mode 100644 > index 0000000000..2addd9351c > --- /dev/null > +++ b/libavutil/loongarch/Makefile > @@ -0,0 +1 @@ > +OBJS += loongarch/cpu.o > diff --git a/libavutil/loongarch/cpu.c b/libavutil/loongarch/cpu.c > new file mode 100644 > index 0000000000..e4b240bc44 > --- /dev/null > +++ b/libavutil/loongarch/cpu.c > @@ -0,0 +1,69 @@ > +/* > + * Copyright (c) 2020 Loongson Technology Corporation Limited > + * Contributed by Shiyou Yin <yinshiyou-hf@loongson.cn> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#include <stdint.h> > +#include "cpu.h" > + > +#define LOONGARCH_CFG2 0x2 > +#define LOONGARCH_CFG2_LSX (1 << 6) > +#define LOONGARCH_CFG2_LASX (1 << 7) > + > +static int cpu_flags_cpucfg(void) > +{ > + int flags = 0; > + uint32_t cfg2 = 0; > + > + __asm__ volatile( > + "cpucfg %0, %1 \n\t" > + : "+&r"(cfg2) > + : "r"(LOONGARCH_CFG2) > + ); > + > + if (cfg2 & LOONGARCH_CFG2_LSX) > + flags |= AV_CPU_FLAG_LSX; > + > + if (cfg2 & LOONGARCH_CFG2_LASX) > + flags |= AV_CPU_FLAG_LASX; > + > + return flags; > +} > + > +int ff_get_cpu_flags_loongarch(void) > +{ > +#if defined __linux__ > + return cpu_flags_cpucfg(); > +#else > + /* Assume no SIMD ASE supported */ > + return 0; > +#endif > +} > + > +size_t ff_get_cpu_max_align_loongarch(void) > +{ > + int flags = av_get_cpu_flags(); > + > + if (flags & AV_CPU_FLAG_LASX) > + return 32; > + if (flags & AV_CPU_FLAG_LSX) > + return 16; > + > + return 8; > +} > diff --git a/libavutil/loongarch/cpu.h b/libavutil/loongarch/cpu.h > new file mode 100644 > index 0000000000..1a445c69bc > --- /dev/null > +++ b/libavutil/loongarch/cpu.h > @@ -0,0 +1,31 @@ > +/* > + * Copyright (c) 2020 Loongson Technology Corporation Limited > + * Contributed by Shiyou Yin <yinshiyou-hf@loongson.cn> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#ifndef AVUTIL_LOONGARCH_CPU_H > +#define AVUTIL_LOONGARCH_CPU_H > + > +#include "libavutil/cpu.h" > +#include "libavutil/cpu_internal.h" > + > +#define have_lsx(flags) CPUEXT(flags, LSX) > +#define have_lasx(flags) CPUEXT(flags, LASX) > + > +#endif /* AVUTIL_LOONGARCH_CPU_H */ > diff --git a/libavutil/tests/cpu.c b/libavutil/tests/cpu.c > index c853371fb3..0a6c0cd32e 100644 > --- a/libavutil/tests/cpu.c > +++ b/libavutil/tests/cpu.c > @@ -77,6 +77,9 @@ static const struct { > { AV_CPU_FLAG_BMI2, "bmi2" }, > { AV_CPU_FLAG_AESNI, "aesni" }, > { AV_CPU_FLAG_AVX512, "avx512" }, > +#elif ARCH_LOONGARCH > + { AV_CPU_FLAG_LSX, "lsx" }, > + { AV_CPU_FLAG_LASX, "lasx" }, > #endif > { 0 } > }; > diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c > index b1353f7cbe..90d080de02 100644 > --- a/tests/checkasm/checkasm.c > +++ b/tests/checkasm/checkasm.c > @@ -236,6 +236,9 @@ static const struct { > { "FMA4", "fma4", AV_CPU_FLAG_FMA4 }, > { "AVX2", "avx2", AV_CPU_FLAG_AVX2 }, > { "AVX-512", "avx512", AV_CPU_FLAG_AVX512 }, > +#elif ARCH_LOONGARCH > + { "LSX", "lsx", AV_CPU_FLAG_LSX }, > + { "LASX", "lasx", AV_CPU_FLAG_LASX }, > #endif > { NULL } > }; > -- > 2.20.1 > LGTM
diff --git a/Makefile b/Makefile index 26c9107237..5b20658b52 100644 --- a/Makefile +++ b/Makefile @@ -89,7 +89,7 @@ SUBDIR_VARS := CLEANFILES FFLIBS HOSTPROGS TESTPROGS TOOLS \ ARMV5TE-OBJS ARMV6-OBJS ARMV8-OBJS VFP-OBJS NEON-OBJS \ ALTIVEC-OBJS VSX-OBJS MMX-OBJS X86ASM-OBJS \ MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSP-OBJS MSA-OBJS \ - MMI-OBJS OBJS SLIBOBJS HOSTOBJS TESTOBJS + MMI-OBJS LSX-OBJS LASX-OBJS OBJS SLIBOBJS HOSTOBJS TESTOBJS define RESET $(1) := diff --git a/configure b/configure index a7593ec2db..c4afde4c5c 100755 --- a/configure +++ b/configure @@ -452,7 +452,9 @@ Optimization options (experts only): --disable-mipsdspr2 disable MIPS DSP ASE R2 optimizations --disable-msa disable MSA optimizations --disable-mipsfpu disable floating point MIPS optimizations - --disable-mmi disable Loongson SIMD optimizations + --disable-mmi disable Loongson MMI optimizations + --disable-lsx disable Loongson LSX optimizations + --disable-lasx disable Loongson LASX optimizations --disable-fast-unaligned consider unaligned accesses slow Developer options (useful when working on FFmpeg itself): @@ -2081,6 +2083,8 @@ ARCH_EXT_LIST_LOONGSON=" loongson2 loongson3 mmi + lsx + lasx " ARCH_EXT_LIST_X86_SIMD=" @@ -2617,6 +2621,10 @@ power8_deps="vsx" loongson2_deps="mips" loongson3_deps="mips" +mmi_deps_any="loongson2 loongson3" +lsx_deps="loongarch" +lasx_deps="lsx" + mips32r2_deps="mips" mips32r5_deps="mips" mips32r6_deps="mips" @@ -2625,7 +2633,6 @@ mips64r6_deps="mips" mipsfpu_deps="mips" mipsdsp_deps="mips" mipsdspr2_deps="mips" -mmi_deps_any="loongson2 loongson3" msa_deps="mipsfpu" cpunop_deps="i686" @@ -6134,6 +6141,9 @@ EOF ;; esac +elif enabled loongarch; then + enabled lsx && check_inline_asm lsx '"vadd.b $vr0, $vr1, $vr2"' '-mlsx' && append LSXFLAGS '-mlsx' + enabled lasx && check_inline_asm lasx '"xvadd.b $xr0, $xr1, $xr2"' '-mlasx' && append LASXFLAGS '-mlasx' fi check_cc intrinsics_neon arm_neon.h "int16x8_t test = vdupq_n_s16(0)" @@ -7484,6 +7494,10 @@ if enabled ppc; then echo "PPC 4xx optimizations ${ppc4xx-no}" echo "dcbzl available ${dcbzl-no}" fi +if enabled loongarch; then + echo "LSX enabled ${lsx-no}" + echo "LASX enabled ${lasx-no}" +fi echo "debug symbols ${debug-no}" echo "strip symbols ${stripping-no}" echo "optimize for size ${small-no}" @@ -7645,6 +7659,8 @@ ASMSTRIPFLAGS=$ASMSTRIPFLAGS X86ASMFLAGS=$X86ASMFLAGS MSAFLAGS=$MSAFLAGS MMIFLAGS=$MMIFLAGS +LSXFLAGS=$LSXFLAGS +LASXFLAGS=$LASXFLAGS BUILDSUF=$build_suffix PROGSSUF=$progs_suffix FULLNAME=$FULLNAME diff --git a/ffbuild/arch.mak b/ffbuild/arch.mak index e09006efca..997e31e85e 100644 --- a/ffbuild/arch.mak +++ b/ffbuild/arch.mak @@ -8,7 +8,9 @@ OBJS-$(HAVE_MIPSFPU) += $(MIPSFPU-OBJS) $(MIPSFPU-OBJS-yes) OBJS-$(HAVE_MIPSDSP) += $(MIPSDSP-OBJS) $(MIPSDSP-OBJS-yes) OBJS-$(HAVE_MIPSDSPR2) += $(MIPSDSPR2-OBJS) $(MIPSDSPR2-OBJS-yes) OBJS-$(HAVE_MSA) += $(MSA-OBJS) $(MSA-OBJS-yes) -OBJS-$(HAVE_MMI) += $(MMI-OBJS) $(MMI-OBJS-yes) +OBJS-$(HAVE_MMI) += $(MMI-OBJS) $(MMI-OBJS-yes) +OBJS-$(HAVE_LSX) += $(LSX-OBJS) $(LSX-OBJS-yes) +OBJS-$(HAVE_LASX) += $(LASX-OBJS) $(LASX-OBJS-yes) OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS) $(ALTIVEC-OBJS-yes) OBJS-$(HAVE_VSX) += $(VSX-OBJS) $(VSX-OBJS-yes) diff --git a/ffbuild/common.mak b/ffbuild/common.mak index 268ae61154..0eb831d434 100644 --- a/ffbuild/common.mak +++ b/ffbuild/common.mak @@ -59,6 +59,8 @@ COMPILE_HOSTC = $(call COMPILE,HOSTCC) COMPILE_NVCC = $(call COMPILE,NVCC) COMPILE_MMI = $(call COMPILE,CC,MMIFLAGS) COMPILE_MSA = $(call COMPILE,CC,MSAFLAGS) +COMPILE_LSX = $(call COMPILE,CC,LSXFLAGS) +COMPILE_LASX = $(call COMPILE,CC,LASXFLAGS) %_mmi.o: %_mmi.c $(COMPILE_MMI) @@ -66,6 +68,12 @@ COMPILE_MSA = $(call COMPILE,CC,MSAFLAGS) %_msa.o: %_msa.c $(COMPILE_MSA) +%_lsx.o: %_lsx.c + $(COMPILE_LSX) + +%_lasx.o: %_lasx.c + $(COMPILE_LASX) + %.o: %.c $(COMPILE_C) diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 4627af4f23..63efb97ffd 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -62,6 +62,8 @@ static int get_cpu_flags(void) return ff_get_cpu_flags_ppc(); if (ARCH_X86) return ff_get_cpu_flags_x86(); + if (ARCH_LOONGARCH) + return ff_get_cpu_flags_loongarch(); return 0; } @@ -168,6 +170,9 @@ int av_parse_cpu_caps(unsigned *flags, const char *s) #elif ARCH_MIPS { "mmi", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMI }, .unit = "flags" }, { "msa", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MSA }, .unit = "flags" }, +#elif ARCH_LOONGARCH + { "lsx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LSX }, .unit = "flags" }, + { "lasx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LASX }, .unit = "flags" }, #endif { NULL }, }; @@ -253,6 +258,8 @@ size_t av_cpu_max_align(void) return ff_get_cpu_max_align_ppc(); if (ARCH_X86) return ff_get_cpu_max_align_x86(); + if (ARCH_LOONGARCH) + return ff_get_cpu_max_align_loongarch(); return 8; } diff --git a/libavutil/cpu.h b/libavutil/cpu.h index afea0640b4..ae443eccad 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -72,6 +72,10 @@ #define AV_CPU_FLAG_MMI (1 << 0) #define AV_CPU_FLAG_MSA (1 << 1) +//Loongarch SIMD extension. +#define AV_CPU_FLAG_LSX (1 << 0) +#define AV_CPU_FLAG_LASX (1 << 1) + /** * Return the flags which specify extensions supported by the CPU. * The returned value is affected by av_force_cpu_flags() if that was used diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h index 889764320b..e207b2d480 100644 --- a/libavutil/cpu_internal.h +++ b/libavutil/cpu_internal.h @@ -46,11 +46,13 @@ int ff_get_cpu_flags_aarch64(void); int ff_get_cpu_flags_arm(void); int ff_get_cpu_flags_ppc(void); int ff_get_cpu_flags_x86(void); +int ff_get_cpu_flags_loongarch(void); size_t ff_get_cpu_max_align_mips(void); size_t ff_get_cpu_max_align_aarch64(void); size_t ff_get_cpu_max_align_arm(void); size_t ff_get_cpu_max_align_ppc(void); size_t ff_get_cpu_max_align_x86(void); +size_t ff_get_cpu_max_align_loongarch(void); #endif /* AVUTIL_CPU_INTERNAL_H */ diff --git a/libavutil/loongarch/Makefile b/libavutil/loongarch/Makefile new file mode 100644 index 0000000000..2addd9351c --- /dev/null +++ b/libavutil/loongarch/Makefile @@ -0,0 +1 @@ +OBJS += loongarch/cpu.o diff --git a/libavutil/loongarch/cpu.c b/libavutil/loongarch/cpu.c new file mode 100644 index 0000000000..e4b240bc44 --- /dev/null +++ b/libavutil/loongarch/cpu.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2020 Loongson Technology Corporation Limited + * Contributed by Shiyou Yin <yinshiyou-hf@loongson.cn> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> +#include "cpu.h" + +#define LOONGARCH_CFG2 0x2 +#define LOONGARCH_CFG2_LSX (1 << 6) +#define LOONGARCH_CFG2_LASX (1 << 7) + +static int cpu_flags_cpucfg(void) +{ + int flags = 0; + uint32_t cfg2 = 0; + + __asm__ volatile( + "cpucfg %0, %1 \n\t" + : "+&r"(cfg2) + : "r"(LOONGARCH_CFG2) + ); + + if (cfg2 & LOONGARCH_CFG2_LSX) + flags |= AV_CPU_FLAG_LSX; + + if (cfg2 & LOONGARCH_CFG2_LASX) + flags |= AV_CPU_FLAG_LASX; + + return flags; +} + +int ff_get_cpu_flags_loongarch(void) +{ +#if defined __linux__ + return cpu_flags_cpucfg(); +#else + /* Assume no SIMD ASE supported */ + return 0; +#endif +} + +size_t ff_get_cpu_max_align_loongarch(void) +{ + int flags = av_get_cpu_flags(); + + if (flags & AV_CPU_FLAG_LASX) + return 32; + if (flags & AV_CPU_FLAG_LSX) + return 16; + + return 8; +} diff --git a/libavutil/loongarch/cpu.h b/libavutil/loongarch/cpu.h new file mode 100644 index 0000000000..1a445c69bc --- /dev/null +++ b/libavutil/loongarch/cpu.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2020 Loongson Technology Corporation Limited + * Contributed by Shiyou Yin <yinshiyou-hf@loongson.cn> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_LOONGARCH_CPU_H +#define AVUTIL_LOONGARCH_CPU_H + +#include "libavutil/cpu.h" +#include "libavutil/cpu_internal.h" + +#define have_lsx(flags) CPUEXT(flags, LSX) +#define have_lasx(flags) CPUEXT(flags, LASX) + +#endif /* AVUTIL_LOONGARCH_CPU_H */ diff --git a/libavutil/tests/cpu.c b/libavutil/tests/cpu.c index c853371fb3..0a6c0cd32e 100644 --- a/libavutil/tests/cpu.c +++ b/libavutil/tests/cpu.c @@ -77,6 +77,9 @@ static const struct { { AV_CPU_FLAG_BMI2, "bmi2" }, { AV_CPU_FLAG_AESNI, "aesni" }, { AV_CPU_FLAG_AVX512, "avx512" }, +#elif ARCH_LOONGARCH + { AV_CPU_FLAG_LSX, "lsx" }, + { AV_CPU_FLAG_LASX, "lasx" }, #endif { 0 } }; diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index b1353f7cbe..90d080de02 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -236,6 +236,9 @@ static const struct { { "FMA4", "fma4", AV_CPU_FLAG_FMA4 }, { "AVX2", "avx2", AV_CPU_FLAG_AVX2 }, { "AVX-512", "avx512", AV_CPU_FLAG_AVX512 }, +#elif ARCH_LOONGARCH + { "LSX", "lsx", AV_CPU_FLAG_LSX }, + { "LASX", "lasx", AV_CPU_FLAG_LASX }, #endif { NULL } };
From: Shiyou Yin <yinshiyou-hf@loongson.cn> LSX and LASX is loongarch SIMD extention. They are enabled by default if compiler support it, and can be disabled with '--disable-lsx' '--disable-lasx'. Change-Id: Ie2608ea61dbd9b7fffadbf0ec2348bad6c124476 --- Makefile | 2 +- configure | 20 +++++++++-- ffbuild/arch.mak | 4 ++- ffbuild/common.mak | 8 +++++ libavutil/cpu.c | 7 ++++ libavutil/cpu.h | 4 +++ libavutil/cpu_internal.h | 2 ++ libavutil/loongarch/Makefile | 1 + libavutil/loongarch/cpu.c | 69 ++++++++++++++++++++++++++++++++++++ libavutil/loongarch/cpu.h | 31 ++++++++++++++++ libavutil/tests/cpu.c | 3 ++ tests/checkasm/checkasm.c | 3 ++ 12 files changed, 150 insertions(+), 4 deletions(-) create mode 100644 libavutil/loongarch/Makefile create mode 100644 libavutil/loongarch/cpu.c create mode 100644 libavutil/loongarch/cpu.h