Message ID | 20170511205921.2172-2-timo@rothenpieler.org |
---|---|
State | Superseded |
Headers | show |
On Thu, 11 May 2017 22:59:20 +0200 Timo Rothenpieler <timo@rothenpieler.org> wrote: > Original work by Yogender Gupta <ygupta@nvidia.com> > --- > .gitignore | 2 ++ > Makefile | 2 ++ > compat/cuda/ptx2c.sh | 35 +++++++++++++++++++++++++++++++++++ > configure | 24 +++++++++++++++++++++++- > ffbuild/common.mak | 15 ++++++++++++--- > 5 files changed, 74 insertions(+), 4 deletions(-) > create mode 100644 compat/cuda/ptx2c.sh > > diff --git a/.gitignore b/.gitignore > index 96172fea74..dabb51762d 100644 > --- a/.gitignore > +++ b/.gitignore > @@ -19,6 +19,8 @@ > *.swp > *.ver > *.version > +*.ptx > +*.ptx.c > *_g > \#* > .\#* > diff --git a/Makefile b/Makefile > index d414cf841e..4d1c3d768f 100644 > --- a/Makefile > +++ b/Makefile > @@ -11,6 +11,8 @@ vpath %.asm $(SRC_PATH) > vpath %.rc $(SRC_PATH) > vpath %.v $(SRC_PATH) > vpath %.texi $(SRC_PATH) > +vpath %.cu $(SRC_PATH) > +vpath %.ptx $(SRC_PATH) > vpath %/fate_config.sh.template $(SRC_PATH) > > AVPROGS-$(CONFIG_FFMPEG) += ffmpeg > diff --git a/compat/cuda/ptx2c.sh b/compat/cuda/ptx2c.sh > new file mode 100644 > index 0000000000..51f0f57ba7 > --- /dev/null > +++ b/compat/cuda/ptx2c.sh > @@ -0,0 +1,35 @@ > +# Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. > +# > +# Permission is hereby granted, free of charge, to any person > obtaining a +# copy of this software and associated documentation > files (the "Software"), +# to deal in the Software without > restriction, including without limitation +# the rights to use, copy, > modify, merge, publish, distribute, sublicense, +# and/or sell copies > of the Software, and to permit persons to whom the +# Software is > furnished to do so, subject to the following conditions: +# > +# The above copyright notice and this permission notice shall be > included in +# all copies or substantial portions of the Software. > +# > +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND > NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT > HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, > WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# > DEALINGS IN THE SOFTWARE. + > +set -e > + > +OUT="$1" > +IN="$2" > +NAME="$(basename "$IN")" > +NAME="${NAME/.ptx/}" > + > +echo -n "const char ${NAME}_ptx[] = \\" > "$OUT" > +while read LINE > +do > +echo -ne "\n\t\"$LINE\\\n\"" >> "$OUT" > +done < "$IN" > +echo ";" >> "$OUT" > + > +exit 0 > \ No newline at end of file > diff --git a/configure b/configure > index ef2134828b..929256bed7 100755 > --- a/configure > +++ b/configure > @@ -338,6 +338,7 @@ Toolchain options: > --cxx=CXX use C compiler CXX [$cxx_default] > --objcc=OCC use ObjC compiler OCC [$cc_default] > --dep-cc=DEPCC use dependency generator DEPCC > [$cc_default] > + --nvcc=NVCC use Nvidia CUDA compiler NVCC > [$nvcc_default] --ld=LD use linker LD [$ld_default] > --pkg-config=PKGCONFIG use pkg-config tool PKGCONFIG > [$pkg_config_default] --pkg-config-flags=FLAGS pass additional flags > to pkgconf [] @@ -359,6 +360,7 @@ Toolchain options: > --extra-libs=ELIBS add ELIBS [$ELIBS] > --extra-version=STRING version string suffix [] > --optflags=OPTFLAGS override optimization-related compiler > flags > + --nvccflags=NVCCFLAGS override nvcc flags [$nvccflags_default] > --build-suffix=SUFFIX library name suffix [] > --enable-pic build position-independent code > --enable-thumb compile for Thumb instruction set > @@ -2221,6 +2223,7 @@ CMDLINE_SET=" > malloc_prefix > nm > optflags > + nvccflags > pkg_config > pkg_config_flags > progs_suffix > @@ -2719,6 +2722,7 @@ vaapi_encode_deps="vaapi" > > hwupload_cuda_filter_deps="cuda" > scale_npp_filter_deps="cuda_sdk libnpp" > +scale_cuda_filter_deps="cuda_sdk" > nvpp_filter_deps="cuda" > > nvenc_deps="cuda" > @@ -3262,6 +3266,8 @@ strip_default="strip" > version_script='--version-script' > yasmexe_default="yasm" > windres_default="windres" > +nvcc_default="nvcc" > +nvccflags_default="-gencode arch=compute_30,code=sm_30 -O2" > > # OS > target_os_default=$(tolower $(uname -s)) > @@ -3335,6 +3341,8 @@ HOSTCC_C='-c' > HOSTCC_E='-E -o $@' > HOSTCC_O='-o $@' > HOSTLD_O='-o $@' > +NVCC_C='-c' > +NVCC_O='-o $@' > > host_extralibs='-lm' > host_cflags_filter=echo > @@ -3722,7 +3730,7 @@ > windres_default="${cross_prefix}${windres_default}" > sysinclude_default="${sysroot}/usr/include" > set_default arch cc cxx doxygen pkg_config ranlib strip sysinclude \ > - target_exec target_os yasmexe > + target_exec target_os yasmexe nvcc > enabled cross_compile || host_cc_default=$cc > set_default host_cc > > @@ -6242,6 +6250,16 @@ if [ -z "$optflags" ]; then > fi > fi > > +if [ -z "$nvccflags" ]; then > + nvccflags=$nvccflags_default > +fi > + > +if enabled x86_64 || enabled ppc64 || enabled aarch64; then > + nvccflags="$nvccflags -m64" > +else > + nvccflags="$nvccflags -m32" > +fi > + > check_optflags(){ > check_cflags "$@" > enabled lto && check_ldflags "$@" > @@ -6705,6 +6723,7 @@ ARFLAGS=$arflags > AR_O=$ar_o > RANLIB=$ranlib > STRIP=$strip > +NVCC=$nvcc > CP=cp -p > LN_S=$ln_s > CPPFLAGS=$CPPFLAGS > @@ -6712,6 +6731,7 @@ CFLAGS=$CFLAGS > CXXFLAGS=$CXXFLAGS > OBJCFLAGS=$OBJCFLAGS > ASFLAGS=$ASFLAGS > +NVCCFLAGS=$nvccflags > AS_C=$AS_C > AS_O=$AS_O > OBJCC_C=$OBJCC_C > @@ -6722,6 +6742,8 @@ CC_E=$CC_E > CC_O=$CC_O > CXX_C=$CXX_C > CXX_O=$CXX_O > +NVCC_C=$NVCC_C > +NVCC_O=$NVCC_O > LD_O=$LD_O > LD_LIB=$LD_LIB > LD_PATH=$LD_PATH > diff --git a/ffbuild/common.mak b/ffbuild/common.mak > index e61f853ebb..b54bc1db2d 100644 > --- a/ffbuild/common.mak > +++ b/ffbuild/common.mak > @@ -15,7 +15,7 @@ ifndef SUBDIR > ifndef V > Q = @ > ECHO = printf "$(1)\t%s\n" $(2) > -BRIEF = CC CXX OBJCC HOSTCC HOSTLD AS YASM AR LD STRIP CP WINDRES > +BRIEF = CC CXX OBJCC HOSTCC HOSTLD AS YASM AR LD STRIP CP WINDRES > NVCC SILENT = DEPCC DEPHOSTCC DEPAS DEPYASM RANLIB RM > > MSG = $@ > @@ -38,6 +38,7 @@ OBJCCFLAGS = $(CPPFLAGS) $(CFLAGS) $(OBJCFLAGS) > ASFLAGS := $(CPPFLAGS) $(ASFLAGS) > CXXFLAGS := $(CPPFLAGS) $(CFLAGS) $(CXXFLAGS) > YASMFLAGS += $(IFLAGS:%=%/) -Pconfig.asm > +NVCCFLAGS += -ptx > > HOSTCCFLAGS = $(IFLAGS) $(HOSTCPPFLAGS) $(HOSTCFLAGS) > LDFLAGS := $(ALLFFLIBS:%=$(LD_PATH)lib%) $(LDFLAGS) > @@ -52,6 +53,7 @@ COMPILE_CXX = $(call COMPILE,CXX) > COMPILE_S = $(call COMPILE,AS) > COMPILE_M = $(call COMPILE,OBJCC) > COMPILE_HOSTC = $(call COMPILE,HOSTCC) > +COMPILE_NVCC = $(call COMPILE,NVCC) > > %.o: %.c > $(COMPILE_C) > @@ -89,6 +91,12 @@ COMPILE_HOSTC = $(call COMPILE,HOSTCC) > %.h.c: > $(Q)echo '#include "$*.h"' >$@ > > +%.ptx: %.cu > + $(COMPILE_NVCC) > + > +%.ptx.c: %.ptx > + $(Q)sh $(SRC_PATH)/compat/cuda/ptx2c.sh $@ $(patsubst > $(SRC_PATH)/%,$(SRC_LINK)/%,$<) + > %.c %.h %.pc %.ver %.version: TAG = GEN > > # Dummy rule to stop make trying to rebuild removed or renamed > headers @@ -133,9 +141,10 @@ ALLHEADERS := $(subst > $(SRC_DIR)/,$(SUBDIR),$(wildcard $(SRC_DIR)/*.h $(SRC_DIR) > SKIPHEADERS += $(ARCH_HEADERS:%=$(ARCH)/%) $(SKIPHEADERS-) > SKIPHEADERS := $(SKIPHEADERS:%=$(SUBDIR)%) HOBJS = > $(filter-out $(SKIPHEADERS:.h=.h.o),$(ALLHEADERS:.h=.h.o)) > +PTXOBJS = $(filter %.ptx.o,$(OBJS)) $(HOBJS): CCFLAGS += > $(CFLAGS_HEADERS) checkheaders: $(HOBJS) > -.SECONDARY: $(HOBJS:.o=.c) > +.SECONDARY: $(HOBJS:.o=.c) $(PTXOBJS:.o=.c) $(PTXOBJS:.o=) > > alltools: $(TOOLS) > > @@ -154,7 +163,7 @@ $(TOOLOBJS): | tools > > OBJDIRS := $(OBJDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SLIBOBJS) > $(TESTOBJS)) > -CLEANSUFFIXES = *.d *.o *~ *.h.c *.gcda *.gcno *.map *.ver > *.version *.ho *$(DEFAULT_YASMD).asm +CLEANSUFFIXES = *.d *.o *~ > *.h.c *.gcda *.gcno *.map *.ver *.version *.ho *$(DEFAULT_YASMD).asm > *.ptx *.ptx.c DISTCLEANSUFFIXES = *.pc LIBSUFFIXES = *.a *.lib > *.so *.so.* *.dylib *.dll *.def *.dll.a Looks good. --phil
diff --git a/.gitignore b/.gitignore index 96172fea74..dabb51762d 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,8 @@ *.swp *.ver *.version +*.ptx +*.ptx.c *_g \#* .\#* diff --git a/Makefile b/Makefile index d414cf841e..4d1c3d768f 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,8 @@ vpath %.asm $(SRC_PATH) vpath %.rc $(SRC_PATH) vpath %.v $(SRC_PATH) vpath %.texi $(SRC_PATH) +vpath %.cu $(SRC_PATH) +vpath %.ptx $(SRC_PATH) vpath %/fate_config.sh.template $(SRC_PATH) AVPROGS-$(CONFIG_FFMPEG) += ffmpeg diff --git a/compat/cuda/ptx2c.sh b/compat/cuda/ptx2c.sh new file mode 100644 index 0000000000..51f0f57ba7 --- /dev/null +++ b/compat/cuda/ptx2c.sh @@ -0,0 +1,35 @@ +# Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +set -e + +OUT="$1" +IN="$2" +NAME="$(basename "$IN")" +NAME="${NAME/.ptx/}" + +echo -n "const char ${NAME}_ptx[] = \\" > "$OUT" +while read LINE +do +echo -ne "\n\t\"$LINE\\\n\"" >> "$OUT" +done < "$IN" +echo ";" >> "$OUT" + +exit 0 \ No newline at end of file diff --git a/configure b/configure index ef2134828b..929256bed7 100755 --- a/configure +++ b/configure @@ -338,6 +338,7 @@ Toolchain options: --cxx=CXX use C compiler CXX [$cxx_default] --objcc=OCC use ObjC compiler OCC [$cc_default] --dep-cc=DEPCC use dependency generator DEPCC [$cc_default] + --nvcc=NVCC use Nvidia CUDA compiler NVCC [$nvcc_default] --ld=LD use linker LD [$ld_default] --pkg-config=PKGCONFIG use pkg-config tool PKGCONFIG [$pkg_config_default] --pkg-config-flags=FLAGS pass additional flags to pkgconf [] @@ -359,6 +360,7 @@ Toolchain options: --extra-libs=ELIBS add ELIBS [$ELIBS] --extra-version=STRING version string suffix [] --optflags=OPTFLAGS override optimization-related compiler flags + --nvccflags=NVCCFLAGS override nvcc flags [$nvccflags_default] --build-suffix=SUFFIX library name suffix [] --enable-pic build position-independent code --enable-thumb compile for Thumb instruction set @@ -2221,6 +2223,7 @@ CMDLINE_SET=" malloc_prefix nm optflags + nvccflags pkg_config pkg_config_flags progs_suffix @@ -2719,6 +2722,7 @@ vaapi_encode_deps="vaapi" hwupload_cuda_filter_deps="cuda" scale_npp_filter_deps="cuda_sdk libnpp" +scale_cuda_filter_deps="cuda_sdk" nvpp_filter_deps="cuda" nvenc_deps="cuda" @@ -3262,6 +3266,8 @@ strip_default="strip" version_script='--version-script' yasmexe_default="yasm" windres_default="windres" +nvcc_default="nvcc" +nvccflags_default="-gencode arch=compute_30,code=sm_30 -O2" # OS target_os_default=$(tolower $(uname -s)) @@ -3335,6 +3341,8 @@ HOSTCC_C='-c' HOSTCC_E='-E -o $@' HOSTCC_O='-o $@' HOSTLD_O='-o $@' +NVCC_C='-c' +NVCC_O='-o $@' host_extralibs='-lm' host_cflags_filter=echo @@ -3722,7 +3730,7 @@ windres_default="${cross_prefix}${windres_default}" sysinclude_default="${sysroot}/usr/include" set_default arch cc cxx doxygen pkg_config ranlib strip sysinclude \ - target_exec target_os yasmexe + target_exec target_os yasmexe nvcc enabled cross_compile || host_cc_default=$cc set_default host_cc @@ -6242,6 +6250,16 @@ if [ -z "$optflags" ]; then fi fi +if [ -z "$nvccflags" ]; then + nvccflags=$nvccflags_default +fi + +if enabled x86_64 || enabled ppc64 || enabled aarch64; then + nvccflags="$nvccflags -m64" +else + nvccflags="$nvccflags -m32" +fi + check_optflags(){ check_cflags "$@" enabled lto && check_ldflags "$@" @@ -6705,6 +6723,7 @@ ARFLAGS=$arflags AR_O=$ar_o RANLIB=$ranlib STRIP=$strip +NVCC=$nvcc CP=cp -p LN_S=$ln_s CPPFLAGS=$CPPFLAGS @@ -6712,6 +6731,7 @@ CFLAGS=$CFLAGS CXXFLAGS=$CXXFLAGS OBJCFLAGS=$OBJCFLAGS ASFLAGS=$ASFLAGS +NVCCFLAGS=$nvccflags AS_C=$AS_C AS_O=$AS_O OBJCC_C=$OBJCC_C @@ -6722,6 +6742,8 @@ CC_E=$CC_E CC_O=$CC_O CXX_C=$CXX_C CXX_O=$CXX_O +NVCC_C=$NVCC_C +NVCC_O=$NVCC_O LD_O=$LD_O LD_LIB=$LD_LIB LD_PATH=$LD_PATH diff --git a/ffbuild/common.mak b/ffbuild/common.mak index e61f853ebb..b54bc1db2d 100644 --- a/ffbuild/common.mak +++ b/ffbuild/common.mak @@ -15,7 +15,7 @@ ifndef SUBDIR ifndef V Q = @ ECHO = printf "$(1)\t%s\n" $(2) -BRIEF = CC CXX OBJCC HOSTCC HOSTLD AS YASM AR LD STRIP CP WINDRES +BRIEF = CC CXX OBJCC HOSTCC HOSTLD AS YASM AR LD STRIP CP WINDRES NVCC SILENT = DEPCC DEPHOSTCC DEPAS DEPYASM RANLIB RM MSG = $@ @@ -38,6 +38,7 @@ OBJCCFLAGS = $(CPPFLAGS) $(CFLAGS) $(OBJCFLAGS) ASFLAGS := $(CPPFLAGS) $(ASFLAGS) CXXFLAGS := $(CPPFLAGS) $(CFLAGS) $(CXXFLAGS) YASMFLAGS += $(IFLAGS:%=%/) -Pconfig.asm +NVCCFLAGS += -ptx HOSTCCFLAGS = $(IFLAGS) $(HOSTCPPFLAGS) $(HOSTCFLAGS) LDFLAGS := $(ALLFFLIBS:%=$(LD_PATH)lib%) $(LDFLAGS) @@ -52,6 +53,7 @@ COMPILE_CXX = $(call COMPILE,CXX) COMPILE_S = $(call COMPILE,AS) COMPILE_M = $(call COMPILE,OBJCC) COMPILE_HOSTC = $(call COMPILE,HOSTCC) +COMPILE_NVCC = $(call COMPILE,NVCC) %.o: %.c $(COMPILE_C) @@ -89,6 +91,12 @@ COMPILE_HOSTC = $(call COMPILE,HOSTCC) %.h.c: $(Q)echo '#include "$*.h"' >$@ +%.ptx: %.cu + $(COMPILE_NVCC) + +%.ptx.c: %.ptx + $(Q)sh $(SRC_PATH)/compat/cuda/ptx2c.sh $@ $(patsubst $(SRC_PATH)/%,$(SRC_LINK)/%,$<) + %.c %.h %.pc %.ver %.version: TAG = GEN # Dummy rule to stop make trying to rebuild removed or renamed headers @@ -133,9 +141,10 @@ ALLHEADERS := $(subst $(SRC_DIR)/,$(SUBDIR),$(wildcard $(SRC_DIR)/*.h $(SRC_DIR) SKIPHEADERS += $(ARCH_HEADERS:%=$(ARCH)/%) $(SKIPHEADERS-) SKIPHEADERS := $(SKIPHEADERS:%=$(SUBDIR)%) HOBJS = $(filter-out $(SKIPHEADERS:.h=.h.o),$(ALLHEADERS:.h=.h.o)) +PTXOBJS = $(filter %.ptx.o,$(OBJS)) $(HOBJS): CCFLAGS += $(CFLAGS_HEADERS) checkheaders: $(HOBJS) -.SECONDARY: $(HOBJS:.o=.c) +.SECONDARY: $(HOBJS:.o=.c) $(PTXOBJS:.o=.c) $(PTXOBJS:.o=) alltools: $(TOOLS) @@ -154,7 +163,7 @@ $(TOOLOBJS): | tools OBJDIRS := $(OBJDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SLIBOBJS) $(TESTOBJS)) -CLEANSUFFIXES = *.d *.o *~ *.h.c *.gcda *.gcno *.map *.ver *.version *.ho *$(DEFAULT_YASMD).asm +CLEANSUFFIXES = *.d *.o *~ *.h.c *.gcda *.gcno *.map *.ver *.version *.ho *$(DEFAULT_YASMD).asm *.ptx *.ptx.c DISTCLEANSUFFIXES = *.pc LIBSUFFIXES = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a