#Common commandline argument interpreter for compilation with lammpscuda (USER-CUDA) installed # make options: # emu=1 switch to cuda emulation mode (otherwise: use gpu) # dbg=1 print a lot of debugging output during runtime # verbose=1 output nvcc command line during compilation # keep=1 do not delete temporary compilation files (.ii, .cubin, ...) # cufft=1 use cuda's fast fourier transformation lib "cufft" where possible (otherwise: use cpu fftw) # binning=1 create virtual particle grid (neighbor-lists otherwise); currently this is not supported # precision=1 single precision (global setting) # precision=2 double precision (global setting) SHELL = /bin/sh # System-specific settings CUDA_INSTALL_PATH = /usr/local/cuda #CUDA_INSTALL_PATH = /home/crtrott/lib/cuda # e.g. in Gentoo # CUDA_INSTALL_PATH = /opt/cuda #////////////////////////////////////////////////////////////////////////////////////////////// # no need to change anything below this line #////////////////////////////////////////////////////////////////////////////////////////////// #use CPU FFT if cufft=0 is requested. FALLBACK_FFT = 1 #default settings for compiler switches ifdef COMPILELIB include Makefile.defaults else include ../../lib/cuda/Makefile.defaults endif #shell echo "Compiling with precision = " ${precision} ", arch = " ${arch} ", cufft = " ${cufft} ", dbg = " ${dbg} ", prec_timer = " ${prec_timer} CUDA_FLAGS := -I${CUDA_INSTALL_PATH}/include -DUNIX CUDA_USRLIB_CONDITIONAL := -L${CUDA_INSTALL_PATH}/lib -L${CUDA_INSTALL_PATH}/lib64 # debug setting ifeq ($(strip $(dbg)), 1) CUDA_FLAGS += -D_DEBUG -g NVCC_FLAGS += -g -G else NVCC_FLAGS += --compiler-options -fno-strict-aliasing -O3 endif # skip timing on Mac and Windows manually ifeq ($(strip $(prec_timer)), 0) CUDA_FLAGS += -DNO_PREC_TIMING endif # set fft routine ifeq ($(strip $(cufft)), 0) ifneq ($(FALLBACK_FFT), 1) FFT_INC = -DFFT_NONE FFT_PATH = FFT_LIB = CUDA_FLAGS += -DFFT_NONE endif else CUDA_FLAGS += -DFFT_CUFFT CUDA_USRLIB_CONDITIONAL += -lcufft endif # make global precision setting ifeq ($(strip $(precision)), 1) CUDA_FLAGS += -DCUDA_PRECISION=1 else ifeq ($(strip $(precision)), 3) CUDA_FLAGS += -DCUDA_PRECISION=1 -DX_PRECISION=2 else ifeq ($(strip $(precision)), 4) CUDA_FLAGS += -DCUDA_PRECISION=1 -DX_PRECISION=2 -DV_PRECISION=2 else CUDA_FLAGS += -DCUDA_PRECISION=2 endif endif endif # make architecture settings ifeq ($(strip $(arch)), 13) CUDA_FLAGS += -DCUDA_ARCH=13 SMVERSIONFLAGS := -arch sm_13 else ifeq ($(strip $(arch)), 20) CUDA_FLAGS += -DCUDA_ARCH=20 #NVCC_FLAGS += -ftz=false -prec-div=true -prec-sqrt=true NVCC_FLAGS += -ftz=true -prec-div=false -prec-sqrt=false SMVERSIONFLAGS := -arch sm_20 else ifeq ($(strip $(arch)), 21) CUDA_FLAGS += -DCUDA_ARCH=20 #NVCC_FLAGS += -ftz=false -prec-div=true -prec-sqrt=true NVCC_FLAGS += -ftz=true -prec-div=false -prec-sqrt=false SMVERSIONFLAGS := -arch sm_21 else ifeq ($(strip $(arch)), 30) CUDA_FLAGS += -DCUDA_ARCH=20 #NVCC_FLAGS += -ftz=false -prec-div=true -prec-sqrt=true NVCC_FLAGS += -ftz=true -prec-div=false -prec-sqrt=false SMVERSIONFLAGS := -arch sm_30 else ifeq ($(strip $(arch)), 35) CUDA_FLAGS += -DCUDA_ARCH=20 #NVCC_FLAGS += -ftz=false -prec-div=true -prec-sqrt=true NVCC_FLAGS += -ftz=true -prec-div=false -prec-sqrt=false SMVERSIONFLAGS := -arch sm_35 else CUDA_FLAGS += -DCUDA_ARCH=99 SMVERSIONFLAGS := -arch sm_13 endif endif endif endif endif CCFLAGS := $(CCFLAGS) $(CUDA_FLAGS) \ -I$(CUDA_INSTALL_PATH)/include