diff --git a/lib/gpu/Makefile.xk7 b/lib/gpu/Makefile.xk7 new file mode 100644 index 0000000000..f4bd399e08 --- /dev/null +++ b/lib/gpu/Makefile.xk7 @@ -0,0 +1,29 @@ +# /* ---------------------------------------------------------------------- +# Makefile for Cray XK7 Architecture supporting Hyper-Q with Proxy +# - Uses PrgEnv-gnu +# ------------------------------------------------------------------------- */ + + +CUDA_HOME = $(CRAY_CUDATOOLKIT_DIR) +NVCC = nvcc + +CUDA_ARCH = -arch=sm_35 + +CUDA_PRECISION = -D_SINGLE_DOUBLE +CUDA_INCLUDE = -I$(CUDA_HOME)/include +CUDA_LIB = -L$(CUDA_HOME)/lib64 +CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math + +CUDR_CPP = CC -DCUDA_PROXY -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK +CUDR_OPTS = -O2 -march=bdver1 -ftree-vectorize + +BIN_DIR = ./ +OBJ_DIR = ./ +LIB_DIR = ./ +AR = ar +BSH = /bin/sh + +CUDPP_OPT = # -DUSE_CUDPP -Icudpp_mini + +include Nvidia.makefile + diff --git a/lib/gpu/lal_base_ellipsoid.cpp b/lib/gpu/lal_base_ellipsoid.cpp index 2b45b109db..fca3151b82 100644 --- a/lib/gpu/lal_base_ellipsoid.cpp +++ b/lib/gpu/lal_base_ellipsoid.cpp @@ -109,10 +109,10 @@ int BaseEllipsoidT::init_base(const int nlocal, const int nall, for (int j=i; j0) { - std::cerr << "Cannot use Gayberne with multiple forms and GPU neighbor.\n"; - exit(1); - } + if (_multiple_forms && host_nlocal>0) + return -8; + if (_multiple_forms && gpu_nbor!=0) + return -9; if (_multiple_forms) ans->force.zero(); diff --git a/lib/gpu/lal_preprocessor.h b/lib/gpu/lal_preprocessor.h index f681268f1c..16e4c29610 100644 --- a/lib/gpu/lal_preprocessor.h +++ b/lib/gpu/lal_preprocessor.h @@ -114,9 +114,9 @@ #define THREADS_PER_ATOM 4 #define THREADS_PER_CHARGE 8 #define BLOCK_NBOR_BUILD 128 -#define BLOCK_PAIR 512 -#define BLOCK_BIO_PAIR 512 -#define BLOCK_ELLIPSE 256 +#define BLOCK_PAIR 256 +#define BLOCK_BIO_PAIR 256 +#define BLOCK_ELLIPSE 128 #define MAX_SHARED_TYPES 11 #ifdef _SINGLE_SINGLE diff --git a/lib/gpu/lal_yukawa.cu b/lib/gpu/lal_yukawa.cu index 1e644b8a64..ece54b854f 100644 --- a/lib/gpu/lal_yukawa.cu +++ b/lib/gpu/lal_yukawa.cu @@ -77,9 +77,9 @@ __kernel void k_yukawa(const __global numtyp4 *restrict x_, int mtype=itype*lj_types+jtype; if (rsq