git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@9175 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2013-01-02 16:27:31 +00:00
parent d6353f2da3
commit c8624bc7eb
4 changed files with 42 additions and 13 deletions

29
lib/gpu/Makefile.xk7 Normal file
View File

@ -0,0 +1,29 @@
# /* ----------------------------------------------------------------------
# Makefile for Cray XK7 Architecture supporting Hyper-Q with Proxy
# - Uses PrgEnv-gnu
# ------------------------------------------------------------------------- */
CUDA_HOME = $(CRAY_CUDATOOLKIT_DIR)
NVCC = nvcc
CUDA_ARCH = -arch=sm_35
CUDA_PRECISION = -D_SINGLE_DOUBLE
CUDA_INCLUDE = -I$(CUDA_HOME)/include
CUDA_LIB = -L$(CUDA_HOME)/lib64
CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math
CUDR_CPP = CC -DCUDA_PROXY -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK
CUDR_OPTS = -O2 -march=bdver1 -ftree-vectorize
BIN_DIR = ./
OBJ_DIR = ./
LIB_DIR = ./
AR = ar
BSH = /bin/sh
CUDPP_OPT = # -DUSE_CUDPP -Icudpp_mini
include Nvidia.makefile

View File

@ -109,10 +109,10 @@ int BaseEllipsoidT::init_base(const int nlocal, const int nall,
for (int j=i; j<ntypes; j++)
if (_host_form[i][j]!=ELLIPSE_ELLIPSE)
_multiple_forms=true;
if (_multiple_forms && host_nlocal>0) {
std::cerr << "Cannot use Gayberne with multiple forms and GPU neighbor.\n";
exit(1);
}
if (_multiple_forms && host_nlocal>0)
return -8;
if (_multiple_forms && gpu_nbor!=0)
return -9;
if (_multiple_forms)
ans->force.zero();

View File

@ -114,9 +114,9 @@
#define THREADS_PER_ATOM 4
#define THREADS_PER_CHARGE 8
#define BLOCK_NBOR_BUILD 128
#define BLOCK_PAIR 512
#define BLOCK_BIO_PAIR 512
#define BLOCK_ELLIPSE 256
#define BLOCK_PAIR 256
#define BLOCK_BIO_PAIR 256
#define BLOCK_ELLIPSE 128
#define MAX_SHARED_TYPES 11
#ifdef _SINGLE_SINGLE

View File

@ -77,9 +77,9 @@ __kernel void k_yukawa(const __global numtyp4 *restrict x_,
int mtype=itype*lj_types+jtype;
if (rsq<coeff[mtype].z) {
numtyp r2inv = (numtyp)1.0/rsq;
numtyp r = ucl_rsqrt(r2inv);
numtyp rinv = 1.0/r;
numtyp r2inv = ucl_recip(rsq);
numtyp r = ucl_sqrt(rsq);
numtyp rinv = ucl_recip(r);
numtyp screening = exp(-kappa*r);
numtyp force = coeff[mtype].x*screening*(kappa + rinv)*r2inv;
force*=factor_lj;
@ -165,9 +165,9 @@ __kernel void k_yukawa_fast(const __global numtyp4 *restrict x_,
numtyp rsq = delx*delx+dely*dely+delz*delz;
if (rsq<coeff[mtype].z) {
numtyp r2inv = (numtyp)1.0/rsq;
numtyp r = ucl_rsqrt(r2inv);
numtyp rinv = 1.0/r;
numtyp r2inv = ucl_recip(rsq);
numtyp r = ucl_sqrt(rsq);
numtyp rinv = ucl_recip(r);
numtyp screening = exp(-kappa*r);
numtyp force = coeff[mtype].x*screening*(kappa + rinv)*r2inv;
force*=factor_lj;