From 28194a14eb82849e23901c0ab3263b1bae4ef7f6 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Sat, 25 Jan 2014 23:53:12 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11335 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- lib/gpu/Makefile.linux | 9 +++-- lib/gpu/Makefile.linux_opencl | 8 +++- lib/gpu/README | 64 +++++++++++++++--------------- lib/gpu/lal_atom.h | 2 +- lib/gpu/lal_base_atomic.cpp | 8 ++-- lib/gpu/lal_base_atomic.h | 12 +++--- lib/gpu/lal_base_charge.cpp | 8 ++-- lib/gpu/lal_base_charge.h | 8 ++-- lib/gpu/lal_base_dipole.cpp | 8 ++-- lib/gpu/lal_base_dipole.h | 8 ++-- lib/gpu/lal_base_ellipsoid.cpp | 8 ++-- lib/gpu/lal_base_ellipsoid.h | 8 ++-- lib/gpu/lal_base_three.cpp | 8 ++-- lib/gpu/lal_base_three.h | 12 +++--- lib/gpu/lal_beck_ext.cpp | 4 +- lib/gpu/lal_born_coul_long_ext.cpp | 4 +- lib/gpu/lal_born_coul_wolf_ext.cpp | 4 +- lib/gpu/lal_born_ext.cpp | 4 +- lib/gpu/lal_buck_coul_ext.cpp | 4 +- lib/gpu/lal_buck_coul_long_ext.cpp | 4 +- lib/gpu/lal_buck_ext.cpp | 4 +- lib/gpu/lal_cg_cmm_ext.cpp | 4 +- lib/gpu/lal_cg_cmm_long_ext.cpp | 4 +- lib/gpu/lal_charmm_long_ext.cpp | 4 +- lib/gpu/lal_colloid_ext.cpp | 4 +- lib/gpu/lal_coul_dsf_ext.cpp | 4 +- lib/gpu/lal_coul_long_ext.cpp | 4 +- lib/gpu/lal_device.cpp | 2 +- lib/gpu/lal_dipole_lj_ext.cpp | 4 +- lib/gpu/lal_dipole_lj_sf_ext.cpp | 4 +- lib/gpu/lal_eam.cpp | 2 +- lib/gpu/lal_eam.h | 4 +- lib/gpu/lal_eam_ext.cpp | 4 +- lib/gpu/lal_gauss_ext.cpp | 4 +- lib/gpu/lal_gayberne_ext.cpp | 8 ++-- lib/gpu/lal_lj96_ext.cpp | 4 +- lib/gpu/lal_lj_class2_long_ext.cpp | 4 +- lib/gpu/lal_lj_coul_debye_ext.cpp | 4 +- lib/gpu/lal_lj_coul_ext.cpp | 4 +- lib/gpu/lal_lj_coul_long_ext.cpp | 4 +- lib/gpu/lal_lj_coul_msm_ext.cpp | 4 +- lib/gpu/lal_lj_dsf_ext.cpp | 4 +- lib/gpu/lal_lj_expand_ext.cpp | 4 +- lib/gpu/lal_lj_ext.cpp | 4 +- lib/gpu/lal_lj_gromacs_ext.cpp | 4 +- lib/gpu/lal_mie_ext.cpp | 4 +- lib/gpu/lal_morse_ext.cpp | 4 +- lib/gpu/lal_neighbor.cpp | 9 +++-- lib/gpu/lal_neighbor.h | 4 +- lib/gpu/lal_neighbor_gpu.cu | 33 +++++++++++---- lib/gpu/lal_precision.h | 18 +++++++++ lib/gpu/lal_preprocessor.h | 4 ++ lib/gpu/lal_re_squared_ext.cpp | 6 +-- lib/gpu/lal_soft_ext.cpp | 4 +- lib/gpu/lal_sw.cu | 18 --------- lib/gpu/lal_sw_ext.cpp | 4 +- lib/gpu/lal_table_ext.cpp | 4 +- lib/gpu/lal_yukawa_colloid.cpp | 4 +- lib/gpu/lal_yukawa_colloid.h | 4 +- lib/gpu/lal_yukawa_colloid_ext.cpp | 4 +- lib/gpu/lal_yukawa_ext.cpp | 4 +- 61 files changed, 226 insertions(+), 197 deletions(-) diff --git a/lib/gpu/Makefile.linux b/lib/gpu/Makefile.linux index 5af5159be3..773183caae 100644 --- a/lib/gpu/Makefile.linux +++ b/lib/gpu/Makefile.linux @@ -17,13 +17,16 @@ CUDA_ARCH = -arch=sm_21 # older CUDA #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE -CUDA_PRECISION = -D_SINGLE_SINGLE +# system-specific settings, should match with LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG or LAMMPS_SMALLSMALL +LMP_INC = -DLAMMPS_BIGBIG + +CUDA_PRECISION = -D_SINGLE_DOUBLE CUDA_INCLUDE = -I$(CUDA_HOME)/include CUDA_LIB = -L$(CUDA_HOME)/lib64 -CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math +CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math $(LMP_INC) CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -CUDR_OPTS = -O2 # -xHost -no-prec-div -ansi-alias +CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias BIN_DIR = ./ OBJ_DIR = ./ diff --git a/lib/gpu/Makefile.linux_opencl b/lib/gpu/Makefile.linux_opencl index 71c82d25f3..75b4ef4927 100644 --- a/lib/gpu/Makefile.linux_opencl +++ b/lib/gpu/Makefile.linux_opencl @@ -11,9 +11,13 @@ OCL_TUNE = -DFERMI_OCL # -- Uncomment for NVIDIA Fermi # OCL_TUNE = -DCYPRESS_OCL # -- Uncomment for AMD Cypress # OCL_TUNE = -DGENERIC_OCL # -- Uncomment for generic device -OCL_CPP = mpic++ $(DEFAULT_DEVICE) -O3 -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK +# system-specific settings, should match with LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL +LMP_INC = #-DLAMMPS_BIGBIG + +OCL_INC = -I/usr/local/cuda/include # Path to CL directory +OCL_CPP = mpic++ $(DEFAULT_DEVICE) -O3 -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK $(LMP_INC) $(OCL_INC) OCL_LINK = -lOpenCL -OCL_PREC = -D_SINGLE_SINGLE +OCL_PREC = -D_SINGLE_DOUBLE BIN_DIR = ./ OBJ_DIR = ./ diff --git a/lib/gpu/README b/lib/gpu/README index 59510131d6..45c8ce49ba 100644 --- a/lib/gpu/README +++ b/lib/gpu/README @@ -82,7 +82,7 @@ and Brown, W.M., Masako, Y. Implementing Molecular Dynamics on Hybrid High Performance Computers - Three-Body Potentials. Computer Physics Communications. -2013. In press. +2013. 184: p. 2785–2793. ---- @@ -103,37 +103,33 @@ Current styles supporting GPU acceleration: 11 eam/alloy 12 eam/fs 13 eam - 14 eam/lj - 15 gauss - 16 gayberne - 17 lj96/cut - 18 lj/charmm/coul/long - 19 lj/class2/coul/long - 20 lj/class2 - 21 lj/cut/coul/cut - 22 lj/cut/coul/debye - 23 lj/cut/coul/dsf - 24 lj/cut/coul/long - 25 lj/cut/coul/msm - 26 lj/cut/coul/wolf/fsw - 27 lj/cut/dipole/cut - 28 lj/cut - 29 lj/cut/tgpu - 30 lj/expand - 31 lj/sdk/coul/long - 32 cg/cmm/coul/long - 33 lj/sdk - 34 cg/cmm - 35 lj/sf/dipole/sf - 36 mie/cut - 37 morse - 38 resquared - 39 soft - 40 sw - 41 table - 42 yukawa/colloid - 43 yukawa - 44 pppm + 14 gauss + 15 gayberne + 16 lj96/cut + 17 lj/charmm/coul/long + 18 lj/class2/coul/long + 19 lj/class2 + 20 lj/cut/coul/cut + 21 lj/cut/coul/debye + 22 lj/cut/coul/dsf + 23 lj/cut/coul/long + 24 lj/cut/coul/msm + 25 lj/cut/dipole/cut + 26 lj/cut + 27 lj/expand + 28 lj/gromacs + 29 lj/sdk/coul/long + 30 lj/sdk + 31 lj/sf/dipole/sf + 32 mie/cut + 33 morse + 34 resquared + 35 soft + 36 sw + 37 table + 38 yukawa/colloid + 39 yukawa + 40 pppm MULTIPLE LAMMPS PROCESSES @@ -200,6 +196,10 @@ NOTE: The lj/cut/coul/long/gpu, cg/cmm/coul/long/gpu, coul/long/gpu, lj/charmm/coul/long/gpu and pppm/gpu styles will only be installed if the KSPACE package has been installed. +NOTE: The system-specific setting LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG, + or LAMMPS_SMALLSMALL if specified when building LAMMPS (i.e. in + src/MAKE/Makefile.foo) should be consistent with that specified + when building libgpu.a (i.e. by LMP_INC in the lib/gpu/Makefile.bar). EXAMPLE BUILD PROCESS -------------------------------- diff --git a/lib/gpu/lal_atom.h b/lib/gpu/lal_atom.h index 171141f7ea..4731b1b08d 100644 --- a/lib/gpu/lal_atom.h +++ b/lib/gpu/lal_atom.h @@ -373,7 +373,7 @@ class Atom { /// Cell list identifiers for device nbor builds UCL_D_Vec dev_particle_id; /// Atom tag information for device nbor builds - UCL_D_Vec dev_tag; + UCL_D_Vec dev_tag; /// Cell list identifiers for hybrid nbor builds UCL_H_Vec host_cell_id; diff --git a/lib/gpu/lal_base_atomic.cpp b/lib/gpu/lal_base_atomic.cpp index b24f7e9661..191f218bd8 100644 --- a/lib/gpu/lal_base_atomic.cpp +++ b/lib/gpu/lal_base_atomic.cpp @@ -150,8 +150,8 @@ template inline void BaseAtomicT::build_nbor_list(const int inum, const int host_inum, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, - int **nspecial, int **special, + double *subhi, tagint *tag, + int **nspecial, tagint **special, bool &success) { success=true; resize_atom(inum,nall,success); @@ -216,8 +216,8 @@ void BaseAtomicT::compute(const int f_ago, const int inum_full, template int ** BaseAtomicT::compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, - int **nspecial, int **special, const bool eflag, + double *sublo, double *subhi, tagint *tag, + int **nspecial, tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, diff --git a/lib/gpu/lal_base_atomic.h b/lib/gpu/lal_base_atomic.h index 74c8530f7f..eaf55f46e2 100644 --- a/lib/gpu/lal_base_atomic.h +++ b/lib/gpu/lal_base_atomic.h @@ -119,8 +119,8 @@ class BaseAtomic { /// Build neighbor list on device void build_nbor_list(const int inum, const int host_inum, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, bool &success); + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, bool &success); /// Pair loop with host neighboring void compute(const int f_ago, const int inum_full, @@ -132,16 +132,16 @@ class BaseAtomic { /// Pair loop with device neighboring int * compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, const double cpu_time, bool &success); /// Pair loop with device neighboring int ** compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **numj, const double cpu_time, bool &success); diff --git a/lib/gpu/lal_base_charge.cpp b/lib/gpu/lal_base_charge.cpp index 8e06a4d18c..e7fe2b62f4 100644 --- a/lib/gpu/lal_base_charge.cpp +++ b/lib/gpu/lal_base_charge.cpp @@ -153,8 +153,8 @@ template inline void BaseChargeT::build_nbor_list(const int inum, const int host_inum, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, - int **nspecial, int **special, + double *subhi, tagint *tag, + int **nspecial, tagint **special, bool &success) { success=true; resize_atom(inum,nall,success); @@ -225,8 +225,8 @@ void BaseChargeT::compute(const int f_ago, const int inum_full, template int** BaseChargeT::compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, - int **nspecial, int **special, const bool eflag, + double *sublo, double *subhi, tagint *tag, + int **nspecial, tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, diff --git a/lib/gpu/lal_base_charge.h b/lib/gpu/lal_base_charge.h index 3ca4705177..e791507432 100644 --- a/lib/gpu/lal_base_charge.h +++ b/lib/gpu/lal_base_charge.h @@ -122,8 +122,8 @@ class BaseCharge { /// Build neighbor list on device void build_nbor_list(const int inum, const int host_inum, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, bool &success); + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, bool &success); /// Pair loop with host neighboring void compute(const int f_ago, const int inum_full, const int nall, @@ -136,8 +136,8 @@ class BaseCharge { /// Pair loop with device neighboring int** compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **numj, const double cpu_time, bool &success, double *charge, double *boxlo, double *prd); diff --git a/lib/gpu/lal_base_dipole.cpp b/lib/gpu/lal_base_dipole.cpp index ed3d720d2c..12e3b20d96 100644 --- a/lib/gpu/lal_base_dipole.cpp +++ b/lib/gpu/lal_base_dipole.cpp @@ -155,8 +155,8 @@ template inline void BaseDipoleT::build_nbor_list(const int inum, const int host_inum, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, - int **nspecial, int **special, + double *subhi, tagint *tag, + int **nspecial, tagint **special, bool &success) { success=true; resize_atom(inum,nall,success); @@ -229,8 +229,8 @@ void BaseDipoleT::compute(const int f_ago, const int inum_full, template int** BaseDipoleT::compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, - int **nspecial, int **special, const bool eflag, + double *sublo, double *subhi, tagint *tag, + int **nspecial, tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, diff --git a/lib/gpu/lal_base_dipole.h b/lib/gpu/lal_base_dipole.h index 51e357afda..2e495c8747 100644 --- a/lib/gpu/lal_base_dipole.h +++ b/lib/gpu/lal_base_dipole.h @@ -121,8 +121,8 @@ class BaseDipole { /// Build neighbor list on device void build_nbor_list(const int inum, const int host_inum, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, bool &success); + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, bool &success); /// Pair loop with host neighboring void compute(const int f_ago, const int inum_full, const int nall, @@ -135,8 +135,8 @@ class BaseDipole { /// Pair loop with device neighboring int** compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **numj, const double cpu_time, bool &success, double *charge, double **mu, double *boxlo, double *prd); diff --git a/lib/gpu/lal_base_ellipsoid.cpp b/lib/gpu/lal_base_ellipsoid.cpp index 641087a6c4..dd83bfa9a4 100644 --- a/lib/gpu/lal_base_ellipsoid.cpp +++ b/lib/gpu/lal_base_ellipsoid.cpp @@ -313,8 +313,8 @@ template inline void BaseEllipsoidT::build_nbor_list(const int inum, const int host_inum, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, - int **nspecial, int **special, + double *subhi, tagint *tag, + int **nspecial, tagint **special, bool &success) { success=true; resize_atom(nall,success); @@ -390,8 +390,8 @@ int* BaseEllipsoidT::compute(const int f_ago, const int inum_full, template int** BaseEllipsoidT::compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, diff --git a/lib/gpu/lal_base_ellipsoid.h b/lib/gpu/lal_base_ellipsoid.h index 96e2e3ee50..e289430f43 100644 --- a/lib/gpu/lal_base_ellipsoid.h +++ b/lib/gpu/lal_base_ellipsoid.h @@ -160,8 +160,8 @@ class BaseEllipsoid { /// Build neighbor list on device void build_nbor_list(const int inum, const int host_inum, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, bool &success); + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, bool &success); /// Pair loop with host neighboring int* compute(const int f_ago, const int inum_full, const int nall, @@ -173,8 +173,8 @@ class BaseEllipsoid { /// Pair loop with device neighboring int** compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **numj, const double cpu_time, bool &success, double **host_quat); diff --git a/lib/gpu/lal_base_three.cpp b/lib/gpu/lal_base_three.cpp index fe64cf44ed..9418293cce 100644 --- a/lib/gpu/lal_base_three.cpp +++ b/lib/gpu/lal_base_three.cpp @@ -186,8 +186,8 @@ template inline void BaseThreeT::build_nbor_list(const int inum, const int host_inum, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, - int **nspecial, int **special, + double *subhi, tagint *tag, + int **nspecial, tagint **special, bool &success) { success=true; resize_atom(inum,nall,success); @@ -267,8 +267,8 @@ void BaseThreeT::compute(const int f_ago, const int nlocal, const int nall, template int ** BaseThreeT::compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, - int **nspecial, int **special, const bool eflag, + double *sublo, double *subhi, tagint *tag, + int **nspecial, tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, diff --git a/lib/gpu/lal_base_three.h b/lib/gpu/lal_base_three.h index 7e167f4655..f457b5bd55 100644 --- a/lib/gpu/lal_base_three.h +++ b/lib/gpu/lal_base_three.h @@ -131,8 +131,8 @@ class BaseThree { /// Build neighbor list on device void build_nbor_list(const int inum, const int host_inum, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, bool &success); + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, bool &success); /// Pair loop with host neighboring void compute(const int f_ago, const int inum_full, const int nall, @@ -144,16 +144,16 @@ class BaseThree { /// Pair loop with device neighboring int * compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, const double cpu_time, bool &success); /// Pair loop with device neighboring int ** compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **numj, const double cpu_time, bool &success); diff --git a/lib/gpu/lal_beck_ext.cpp b/lib/gpu/lal_beck_ext.cpp index 1a736bf02d..40afe9ca27 100644 --- a/lib/gpu/lal_beck_ext.cpp +++ b/lib/gpu/lal_beck_ext.cpp @@ -94,8 +94,8 @@ void beck_gpu_clear() { int ** beck_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success) { diff --git a/lib/gpu/lal_born_coul_long_ext.cpp b/lib/gpu/lal_born_coul_long_ext.cpp index e8ac4eff0b..9734fe9f2b 100644 --- a/lib/gpu/lal_born_coul_long_ext.cpp +++ b/lib/gpu/lal_born_coul_long_ext.cpp @@ -102,8 +102,8 @@ void borncl_gpu_clear() { int** borncl_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, diff --git a/lib/gpu/lal_born_coul_wolf_ext.cpp b/lib/gpu/lal_born_coul_wolf_ext.cpp index 3e779d099e..5c9e2c02bf 100644 --- a/lib/gpu/lal_born_coul_wolf_ext.cpp +++ b/lib/gpu/lal_born_coul_wolf_ext.cpp @@ -104,8 +104,8 @@ void borncw_gpu_clear() { int** borncw_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, diff --git a/lib/gpu/lal_born_ext.cpp b/lib/gpu/lal_born_ext.cpp index 7785353a8a..3b593efb23 100644 --- a/lib/gpu/lal_born_ext.cpp +++ b/lib/gpu/lal_born_ext.cpp @@ -98,8 +98,8 @@ void born_gpu_clear() { int ** born_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success) { diff --git a/lib/gpu/lal_buck_coul_ext.cpp b/lib/gpu/lal_buck_coul_ext.cpp index ac3e6b8913..bbf076199b 100644 --- a/lib/gpu/lal_buck_coul_ext.cpp +++ b/lib/gpu/lal_buck_coul_ext.cpp @@ -101,8 +101,8 @@ void buckc_gpu_clear() { int ** buckc_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, diff --git a/lib/gpu/lal_buck_coul_long_ext.cpp b/lib/gpu/lal_buck_coul_long_ext.cpp index d9328a9210..80382ff5b3 100644 --- a/lib/gpu/lal_buck_coul_long_ext.cpp +++ b/lib/gpu/lal_buck_coul_long_ext.cpp @@ -100,8 +100,8 @@ void buckcl_gpu_clear() { int** buckcl_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, diff --git a/lib/gpu/lal_buck_ext.cpp b/lib/gpu/lal_buck_ext.cpp index 9f7f725aef..af1f826751 100644 --- a/lib/gpu/lal_buck_ext.cpp +++ b/lib/gpu/lal_buck_ext.cpp @@ -95,8 +95,8 @@ void buck_gpu_clear() { int ** buck_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success) { diff --git a/lib/gpu/lal_cg_cmm_ext.cpp b/lib/gpu/lal_cg_cmm_ext.cpp index 6d64c30435..a108f7525d 100644 --- a/lib/gpu/lal_cg_cmm_ext.cpp +++ b/lib/gpu/lal_cg_cmm_ext.cpp @@ -95,8 +95,8 @@ void cmm_gpu_clear() { int** cmm_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success) { diff --git a/lib/gpu/lal_cg_cmm_long_ext.cpp b/lib/gpu/lal_cg_cmm_long_ext.cpp index ca7aab70ce..f4c1fd40d1 100644 --- a/lib/gpu/lal_cg_cmm_long_ext.cpp +++ b/lib/gpu/lal_cg_cmm_long_ext.cpp @@ -99,8 +99,8 @@ void cmml_gpu_clear() { int** cmml_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, diff --git a/lib/gpu/lal_charmm_long_ext.cpp b/lib/gpu/lal_charmm_long_ext.cpp index 5d544dc87b..e9ffc16939 100644 --- a/lib/gpu/lal_charmm_long_ext.cpp +++ b/lib/gpu/lal_charmm_long_ext.cpp @@ -104,8 +104,8 @@ void crml_gpu_clear() { int** crml_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, diff --git a/lib/gpu/lal_colloid_ext.cpp b/lib/gpu/lal_colloid_ext.cpp index d4e8a2092b..037a982cf4 100644 --- a/lib/gpu/lal_colloid_ext.cpp +++ b/lib/gpu/lal_colloid_ext.cpp @@ -101,8 +101,8 @@ void colloid_gpu_clear() { int ** colloid_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success) { diff --git a/lib/gpu/lal_coul_dsf_ext.cpp b/lib/gpu/lal_coul_dsf_ext.cpp index 19879cb9c5..4f2b7131d0 100644 --- a/lib/gpu/lal_coul_dsf_ext.cpp +++ b/lib/gpu/lal_coul_dsf_ext.cpp @@ -95,8 +95,8 @@ void cdsf_gpu_clear() { int** cdsf_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, diff --git a/lib/gpu/lal_coul_long_ext.cpp b/lib/gpu/lal_coul_long_ext.cpp index f6ce0c1d79..904c33527f 100644 --- a/lib/gpu/lal_coul_long_ext.cpp +++ b/lib/gpu/lal_coul_long_ext.cpp @@ -93,8 +93,8 @@ void cl_gpu_clear() { int** cl_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index 99e259213b..554a535909 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -205,7 +205,7 @@ int DeviceT::set_ocl_params(char *ocl_vendor) { " -DBLOCK_CELL_ID="+params[11]+ " -DMAX_BIO_SHARED_TYPES="+params[12]; } - _ocl_compile_string="-cl-fast-relaxed-math -cl-mad-enable "+ + _ocl_compile_string="-cl-fast-relaxed-math -cl-mad-enable "+std::string(OCL_INT_TYPE)+" "+ std::string(OCL_PRECISION_COMPILE)+" "+_ocl_vendor_string; #endif return 0; diff --git a/lib/gpu/lal_dipole_lj_ext.cpp b/lib/gpu/lal_dipole_lj_ext.cpp index 05d8fd9f72..d3174c3392 100644 --- a/lib/gpu/lal_dipole_lj_ext.cpp +++ b/lib/gpu/lal_dipole_lj_ext.cpp @@ -98,8 +98,8 @@ void dpl_gpu_clear() { int** dpl_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double **host_mu, diff --git a/lib/gpu/lal_dipole_lj_sf_ext.cpp b/lib/gpu/lal_dipole_lj_sf_ext.cpp index 53ef66fca6..8a24233649 100644 --- a/lib/gpu/lal_dipole_lj_sf_ext.cpp +++ b/lib/gpu/lal_dipole_lj_sf_ext.cpp @@ -98,8 +98,8 @@ void dplsf_gpu_clear() { int** dplsf_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double **host_mu, diff --git a/lib/gpu/lal_eam.cpp b/lib/gpu/lal_eam.cpp index 54e5998ddd..c856a8e667 100644 --- a/lib/gpu/lal_eam.cpp +++ b/lib/gpu/lal_eam.cpp @@ -346,7 +346,7 @@ void EAMT::compute(const int f_ago, const int inum_full, const int nlocal, template int** EAMT::compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, int **nspecial, int **special, + double *subhi, tagint *tag, int **nspecial, tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, int &inum, diff --git a/lib/gpu/lal_eam.h b/lib/gpu/lal_eam.h index 96dcb9bebc..698f9938cb 100644 --- a/lib/gpu/lal_eam.h +++ b/lib/gpu/lal_eam.h @@ -79,8 +79,8 @@ class EAM : public BaseAtomic { /// Pair loop with device neighboring int** compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **numj, const double cpu_time, bool &success, int &inum, void **fp_ptr); diff --git a/lib/gpu/lal_eam_ext.cpp b/lib/gpu/lal_eam_ext.cpp index 687a04529f..57695555b1 100644 --- a/lib/gpu/lal_eam_ext.cpp +++ b/lib/gpu/lal_eam_ext.cpp @@ -108,8 +108,8 @@ void eam_gpu_clear() { int ** eam_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, int &inum, void **fp_ptr) { diff --git a/lib/gpu/lal_gauss_ext.cpp b/lib/gpu/lal_gauss_ext.cpp index 4d95c7cfb9..818c7cbd26 100644 --- a/lib/gpu/lal_gauss_ext.cpp +++ b/lib/gpu/lal_gauss_ext.cpp @@ -94,8 +94,8 @@ void gauss_gpu_clear() { int ** gauss_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success) { diff --git a/lib/gpu/lal_gayberne_ext.cpp b/lib/gpu/lal_gayberne_ext.cpp index 047250363a..69ccb23b96 100644 --- a/lib/gpu/lal_gayberne_ext.cpp +++ b/lib/gpu/lal_gayberne_ext.cpp @@ -102,17 +102,17 @@ void gb_gpu_clear() { GBMF.clear(); } - int** compute(const int ago, const int inum_full, const int nall, +int** compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **numj, const double cpu_time, bool &success, double **host_quat); int** gb_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, int **nspecial, int **special, + double *subhi, tagint *tag, int **nspecial, tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, diff --git a/lib/gpu/lal_lj96_ext.cpp b/lib/gpu/lal_lj96_ext.cpp index 4fe1880573..11b1ed184f 100644 --- a/lib/gpu/lal_lj96_ext.cpp +++ b/lib/gpu/lal_lj96_ext.cpp @@ -94,8 +94,8 @@ void lj96_gpu_clear() { int** lj96_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success) { diff --git a/lib/gpu/lal_lj_class2_long_ext.cpp b/lib/gpu/lal_lj_class2_long_ext.cpp index 7e637d4c98..5134d149ff 100644 --- a/lib/gpu/lal_lj_class2_long_ext.cpp +++ b/lib/gpu/lal_lj_class2_long_ext.cpp @@ -99,8 +99,8 @@ void c2cl_gpu_clear() { int** c2cl_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, diff --git a/lib/gpu/lal_lj_coul_debye_ext.cpp b/lib/gpu/lal_lj_coul_debye_ext.cpp index 3e5aab4d9d..572f2bba13 100644 --- a/lib/gpu/lal_lj_coul_debye_ext.cpp +++ b/lib/gpu/lal_lj_coul_debye_ext.cpp @@ -99,8 +99,8 @@ void ljcd_gpu_clear() { int** ljcd_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, diff --git a/lib/gpu/lal_lj_coul_ext.cpp b/lib/gpu/lal_lj_coul_ext.cpp index b0dec6f07d..1c0499e578 100644 --- a/lib/gpu/lal_lj_coul_ext.cpp +++ b/lib/gpu/lal_lj_coul_ext.cpp @@ -98,8 +98,8 @@ void ljc_gpu_clear() { int** ljc_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, diff --git a/lib/gpu/lal_lj_coul_long_ext.cpp b/lib/gpu/lal_lj_coul_long_ext.cpp index f0724a8a98..b769446d43 100644 --- a/lib/gpu/lal_lj_coul_long_ext.cpp +++ b/lib/gpu/lal_lj_coul_long_ext.cpp @@ -99,8 +99,8 @@ void ljcl_gpu_clear() { int** ljcl_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, diff --git a/lib/gpu/lal_lj_coul_msm_ext.cpp b/lib/gpu/lal_lj_coul_msm_ext.cpp index 3480b14f3e..ce10cac048 100644 --- a/lib/gpu/lal_lj_coul_msm_ext.cpp +++ b/lib/gpu/lal_lj_coul_msm_ext.cpp @@ -101,8 +101,8 @@ void ljcm_gpu_clear() { int** ljcm_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, diff --git a/lib/gpu/lal_lj_dsf_ext.cpp b/lib/gpu/lal_lj_dsf_ext.cpp index 52b56f38c2..bd13d970fa 100644 --- a/lib/gpu/lal_lj_dsf_ext.cpp +++ b/lib/gpu/lal_lj_dsf_ext.cpp @@ -102,8 +102,8 @@ void ljd_gpu_clear() { int** ljd_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_q, double *boxlo, diff --git a/lib/gpu/lal_lj_expand_ext.cpp b/lib/gpu/lal_lj_expand_ext.cpp index 54bb3f62f9..725004b0ef 100644 --- a/lib/gpu/lal_lj_expand_ext.cpp +++ b/lib/gpu/lal_lj_expand_ext.cpp @@ -95,8 +95,8 @@ void lje_gpu_clear() { int** lje_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success) { diff --git a/lib/gpu/lal_lj_ext.cpp b/lib/gpu/lal_lj_ext.cpp index 1dc47ccbbd..3d85f689d0 100644 --- a/lib/gpu/lal_lj_ext.cpp +++ b/lib/gpu/lal_lj_ext.cpp @@ -94,8 +94,8 @@ void ljl_gpu_clear() { int ** ljl_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success) { diff --git a/lib/gpu/lal_lj_gromacs_ext.cpp b/lib/gpu/lal_lj_gromacs_ext.cpp index c2db048fe6..b5eb0038b7 100644 --- a/lib/gpu/lal_lj_gromacs_ext.cpp +++ b/lib/gpu/lal_lj_gromacs_ext.cpp @@ -99,8 +99,8 @@ void ljgrm_gpu_clear() { int ** ljgrm_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success) { diff --git a/lib/gpu/lal_mie_ext.cpp b/lib/gpu/lal_mie_ext.cpp index 52668cf52d..f9b36b5075 100644 --- a/lib/gpu/lal_mie_ext.cpp +++ b/lib/gpu/lal_mie_ext.cpp @@ -98,8 +98,8 @@ void mie_gpu_clear() { int ** mie_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success) { diff --git a/lib/gpu/lal_morse_ext.cpp b/lib/gpu/lal_morse_ext.cpp index 787f49b39d..2fcf25cf23 100644 --- a/lib/gpu/lal_morse_ext.cpp +++ b/lib/gpu/lal_morse_ext.cpp @@ -95,8 +95,8 @@ void mor_gpu_clear() { int** mor_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success) { diff --git a/lib/gpu/lal_neighbor.cpp b/lib/gpu/lal_neighbor.cpp index 662ab114de..877dbf4886 100644 --- a/lib/gpu/lal_neighbor.cpp +++ b/lib/gpu/lal_neighbor.cpp @@ -389,8 +389,8 @@ void Neighbor::resize_max_neighbors(const int maxn, bool &success) { template void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum, const int nall, Atom &atom, - double *sublo, double *subhi, int *tag, - int **nspecial, int **special, bool &success, + double *sublo, double *subhi, tagint *tag, + int **nspecial, tagint **special, bool &success, int &mn) { _nbor_time_avail=true; const int nt=inum+host_inum; @@ -423,7 +423,8 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum, if (_maxspecial>0) { time_nbor.start(); - UCL_H_Vec view_nspecial, view_special, view_tag; + UCL_H_Vec view_nspecial; + UCL_H_Vec view_special, view_tag; view_nspecial.view(nspecial[0],nt*3,*dev); view_special.view(special[0],nt*_maxspecial,*dev); view_tag.view(tag,nall,*dev); @@ -615,5 +616,5 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum, template void Neighbor::build_nbor_list (double **x, const int inum, const int host_inum, const int nall, Atom &atom, double *sublo, double *subhi, - int *, int **, int **, bool &success, int &mn); + tagint *, int **, tagint **, bool &success, int &mn); diff --git a/lib/gpu/lal_neighbor.h b/lib/gpu/lal_neighbor.h index 7949221ea5..d54aa439b0 100644 --- a/lib/gpu/lal_neighbor.h +++ b/lib/gpu/lal_neighbor.h @@ -173,7 +173,7 @@ class Neighbor { template void build_nbor_list(double **x, const int inum, const int host_inum, const int nall, Atom &atom, double *sublo, - double *subhi, int *tag, int **nspecial, int **special, + double *subhi, tagint *tag, int **nspecial, tagint **special, bool &success, int &max_nbors); /// Return the number of bytes used on device @@ -212,7 +212,7 @@ class Neighbor { /// Device storage for special neighbor counts UCL_D_Vec dev_nspecial; /// Device storage for special neighbors - UCL_D_Vec dev_special, dev_special_t; + UCL_D_Vec dev_special, dev_special_t; /// Host/Device storage for number of particles per cell UCL_Vector cell_counts; int *cell_iter; diff --git a/lib/gpu/lal_neighbor_gpu.cu b/lib/gpu/lal_neighbor_gpu.cu index cc4e210932..4f8464e803 100644 --- a/lib/gpu/lal_neighbor_gpu.cu +++ b/lib/gpu/lal_neighbor_gpu.cu @@ -16,6 +16,16 @@ #ifdef NV_KERNEL #include "lal_preprocessor.h" +#ifdef LAMMPS_SMALLBIG +#define tagint int +#endif +#ifdef LAMMPS_BIGBIG +#include "inttypes.h" +#define tagint int64_t +#endif +#ifdef LAMMPS_SMALLSMALL +#define tagint int +#endif #ifndef _DOUBLE_DOUBLE texture pos_tex; #else @@ -93,15 +103,22 @@ __kernel void kernel_calc_cell_counts(const unsigned *restrict cell_id, #else #define pos_tex x_ +#ifdef LAMMPS_SMALLBIG +#define tagint int +#endif +#ifdef LAMMPS_BIGBIG +#define tagint long long int +#endif +#ifdef LAMMPS_SMALLSMALL +#define tagint int +#endif #endif - - -__kernel void transpose(__global int *restrict out, - const __global int *restrict in, +__kernel void transpose(__global tagint *restrict out, + const __global tagint *restrict in, int columns_in, int rows_in) { - __local int block[BLOCK_CELL_2D][BLOCK_CELL_2D+1]; + __local tagint block[BLOCK_CELL_2D][BLOCK_CELL_2D+1]; unsigned ti=THREAD_ID_X; unsigned tj=THREAD_ID_Y; @@ -239,9 +256,9 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_, __kernel void kernel_special(__global int *dev_nbor, __global int *host_nbor_list, const __global int *host_numj, - const __global int *restrict tag, + const __global tagint *restrict tag, const __global int *restrict nspecial, - const __global int *restrict special, + const __global tagint *restrict special, int inum, int nt, int max_nbors, int t_per_atom) { int tid=THREAD_ID_X; int ii=fast_mul((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom); @@ -275,7 +292,7 @@ __kernel void kernel_special(__global int *dev_nbor, for ( ; list> SBBITS & 3; }; #define BLOCK_ELLIPSE BLOCK_PAIR #endif +// default to 32-bit smallint and other ints, 64-bit bigint: same as defined in src/lmptype.h +#if !defined(LAMMPS_SMALLSMALL) && !defined(LAMMPS_BIGBIG) && !defined(LAMMPS_SMALLBIG) +#define LAMMPS_SMALLBIG +#endif diff --git a/lib/gpu/lal_re_squared_ext.cpp b/lib/gpu/lal_re_squared_ext.cpp index 09e4c15c43..b010f0f21c 100644 --- a/lib/gpu/lal_re_squared_ext.cpp +++ b/lib/gpu/lal_re_squared_ext.cpp @@ -101,15 +101,15 @@ void re_gpu_clear() { int** compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **numj, const double cpu_time, bool &success, double **host_quat); int** re_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, int **nspecial, int **special, + double *subhi, tagint *tag, int **nspecial, tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, diff --git a/lib/gpu/lal_soft_ext.cpp b/lib/gpu/lal_soft_ext.cpp index 3efeece427..1872388bb3 100644 --- a/lib/gpu/lal_soft_ext.cpp +++ b/lib/gpu/lal_soft_ext.cpp @@ -94,8 +94,8 @@ void soft_gpu_clear() { int ** soft_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success) { diff --git a/lib/gpu/lal_sw.cu b/lib/gpu/lal_sw.cu index 2408602e73..71433cd5c2 100644 --- a/lib/gpu/lal_sw.cu +++ b/lib/gpu/lal_sw.cu @@ -393,8 +393,6 @@ __kernel void k_sw_three_center(const __global numtyp4 *restrict x_, if (rsq1 > sw3_ijparam.y) continue; numtyp4 sw1_ijparam; fetch4(sw1_ijparam,ijparam,sw1_tex); - sw_sigma=sw1_ijparam.y; - sw_gamma=sw1_ijparam.w; sw_sigma_gamma_ij=sw1_ijparam.y*sw1_ijparam.w; //sw_sigma*sw_gamma; sw_cut_ij=sw3_ijparam.x; @@ -418,15 +416,11 @@ __kernel void k_sw_three_center(const __global numtyp4 *restrict x_, numtyp rsq2 = delr2x*delr2x + delr2y*delr2y + delr2z*delr2z; if (rsq2 < sw3_ikparam.y) { // sw_cutsq=sw3[ikparam].y; numtyp4 sw1_ikparam; fetch4(sw1_ikparam,ikparam,sw1_tex); - sw_sigma=sw1_ikparam.y; - sw_gamma=sw1_ikparam.w; sw_sigma_gamma_ik=sw1_ikparam.y*sw1_ikparam.w; //sw_sigma*sw_gamma; sw_cut_ik=sw3_ikparam.x; int ijkparam=elem2param[itype*nelements*nelements+jtype*nelements+ktype]; numtyp4 sw1_ijkparam; fetch4(sw1_ijkparam,ijkparam,sw1_tex); - sw_epsilon=sw1_ijkparam.x; - sw_lambda=sw1_ijkparam.z; sw_lambda_epsilon_ijk=sw1_ijkparam.x*sw1_ijkparam.z; //sw_lambda*sw_epsilon; sw_lambda_epsilon2_ijk=(numtyp)2.0*sw_lambda_epsilon_ijk; numtyp4 sw3_ijkparam; fetch4(sw3_ijkparam,ijkparam,sw3_tex); @@ -522,8 +516,6 @@ __kernel void k_sw_three_end(const __global numtyp4 *restrict x_, if (rsq1 > sw3_ijparam.y) continue; numtyp4 sw1_ijparam; fetch4(sw1_ijparam,ijparam,sw1_tex); - sw_sigma=sw1_ijparam.y; - sw_gamma=sw1_ijparam.w; sw_sigma_gamma_ij=sw1_ijparam.y*sw1_ijparam.w; //sw_sigma*sw_gamma; sw_cut_ij=sw3_ijparam.x; @@ -559,15 +551,11 @@ __kernel void k_sw_three_end(const __global numtyp4 *restrict x_, if (rsq2 < sw3_ikparam.y) { numtyp4 sw1_ikparam; fetch4(sw1_ikparam,ikparam,sw1_tex); - sw_sigma=sw1_ikparam.y; - sw_gamma=sw1_ikparam.w; sw_sigma_gamma_ik=sw1_ikparam.y*sw1_ikparam.w; //sw_sigma*sw_gamma; sw_cut_ik=sw3_ikparam.x; int ijkparam=elem2param[itype*nelements*nelements+jtype*nelements+ktype]; numtyp4 sw1_ijkparam; fetch4(sw1_ijkparam,ijkparam,sw1_tex); - sw_epsilon=sw1_ijkparam.x; - sw_lambda=sw1_ijkparam.z; sw_lambda_epsilon_ijk=sw1_ijkparam.x*sw1_ijkparam.z; //sw_lambda*sw_epsilon; sw_lambda_epsilon2_ijk=(numtyp)2.0*sw_lambda_epsilon_ijk; numtyp4 sw3_ijkparam; fetch4(sw3_ijkparam,ijkparam,sw3_tex); @@ -663,8 +651,6 @@ __kernel void k_sw_three_end_vatom(const __global numtyp4 *restrict x_, if (rsq1 > sw3_ijparam.y) continue; numtyp4 sw1_ijparam; fetch4(sw1_ijparam,ijparam,sw1_tex); - sw_sigma=sw1_ijparam.y; - sw_gamma=sw1_ijparam.w; sw_sigma_gamma_ij=sw1_ijparam.y*sw1_ijparam.w; //sw_sigma*sw_gamma; sw_cut_ij=sw3_ijparam.x; @@ -700,15 +686,11 @@ __kernel void k_sw_three_end_vatom(const __global numtyp4 *restrict x_, if (rsq2 < sw3_ikparam.y) { numtyp4 sw1_ikparam; fetch4(sw1_ikparam,ikparam,sw1_tex); - sw_sigma=sw1_ikparam.y; - sw_gamma=sw1_ikparam.w; sw_sigma_gamma_ik=sw1_ikparam.y*sw1_ikparam.w; //sw_sigma*sw_gamma; sw_cut_ik=sw3_ikparam.x; int ijkparam=elem2param[itype*nelements*nelements+jtype*nelements+ktype]; numtyp4 sw1_ijkparam; fetch4(sw1_ijkparam,ijkparam,sw1_tex); - sw_epsilon=sw1_ijkparam.x; - sw_lambda=sw1_ijkparam.z; sw_lambda_epsilon_ijk=sw1_ijkparam.x*sw1_ijkparam.z; //sw_lambda*sw_epsilon; sw_lambda_epsilon2_ijk=(numtyp)2.0*sw_lambda_epsilon_ijk; numtyp4 sw3_ijkparam; fetch4(sw3_ijkparam,ijkparam,sw3_tex); diff --git a/lib/gpu/lal_sw_ext.cpp b/lib/gpu/lal_sw_ext.cpp index 16b6828325..e2d1b5e4dd 100644 --- a/lib/gpu/lal_sw_ext.cpp +++ b/lib/gpu/lal_sw_ext.cpp @@ -105,8 +105,8 @@ void sw_gpu_clear() { int ** sw_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success) { diff --git a/lib/gpu/lal_table_ext.cpp b/lib/gpu/lal_table_ext.cpp index 2186e33dea..8e030ab4ba 100644 --- a/lib/gpu/lal_table_ext.cpp +++ b/lib/gpu/lal_table_ext.cpp @@ -94,8 +94,8 @@ void table_gpu_clear() { int ** table_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success) { diff --git a/lib/gpu/lal_yukawa_colloid.cpp b/lib/gpu/lal_yukawa_colloid.cpp index f86a257821..70282a7117 100644 --- a/lib/gpu/lal_yukawa_colloid.cpp +++ b/lib/gpu/lal_yukawa_colloid.cpp @@ -189,8 +189,8 @@ void YukawaColloidT::compute(const int f_ago, const int inum_full, template int** YukawaColloidT::compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *rad) { diff --git a/lib/gpu/lal_yukawa_colloid.h b/lib/gpu/lal_yukawa_colloid.h index 2b6cbe4ecd..5a9ee7ae6e 100644 --- a/lib/gpu/lal_yukawa_colloid.h +++ b/lib/gpu/lal_yukawa_colloid.h @@ -82,8 +82,8 @@ class YukawaColloid : public BaseAtomic { /// Pair loop with device neighboring int** compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *rad); diff --git a/lib/gpu/lal_yukawa_colloid_ext.cpp b/lib/gpu/lal_yukawa_colloid_ext.cpp index c43166c33b..f259152e2b 100644 --- a/lib/gpu/lal_yukawa_colloid_ext.cpp +++ b/lib/gpu/lal_yukawa_colloid_ext.cpp @@ -94,8 +94,8 @@ void ykcolloid_gpu_clear() { int ** ykcolloid_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, double *host_rad) { diff --git a/lib/gpu/lal_yukawa_ext.cpp b/lib/gpu/lal_yukawa_ext.cpp index 36f390ab99..79f12d11ba 100644 --- a/lib/gpu/lal_yukawa_ext.cpp +++ b/lib/gpu/lal_yukawa_ext.cpp @@ -94,8 +94,8 @@ void yukawa_gpu_clear() { int ** yukawa_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success) {