git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11660 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -29,6 +29,7 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
|
|||||||
$(OBJ_DIR)/lal_device.o $(OBJ_DIR)/lal_base_atomic.o \
|
$(OBJ_DIR)/lal_device.o $(OBJ_DIR)/lal_base_atomic.o \
|
||||||
$(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_base_ellipsoid.o \
|
$(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_base_ellipsoid.o \
|
||||||
$(OBJ_DIR)/lal_base_dipole.o $(OBJ_DIR)/lal_base_three.o \
|
$(OBJ_DIR)/lal_base_dipole.o $(OBJ_DIR)/lal_base_three.o \
|
||||||
|
$(OBJ_DIR)/lal_base_dpd.o \
|
||||||
$(OBJ_DIR)/lal_pppm.o $(OBJ_DIR)/lal_pppm_ext.o \
|
$(OBJ_DIR)/lal_pppm.o $(OBJ_DIR)/lal_pppm_ext.o \
|
||||||
$(OBJ_DIR)/lal_gayberne.o $(OBJ_DIR)/lal_gayberne_ext.o \
|
$(OBJ_DIR)/lal_gayberne.o $(OBJ_DIR)/lal_gayberne_ext.o \
|
||||||
$(OBJ_DIR)/lal_re_squared.o $(OBJ_DIR)/lal_re_squared_ext.o \
|
$(OBJ_DIR)/lal_re_squared.o $(OBJ_DIR)/lal_re_squared_ext.o \
|
||||||
@ -65,7 +66,8 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
|
|||||||
$(OBJ_DIR)/lal_mie.o $(OBJ_DIR)/lal_mie_ext.o \
|
$(OBJ_DIR)/lal_mie.o $(OBJ_DIR)/lal_mie_ext.o \
|
||||||
$(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \
|
$(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \
|
||||||
$(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
|
$(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
|
||||||
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o
|
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
|
||||||
|
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o
|
||||||
|
|
||||||
CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
|
CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
|
||||||
$(OBJ_DIR)/atom.cubin $(OBJ_DIR)/atom_cubin.h \
|
$(OBJ_DIR)/atom.cubin $(OBJ_DIR)/atom_cubin.h \
|
||||||
@ -111,7 +113,8 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
|
|||||||
$(OBJ_DIR)/mie.cubin $(OBJ_DIR)/mie_cubin.h \
|
$(OBJ_DIR)/mie.cubin $(OBJ_DIR)/mie_cubin.h \
|
||||||
$(OBJ_DIR)/soft.cubin $(OBJ_DIR)/soft_cubin.h \
|
$(OBJ_DIR)/soft.cubin $(OBJ_DIR)/soft_cubin.h \
|
||||||
$(OBJ_DIR)/lj_coul_msm.cubin $(OBJ_DIR)/lj_coul_msm_cubin.h \
|
$(OBJ_DIR)/lj_coul_msm.cubin $(OBJ_DIR)/lj_coul_msm_cubin.h \
|
||||||
$(OBJ_DIR)/lj_gromacs.cubin $(OBJ_DIR)/lj_gromacs_cubin.h
|
$(OBJ_DIR)/lj_gromacs.cubin $(OBJ_DIR)/lj_gromacs_cubin.h \
|
||||||
|
$(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd_cubin.h
|
||||||
|
|
||||||
all: $(OBJ_DIR) $(GPU_LIB) $(EXECS)
|
all: $(OBJ_DIR) $(GPU_LIB) $(EXECS)
|
||||||
|
|
||||||
@ -190,6 +193,9 @@ $(OBJ_DIR)/lal_base_dipole.o: $(ALL_H) lal_base_dipole.h lal_base_dipole.cpp
|
|||||||
$(OBJ_DIR)/lal_base_three.o: $(ALL_H) lal_base_three.h lal_base_three.cpp
|
$(OBJ_DIR)/lal_base_three.o: $(ALL_H) lal_base_three.h lal_base_three.cpp
|
||||||
$(CUDR) -o $@ -c lal_base_three.cpp
|
$(CUDR) -o $@ -c lal_base_three.cpp
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_base_dpd.o: $(ALL_H) lal_base_dpd.h lal_base_dpd.cpp
|
||||||
|
$(CUDR) -o $@ -c lal_base_dpd.cpp
|
||||||
|
|
||||||
$(OBJ_DIR)/pppm_f.cubin: lal_pppm.cu lal_precision.h lal_preprocessor.h
|
$(OBJ_DIR)/pppm_f.cubin: lal_pppm.cu lal_precision.h lal_preprocessor.h
|
||||||
$(CUDA) --cubin -DNV_KERNEL -Dgrdtyp=float -Dgrdtyp4=float4 -o $@ lal_pppm.cu
|
$(CUDA) --cubin -DNV_KERNEL -Dgrdtyp=float -Dgrdtyp4=float4 -o $@ lal_pppm.cu
|
||||||
|
|
||||||
@ -658,6 +664,18 @@ $(OBJ_DIR)/lal_lj_gromacs.o: $(ALL_H) lal_lj_gromacs.h lal_lj_gromacs.cpp $(OBJ_
|
|||||||
$(OBJ_DIR)/lal_lj_gromacs_ext.o: $(ALL_H) lal_lj_gromacs.h lal_lj_gromacs_ext.cpp lal_base_atomic.h
|
$(OBJ_DIR)/lal_lj_gromacs_ext.o: $(ALL_H) lal_lj_gromacs.h lal_lj_gromacs_ext.cpp lal_base_atomic.h
|
||||||
$(CUDR) -o $@ -c lal_lj_gromacs_ext.cpp -I$(OBJ_DIR)
|
$(CUDR) -o $@ -c lal_lj_gromacs_ext.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
|
$(OBJ_DIR)/dpd.cubin: lal_dpd.cu lal_precision.h lal_preprocessor.h
|
||||||
|
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_dpd.cu
|
||||||
|
|
||||||
|
$(OBJ_DIR)/dpd_cubin.h: $(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd.cubin
|
||||||
|
$(BIN2C) -c -n dpd $(OBJ_DIR)/dpd.cubin > $(OBJ_DIR)/dpd_cubin.h
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cubin.h $(OBJ_DIR)/lal_base_dpd.o
|
||||||
|
$(CUDR) -o $@ -c lal_dpd.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h
|
||||||
|
$(CUDR) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
|
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
|
||||||
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda
|
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda
|
||||||
|
|
||||||
|
|||||||
@ -18,6 +18,7 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \
|
|||||||
$(OBJ_DIR)/lal_device.o $(OBJ_DIR)/lal_base_atomic.o \
|
$(OBJ_DIR)/lal_device.o $(OBJ_DIR)/lal_base_atomic.o \
|
||||||
$(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_base_ellipsoid.o \
|
$(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_base_ellipsoid.o \
|
||||||
$(OBJ_DIR)/lal_base_dipole.o $(OBJ_DIR)/lal_base_three.o \
|
$(OBJ_DIR)/lal_base_dipole.o $(OBJ_DIR)/lal_base_three.o \
|
||||||
|
$(OBJ_DIR)/lal_base_dpd.o \
|
||||||
$(OBJ_DIR)/lal_pppm.o $(OBJ_DIR)/lal_pppm_ext.o \
|
$(OBJ_DIR)/lal_pppm.o $(OBJ_DIR)/lal_pppm_ext.o \
|
||||||
$(OBJ_DIR)/lal_gayberne.o $(OBJ_DIR)/lal_gayberne_ext.o \
|
$(OBJ_DIR)/lal_gayberne.o $(OBJ_DIR)/lal_gayberne_ext.o \
|
||||||
$(OBJ_DIR)/lal_re_squared.o $(OBJ_DIR)/lal_re_squared_ext.o \
|
$(OBJ_DIR)/lal_re_squared.o $(OBJ_DIR)/lal_re_squared_ext.o \
|
||||||
@ -54,7 +55,8 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \
|
|||||||
$(OBJ_DIR)/lal_mie.o $(OBJ_DIR)/lal_mie_ext.o \
|
$(OBJ_DIR)/lal_mie.o $(OBJ_DIR)/lal_mie_ext.o \
|
||||||
$(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \
|
$(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \
|
||||||
$(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
|
$(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
|
||||||
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o
|
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
|
||||||
|
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o
|
||||||
|
|
||||||
KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
|
KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
|
||||||
$(OBJ_DIR)/neighbor_cpu_cl.h $(OBJ_DIR)/pppm_cl.h \
|
$(OBJ_DIR)/neighbor_cpu_cl.h $(OBJ_DIR)/pppm_cl.h \
|
||||||
@ -77,7 +79,7 @@ KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
|
|||||||
$(OBJ_DIR)/lj_coul_debye_cl.h $(OBJ_DIR)/coul_dsf_cl.h \
|
$(OBJ_DIR)/lj_coul_debye_cl.h $(OBJ_DIR)/coul_dsf_cl.h \
|
||||||
$(OBJ_DIR)/sw_cl.h $(OBJ_DIR)/beck_cl.h $(OBJ_DIR)/mie_cl.h \
|
$(OBJ_DIR)/sw_cl.h $(OBJ_DIR)/beck_cl.h $(OBJ_DIR)/mie_cl.h \
|
||||||
$(OBJ_DIR)/soft_cl.h $(OBJ_DIR)/lj_coul_msm_cl.h \
|
$(OBJ_DIR)/soft_cl.h $(OBJ_DIR)/lj_coul_msm_cl.h \
|
||||||
$(OBJ_DIR)/lj_gromacs_cl.h
|
$(OBJ_DIR)/lj_gromacs_cl.h $(OBJ_DIR)/dpd_cl.h
|
||||||
|
|
||||||
|
|
||||||
OCL_EXECS = $(BIN_DIR)/ocl_get_devices
|
OCL_EXECS = $(BIN_DIR)/ocl_get_devices
|
||||||
@ -129,6 +131,9 @@ $(OBJ_DIR)/lal_base_dipole.o: $(OCL_H) lal_base_dipole.h lal_base_dipole.cpp
|
|||||||
$(OBJ_DIR)/lal_base_three.o: $(OCL_H) lal_base_three.h lal_base_three.cpp
|
$(OBJ_DIR)/lal_base_three.o: $(OCL_H) lal_base_three.h lal_base_three.cpp
|
||||||
$(OCL) -o $@ -c lal_base_three.cpp
|
$(OCL) -o $@ -c lal_base_three.cpp
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_base_dpd.o: $(OCL_H) lal_base_dpd.h lal_base_dpd.cpp
|
||||||
|
$(OCL) -o $@ -c lal_base_dpd.cpp
|
||||||
|
|
||||||
$(OBJ_DIR)/pppm_cl.h: lal_pppm.cu lal_preprocessor.h
|
$(OBJ_DIR)/pppm_cl.h: lal_pppm.cu lal_preprocessor.h
|
||||||
$(BSH) ./geryon/file_to_cstr.sh pppm lal_preprocessor.h lal_pppm.cu $(OBJ_DIR)/pppm_cl.h;
|
$(BSH) ./geryon/file_to_cstr.sh pppm lal_preprocessor.h lal_pppm.cu $(OBJ_DIR)/pppm_cl.h;
|
||||||
|
|
||||||
@ -471,6 +476,15 @@ $(OBJ_DIR)/lal_lj_gromacs.o: $(ALL_H) lal_lj_gromacs.h lal_lj_gromacs.cpp $(OBJ
|
|||||||
$(OBJ_DIR)/lal_lj_gromacs_ext.o: $(ALL_H) lal_lj_gromacs.h lal_lj_gromacs_ext.cpp lal_base_atomic.h
|
$(OBJ_DIR)/lal_lj_gromacs_ext.o: $(ALL_H) lal_lj_gromacs.h lal_lj_gromacs_ext.cpp lal_base_atomic.h
|
||||||
$(OCL) -o $@ -c lal_lj_gromacs_ext.cpp -I$(OBJ_DIR)
|
$(OCL) -o $@ -c lal_lj_gromacs_ext.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
|
$(OBJ_DIR)/dpd_cl.h: lal_dpd.cu $(PRE1_H)
|
||||||
|
$(BSH) ./geryon/file_to_cstr.sh dpd $(PRE1_H) lal_dpd.cu $(OBJ_DIR)/dpd_cl.h;
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cl.h $(OBJ_DIR)/dpd_cl.h $(OBJ_DIR)/lal_base_dpd.o
|
||||||
|
$(OCL) -o $@ -c lal_dpd.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
|
$(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h
|
||||||
|
$(OCL) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR)
|
||||||
|
|
||||||
$(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp
|
$(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp
|
||||||
$(OCL) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_OPENCL $(OCL_LINK)
|
$(OCL) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_OPENCL $(OCL_LINK)
|
||||||
|
|
||||||
|
|||||||
@ -41,6 +41,8 @@ int AtomT::bytes_per_atom() const {
|
|||||||
bytes+=4*sizeof(numtyp);
|
bytes+=4*sizeof(numtyp);
|
||||||
if (_charge)
|
if (_charge)
|
||||||
bytes+=sizeof(numtyp);
|
bytes+=sizeof(numtyp);
|
||||||
|
if (_vel)
|
||||||
|
bytes+=4*sizeof(numtyp);
|
||||||
return bytes;
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -52,7 +54,7 @@ bool AtomT::alloc(const int nall) {
|
|||||||
|
|
||||||
// Ignore host/device transfers?
|
// Ignore host/device transfers?
|
||||||
_host_view=false;
|
_host_view=false;
|
||||||
if (dev->shared_memory()) {
|
if (dev->shared_memory() && sizeof(numtyp)==sizeof(double)) {
|
||||||
_host_view=true;
|
_host_view=true;
|
||||||
#ifdef GPU_CAST
|
#ifdef GPU_CAST
|
||||||
assert(0==1);
|
assert(0==1);
|
||||||
@ -90,6 +92,11 @@ bool AtomT::alloc(const int nall) {
|
|||||||
UCL_READ_ONLY)==UCL_SUCCESS);
|
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||||
gpu_bytes+=quat.device.row_bytes();
|
gpu_bytes+=quat.device.row_bytes();
|
||||||
}
|
}
|
||||||
|
if (_vel && _host_view==false) {
|
||||||
|
success=success && (v.alloc(_max_atoms*4,*dev,UCL_WRITE_ONLY,
|
||||||
|
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||||
|
gpu_bytes+=v.device.row_bytes();
|
||||||
|
}
|
||||||
|
|
||||||
if (_gpu_nbor>0) {
|
if (_gpu_nbor>0) {
|
||||||
if (_bonds) {
|
if (_bonds) {
|
||||||
@ -124,7 +131,7 @@ bool AtomT::alloc(const int nall) {
|
|||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
bool AtomT::add_fields(const bool charge, const bool rot,
|
bool AtomT::add_fields(const bool charge, const bool rot,
|
||||||
const int gpu_nbor, const bool bonds) {
|
const int gpu_nbor, const bool bonds, const bool vel) {
|
||||||
bool success=true;
|
bool success=true;
|
||||||
// Ignore host/device transfers?
|
// Ignore host/device transfers?
|
||||||
int gpu_bytes=0;
|
int gpu_bytes=0;
|
||||||
@ -149,6 +156,16 @@ bool AtomT::add_fields(const bool charge, const bool rot,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (vel && _vel==false) {
|
||||||
|
_vel=true;
|
||||||
|
_other=true;
|
||||||
|
if (_host_view==false) {
|
||||||
|
success=success && (v.alloc(_max_atoms*4,*dev,UCL_WRITE_ONLY,
|
||||||
|
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||||
|
gpu_bytes+=v.device.row_bytes();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (bonds && _bonds==false) {
|
if (bonds && _bonds==false) {
|
||||||
_bonds=true;
|
_bonds=true;
|
||||||
if (_bonds && _gpu_nbor>0) {
|
if (_bonds && _gpu_nbor>0) {
|
||||||
@ -191,19 +208,21 @@ bool AtomT::add_fields(const bool charge, const bool rot,
|
|||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
bool AtomT::init(const int nall, const bool charge, const bool rot,
|
bool AtomT::init(const int nall, const bool charge, const bool rot,
|
||||||
UCL_Device &devi, const int gpu_nbor, const bool bonds) {
|
UCL_Device &devi, const int gpu_nbor, const bool bonds, const bool vel) {
|
||||||
clear();
|
clear();
|
||||||
|
|
||||||
bool success=true;
|
bool success=true;
|
||||||
_x_avail=false;
|
_x_avail=false;
|
||||||
_q_avail=false;
|
_q_avail=false;
|
||||||
_quat_avail=false;
|
_quat_avail=false;
|
||||||
|
_v_avail=false;
|
||||||
_resized=false;
|
_resized=false;
|
||||||
_gpu_nbor=gpu_nbor;
|
_gpu_nbor=gpu_nbor;
|
||||||
_bonds=bonds;
|
_bonds=bonds;
|
||||||
_charge=charge;
|
_charge=charge;
|
||||||
_rot=rot;
|
_rot=rot;
|
||||||
_other=_charge || _rot;
|
_vel=vel;
|
||||||
|
_other=_charge || _rot || _vel;
|
||||||
dev=&devi;
|
dev=&devi;
|
||||||
_time_transfer=0;
|
_time_transfer=0;
|
||||||
|
|
||||||
@ -216,9 +235,11 @@ bool AtomT::init(const int nall, const bool charge, const bool rot,
|
|||||||
time_pos.init(*dev);
|
time_pos.init(*dev);
|
||||||
time_q.init(*dev);
|
time_q.init(*dev);
|
||||||
time_quat.init(*dev);
|
time_quat.init(*dev);
|
||||||
|
time_vel.init(*dev);
|
||||||
time_pos.zero();
|
time_pos.zero();
|
||||||
time_q.zero();
|
time_q.zero();
|
||||||
time_quat.zero();
|
time_quat.zero();
|
||||||
|
time_vel.zero();
|
||||||
_time_cast=0.0;
|
_time_cast=0.0;
|
||||||
|
|
||||||
#ifdef GPU_CAST
|
#ifdef GPU_CAST
|
||||||
@ -239,6 +260,8 @@ void AtomT::clear_resize() {
|
|||||||
q.clear();
|
q.clear();
|
||||||
if (_rot)
|
if (_rot)
|
||||||
quat.clear();
|
quat.clear();
|
||||||
|
if (_vel)
|
||||||
|
v.clear();
|
||||||
|
|
||||||
dev_cell_id.clear();
|
dev_cell_id.clear();
|
||||||
dev_particle_id.clear();
|
dev_particle_id.clear();
|
||||||
@ -267,6 +290,7 @@ void AtomT::clear() {
|
|||||||
time_pos.clear();
|
time_pos.clear();
|
||||||
time_q.clear();
|
time_q.clear();
|
||||||
time_quat.clear();
|
time_quat.clear();
|
||||||
|
time_vel.clear();
|
||||||
clear_resize();
|
clear_resize();
|
||||||
|
|
||||||
#ifdef GPU_CAST
|
#ifdef GPU_CAST
|
||||||
@ -285,6 +309,8 @@ double AtomT::host_memory_usage() const {
|
|||||||
atom_bytes+=1;
|
atom_bytes+=1;
|
||||||
if (_rot)
|
if (_rot)
|
||||||
atom_bytes+=4;
|
atom_bytes+=4;
|
||||||
|
if (_vel)
|
||||||
|
atom_bytes+=4;
|
||||||
return _max_atoms*atom_bytes*sizeof(numtyp)+sizeof(Atom<numtyp,acctyp>);
|
return _max_atoms*atom_bytes*sizeof(numtyp)+sizeof(Atom<numtyp,acctyp>);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -67,7 +67,8 @@ class Atom {
|
|||||||
* gpu_nbor 1 if neighboring will be performed on device
|
* gpu_nbor 1 if neighboring will be performed on device
|
||||||
* gpu_nbor 2 if binning on host and neighboring on device **/
|
* gpu_nbor 2 if binning on host and neighboring on device **/
|
||||||
bool init(const int nall, const bool charge, const bool rot,
|
bool init(const int nall, const bool charge, const bool rot,
|
||||||
UCL_Device &dev, const int gpu_nbor=0, const bool bonds=false);
|
UCL_Device &dev, const int gpu_nbor=0, const bool bonds=false,
|
||||||
|
const bool vel=false);
|
||||||
|
|
||||||
/// Check if we have enough device storage and realloc if not
|
/// Check if we have enough device storage and realloc if not
|
||||||
/** Returns true if resized with any call during this timestep **/
|
/** Returns true if resized with any call during this timestep **/
|
||||||
@ -87,7 +88,7 @@ class Atom {
|
|||||||
* gpu_nbor 1 if neighboring will be performed on device
|
* gpu_nbor 1 if neighboring will be performed on device
|
||||||
* gpu_nbor 2 if binning on host and neighboring on device **/
|
* gpu_nbor 2 if binning on host and neighboring on device **/
|
||||||
bool add_fields(const bool charge, const bool rot, const int gpu_nbor,
|
bool add_fields(const bool charge, const bool rot, const int gpu_nbor,
|
||||||
const bool bonds);
|
const bool bonds, const bool vel=false);
|
||||||
|
|
||||||
/// Returns true if GPU is using charges
|
/// Returns true if GPU is using charges
|
||||||
bool charge() { return _charge; }
|
bool charge() { return _charge; }
|
||||||
@ -95,6 +96,9 @@ class Atom {
|
|||||||
/// Returns true if GPU is using quaternions
|
/// Returns true if GPU is using quaternions
|
||||||
bool quaternion() { return _rot; }
|
bool quaternion() { return _rot; }
|
||||||
|
|
||||||
|
/// Returns true if GPU is using velocities
|
||||||
|
bool velocity() { return _vel; }
|
||||||
|
|
||||||
/// Only free matrices of length inum or nall for resizing
|
/// Only free matrices of length inum or nall for resizing
|
||||||
void clear_resize();
|
void clear_resize();
|
||||||
|
|
||||||
@ -114,6 +118,8 @@ class Atom {
|
|||||||
time_q.add_to_total();
|
time_q.add_to_total();
|
||||||
if (_rot)
|
if (_rot)
|
||||||
time_quat.add_to_total();
|
time_quat.add_to_total();
|
||||||
|
if (_vel)
|
||||||
|
time_vel.add_to_total();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add copy times to timers
|
/// Add copy times to timers
|
||||||
@ -123,6 +129,8 @@ class Atom {
|
|||||||
time_q.zero();
|
time_q.zero();
|
||||||
if (_rot)
|
if (_rot)
|
||||||
time_quat.zero();
|
time_quat.zero();
|
||||||
|
if (_vel)
|
||||||
|
time_vel.zero();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the total time for host/device data transfer
|
/// Return the total time for host/device data transfer
|
||||||
@ -135,9 +143,13 @@ class Atom {
|
|||||||
time_q.zero_total();
|
time_q.zero_total();
|
||||||
}
|
}
|
||||||
if (_rot) {
|
if (_rot) {
|
||||||
total+=time_q.total_seconds();
|
total+=time_quat.total_seconds();
|
||||||
time_quat.zero_total();
|
time_quat.zero_total();
|
||||||
}
|
}
|
||||||
|
if (_vel) {
|
||||||
|
total+=time_vel.total_seconds();
|
||||||
|
time_vel.zero_total();
|
||||||
|
}
|
||||||
|
|
||||||
return total+_time_transfer/1000.0;
|
return total+_time_transfer/1000.0;
|
||||||
}
|
}
|
||||||
@ -242,7 +254,7 @@ class Atom {
|
|||||||
|
|
||||||
/// Signal that we need to transfer atom data for next timestep
|
/// Signal that we need to transfer atom data for next timestep
|
||||||
inline void data_unavail()
|
inline void data_unavail()
|
||||||
{ _x_avail=false; _q_avail=false; _quat_avail=false; _resized=false; }
|
{ _x_avail=false; _q_avail=false; _quat_avail=false; _v_avail=false; _resized=false; }
|
||||||
|
|
||||||
/// Cast positions and types to write buffer
|
/// Cast positions and types to write buffer
|
||||||
inline void cast_x_data(double **host_ptr, const int *host_type) {
|
inline void cast_x_data(double **host_ptr, const int *host_type) {
|
||||||
@ -341,6 +353,53 @@ class Atom {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Cast velocities and tags to write buffer
|
||||||
|
inline void cast_v_data(double **host_ptr, const tagint *host_tag) {
|
||||||
|
if (_v_avail==false) {
|
||||||
|
double t=MPI_Wtime();
|
||||||
|
#ifdef GPU_CAST
|
||||||
|
memcpy(host_v_cast.begin(),host_ptr[0],_nall*3*sizeof(double));
|
||||||
|
memcpy(host_tag_cast.begin(),host_tag,_nall*sizeof(int));
|
||||||
|
#else
|
||||||
|
int wl=0;
|
||||||
|
for (int i=0; i<_nall; i++) {
|
||||||
|
v[wl]=host_ptr[i][0];
|
||||||
|
v[wl+1]=host_ptr[i][1];
|
||||||
|
v[wl+2]=host_ptr[i][2];
|
||||||
|
v[wl+3]=host_tag[i];
|
||||||
|
wl+=4;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
_time_cast+=MPI_Wtime()-t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Copy velocities and tags to device asynchronously
|
||||||
|
/** Copies nall() elements **/
|
||||||
|
inline void add_v_data(double **host_ptr, tagint *host_tag) {
|
||||||
|
time_vel.start();
|
||||||
|
if (_v_avail==false) {
|
||||||
|
#ifdef GPU_CAST
|
||||||
|
v_cast.update_device(_nall*3,true);
|
||||||
|
tag_cast.update_device(_nall,true);
|
||||||
|
int block_size=64;
|
||||||
|
int GX=static_cast<int>(ceil(static_cast<double>(_nall)/block_size));
|
||||||
|
k_cast_x.set_size(GX,block_size);
|
||||||
|
k_cast_x.run(&v, &v_cast, &tag_cast, &_nall);
|
||||||
|
#else
|
||||||
|
v.update_device(_nall*4,true);
|
||||||
|
#endif
|
||||||
|
_v_avail=true;
|
||||||
|
}
|
||||||
|
time_vel.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calls cast_v_data and add_v_data and times the routines
|
||||||
|
inline void cast_copy_v(double **host_ptr, tagint *host_tag) {
|
||||||
|
cast_v_data(host_ptr,host_tag);
|
||||||
|
add_v_data(host_ptr,host_tag);
|
||||||
|
}
|
||||||
|
|
||||||
/// Add in casting time from additional data (seconds)
|
/// Add in casting time from additional data (seconds)
|
||||||
inline void add_cast_time(double t) { _time_cast+=t; }
|
inline void add_cast_time(double t) { _time_cast+=t; }
|
||||||
|
|
||||||
@ -362,7 +421,9 @@ class Atom {
|
|||||||
UCL_Vector<numtyp,numtyp> q;
|
UCL_Vector<numtyp,numtyp> q;
|
||||||
/// Quaterions
|
/// Quaterions
|
||||||
UCL_Vector<numtyp,numtyp> quat;
|
UCL_Vector<numtyp,numtyp> quat;
|
||||||
|
/// Velocities
|
||||||
|
UCL_Vector<numtyp,numtyp> v;
|
||||||
|
|
||||||
#ifdef GPU_CAST
|
#ifdef GPU_CAST
|
||||||
UCL_Vector<double,double> x_cast;
|
UCL_Vector<double,double> x_cast;
|
||||||
UCL_Vector<int,int> type_cast;
|
UCL_Vector<int,int> type_cast;
|
||||||
@ -372,6 +433,7 @@ class Atom {
|
|||||||
UCL_D_Vec<unsigned> dev_cell_id;
|
UCL_D_Vec<unsigned> dev_cell_id;
|
||||||
/// Cell list identifiers for device nbor builds
|
/// Cell list identifiers for device nbor builds
|
||||||
UCL_D_Vec<int> dev_particle_id;
|
UCL_D_Vec<int> dev_particle_id;
|
||||||
|
|
||||||
/// Atom tag information for device nbor builds
|
/// Atom tag information for device nbor builds
|
||||||
UCL_D_Vec<tagint> dev_tag;
|
UCL_D_Vec<tagint> dev_tag;
|
||||||
|
|
||||||
@ -381,7 +443,7 @@ class Atom {
|
|||||||
UCL_H_Vec<int> host_particle_id;
|
UCL_H_Vec<int> host_particle_id;
|
||||||
|
|
||||||
/// Device timers
|
/// Device timers
|
||||||
UCL_Timer time_pos, time_q, time_quat;
|
UCL_Timer time_pos, time_q, time_quat, time_vel;
|
||||||
|
|
||||||
/// Geryon device
|
/// Geryon device
|
||||||
UCL_Device *dev;
|
UCL_Device *dev;
|
||||||
@ -396,11 +458,11 @@ class Atom {
|
|||||||
bool _compiled;
|
bool _compiled;
|
||||||
|
|
||||||
// True if data has been copied to device already
|
// True if data has been copied to device already
|
||||||
bool _x_avail, _q_avail, _quat_avail, _resized;
|
bool _x_avail, _q_avail, _quat_avail, _v_avail, _resized;
|
||||||
|
|
||||||
bool alloc(const int nall);
|
bool alloc(const int nall);
|
||||||
|
|
||||||
bool _allocated, _rot, _charge, _bonds, _other;
|
bool _allocated, _rot, _charge, _bonds, _vel, _other;
|
||||||
int _max_atoms, _nall, _gpu_nbor;
|
int _max_atoms, _nall, _gpu_nbor;
|
||||||
bool _host_view;
|
bool _host_view;
|
||||||
double _time_cast, _time_transfer;
|
double _time_cast, _time_transfer;
|
||||||
|
|||||||
@ -213,12 +213,12 @@ int DeviceT::set_ocl_params(char *ocl_vendor) {
|
|||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
|
int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
|
||||||
const bool rot, const int nlocal,
|
const bool rot, const int nlocal,
|
||||||
const int host_nlocal, const int nall,
|
const int host_nlocal, const int nall,
|
||||||
Neighbor *nbor, const int maxspecial,
|
Neighbor *nbor, const int maxspecial,
|
||||||
const int gpu_host, const int max_nbors,
|
const int gpu_host, const int max_nbors,
|
||||||
const double cell_size, const bool pre_cut,
|
const double cell_size, const bool pre_cut,
|
||||||
const int threads_per_atom) {
|
const int threads_per_atom, const bool vel) {
|
||||||
if (!_device_init)
|
if (!_device_init)
|
||||||
return -1;
|
return -1;
|
||||||
if (sizeof(acctyp)==sizeof(double) && gpu->double_precision()==false)
|
if (sizeof(acctyp)==sizeof(double) && gpu->double_precision()==false)
|
||||||
@ -245,7 +245,7 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
|
|||||||
|
|
||||||
if (_init_count==0) {
|
if (_init_count==0) {
|
||||||
// Initialize atom and nbor data
|
// Initialize atom and nbor data
|
||||||
if (!atom.init(nall,charge,rot,*gpu,gpu_nbor,gpu_nbor>0 && maxspecial>0))
|
if (!atom.init(nall,charge,rot,*gpu,gpu_nbor,gpu_nbor>0 && maxspecial>0,vel))
|
||||||
return -3;
|
return -3;
|
||||||
|
|
||||||
_data_in_estimate++;
|
_data_in_estimate++;
|
||||||
@ -253,12 +253,16 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
|
|||||||
_data_in_estimate++;
|
_data_in_estimate++;
|
||||||
if (rot)
|
if (rot)
|
||||||
_data_in_estimate++;
|
_data_in_estimate++;
|
||||||
|
if (vel)
|
||||||
|
_data_in_estimate++;
|
||||||
} else {
|
} else {
|
||||||
if (atom.charge()==false && charge)
|
if (atom.charge()==false && charge)
|
||||||
_data_in_estimate++;
|
_data_in_estimate++;
|
||||||
if (atom.quaternion()==false && rot)
|
if (atom.quaternion()==false && rot)
|
||||||
_data_in_estimate++;
|
_data_in_estimate++;
|
||||||
if (!atom.add_fields(charge,rot,gpu_nbor,gpu_nbor>0 && maxspecial))
|
if (atom.velocity()==false && vel)
|
||||||
|
_data_in_estimate++;
|
||||||
|
if (!atom.add_fields(charge,rot,gpu_nbor,gpu_nbor>0 && maxspecial,vel))
|
||||||
return -3;
|
return -3;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -318,7 +322,7 @@ void DeviceT::set_double_precompute
|
|||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void DeviceT::init_message(FILE *screen, const char *name,
|
void DeviceT::init_message(FILE *screen, const char *name,
|
||||||
const int first_gpu, const int last_gpu) {
|
const int first_gpu, const int last_gpu) {
|
||||||
#if defined(USE_OPENCL)
|
#if defined(USE_OPENCL)
|
||||||
std::string fs="";
|
std::string fs="";
|
||||||
#elif defined(USE_CUDART)
|
#elif defined(USE_CUDART)
|
||||||
@ -330,7 +334,7 @@ void DeviceT::init_message(FILE *screen, const char *name,
|
|||||||
if (_replica_me == 0 && screen) {
|
if (_replica_me == 0 && screen) {
|
||||||
fprintf(screen,"\n-------------------------------------");
|
fprintf(screen,"\n-------------------------------------");
|
||||||
fprintf(screen,"-------------------------------------\n");
|
fprintf(screen,"-------------------------------------\n");
|
||||||
fprintf(screen,"- Using GPGPU acceleration for %s:\n",name);
|
fprintf(screen,"- Using acceleration for %s:\n",name);
|
||||||
fprintf(screen,"- with %d proc(s) per device.\n",_procs_per_gpu);
|
fprintf(screen,"- with %d proc(s) per device.\n",_procs_per_gpu);
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
fprintf(screen,"- with %d thread(s) per proc.\n",_nthreads);
|
fprintf(screen,"- with %d thread(s) per proc.\n",_nthreads);
|
||||||
@ -361,7 +365,7 @@ void DeviceT::init_message(FILE *screen, const char *name,
|
|||||||
} else
|
} else
|
||||||
sname+="Double Precision)";
|
sname+="Double Precision)";
|
||||||
|
|
||||||
fprintf(screen,"GPU %d: %s\n",i,sname.c_str());
|
fprintf(screen,"Device %d: %s\n",i,sname.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(screen,"-------------------------------------");
|
fprintf(screen,"-------------------------------------");
|
||||||
@ -371,8 +375,8 @@ void DeviceT::init_message(FILE *screen, const char *name,
|
|||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void DeviceT::estimate_gpu_overhead(const int kernel_calls,
|
void DeviceT::estimate_gpu_overhead(const int kernel_calls,
|
||||||
double &gpu_overhead,
|
double &gpu_overhead,
|
||||||
double &gpu_driver_overhead) {
|
double &gpu_driver_overhead) {
|
||||||
UCL_H_Vec<int> *host_data_in=NULL, *host_data_out=NULL;
|
UCL_H_Vec<int> *host_data_in=NULL, *host_data_out=NULL;
|
||||||
UCL_D_Vec<int> *dev_data_in=NULL, *dev_data_out=NULL, *kernel_data=NULL;
|
UCL_D_Vec<int> *dev_data_in=NULL, *dev_data_out=NULL, *kernel_data=NULL;
|
||||||
UCL_Timer *timers_in=NULL, *timers_out=NULL, *timers_kernel=NULL;
|
UCL_Timer *timers_in=NULL, *timers_out=NULL, *timers_kernel=NULL;
|
||||||
@ -506,16 +510,17 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer<numtyp,acctyp> &ans,
|
|||||||
double mpi_max_bytes;
|
double mpi_max_bytes;
|
||||||
MPI_Reduce(&my_max_bytes,&mpi_max_bytes,1,MPI_DOUBLE,MPI_MAX,0,_comm_replica);
|
MPI_Reduce(&my_max_bytes,&mpi_max_bytes,1,MPI_DOUBLE,MPI_MAX,0,_comm_replica);
|
||||||
double max_mb=mpi_max_bytes/(1024.0*1024.0);
|
double max_mb=mpi_max_bytes/(1024.0*1024.0);
|
||||||
|
double t_time=times[0]+times[1]+times[2]+times[3]+times[4];
|
||||||
|
|
||||||
if (replica_me()==0)
|
if (replica_me()==0)
|
||||||
if (screen && times[5]>0.0) {
|
if (screen && times[5]>0.0) {
|
||||||
fprintf(screen,"\n\n-------------------------------------");
|
fprintf(screen,"\n\n-------------------------------------");
|
||||||
fprintf(screen,"--------------------------------\n");
|
fprintf(screen,"--------------------------------\n");
|
||||||
fprintf(screen," GPU Time Info (average): ");
|
fprintf(screen," Device Time Info (average): ");
|
||||||
fprintf(screen,"\n-------------------------------------");
|
fprintf(screen,"\n-------------------------------------");
|
||||||
fprintf(screen,"--------------------------------\n");
|
fprintf(screen,"--------------------------------\n");
|
||||||
|
|
||||||
if (time_device()) {
|
if (time_device() && t_time>0) {
|
||||||
fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/_replica_size);
|
fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/_replica_size);
|
||||||
fprintf(screen,"Data Cast/Pack: %.4f s.\n",times[4]/_replica_size);
|
fprintf(screen,"Data Cast/Pack: %.4f s.\n",times[4]/_replica_size);
|
||||||
fprintf(screen,"Neighbor copy: %.4f s.\n",times[1]/_replica_size);
|
fprintf(screen,"Neighbor copy: %.4f s.\n",times[1]/_replica_size);
|
||||||
@ -527,7 +532,8 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer<numtyp,acctyp> &ans,
|
|||||||
}
|
}
|
||||||
if (nbor.gpu_nbor()==2)
|
if (nbor.gpu_nbor()==2)
|
||||||
fprintf(screen,"Neighbor (CPU): %.4f s.\n",times[8]/_replica_size);
|
fprintf(screen,"Neighbor (CPU): %.4f s.\n",times[8]/_replica_size);
|
||||||
fprintf(screen,"GPU Overhead: %.4f s.\n",times[5]/_replica_size);
|
if (times[5]>0)
|
||||||
|
fprintf(screen,"Device Overhead: %.4f s.\n",times[5]/_replica_size);
|
||||||
fprintf(screen,"Average split: %.4f.\n",avg_split);
|
fprintf(screen,"Average split: %.4f.\n",avg_split);
|
||||||
fprintf(screen,"Threads / atom: %d.\n",threads_per_atom);
|
fprintf(screen,"Threads / atom: %d.\n",threads_per_atom);
|
||||||
fprintf(screen,"Max Mem / Proc: %.2f MB.\n",max_mb);
|
fprintf(screen,"Max Mem / Proc: %.2f MB.\n",max_mb);
|
||||||
@ -541,14 +547,14 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer<numtyp,acctyp> &ans,
|
|||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void DeviceT::output_kspace_times(UCL_Timer &time_in,
|
void DeviceT::output_kspace_times(UCL_Timer &time_in,
|
||||||
UCL_Timer &time_out,
|
UCL_Timer &time_out,
|
||||||
UCL_Timer &time_map,
|
UCL_Timer &time_map,
|
||||||
UCL_Timer &time_rho,
|
UCL_Timer &time_rho,
|
||||||
UCL_Timer &time_interp,
|
UCL_Timer &time_interp,
|
||||||
Answer<numtyp,acctyp> &ans,
|
Answer<numtyp,acctyp> &ans,
|
||||||
const double max_bytes,
|
const double max_bytes,
|
||||||
const double cpu_time,
|
const double cpu_time,
|
||||||
const double idle_time, FILE *screen) {
|
const double idle_time, FILE *screen) {
|
||||||
double single[8], times[8];
|
double single[8], times[8];
|
||||||
|
|
||||||
single[0]=time_out.total_seconds();
|
single[0]=time_out.total_seconds();
|
||||||
@ -566,16 +572,17 @@ void DeviceT::output_kspace_times(UCL_Timer &time_in,
|
|||||||
double mpi_max_bytes;
|
double mpi_max_bytes;
|
||||||
MPI_Reduce(&my_max_bytes,&mpi_max_bytes,1,MPI_DOUBLE,MPI_MAX,0,_comm_replica);
|
MPI_Reduce(&my_max_bytes,&mpi_max_bytes,1,MPI_DOUBLE,MPI_MAX,0,_comm_replica);
|
||||||
double max_mb=mpi_max_bytes/(1024.0*1024.0);
|
double max_mb=mpi_max_bytes/(1024.0*1024.0);
|
||||||
|
double t_time=times[0]+times[1]+times[2]+times[3]+times[4]+times[5];
|
||||||
|
|
||||||
if (replica_me()==0)
|
if (replica_me()==0)
|
||||||
if (screen && times[6]>0.0) {
|
if (screen && times[6]>0.0) {
|
||||||
fprintf(screen,"\n\n-------------------------------------");
|
fprintf(screen,"\n\n-------------------------------------");
|
||||||
fprintf(screen,"--------------------------------\n");
|
fprintf(screen,"--------------------------------\n");
|
||||||
fprintf(screen," GPU Time Info (average): ");
|
fprintf(screen," Device Time Info (average): ");
|
||||||
fprintf(screen,"\n-------------------------------------");
|
fprintf(screen,"\n-------------------------------------");
|
||||||
fprintf(screen,"--------------------------------\n");
|
fprintf(screen,"--------------------------------\n");
|
||||||
|
|
||||||
if (time_device()) {
|
if (time_device() && t_time>0) {
|
||||||
fprintf(screen,"Data Out: %.4f s.\n",times[0]/_replica_size);
|
fprintf(screen,"Data Out: %.4f s.\n",times[0]/_replica_size);
|
||||||
fprintf(screen,"Data In: %.4f s.\n",times[1]/_replica_size);
|
fprintf(screen,"Data In: %.4f s.\n",times[1]/_replica_size);
|
||||||
fprintf(screen,"Kernel (map): %.4f s.\n",times[2]/_replica_size);
|
fprintf(screen,"Kernel (map): %.4f s.\n",times[2]/_replica_size);
|
||||||
|
|||||||
@ -77,7 +77,7 @@ class Device {
|
|||||||
const int nlocal, const int host_nlocal, const int nall,
|
const int nlocal, const int host_nlocal, const int nall,
|
||||||
Neighbor *nbor, const int maxspecial, const int gpu_host,
|
Neighbor *nbor, const int maxspecial, const int gpu_host,
|
||||||
const int max_nbors, const double cell_size, const bool pre_cut,
|
const int max_nbors, const double cell_size, const bool pre_cut,
|
||||||
const int threads_per_atom);
|
const int threads_per_atom, const bool vel=false);
|
||||||
|
|
||||||
/// Initialize the device for Atom storage only
|
/// Initialize the device for Atom storage only
|
||||||
/** \param nlocal Total number of local particles to allocate memory for
|
/** \param nlocal Total number of local particles to allocate memory for
|
||||||
|
|||||||
Reference in New Issue
Block a user