Added changes to Atom and Device classes for allocation of extra fields and SBBITS15 and NEIGHMASK15
This commit is contained in:
@ -48,6 +48,8 @@ int AtomT::bytes_per_atom() const {
|
||||
bytes+=sizeof(numtyp);
|
||||
if (_vel)
|
||||
bytes+=4*sizeof(numtyp);
|
||||
if (_extra_fields>0)
|
||||
bytes+=_extra_fields*sizeof(numtyp);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@ -122,6 +124,11 @@ bool AtomT::alloc(const int nall) {
|
||||
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||
gpu_bytes+=v.device.row_bytes();
|
||||
}
|
||||
if (_extra_fields>0 && _host_view==false) {
|
||||
success=success && (extra.alloc(_max_atoms*_extra_fields,*dev,UCL_WRITE_ONLY,
|
||||
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||
gpu_bytes+=extra.device.row_bytes();
|
||||
}
|
||||
|
||||
if (_gpu_nbor>0) {
|
||||
if (_bonds) {
|
||||
@ -156,7 +163,8 @@ bool AtomT::alloc(const int nall) {
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
bool AtomT::add_fields(const bool charge, const bool rot,
|
||||
const int gpu_nbor, const bool bonds, const bool vel) {
|
||||
const int gpu_nbor, const bool bonds, const bool vel,
|
||||
const int extra_fields) {
|
||||
bool success=true;
|
||||
// Ignore host/device transfers?
|
||||
int gpu_bytes=0;
|
||||
@ -191,6 +199,16 @@ bool AtomT::add_fields(const bool charge, const bool rot,
|
||||
}
|
||||
}
|
||||
|
||||
if (extra_fields > 0 && _extra_fields==0) {
|
||||
_extra_fields=extra_fields;
|
||||
_other=true;
|
||||
if (_host_view==false) {
|
||||
success=success && (extra.alloc(_max_atoms*_extra_fields,*dev,UCL_WRITE_ONLY,
|
||||
UCL_READ_ONLY)==UCL_SUCCESS);
|
||||
gpu_bytes+=extra.device.row_bytes();
|
||||
}
|
||||
}
|
||||
|
||||
if (bonds && _bonds==false) {
|
||||
_bonds=true;
|
||||
if (_bonds && _gpu_nbor>0) {
|
||||
@ -254,7 +272,8 @@ bool AtomT::add_fields(const bool charge, const bool rot,
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
bool AtomT::init(const int nall, const bool charge, const bool rot,
|
||||
UCL_Device &devi, const int gpu_nbor, const bool bonds, const bool vel) {
|
||||
UCL_Device &devi, const int gpu_nbor, const bool bonds, const bool vel,
|
||||
const int extra_fields) {
|
||||
clear();
|
||||
|
||||
bool success=true;
|
||||
@ -262,13 +281,15 @@ bool AtomT::init(const int nall, const bool charge, const bool rot,
|
||||
_q_avail=false;
|
||||
_quat_avail=false;
|
||||
_v_avail=false;
|
||||
_extra_avail=false;
|
||||
_resized=false;
|
||||
_gpu_nbor=gpu_nbor;
|
||||
_bonds=bonds;
|
||||
_charge=charge;
|
||||
_rot=rot;
|
||||
_vel=vel;
|
||||
_other=_charge || _rot || _vel;
|
||||
_extra_fields=extra_fields;
|
||||
_other=_charge || _rot || _vel || (extra_fields>0);
|
||||
dev=&devi;
|
||||
_time_transfer=0;
|
||||
|
||||
@ -282,10 +303,14 @@ bool AtomT::init(const int nall, const bool charge, const bool rot,
|
||||
time_q.init(*dev);
|
||||
time_quat.init(*dev);
|
||||
time_vel.init(*dev);
|
||||
time_extra.init(*dev);
|
||||
|
||||
time_pos.zero();
|
||||
time_q.zero();
|
||||
time_quat.zero();
|
||||
time_vel.zero();
|
||||
time_extra.zero();
|
||||
|
||||
_time_cast=0.0;
|
||||
|
||||
#ifdef GPU_CAST
|
||||
@ -308,6 +333,8 @@ void AtomT::clear_resize() {
|
||||
quat.clear();
|
||||
if (_vel)
|
||||
v.clear();
|
||||
if (_extra_fields>0)
|
||||
extra.clear();
|
||||
|
||||
dev_cell_id.clear();
|
||||
dev_particle_id.clear();
|
||||
@ -350,6 +377,7 @@ void AtomT::clear() {
|
||||
time_q.clear();
|
||||
time_quat.clear();
|
||||
time_vel.clear();
|
||||
time_extra.clear();
|
||||
clear_resize();
|
||||
|
||||
#ifdef GPU_CAST
|
||||
@ -370,6 +398,8 @@ double AtomT::host_memory_usage() const {
|
||||
atom_bytes+=4;
|
||||
if (_vel)
|
||||
atom_bytes+=4;
|
||||
if (_extra_fields>0)
|
||||
atom_bytes+=_extra_fields;
|
||||
return _max_atoms*atom_bytes*sizeof(numtyp)+sizeof(Atom<numtyp,acctyp>);
|
||||
}
|
||||
|
||||
|
||||
@ -76,7 +76,7 @@ class Atom {
|
||||
* gpu_nbor 2 if binning on host and neighboring on device **/
|
||||
bool init(const int nall, const bool charge, const bool rot,
|
||||
UCL_Device &dev, const int gpu_nbor=0, const bool bonds=false,
|
||||
const bool vel=false);
|
||||
const bool vel=false, const int extra_fields=0);
|
||||
|
||||
/// Check if we have enough device storage and realloc if not
|
||||
/** Returns true if resized with any call during this timestep **/
|
||||
@ -96,7 +96,7 @@ class Atom {
|
||||
* gpu_nbor 1 if neighboring will be performed on device
|
||||
* gpu_nbor 2 if binning on host and neighboring on device **/
|
||||
bool add_fields(const bool charge, const bool rot, const int gpu_nbor,
|
||||
const bool bonds, const bool vel=false);
|
||||
const bool bonds, const bool vel=false, const int extra_fields=0);
|
||||
|
||||
/// Returns true if GPU is using charges
|
||||
bool charge() { return _charge; }
|
||||
@ -107,6 +107,9 @@ class Atom {
|
||||
/// Returns true if GPU is using velocities
|
||||
bool velocity() { return _vel; }
|
||||
|
||||
/// Returns true if GPU is using extra fields
|
||||
bool using_extra() { return _extra_fields; }
|
||||
|
||||
/// Only free matrices of length inum or nall for resizing
|
||||
void clear_resize();
|
||||
|
||||
@ -450,6 +453,38 @@ class Atom {
|
||||
add_v_data(host_ptr,host_tag);
|
||||
}
|
||||
|
||||
// Cast extras to write buffer
|
||||
template<class cpytyp>
|
||||
inline void cast_extra_data(cpytyp *host_ptr) {
|
||||
if (_extra_avail==false) {
|
||||
double t=MPI_Wtime();
|
||||
if (_host_view) {
|
||||
extra.host.view((numtyp*)host_ptr,_nall*_extra_fields,*dev);
|
||||
extra.device.view(extra.host);
|
||||
} else if (sizeof(numtyp)==sizeof(double))
|
||||
memcpy(extra.host.begin(),host_ptr,_nall*_extra_fields*sizeof(numtyp));
|
||||
else
|
||||
#if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
|
||||
#pragma omp parallel for simd schedule(static)
|
||||
#elif (LAL_USE_OMP_SIMD == 1)
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (int i=0; i<_nall*_extra_fields; i++) extra[i]=host_ptr[i];
|
||||
_time_cast+=MPI_Wtime()-t;
|
||||
}
|
||||
}
|
||||
|
||||
// Copy extras to device
|
||||
/** Copies nall()*_extra elements **/
|
||||
inline void add_extra_data() {
|
||||
time_extra.start();
|
||||
if (_extra_avail==false) {
|
||||
extra.update_device(_nall*_extra_fields,true);
|
||||
_extra_avail=true;
|
||||
}
|
||||
time_extra.stop();
|
||||
}
|
||||
|
||||
/// Add in casting time from additional data (seconds)
|
||||
inline void add_cast_time(double t) { _time_cast+=t; }
|
||||
|
||||
@ -473,6 +508,8 @@ class Atom {
|
||||
UCL_Vector<numtyp,numtyp> quat;
|
||||
/// Velocities
|
||||
UCL_Vector<numtyp,numtyp> v;
|
||||
/// Extras
|
||||
UCL_Vector<numtyp,numtyp> extra;
|
||||
|
||||
#ifdef GPU_CAST
|
||||
UCL_Vector<double,double> x_cast;
|
||||
@ -493,7 +530,7 @@ class Atom {
|
||||
UCL_H_Vec<int> host_particle_id;
|
||||
|
||||
/// Device timers
|
||||
UCL_Timer time_pos, time_q, time_quat, time_vel;
|
||||
UCL_Timer time_pos, time_q, time_quat, time_vel, time_extra;
|
||||
|
||||
/// Geryon device
|
||||
UCL_Device *dev;
|
||||
@ -508,11 +545,12 @@ class Atom {
|
||||
bool _compiled;
|
||||
|
||||
// True if data has been copied to device already
|
||||
bool _x_avail, _q_avail, _quat_avail, _v_avail, _resized;
|
||||
bool _x_avail, _q_avail, _quat_avail, _v_avail, _extra_avail, _resized;
|
||||
|
||||
bool alloc(const int nall);
|
||||
|
||||
bool _allocated, _rot, _charge, _bonds, _vel, _other;
|
||||
int _extra_fields;
|
||||
int _max_atoms, _nall, _gpu_nbor;
|
||||
bool _host_view;
|
||||
double _time_cast, _time_transfer;
|
||||
|
||||
@ -424,7 +424,7 @@ template <class numtyp, class acctyp>
|
||||
int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
|
||||
const bool rot, const int nlocal,
|
||||
const int nall, const int maxspecial,
|
||||
const bool vel) {
|
||||
const bool vel, const int extra_fields) {
|
||||
if (!_device_init)
|
||||
return -1;
|
||||
if (sizeof(acctyp)==sizeof(double) && gpu->double_precision()==false)
|
||||
@ -453,7 +453,7 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
|
||||
|
||||
if (_init_count==0) {
|
||||
// Initialize atom and nbor data
|
||||
if (!atom.init(nall,charge,rot,*gpu,gpu_nbor,gpu_nbor>0 && maxspecial>0,vel))
|
||||
if (!atom.init(nall,charge,rot,*gpu,gpu_nbor,gpu_nbor>0 && maxspecial>0,vel,extra_fields))
|
||||
return -3;
|
||||
|
||||
_data_in_estimate++;
|
||||
@ -463,6 +463,9 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
|
||||
_data_in_estimate++;
|
||||
if (vel)
|
||||
_data_in_estimate++;
|
||||
if (extra_fields>0)
|
||||
_data_in_estimate++;
|
||||
|
||||
} else {
|
||||
if (atom.charge()==false && charge)
|
||||
_data_in_estimate++;
|
||||
@ -470,7 +473,9 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
|
||||
_data_in_estimate++;
|
||||
if (atom.velocity()==false && vel)
|
||||
_data_in_estimate++;
|
||||
if (!atom.add_fields(charge,rot,gpu_nbor,gpu_nbor>0 && maxspecial,vel))
|
||||
if (atom.using_extra()==false && extra_fields>0)
|
||||
_data_in_estimate++;
|
||||
if (!atom.add_fields(charge,rot,gpu_nbor,gpu_nbor>0 && maxspecial,vel,extra_fields))
|
||||
return -3;
|
||||
}
|
||||
|
||||
|
||||
@ -61,6 +61,7 @@ class Device {
|
||||
* \param nall Total number of local+ghost particles
|
||||
* \param maxspecial Maximum mumber of special bonded atoms per atom
|
||||
* \param vel True if velocities need to be stored
|
||||
* \param extra_fields Nonzero if extra fields need to be stored
|
||||
*
|
||||
* Returns:
|
||||
* - 0 if successful
|
||||
@ -70,7 +71,7 @@ class Device {
|
||||
* - -5 Double precision is not supported on card **/
|
||||
int init(Answer<numtyp,acctyp> &ans, const bool charge, const bool rot,
|
||||
const int nlocal, const int nall, const int maxspecial,
|
||||
const bool vel=false);
|
||||
const bool vel=false, const int extra_fields=0);
|
||||
|
||||
/// Initialize the device for Atom storage only
|
||||
/** \param nlocal Total number of local particles to allocate memory for
|
||||
|
||||
@ -330,6 +330,10 @@
|
||||
#define NEIGHMASK 0x3FFFFFFF
|
||||
ucl_inline int sbmask(int j) { return j >> SBBITS & 3; };
|
||||
|
||||
#define SBBITS15 29
|
||||
#define NEIGHMASK15 0x1FFFFFFF
|
||||
ucl_inline int sbmask15(int j) { return j >> SBBITS15 & 7; };
|
||||
|
||||
// default to 32-bit smallint and other ints, 64-bit bigint:
|
||||
// same as defined in src/lmptype.h
|
||||
#if !defined(LAMMPS_SMALLSMALL) && !defined(LAMMPS_BIGBIG) && \
|
||||
|
||||
Reference in New Issue
Block a user