Added changes to Atom and Device classes for allocation of extra fields and SBBITS15 and NEIGHMASK15

This commit is contained in:
Trung Nguyen
2021-08-26 09:33:20 -05:00
parent db92844228
commit 91317b2879
5 changed files with 89 additions and 11 deletions

View File

@ -48,6 +48,8 @@ int AtomT::bytes_per_atom() const {
bytes+=sizeof(numtyp);
if (_vel)
bytes+=4*sizeof(numtyp);
if (_extra_fields>0)
bytes+=_extra_fields*sizeof(numtyp);
return bytes;
}
@ -122,6 +124,11 @@ bool AtomT::alloc(const int nall) {
UCL_READ_ONLY)==UCL_SUCCESS);
gpu_bytes+=v.device.row_bytes();
}
if (_extra_fields>0 && _host_view==false) {
success=success && (extra.alloc(_max_atoms*_extra_fields,*dev,UCL_WRITE_ONLY,
UCL_READ_ONLY)==UCL_SUCCESS);
gpu_bytes+=extra.device.row_bytes();
}
if (_gpu_nbor>0) {
if (_bonds) {
@ -156,7 +163,8 @@ bool AtomT::alloc(const int nall) {
template <class numtyp, class acctyp>
bool AtomT::add_fields(const bool charge, const bool rot,
const int gpu_nbor, const bool bonds, const bool vel) {
const int gpu_nbor, const bool bonds, const bool vel,
const int extra_fields) {
bool success=true;
// Ignore host/device transfers?
int gpu_bytes=0;
@ -191,6 +199,16 @@ bool AtomT::add_fields(const bool charge, const bool rot,
}
}
if (extra_fields > 0 && _extra_fields==0) {
_extra_fields=extra_fields;
_other=true;
if (_host_view==false) {
success=success && (extra.alloc(_max_atoms*_extra_fields,*dev,UCL_WRITE_ONLY,
UCL_READ_ONLY)==UCL_SUCCESS);
gpu_bytes+=extra.device.row_bytes();
}
}
if (bonds && _bonds==false) {
_bonds=true;
if (_bonds && _gpu_nbor>0) {
@ -254,7 +272,8 @@ bool AtomT::add_fields(const bool charge, const bool rot,
template <class numtyp, class acctyp>
bool AtomT::init(const int nall, const bool charge, const bool rot,
UCL_Device &devi, const int gpu_nbor, const bool bonds, const bool vel) {
UCL_Device &devi, const int gpu_nbor, const bool bonds, const bool vel,
const int extra_fields) {
clear();
bool success=true;
@ -262,13 +281,15 @@ bool AtomT::init(const int nall, const bool charge, const bool rot,
_q_avail=false;
_quat_avail=false;
_v_avail=false;
_extra_avail=false;
_resized=false;
_gpu_nbor=gpu_nbor;
_bonds=bonds;
_charge=charge;
_rot=rot;
_vel=vel;
_other=_charge || _rot || _vel;
_extra_fields=extra_fields;
_other=_charge || _rot || _vel || (extra_fields>0);
dev=&devi;
_time_transfer=0;
@ -282,10 +303,14 @@ bool AtomT::init(const int nall, const bool charge, const bool rot,
time_q.init(*dev);
time_quat.init(*dev);
time_vel.init(*dev);
time_extra.init(*dev);
time_pos.zero();
time_q.zero();
time_quat.zero();
time_vel.zero();
time_extra.zero();
_time_cast=0.0;
#ifdef GPU_CAST
@ -308,6 +333,8 @@ void AtomT::clear_resize() {
quat.clear();
if (_vel)
v.clear();
if (_extra_fields>0)
extra.clear();
dev_cell_id.clear();
dev_particle_id.clear();
@ -350,6 +377,7 @@ void AtomT::clear() {
time_q.clear();
time_quat.clear();
time_vel.clear();
time_extra.clear();
clear_resize();
#ifdef GPU_CAST
@ -370,6 +398,8 @@ double AtomT::host_memory_usage() const {
atom_bytes+=4;
if (_vel)
atom_bytes+=4;
if (_extra_fields>0)
atom_bytes+=_extra_fields;
return _max_atoms*atom_bytes*sizeof(numtyp)+sizeof(Atom<numtyp,acctyp>);
}

View File

@ -76,7 +76,7 @@ class Atom {
* gpu_nbor 2 if binning on host and neighboring on device **/
bool init(const int nall, const bool charge, const bool rot,
UCL_Device &dev, const int gpu_nbor=0, const bool bonds=false,
const bool vel=false);
const bool vel=false, const int extra_fields=0);
/// Check if we have enough device storage and realloc if not
/** Returns true if resized with any call during this timestep **/
@ -96,7 +96,7 @@ class Atom {
* gpu_nbor 1 if neighboring will be performed on device
* gpu_nbor 2 if binning on host and neighboring on device **/
bool add_fields(const bool charge, const bool rot, const int gpu_nbor,
const bool bonds, const bool vel=false);
const bool bonds, const bool vel=false, const int extra_fields=0);
/// Returns true if GPU is using charges
bool charge() { return _charge; }
@ -107,6 +107,9 @@ class Atom {
/// Returns true if GPU is using velocities
bool velocity() { return _vel; }
/// Returns true if GPU is using extra fields
bool using_extra() { return _extra_fields; }
/// Only free matrices of length inum or nall for resizing
void clear_resize();
@ -450,6 +453,38 @@ class Atom {
add_v_data(host_ptr,host_tag);
}
// Cast extras to write buffer
template<class cpytyp>
inline void cast_extra_data(cpytyp *host_ptr) {
if (_extra_avail==false) {
double t=MPI_Wtime();
if (_host_view) {
extra.host.view((numtyp*)host_ptr,_nall*_extra_fields,*dev);
extra.device.view(extra.host);
} else if (sizeof(numtyp)==sizeof(double))
memcpy(extra.host.begin(),host_ptr,_nall*_extra_fields*sizeof(numtyp));
else
#if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
#pragma omp parallel for simd schedule(static)
#elif (LAL_USE_OMP_SIMD == 1)
#pragma omp simd
#endif
for (int i=0; i<_nall*_extra_fields; i++) extra[i]=host_ptr[i];
_time_cast+=MPI_Wtime()-t;
}
}
// Copy extras to device
/** Copies nall()*_extra elements **/
inline void add_extra_data() {
time_extra.start();
if (_extra_avail==false) {
extra.update_device(_nall*_extra_fields,true);
_extra_avail=true;
}
time_extra.stop();
}
/// Add in casting time from additional data (seconds)
inline void add_cast_time(double t) { _time_cast+=t; }
@ -473,6 +508,8 @@ class Atom {
UCL_Vector<numtyp,numtyp> quat;
/// Velocities
UCL_Vector<numtyp,numtyp> v;
/// Extras
UCL_Vector<numtyp,numtyp> extra;
#ifdef GPU_CAST
UCL_Vector<double,double> x_cast;
@ -493,7 +530,7 @@ class Atom {
UCL_H_Vec<int> host_particle_id;
/// Device timers
UCL_Timer time_pos, time_q, time_quat, time_vel;
UCL_Timer time_pos, time_q, time_quat, time_vel, time_extra;
/// Geryon device
UCL_Device *dev;
@ -508,11 +545,12 @@ class Atom {
bool _compiled;
// True if data has been copied to device already
bool _x_avail, _q_avail, _quat_avail, _v_avail, _resized;
bool _x_avail, _q_avail, _quat_avail, _v_avail, _extra_avail, _resized;
bool alloc(const int nall);
bool _allocated, _rot, _charge, _bonds, _vel, _other;
int _extra_fields;
int _max_atoms, _nall, _gpu_nbor;
bool _host_view;
double _time_cast, _time_transfer;

View File

@ -424,7 +424,7 @@ template <class numtyp, class acctyp>
int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
const bool rot, const int nlocal,
const int nall, const int maxspecial,
const bool vel) {
const bool vel, const int extra_fields) {
if (!_device_init)
return -1;
if (sizeof(acctyp)==sizeof(double) && gpu->double_precision()==false)
@ -453,7 +453,7 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
if (_init_count==0) {
// Initialize atom and nbor data
if (!atom.init(nall,charge,rot,*gpu,gpu_nbor,gpu_nbor>0 && maxspecial>0,vel))
if (!atom.init(nall,charge,rot,*gpu,gpu_nbor,gpu_nbor>0 && maxspecial>0,vel,extra_fields))
return -3;
_data_in_estimate++;
@ -463,6 +463,9 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
_data_in_estimate++;
if (vel)
_data_in_estimate++;
if (extra_fields>0)
_data_in_estimate++;
} else {
if (atom.charge()==false && charge)
_data_in_estimate++;
@ -470,7 +473,9 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
_data_in_estimate++;
if (atom.velocity()==false && vel)
_data_in_estimate++;
if (!atom.add_fields(charge,rot,gpu_nbor,gpu_nbor>0 && maxspecial,vel))
if (atom.using_extra()==false && extra_fields>0)
_data_in_estimate++;
if (!atom.add_fields(charge,rot,gpu_nbor,gpu_nbor>0 && maxspecial,vel,extra_fields))
return -3;
}

View File

@ -61,6 +61,7 @@ class Device {
* \param nall Total number of local+ghost particles
* \param maxspecial Maximum mumber of special bonded atoms per atom
* \param vel True if velocities need to be stored
* \param extra_fields Nonzero if extra fields need to be stored
*
* Returns:
* - 0 if successful
@ -70,7 +71,7 @@ class Device {
* - -5 Double precision is not supported on card **/
int init(Answer<numtyp,acctyp> &ans, const bool charge, const bool rot,
const int nlocal, const int nall, const int maxspecial,
const bool vel=false);
const bool vel=false, const int extra_fields=0);
/// Initialize the device for Atom storage only
/** \param nlocal Total number of local particles to allocate memory for

View File

@ -330,6 +330,10 @@
#define NEIGHMASK 0x3FFFFFFF
ucl_inline int sbmask(int j) { return j >> SBBITS & 3; };
#define SBBITS15 29
#define NEIGHMASK15 0x1FFFFFFF
ucl_inline int sbmask15(int j) { return j >> SBBITS15 & 7; };
// default to 32-bit smallint and other ints, 64-bit bigint:
// same as defined in src/lmptype.h
#if !defined(LAMMPS_SMALLSMALL) && !defined(LAMMPS_BIGBIG) && \