More cleanup

This commit is contained in:
Trung Nguyen
2021-09-17 23:24:23 -05:00
parent 78045d8f76
commit 5d801e985f
3 changed files with 82 additions and 68 deletions

View File

@ -182,13 +182,13 @@ int AmoebaT::udirect2b(const int eflag, const int vflag) {
this->time_pair.start(); this->time_pair.start();
// Build the short neighbor list if not done yet // Build the short neighbor list if not done yet
if (!this->short_nbor_avail) { if (!this->short_nbor_polar_avail) {
this->k_short_nbor.set_size(GX,BX); this->k_short_nbor.set_size(GX,BX);
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor, this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
&this->_nbor_data->begin(), &this->_nbor_data->begin(),
&this->dev_short_nbor, &this->_off2_polar, &ainum, &this->dev_short_nbor, &this->_off2_polar, &ainum,
&nbor_pitch, &this->_threads_per_atom); &nbor_pitch, &this->_threads_per_atom);
this->short_nbor_avail = true; this->short_nbor_polar_avail = true;
} }
this->k_udirect2b.set_size(GX,BX); this->k_udirect2b.set_size(GX,BX);
@ -222,13 +222,13 @@ int AmoebaT::umutual2b(const int eflag, const int vflag) {
this->time_pair.start(); this->time_pair.start();
// Build the short neighbor list if not done yet // Build the short neighbor list if not done yet
if (!this->short_nbor_avail) { if (!this->short_nbor_polar_avail) {
this->k_short_nbor.set_size(GX,BX); this->k_short_nbor.set_size(GX,BX);
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor, this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
&this->_nbor_data->begin(), &this->dev_short_nbor, &this->_nbor_data->begin(), &this->dev_short_nbor,
&this->_off2_polar, &ainum, &nbor_pitch, &this->_off2_polar, &ainum, &nbor_pitch,
&this->_threads_per_atom); &this->_threads_per_atom);
this->short_nbor_avail = true; this->short_nbor_polar_avail = true;
} }
this->k_umutual2b.set_size(GX,BX); this->k_umutual2b.set_size(GX,BX);
@ -261,13 +261,13 @@ int AmoebaT::polar_real(const int eflag, const int vflag) {
this->time_pair.start(); this->time_pair.start();
// Build the short neighbor list if not done yet // Build the short neighbor list if not done yet
if (!this->short_nbor_avail) { if (!this->short_nbor_polar_avail) {
this->k_short_nbor.set_size(GX,BX); this->k_short_nbor.set_size(GX,BX);
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor, this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
&this->_nbor_data->begin(), &this->_nbor_data->begin(),
&this->dev_short_nbor, &this->_off2_polar, &ainum, &this->dev_short_nbor, &this->_off2_polar, &ainum,
&nbor_pitch, &this->_threads_per_atom); &nbor_pitch, &this->_threads_per_atom);
this->short_nbor_avail = true; this->short_nbor_polar_avail = true;
} }
this->k_polar.set_size(GX,BX); this->k_polar.set_size(GX,BX);
@ -283,7 +283,7 @@ int AmoebaT::polar_real(const int eflag, const int vflag) {
// Signal that short nbor list is not avail for the next time step // Signal that short nbor list is not avail for the next time step
// do it here because polar_real() is the last kernel in a time step at this point // do it here because polar_real() is the last kernel in a time step at this point
this->short_nbor_avail = false; this->short_nbor_polar_avail = false;
return GX; return GX;
} }

View File

@ -21,7 +21,7 @@ namespace LAMMPS_AL {
extern Device<PRECISION,ACC_PRECISION> global_device; extern Device<PRECISION,ACC_PRECISION> global_device;
template <class numtyp, class acctyp> template <class numtyp, class acctyp>
BaseAmoebaT::BaseAmoeba() : _compiled(false), _max_bytes(0), short_nbor_avail(false) { BaseAmoebaT::BaseAmoeba() : _compiled(false), _max_bytes(0), short_nbor_polar_avail(false) {
device=&global_device; device=&global_device;
ans=new Answer<numtyp,acctyp>(); ans=new Answer<numtyp,acctyp>();
nbor=new Neighbor(); nbor=new Neighbor();
@ -241,11 +241,12 @@ inline int BaseAmoebaT::build_nbor_list(const int inum, const int host_inum,
} }
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Copy nbor list from host if necessary and then calculate forces, virials,.. // Copy nbor list from host if necessary and then calculate forces, virials
// for the polar real-space term
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
template <class numtyp, class acctyp> template <class numtyp, class acctyp>
void BaseAmoebaT::compute_polar_real_host_nbor(const int f_ago, const int inum_full, const int nall, void BaseAmoebaT::compute_polar_real_host_nbor(const int f_ago, const int inum_full,
double **host_x, int *host_type, int *host_amtype, const int nall, double **host_x, int *host_type, int *host_amtype,
int *host_amgroup, double **host_rpole, int *host_amgroup, double **host_rpole,
double **host_uind, double **host_uinp, double **host_uind, double **host_uinp,
int *ilist, int *numj, int **firstneigh, int *ilist, int *numj, int **firstneigh,
@ -432,17 +433,20 @@ int** BaseAmoebaT::precompute(const int ago, const int inum_full, const int nall
// Reneighbor on GPU if necessary, and then compute polar real-space // Reneighbor on GPU if necessary, and then compute polar real-space
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
template <class numtyp, class acctyp> template <class numtyp, class acctyp>
int** BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full, const int nall, int** BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full,
double **host_x, int *host_type, int *host_amtype, const int nall, double **host_x,
int *host_type, int *host_amtype,
int *host_amgroup, double **host_rpole, int *host_amgroup, double **host_rpole,
double *sublo, double *subhi, tagint *tag, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, int **nspecial, tagint **special,
int *nspecial15, tagint **special15, int *nspecial15, tagint **special15,
const bool eflag_in, const bool vflag_in, const bool eflag_in, const bool vflag_in,
const bool eatom, const bool vatom, int &host_start, const bool eatom, const bool vatom,
int **ilist, int **jnum, const double cpu_time, int &host_start, int **ilist, int **jnum,
bool &success, const double aewald, const double felec, const double off2_mpole, const double cpu_time, bool &success,
double *host_q, double *boxlo, double *prd, void **tep_ptr) { const double aewald, const double felec,
const double off2_mpole, double *host_q,
double *boxlo, double *prd, void **tep_ptr) {
acc_timers(); acc_timers();
int eflag, vflag; int eflag, vflag;
if (eatom) eflag=2; if (eatom) eflag=2;
@ -492,7 +496,8 @@ int** BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full, co
_aewald = aewald; _aewald = aewald;
const int red_blocks=multipole_real(eflag,vflag); const int red_blocks=multipole_real(eflag,vflag);
// leave the answers (forces, energies and virial) on the device, only copy them back in the last kernel (polar_real) // leave the answers (forces, energies and virial) on the device,
// only copy them back in the last kernel (polar_real)
//ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks); //ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
//device->add_ans_object(ans); //device->add_ans_object(ans);
@ -516,18 +521,21 @@ int** BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full, co
// of the permanent field // of the permanent field
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
template <class numtyp, class acctyp> template <class numtyp, class acctyp>
int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full, const int nall, int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full,
double **host_x, int *host_type, int *host_amtype, const int nall, double **host_x,
int *host_type, int *host_amtype,
int *host_amgroup, double **host_rpole, int *host_amgroup, double **host_rpole,
double **host_uind, double **host_uinp, double **host_uind, double **host_uinp,
double *sublo, double *subhi, tagint *tag, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, int **nspecial, tagint **special,
int *nspecial15, tagint **special15, int *nspecial15, tagint **special15,
const bool eflag_in, const bool vflag_in, const bool eflag_in, const bool vflag_in,
const bool eatom, const bool vatom, int &host_start, const bool eatom, const bool vatom,
int **ilist, int **jnum, const double cpu_time, int &host_start, int **ilist, int **jnum,
bool &success, const double aewald, const double off2_polar, const double cpu_time, bool &success,
double *host_q, double *boxlo, double *prd, void** fieldp_ptr) { const double aewald, const double off2_polar,
double *host_q, double *boxlo, double *prd,
void** fieldp_ptr) {
acc_timers(); acc_timers();
int eflag, vflag; int eflag, vflag;
if (eatom) eflag=2; if (eatom) eflag=2;
@ -587,18 +595,21 @@ int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full, const i
// of the induced field // of the induced field
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
template <class numtyp, class acctyp> template <class numtyp, class acctyp>
int** BaseAmoebaT::compute_umutual2b(const int ago, const int inum_full, const int nall, int** BaseAmoebaT::compute_umutual2b(const int ago, const int inum_full,
double **host_x, int *host_type, int *host_amtype, const int nall, double **host_x,
int *host_type, int *host_amtype,
int *host_amgroup, double **host_rpole, int *host_amgroup, double **host_rpole,
double **host_uind, double **host_uinp, double **host_uind, double **host_uinp,
double *sublo, double *subhi, tagint *tag, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, int **nspecial, tagint **special,
int *nspecial15, tagint **special15, int *nspecial15, tagint **special15,
const bool eflag_in, const bool vflag_in, const bool eflag_in, const bool vflag_in,
const bool eatom, const bool vatom, int &host_start, const bool eatom, const bool vatom,
int **ilist, int **jnum, const double cpu_time, int &host_start, int **ilist, int **jnum,
bool &success, const double aewald, const double off2_polar, const double cpu_time, bool &success,
double *host_q, double *boxlo, double *prd, void** fieldp_ptr) { const double aewald, const double off2_polar,
double *host_q, double *boxlo, double *prd,
void** fieldp_ptr) {
acc_timers(); acc_timers();
int eflag, vflag; int eflag, vflag;
if (eatom) eflag=2; if (eatom) eflag=2;
@ -657,19 +668,21 @@ int** BaseAmoebaT::compute_umutual2b(const int ago, const int inum_full, const i
// Reneighbor on GPU if necessary, and then compute polar real-space // Reneighbor on GPU if necessary, and then compute polar real-space
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
template <class numtyp, class acctyp> template <class numtyp, class acctyp>
int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full, const int nall, int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full,
double **host_x, int *host_type, int *host_amtype, const int nall, double **host_x,
int *host_type, int *host_amtype,
int *host_amgroup, double **host_rpole, int *host_amgroup, double **host_rpole,
double **host_uind, double **host_uinp, double **host_uind, double **host_uinp,
double *sublo, double *subhi, tagint *tag, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, int **nspecial, tagint **special,
int *nspecial15, tagint **special15, int *nspecial15, tagint **special15,
const bool eflag_in, const bool vflag_in, const bool eflag_in, const bool vflag_in,
const bool eatom, const bool vatom, int &host_start, const bool eatom, const bool vatom,
int **ilist, int **jnum, const double cpu_time, int &host_start, int **ilist, int **jnum,
bool &success, const double aewald, const double felec, const double cpu_time, bool &success,
const double off2_polar, double *host_q, double *boxlo, const double aewald, const double felec,
double *prd, void **tep_ptr) { const double off2_polar, double *host_q,
double *boxlo, double *prd, void **tep_ptr) {
acc_timers(); acc_timers();
int eflag, vflag; int eflag, vflag;
if (eatom) eflag=2; if (eatom) eflag=2;
@ -719,7 +732,8 @@ int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full, const
_aewald = aewald; _aewald = aewald;
const int red_blocks=polar_real(eflag,vflag); const int red_blocks=polar_real(eflag,vflag);
// only copy answers (forces, energies and virial) back from the device in the last kernel (which is polar_real here) // only copy answers (forces, energies and virial) back from the device
// in the last kernel (which is polar_real here)
ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks); ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
device->add_ans_object(ans); device->add_ans_object(ans);
@ -747,7 +761,6 @@ double BaseAmoebaT::host_memory_usage_atomic() const {
template <class numtyp, class acctyp> template <class numtyp, class acctyp>
void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole, void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
double** uind, double** uinp) { double** uind, double** uinp) {
// signal that we need to transfer extra data from the host // signal that we need to transfer extra data from the host
atom->extra_data_unavail(); atom->extra_data_unavail();

View File

@ -192,8 +192,8 @@ class BaseAmoeba {
const bool eflag, const bool vflag, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const bool eatom, const bool vatom, int &host_start,
int **ilist, int **numj, const double cpu_time, bool &success, int **ilist, int **numj, const double cpu_time, bool &success,
const double aewald, const double felec, const double off2_polar, double *charge, const double aewald, const double felec, const double off2_polar,
double *boxlo, double *prd, void **tep_ptr); double *charge, double *boxlo, double *prd, void **tep_ptr);
/// Compute polar real-space with host neighboring (not active for now) /// Compute polar real-space with host neighboring (not active for now)
void compute_polar_real_host_nbor(const int f_ago, const int inum_full, const int nall, void compute_polar_real_host_nbor(const int f_ago, const int inum_full, const int nall,
@ -202,8 +202,9 @@ class BaseAmoeba {
double **host_uinp, int *ilist, int *numj, double **host_uinp, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag, int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, const double aewald, const double felec, const double off2_polar, const double cpu_time, bool &success, const double aewald, const double felec,
double *charge, const int nlocal, double *boxlo, double *prd, void **tep_ptr); const double off2_polar, double *charge, const int nlocal, double *boxlo,
double *prd, void **tep_ptr);
// -------------------------- DEVICE DATA ------------------------- // -------------------------- DEVICE DATA -------------------------
@ -271,7 +272,7 @@ class BaseAmoeba {
int _extra_fields; int _extra_fields;
double _max_bytes, _max_an_bytes, _maxspecial, _maxspecial15, _max_nbors; double _max_bytes, _max_an_bytes, _maxspecial, _maxspecial15, _max_nbors;
double _gpu_overhead, _driver_overhead; double _gpu_overhead, _driver_overhead;
bool short_nbor_avail; bool short_nbor_polar_avail;
UCL_D_Vec<int> *_nbor_data; UCL_D_Vec<int> *_nbor_data;
numtyp _aewald,_felec; numtyp _aewald,_felec;