diff --git a/lib/gpu/lal_amoeba.cpp b/lib/gpu/lal_amoeba.cpp index af71decb86..d2f2b1bf79 100644 --- a/lib/gpu/lal_amoeba.cpp +++ b/lib/gpu/lal_amoeba.cpp @@ -182,13 +182,13 @@ int AmoebaT::udirect2b(const int eflag, const int vflag) { this->time_pair.start(); // Build the short neighbor list if not done yet - if (!this->short_nbor_avail) { + if (!this->short_nbor_polar_avail) { this->k_short_nbor.set_size(GX,BX); this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->dev_short_nbor, &this->_off2_polar, &ainum, &nbor_pitch, &this->_threads_per_atom); - this->short_nbor_avail = true; + this->short_nbor_polar_avail = true; } this->k_udirect2b.set_size(GX,BX); @@ -222,13 +222,13 @@ int AmoebaT::umutual2b(const int eflag, const int vflag) { this->time_pair.start(); // Build the short neighbor list if not done yet - if (!this->short_nbor_avail) { + if (!this->short_nbor_polar_avail) { this->k_short_nbor.set_size(GX,BX); this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->dev_short_nbor, &this->_off2_polar, &ainum, &nbor_pitch, &this->_threads_per_atom); - this->short_nbor_avail = true; + this->short_nbor_polar_avail = true; } this->k_umutual2b.set_size(GX,BX); @@ -261,13 +261,13 @@ int AmoebaT::polar_real(const int eflag, const int vflag) { this->time_pair.start(); // Build the short neighbor list if not done yet - if (!this->short_nbor_avail) { + if (!this->short_nbor_polar_avail) { this->k_short_nbor.set_size(GX,BX); this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->dev_short_nbor, &this->_off2_polar, &ainum, &nbor_pitch, &this->_threads_per_atom); - this->short_nbor_avail = true; + this->short_nbor_polar_avail = true; } this->k_polar.set_size(GX,BX); @@ -283,7 +283,7 @@ int AmoebaT::polar_real(const int eflag, const int vflag) { // Signal that short nbor list is not avail for the next time step // do it here because polar_real() is the last kernel in a time step at this point - this->short_nbor_avail = false; + this->short_nbor_polar_avail = false; return GX; } diff --git a/lib/gpu/lal_base_amoeba.cpp b/lib/gpu/lal_base_amoeba.cpp index f70903c889..e777981912 100644 --- a/lib/gpu/lal_base_amoeba.cpp +++ b/lib/gpu/lal_base_amoeba.cpp @@ -21,7 +21,7 @@ namespace LAMMPS_AL { extern Device global_device; template -BaseAmoebaT::BaseAmoeba() : _compiled(false), _max_bytes(0), short_nbor_avail(false) { +BaseAmoebaT::BaseAmoeba() : _compiled(false), _max_bytes(0), short_nbor_polar_avail(false) { device=&global_device; ans=new Answer(); nbor=new Neighbor(); @@ -241,11 +241,12 @@ inline int BaseAmoebaT::build_nbor_list(const int inum, const int host_inum, } // --------------------------------------------------------------------------- -// Copy nbor list from host if necessary and then calculate forces, virials,.. +// Copy nbor list from host if necessary and then calculate forces, virials +// for the polar real-space term // --------------------------------------------------------------------------- template -void BaseAmoebaT::compute_polar_real_host_nbor(const int f_ago, const int inum_full, const int nall, - double **host_x, int *host_type, int *host_amtype, +void BaseAmoebaT::compute_polar_real_host_nbor(const int f_ago, const int inum_full, + const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, int *ilist, int *numj, int **firstneigh, @@ -432,17 +433,20 @@ int** BaseAmoebaT::precompute(const int ago, const int inum_full, const int nall // Reneighbor on GPU if necessary, and then compute polar real-space // --------------------------------------------------------------------------- template -int** BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full, const int nall, - double **host_x, int *host_type, int *host_amtype, - int *host_amgroup, double **host_rpole, - double *sublo, double *subhi, tagint *tag, - int **nspecial, tagint **special, - int *nspecial15, tagint **special15, - const bool eflag_in, const bool vflag_in, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **jnum, const double cpu_time, - bool &success, const double aewald, const double felec, const double off2_mpole, - double *host_q, double *boxlo, double *prd, void **tep_ptr) { +int** BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full, + const int nall, double **host_x, + int *host_type, int *host_amtype, + int *host_amgroup, double **host_rpole, + double *sublo, double *subhi, tagint *tag, + int **nspecial, tagint **special, + int *nspecial15, tagint **special15, + const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, + int &host_start, int **ilist, int **jnum, + const double cpu_time, bool &success, + const double aewald, const double felec, + const double off2_mpole, double *host_q, + double *boxlo, double *prd, void **tep_ptr) { acc_timers(); int eflag, vflag; if (eatom) eflag=2; @@ -492,7 +496,8 @@ int** BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full, co _aewald = aewald; const int red_blocks=multipole_real(eflag,vflag); - // leave the answers (forces, energies and virial) on the device, only copy them back in the last kernel (polar_real) + // leave the answers (forces, energies and virial) on the device, + // only copy them back in the last kernel (polar_real) //ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks); //device->add_ans_object(ans); @@ -516,18 +521,21 @@ int** BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full, co // of the permanent field // --------------------------------------------------------------------------- template -int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full, const int nall, - double **host_x, int *host_type, int *host_amtype, - int *host_amgroup, double **host_rpole, - double **host_uind, double **host_uinp, - double *sublo, double *subhi, tagint *tag, - int **nspecial, tagint **special, - int *nspecial15, tagint **special15, - const bool eflag_in, const bool vflag_in, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **jnum, const double cpu_time, - bool &success, const double aewald, const double off2_polar, - double *host_q, double *boxlo, double *prd, void** fieldp_ptr) { +int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full, + const int nall, double **host_x, + int *host_type, int *host_amtype, + int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, + double *sublo, double *subhi, tagint *tag, + int **nspecial, tagint **special, + int *nspecial15, tagint **special15, + const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, + int &host_start, int **ilist, int **jnum, + const double cpu_time, bool &success, + const double aewald, const double off2_polar, + double *host_q, double *boxlo, double *prd, + void** fieldp_ptr) { acc_timers(); int eflag, vflag; if (eatom) eflag=2; @@ -587,18 +595,21 @@ int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full, const i // of the induced field // --------------------------------------------------------------------------- template -int** BaseAmoebaT::compute_umutual2b(const int ago, const int inum_full, const int nall, - double **host_x, int *host_type, int *host_amtype, - int *host_amgroup, double **host_rpole, - double **host_uind, double **host_uinp, - double *sublo, double *subhi, tagint *tag, - int **nspecial, tagint **special, - int *nspecial15, tagint **special15, - const bool eflag_in, const bool vflag_in, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **jnum, const double cpu_time, - bool &success, const double aewald, const double off2_polar, - double *host_q, double *boxlo, double *prd, void** fieldp_ptr) { +int** BaseAmoebaT::compute_umutual2b(const int ago, const int inum_full, + const int nall, double **host_x, + int *host_type, int *host_amtype, + int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, + double *sublo, double *subhi, tagint *tag, + int **nspecial, tagint **special, + int *nspecial15, tagint **special15, + const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, + int &host_start, int **ilist, int **jnum, + const double cpu_time, bool &success, + const double aewald, const double off2_polar, + double *host_q, double *boxlo, double *prd, + void** fieldp_ptr) { acc_timers(); int eflag, vflag; if (eatom) eflag=2; @@ -657,19 +668,21 @@ int** BaseAmoebaT::compute_umutual2b(const int ago, const int inum_full, const i // Reneighbor on GPU if necessary, and then compute polar real-space // --------------------------------------------------------------------------- template -int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full, const int nall, - double **host_x, int *host_type, int *host_amtype, - int *host_amgroup, double **host_rpole, - double **host_uind, double **host_uinp, - double *sublo, double *subhi, tagint *tag, - int **nspecial, tagint **special, - int *nspecial15, tagint **special15, - const bool eflag_in, const bool vflag_in, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **jnum, const double cpu_time, - bool &success, const double aewald, const double felec, - const double off2_polar, double *host_q, double *boxlo, - double *prd, void **tep_ptr) { +int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full, + const int nall, double **host_x, + int *host_type, int *host_amtype, + int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, + double *sublo, double *subhi, tagint *tag, + int **nspecial, tagint **special, + int *nspecial15, tagint **special15, + const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, + int &host_start, int **ilist, int **jnum, + const double cpu_time, bool &success, + const double aewald, const double felec, + const double off2_polar, double *host_q, + double *boxlo, double *prd, void **tep_ptr) { acc_timers(); int eflag, vflag; if (eatom) eflag=2; @@ -719,7 +732,8 @@ int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full, const _aewald = aewald; const int red_blocks=polar_real(eflag,vflag); - // only copy answers (forces, energies and virial) back from the device in the last kernel (which is polar_real here) + // only copy answers (forces, energies and virial) back from the device + // in the last kernel (which is polar_real here) ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks); device->add_ans_object(ans); @@ -746,8 +760,7 @@ double BaseAmoebaT::host_memory_usage_atomic() const { template void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole, - double** uind, double** uinp) { - + double** uind, double** uinp) { // signal that we need to transfer extra data from the host atom->extra_data_unavail(); diff --git a/lib/gpu/lal_base_amoeba.h b/lib/gpu/lal_base_amoeba.h index 0b6c09742e..a45316b6f3 100644 --- a/lib/gpu/lal_base_amoeba.h +++ b/lib/gpu/lal_base_amoeba.h @@ -192,8 +192,8 @@ class BaseAmoeba { const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **numj, const double cpu_time, bool &success, - const double aewald, const double felec, const double off2_polar, double *charge, - double *boxlo, double *prd, void **tep_ptr); + const double aewald, const double felec, const double off2_polar, + double *charge, double *boxlo, double *prd, void **tep_ptr); /// Compute polar real-space with host neighboring (not active for now) void compute_polar_real_host_nbor(const int f_ago, const int inum_full, const int nall, @@ -202,8 +202,9 @@ class BaseAmoeba { double **host_uinp, int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, - const double cpu_time, bool &success, const double aewald, const double felec, const double off2_polar, - double *charge, const int nlocal, double *boxlo, double *prd, void **tep_ptr); + const double cpu_time, bool &success, const double aewald, const double felec, + const double off2_polar, double *charge, const int nlocal, double *boxlo, + double *prd, void **tep_ptr); // -------------------------- DEVICE DATA ------------------------- @@ -271,7 +272,7 @@ class BaseAmoeba { int _extra_fields; double _max_bytes, _max_an_bytes, _maxspecial, _maxspecial15, _max_nbors; double _gpu_overhead, _driver_overhead; - bool short_nbor_avail; + bool short_nbor_polar_avail; UCL_D_Vec *_nbor_data; numtyp _aewald,_felec;