diff --git a/lib/gpu/lal_amoeba_ext.cpp b/lib/gpu/lal_amoeba_ext.cpp index b73f6c4ca6..18e1cf22f8 100644 --- a/lib/gpu/lal_amoeba_ext.cpp +++ b/lib/gpu/lal_amoeba_ext.cpp @@ -134,58 +134,28 @@ int** amoeba_gpu_compute_multipole_real(const int ago, const int inum_full, cpu_time, success, aewald, felec, off2, host_q, boxlo, prd, tep_ptr); } -int** amoeba_gpu_compute_udirect2b(const int ago, const int inum_full, - const int nall, double **host_x, int *host_type, - int *host_amtype, int *host_amgroup, double **host_rpole, +void amoeba_gpu_compute_udirect2b(int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, - double *sublo, double *subhi, tagint *tag, int **nspecial, - tagint **special, int *nspecial15, tagint** special15, - const bool eflag, const bool vflag, const bool eatom, - const bool vatom, int &host_start, - int **ilist, int **jnum, const double cpu_time, - bool &success, const double aewald, const double off2, double *host_q, - double *boxlo, double *prd, void **fieldp_ptr) { - return AMOEBAMF.compute_udirect2b(ago, inum_full, nall, host_x, host_type, - host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, nullptr, - sublo, subhi, tag, nspecial, special, nspecial15, special15, - eflag, vflag, eatom, vatom, host_start, ilist, jnum, - cpu_time, success, aewald, off2, host_q, boxlo, prd, fieldp_ptr); + const double aewald, const double off2, void **fieldp_ptr) { + AMOEBAMF.compute_udirect2b(host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, nullptr, + aewald, off2, fieldp_ptr); } -int** amoeba_gpu_compute_umutual2b(const int ago, const int inum_full, - const int nall, double **host_x, int *host_type, - int *host_amtype, int *host_amgroup, double **host_rpole, - double **host_uind, double **host_uinp, - double *sublo, double *subhi, tagint *tag, int **nspecial, - tagint **special, int *nspecial15, tagint** special15, - const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **jnum, const double cpu_time, - bool &success, const double aewald, const double off2, double *host_q, - double *boxlo, double *prd, void **fieldp_ptr) { - return AMOEBAMF.compute_umutual2b(ago, inum_full, nall, host_x, host_type, - host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, nullptr, - sublo, subhi, tag, nspecial, special, nspecial15, special15, - eflag, vflag, eatom, vatom, host_start, ilist, jnum, - cpu_time, success, aewald, off2, host_q, boxlo, prd, fieldp_ptr); +void amoeba_gpu_compute_umutual2b(int *host_amtype, int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, + const double aewald, const double off2, void **fieldp_ptr) { + AMOEBAMF.compute_umutual2b(host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, nullptr, + aewald, off2, fieldp_ptr); } -int** amoeba_gpu_compute_polar_real(const int ago, const int inum_full, - const int nall, double **host_x, int *host_type, - int *host_amtype, int *host_amgroup, - double **host_rpole, double **host_uind, double **host_uinp, - double *sublo, double *subhi, tagint *tag, int **nspecial, - tagint **special, int *nspecial15, tagint** special15, - const bool eflag, const bool vflag, const bool eatom, - const bool vatom, int &host_start, - int **ilist, int **jnum, const double cpu_time, - bool &success, const double aewald, const double felec, const double off2, - double *host_q, double *boxlo, double *prd, void **tep_ptr) { - return AMOEBAMF.compute_polar_real(ago, inum_full, nall, host_x, host_type, - host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, nullptr, - sublo, subhi, tag, nspecial, special, nspecial15, special15, - eflag, vflag, eatom, vatom, host_start, ilist, jnum, - cpu_time, success, aewald, felec, off2, host_q, boxlo, prd, tep_ptr); +void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, + const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, + const double aewald, const double felec, const double off2, + void **tep_ptr) { + AMOEBAMF.compute_polar_real(host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, nullptr, + eflag_in, vflag_in, eatom, vatom, aewald, felec, off2, tep_ptr); } double amoeba_gpu_bytes() { diff --git a/lib/gpu/lal_base_amoeba.cpp b/lib/gpu/lal_base_amoeba.cpp index c56cb77aa3..5b396a641e 100644 --- a/lib/gpu/lal_base_amoeba.cpp +++ b/lib/gpu/lal_base_amoeba.cpp @@ -487,35 +487,15 @@ int** BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full, // of the permanent field // --------------------------------------------------------------------------- template -int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full, - const int nall, double **host_x, - int *host_type, int *host_amtype, - int *host_amgroup, double **host_rpole, +void BaseAmoebaT::compute_udirect2b(int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, double *host_pval, - double *sublo, double *subhi, tagint *tag, - int **nspecial, tagint **special, - int *nspecial15, tagint **special15, - const bool eflag_in, const bool vflag_in, - const bool eatom, const bool vatom, - int &host_start, int **ilist, int **jnum, - const double cpu_time, bool &success, const double aewald, const double off2_polar, - double *host_q, double *boxlo, double *prd, void** fieldp_ptr) { - // reallocate per-atom arrays, transfer data from the host - // and build the neighbor lists if needed - - int** firstneigh = nullptr; + // all the necessary data arrays are already copied from host to device cast_extra_data(host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval); atom->add_extra_data(); - - // ------------------- Resize _fieldp array ------------------------ - - if (inum_full>_max_fieldp_size) { - _max_fieldp_size=static_cast(static_cast(inum_full)*1.10); - _fieldp.resize(_max_fieldp_size*8); - } + *fieldp_ptr=_fieldp.host.begin(); _off2_polar = off2_polar; @@ -525,8 +505,6 @@ int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full, // copy field and fieldp from device to host (_fieldp store both arrays, one after another) _fieldp.update_host(_max_fieldp_size*8,false); - - return firstneigh; //nbor->host_jlist.begin()-host_start; } // --------------------------------------------------------------------------- @@ -534,35 +512,15 @@ int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full, // of the induced field // --------------------------------------------------------------------------- template -int** BaseAmoebaT::compute_umutual2b(const int ago, const int inum_full, - const int nall, double **host_x, - int *host_type, int *host_amtype, - int *host_amgroup, double **host_rpole, +void BaseAmoebaT::compute_umutual2b(int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, double *host_pval, - double *sublo, double *subhi, tagint *tag, - int **nspecial, tagint **special, - int *nspecial15, tagint **special15, - const bool eflag_in, const bool vflag_in, - const bool eatom, const bool vatom, - int &host_start, int **ilist, int **jnum, - const double cpu_time, bool &success, const double aewald, const double off2_polar, - double *host_q, double *boxlo, double *prd, void** fieldp_ptr) { - // reallocate per-atom arrays, transfer extra data from the host - // and build the neighbor lists if needed - - int** firstneigh = nullptr; + // all the necessary data arrays are already copied from host to device cast_extra_data(host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval); atom->add_extra_data(); - // ------------------- Resize _fieldp array ------------------------ - - if (inum_full>_max_fieldp_size) { - _max_fieldp_size=static_cast(static_cast(inum_full)*1.10); - _fieldp.resize(_max_fieldp_size*8); - } *fieldp_ptr=_fieldp.host.begin(); _off2_polar = off2_polar; @@ -572,41 +530,25 @@ int** BaseAmoebaT::compute_umutual2b(const int ago, const int inum_full, // copy field and fieldp from device to host (_fieldp store both arrays, one after another) _fieldp.update_host(_max_fieldp_size*8,false); - - return firstneigh; //nbor->host_jlist.begin()-host_start; } // --------------------------------------------------------------------------- // Reneighbor on GPU if necessary, and then compute polar real-space // --------------------------------------------------------------------------- template -int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full, - const int nall, double **host_x, - int *host_type, int *host_amtype, - int *host_amgroup, double **host_rpole, - double **host_uind, double **host_uinp, double *host_pval, - double *sublo, double *subhi, tagint *tag, - int **nspecial, tagint **special, - int *nspecial15, tagint **special15, - const bool eflag_in, const bool vflag_in, - const bool eatom, const bool vatom, - int &host_start, int **ilist, int **jnum, - const double cpu_time, bool &success, - const double aewald, const double felec, - const double off2_polar, double *host_q, - double *boxlo, double *prd, void **tep_ptr) { +void BaseAmoebaT::compute_polar_real(int *host_amtype, int *host_amgroup, + double **host_rpole, double **host_uind, + double **host_uinp, double *host_pval, + const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, + const double aewald, const double felec, + const double off2_polar, void **tep_ptr) { int** firstneigh = nullptr; cast_extra_data(host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval); atom->add_extra_data(); - // ------------------- Resize _tep array ------------------------ - - if (inum_full>_max_tep_size) { - _max_tep_size=static_cast(static_cast(inum_full)*1.10); - _tep.resize(_max_tep_size*4); - } *tep_ptr=_tep.host.begin(); _off2_polar = off2_polar; @@ -624,8 +566,6 @@ int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full, // copy tep from device to host _tep.update_host(_max_tep_size*4,false); - - return firstneigh; // nbor->host_jlist.begin()-host_start; } template diff --git a/lib/gpu/lal_base_amoeba.h b/lib/gpu/lal_base_amoeba.h index dc3467f692..7f9777061c 100644 --- a/lib/gpu/lal_base_amoeba.h +++ b/lib/gpu/lal_base_amoeba.h @@ -155,45 +155,22 @@ class BaseAmoeba { double *prd, void **tep_ptr); /// Compute the real space part of the permanent field (udirect2b) with device neighboring - virtual int** compute_udirect2b(const int ago, const int inum_full, const int nall, - double **host_x, int *host_type, int *host_amtype, - int *host_amgroup, double **host_rpole, + virtual void compute_udirect2b(int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, double *host_pval, - double *sublo, double *subhi, - tagint *tag, int **nspecial, tagint **special, - int *nspecial15, tagint **special15, - const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **numj, const double cpu_time, bool &success, - const double aewald, const double off2_polar, - double *charge, double *boxlo, double *prd, void **fieldp_ptr); + const double aewald, const double off2_polar, void **fieldp_ptr); /// Compute the real space part of the induced field (umutual2b) with device neighboring - virtual int** compute_umutual2b(const int ago, const int inum_full, const int nall, - double **host_x, int *host_type, int *host_amtype, - int *host_amgroup, double **host_rpole, + virtual void compute_umutual2b(int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, double *host_pval, - double *sublo, double *subhi, - tagint *tag, int **nspecial, tagint **special, - int *nspecial15, tagint **special15, - const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **numj, const double cpu_time, bool &success, - const double aewald, const double off2_polar, double *charge, - double *boxlo, double *prd, void **fieldp_ptr); + const double aewald, const double off2_polar, void **fieldp_ptr); /// Compute polar real-space with device neighboring - virtual int** compute_polar_real(const int ago, const int inum_full, const int nall, - double **host_x, int *host_type, int *host_amtype, - int *host_amgroup, double **host_rpole, double **host_uind, - double **host_uinp, double *host_pval, double *sublo, double *subhi, - tagint *tag, int **nspecial, tagint **special, - int *nspecial15, tagint **special15, + virtual void compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, double *host_pval, const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **numj, const double cpu_time, bool &success, + const bool eatom, const bool vatom, const double aewald, const double felec, const double off2_polar, - double *charge, double *boxlo, double *prd, void **tep_ptr); + void **tep_ptr); /// Compute polar real-space with host neighboring (not active for now) void compute_polar_real_host_nbor(const int f_ago, const int inum_full, const int nall, diff --git a/lib/gpu/lal_hippo.cpp b/lib/gpu/lal_hippo.cpp index 5a348c9272..f62c46aaec 100644 --- a/lib/gpu/lal_hippo.cpp +++ b/lib/gpu/lal_hippo.cpp @@ -301,19 +301,9 @@ int HippoT::repulsion(const int eflag, const int vflag) { // Reneighbor on GPU if necessary, and then compute dispersion real-space // --------------------------------------------------------------------------- template -int** HippoT::compute_dispersion_real(const int ago, const int inum_full, - const int nall, double **host_x, - int *host_type, int *host_amtype, - int *host_amgroup, double **host_rpole, - double *sublo, double *subhi, tagint *tag, - int **nspecial, tagint **special, - int *nspecial15, tagint **special15, - const bool eflag_in, const bool vflag_in, - const bool eatom, const bool vatom, - int &host_start, int **ilist, int **jnum, - const double cpu_time, bool &success, - const double aewald, const double off2_disp, - double *host_q, double *boxlo, double *prd) { +int** HippoT::compute_dispersion_real(int *host_amtype, int *host_amgroup, + double **host_rpole, const double aewald, + const double off2_disp) { // cast necessary data arrays from host to device @@ -475,21 +465,9 @@ int HippoT::multipole_real(const int eflag, const int vflag) { // of the permanent field // --------------------------------------------------------------------------- template -int** HippoT::compute_udirect2b(const int ago, const int inum_full, - const int nall, double **host_x, - int *host_type, int *host_amtype, - int *host_amgroup, double **host_rpole, - double **host_uind, double **host_uinp, - double* host_pval, - double *sublo, double *subhi, tagint *tag, - int **nspecial, tagint **special, - int *nspecial15, tagint **special15, - const bool eflag_in, const bool vflag_in, - const bool eatom, const bool vatom, - int &host_start, int **ilist, int **jnum, - const double cpu_time, bool &success, +void HippoT::compute_udirect2b(int *host_amtype, int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, double* host_pval, const double aewald, const double off2_polar, - double *host_q, double *boxlo, double *prd, void** fieldp_ptr) { // all the necessary data arrays are already copied from host to device @@ -497,12 +475,6 @@ int** HippoT::compute_udirect2b(const int ago, const int inum_full, this->cast_extra_data(nullptr, nullptr, nullptr, host_uind, host_uinp, host_pval); this->atom->add_extra_data(); - // ------------------- Resize _fieldp array ------------------------ - - if (inum_full>this->_max_fieldp_size) { - this->_max_fieldp_size=static_cast(static_cast(inum_full)*1.10); - this->_fieldp.resize(this->_max_fieldp_size*8); - } *fieldp_ptr=this->_fieldp.host.begin(); this->_off2_polar = off2_polar; @@ -512,8 +484,6 @@ int** HippoT::compute_udirect2b(const int ago, const int inum_full, // copy field and fieldp from device to host (_fieldp store both arrays, one after another) this->_fieldp.update_host(this->_max_fieldp_size*8,false); - - return nullptr; //nbor->host_jlist.begin()-host_start; } // --------------------------------------------------------------------------- @@ -562,33 +532,16 @@ int HippoT::udirect2b(const int eflag, const int vflag) { // of the induced field // --------------------------------------------------------------------------- template -int** HippoT::compute_umutual2b(const int ago, const int inum_full, - const int nall, double **host_x, - int *host_type, int *host_amtype, - int *host_amgroup, double **host_rpole, - double **host_uind, double **host_uinp, double *host_pval, - double *sublo, double *subhi, tagint *tag, - int **nspecial, tagint **special, - int *nspecial15, tagint **special15, - const bool eflag_in, const bool vflag_in, - const bool eatom, const bool vatom, - int &host_start, int **ilist, int **jnum, - const double cpu_time, bool &success, - const double aewald, const double off2_polar, - double *host_q, double *boxlo, double *prd, - void** fieldp_ptr) { +void HippoT::compute_umutual2b(int *host_amtype, int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, double *host_pval, + const double aewald, const double off2_polar, + void** fieldp_ptr) { // cast necessary data arrays from host to device this->cast_extra_data(nullptr, nullptr, nullptr, host_uind, host_uinp, nullptr); this->atom->add_extra_data(); - // ------------------- Resize _fieldp array ------------------------ - - if (inum_full>this->_max_fieldp_size) { - this->_max_fieldp_size=static_cast(static_cast(inum_full)*1.10); - this->_fieldp.resize(this->_max_fieldp_size*8); - } *fieldp_ptr=this->_fieldp.host.begin(); this->_off2_polar = off2_polar; @@ -598,8 +551,6 @@ int** HippoT::compute_umutual2b(const int ago, const int inum_full, // copy field and fieldp from device to host (_fieldp store both arrays, one after another) this->_fieldp.update_host(this->_max_fieldp_size*8,false); - - return nullptr; //nbor->host_jlist.begin()-host_start; } // --------------------------------------------------------------------------- @@ -646,34 +597,17 @@ int HippoT::umutual2b(const int eflag, const int vflag) { // Reneighbor on GPU if necessary, and then compute polar real-space // --------------------------------------------------------------------------- template -int** HippoT::compute_polar_real(const int ago, const int inum_full, - const int nall, double **host_x, - int *host_type, int *host_amtype, - int *host_amgroup, double **host_rpole, - double **host_uind, double **host_uinp, - double *host_pval, double *sublo, double *subhi, - tagint *tag, int **nspecial, tagint **special, - int *nspecial15, tagint **special15, - const bool eflag_in, const bool vflag_in, - const bool eatom, const bool vatom, - int &host_start, int **ilist, int **jnum, - const double cpu_time, bool &success, - const double aewald, const double felec, - const double off2_polar, double *host_q, - double *boxlo, double *prd, void **tep_ptr) { - +void HippoT::compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, double *host_pval, + const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, + const double aewald, const double felec, + const double off2_polar, void **tep_ptr) { // cast necessary data arrays from host to device - //this->cast_extra_data(host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval); this->cast_extra_data(nullptr, nullptr, nullptr, host_uind, host_uinp, nullptr); this->atom->add_extra_data(); - // ------------------- Resize _tep array ------------------------ - - if (inum_full>this->_max_tep_size) { - this->_max_tep_size=static_cast(static_cast(inum_full)*1.10); - this->_tep.resize(this->_max_tep_size*4); - } *tep_ptr=this->_tep.host.begin(); this->_off2_polar = off2_polar; @@ -691,8 +625,6 @@ int** HippoT::compute_polar_real(const int ago, const int inum_full, // copy tep from device to host this->_tep.update_host(this->_max_tep_size*4,false); - - return nullptr; } // --------------------------------------------------------------------------- diff --git a/lib/gpu/lal_hippo.h b/lib/gpu/lal_hippo.h index 9941460bff..492712eb85 100644 --- a/lib/gpu/lal_hippo.h +++ b/lib/gpu/lal_hippo.h @@ -72,16 +72,9 @@ class Hippo : public BaseAmoeba { double c3, double c4, double c5,void** tep_ptr); /// Compute dispersion real-space with device neighboring - int** compute_dispersion_real(const int ago, const int inum_full, const int nall, - double **host_x, int *host_type, int *host_amtype, - int *host_amgroup, double **host_rpole, double *sublo, double *subhi, - tagint *tag, int **nspecial, tagint **special, - int *nspecial15, tagint **special15, - const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **numj, const double cpu_time, bool &success, - const double aewald, const double off2_disp, double *charge, - double *boxlo, double *prd); + int** compute_dispersion_real(int *host_amtype, int *host_amgroup, + double **host_rpole, const double aewald, + const double off2_disp); /// Compute multipole real-space with device neighboring virtual int** compute_multipole_real(const int ago, const int inum_full, const int nall, @@ -96,51 +89,23 @@ class Hippo : public BaseAmoeba { double *boxlo, double *prd, void **tep_ptr); /// Compute the real space part of the permanent field (udirect2b) with device neighboring - virtual int** compute_udirect2b(const int ago, const int inum_full, - const int nall, double **host_x, - int *host_type, int *host_amtype, - int *host_amgroup, double **host_rpole, + virtual void compute_udirect2b(int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, double* host_pval, - double *sublo, double *subhi, tagint *tag, - int **nspecial, tagint **special, - int *nspecial15, tagint **special15, - const bool eflag_in, const bool vflag_in, - const bool eatom, const bool vatom, - int &host_start, int **ilist, int **jnum, - const double cpu_time, bool &success, - const double aewald, const double off2_polar, - double *host_q, double *boxlo, double *prd, - void** fieldp_ptr); + const double aewald, const double off2_polar, void** fieldp_ptr); /// Compute the real space part of the induced field (umutual2b) with device neighboring - virtual int** compute_umutual2b(const int ago, const int inum_full, - const int nall, double **host_x, - int *host_type, int *host_amtype, - int *host_amgroup, double **host_rpole, - double **host_uind, double **host_uinp, double *host_pval, - double *sublo, double *subhi, tagint *tag, - int **nspecial, tagint **special, - int *nspecial15, tagint **special15, - const bool eflag_in, const bool vflag_in, - const bool eatom, const bool vatom, - int &host_start, int **ilist, int **jnum, - const double cpu_time, bool &success, - const double aewald, const double off2_polar, - double *host_q, double *boxlo, double *prd, - void** fieldp_ptr); + virtual void compute_umutual2b(int *host_amtype, int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, double *host_pval, + const double aewald, const double off2_polar, + void** fieldp_ptr); /// Compute polar real-space with device neighboring - virtual int** compute_polar_real(const int ago, const int inum_full, const int nall, - double **host_x, int *host_type, int *host_amtype, - int *host_amgroup, double **host_rpole, double **host_uind, - double **host_uinp, double *host_pval, double *sublo, double *subhi, - tagint *tag, int **nspecial, tagint **special, - int *nspecial15, tagint **special15, - const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **numj, const double cpu_time, bool &success, + virtual void compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, double *host_pval, + const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, const double aewald, const double felec, const double off2_polar, - double *charge, double *boxlo, double *prd, void **tep_ptr); + void **tep_ptr); /// Clear all host and device data /** \note This is called at the beginning of the init() routine **/ diff --git a/lib/gpu/lal_hippo_ext.cpp b/lib/gpu/lal_hippo_ext.cpp index 4152833320..9d3d845ad0 100644 --- a/lib/gpu/lal_hippo_ext.cpp +++ b/lib/gpu/lal_hippo_ext.cpp @@ -140,21 +140,11 @@ int** hippo_gpu_compute_repulsion(const int ago, const int inum_full, cut2, c0, c1, c2, c3, c4, c5, tep_ptr); } -int** hippo_gpu_compute_dispersion_real(const int ago, const int inum_full, - const int nall, double **host_x, int *host_type, - int *host_amtype, int *host_amgroup, double **host_rpole, - double *sublo, double *subhi, tagint *tag, int **nspecial, - tagint **special, int *nspecial15, tagint** special15, - const bool eflag, const bool vflag, const bool eatom, - const bool vatom, int &host_start, - int **ilist, int **jnum, const double cpu_time, - bool &success, const double aewald, const double off2, - double *host_q, double *boxlo, double *prd) { - return HIPPOMF.compute_dispersion_real(ago, inum_full, nall, host_x, host_type, - host_amtype, host_amgroup, host_rpole, sublo, subhi, - tag, nspecial, special, nspecial15, special15, - eflag, vflag, eatom, vatom, host_start, ilist, jnum, - cpu_time, success, aewald, off2, host_q, boxlo, prd); +void hippo_gpu_compute_dispersion_real(int *host_amtype, int *host_amgroup, + double **host_rpole, const double aewald, + const double off2) { + HIPPOMF.compute_dispersion_real(host_amtype, host_amgroup, host_rpole, + aewald, off2); } int** hippo_gpu_compute_multipole_real(const int ago, const int inum_full, @@ -174,58 +164,29 @@ int** hippo_gpu_compute_multipole_real(const int ago, const int inum_full, cpu_time, success, aewald, felec, off2, host_q, boxlo, prd, tep_ptr); } -int** hippo_gpu_compute_udirect2b(const int ago, const int inum_full, - const int nall, double **host_x, int *host_type, - int *host_amtype, int *host_amgroup, double **host_rpole, +void hippo_gpu_compute_udirect2b(int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, double *host_pval, - double *sublo, double *subhi, tagint *tag, int **nspecial, - tagint **special, int *nspecial15, tagint** special15, - const bool eflag, const bool vflag, const bool eatom, - const bool vatom, int &host_start, - int **ilist, int **jnum, const double cpu_time, - bool &success, const double aewald, const double off2, double *host_q, - double *boxlo, double *prd, void **fieldp_ptr) { - return HIPPOMF.compute_udirect2b(ago, inum_full, nall, host_x, host_type, - host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval, - sublo, subhi, tag, nspecial, special, nspecial15, special15, - eflag, vflag, eatom, vatom, host_start, ilist, jnum, - cpu_time, success, aewald, off2, host_q, boxlo, prd, fieldp_ptr); + const double aewald, const double off2, void **fieldp_ptr) { + HIPPOMF.compute_udirect2b(host_amtype, host_amgroup, host_rpole, + host_uind, host_uinp, host_pval, + aewald, off2, fieldp_ptr); } -int** hippo_gpu_compute_umutual2b(const int ago, const int inum_full, - const int nall, double **host_x, int *host_type, - int *host_amtype, int *host_amgroup, double **host_rpole, - double **host_uind, double **host_uinp, double *host_pval, - double *sublo, double *subhi, tagint *tag, int **nspecial, - tagint **special, int *nspecial15, tagint** special15, - const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **jnum, const double cpu_time, - bool &success, const double aewald, const double off2, double *host_q, - double *boxlo, double *prd, void **fieldp_ptr) { - return HIPPOMF.compute_umutual2b(ago, inum_full, nall, host_x, host_type, - host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval, - sublo, subhi, tag, nspecial, special, nspecial15, special15, - eflag, vflag, eatom, vatom, host_start, ilist, jnum, - cpu_time, success, aewald, off2, host_q, boxlo, prd, fieldp_ptr); +void hippo_gpu_compute_umutual2b(int *host_amtype, int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, double *host_pval, + const double aewald, const double off2, void **fieldp_ptr) { + HIPPOMF.compute_umutual2b(host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval, + aewald, off2, fieldp_ptr); } -int** hippo_gpu_compute_polar_real(const int ago, const int inum_full, - const int nall, double **host_x, int *host_type, - int *host_amtype, int *host_amgroup, - double **host_rpole, double **host_uind, double **host_uinp, - double *host_pval, double *sublo, double *subhi, tagint *tag, int **nspecial, - tagint **special, int *nspecial15, tagint** special15, - const bool eflag, const bool vflag, const bool eatom, - const bool vatom, int &host_start, - int **ilist, int **jnum, const double cpu_time, - bool &success, const double aewald, const double felec, const double off2, - double *host_q, double *boxlo, double *prd, void **tep_ptr) { - return HIPPOMF.compute_polar_real(ago, inum_full, nall, host_x, host_type, - host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval, - sublo, subhi, tag, nspecial, special, nspecial15, special15, - eflag, vflag, eatom, vatom, host_start, ilist, jnum, - cpu_time, success, aewald, felec, off2, host_q, boxlo, prd, tep_ptr); +void hippo_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, double *host_pval, + const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, + const double aewald, const double felec, const double off2, + void **tep_ptr) { + HIPPOMF.compute_polar_real(host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval, + eflag_in, vflag_in, eatom, vatom, aewald, felec, off2, tep_ptr); } double hippo_gpu_bytes() { diff --git a/src/GPU/pair_amoeba_gpu.cpp b/src/GPU/pair_amoeba_gpu.cpp index 65a4af7d64..ea7c40793c 100644 --- a/src/GPU/pair_amoeba_gpu.cpp +++ b/src/GPU/pair_amoeba_gpu.cpp @@ -74,35 +74,19 @@ int ** amoeba_gpu_compute_multipole_real(const int ago, const int inum, const in bool &success, const double aewald, const double felec, const double off2, double *host_q, double *boxlo, double *prd, void **tq_ptr); -int ** amoeba_gpu_compute_udirect2b(const int ago, const int inum, const int nall, - double **host_x, int *host_type, int *host_amtype, int *host_amgroup, +void amoeba_gpu_compute_udirect2b(int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, - double *sublo, double *subhi, tagint *tag, int **nspecial, - tagint **special, int* nspecial15, tagint** special15, - const bool eflag, const bool vflag, const bool eatom, const bool vatom, - int &host_start, int **ilist, int **jnum, const double cpu_time, - bool &success, const double aewald, const double off2, double *host_q, - double *boxlo, double *prd, void **fieldp_ptr); + const double aewald, const double off2, void **fieldp_ptr); -int ** amoeba_gpu_compute_umutual2b(const int ago, const int inum, const int nall, - double **host_x, int *host_type, int *host_amtype, int *host_amgroup, +void amoeba_gpu_compute_umutual2b(int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, - double *sublo, double *subhi, tagint *tag, int **nspecial, - tagint **special, int* nspecial15, tagint** special15, - const bool eflag, const bool vflag, const bool eatom, const bool vatom, - int &host_start, int **ilist, int **jnum, const double cpu_time, - bool &success, const double aewald, const double off2, double *host_q, - double *boxlo, double *prd, void **fieldp_ptr); + const double aewald, const double off2, void **fieldp_ptr); -int ** amoeba_gpu_compute_polar_real(const int ago, const int inum, const int nall, - double **host_x, int *host_type, int *host_amtype, int *host_amgroup, +void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, - double *sublo, double *subhi, tagint *tag, int **nspecial, - tagint **special, int* nspecial15, tagint** special15, const bool eflag, const bool vflag, const bool eatom, const bool vatom, - int &host_start, int **ilist, int **jnum, const double cpu_time, - bool &success, const double aewald, const double felec, const double off2, - double *host_q, double *boxlo, double *prd, void **tq_ptr); + const double aewald, const double felec, const double off2, + void **tq_ptr); double amoeba_gpu_bytes(); @@ -345,14 +329,7 @@ void PairAmoebaGPU::induce() } } } -/* - printf("GPU: cutghost = %f\n", comm->cutghost[0]); - for (i = 0; i < 10; i++) { - printf("i = %d: udir = %f %f %f; udirp = %f %f %f\n", - i, udir[i][0], udir[i][1], udir[i][2], - udirp[i][0], udirp[i][1], udirp[i][2]); - } -*/ + // get induced dipoles via the OPT extrapolation method // NOTE: any way to rewrite these loops to avoid allocating // uopt,uoptp with a optorder+1 dimension, just optorder ?? @@ -731,17 +708,8 @@ void PairAmoebaGPU::udirect2b(double **field, double **fieldp) if (use_ewald) choose(POLAR_LONG); else choose(POLAR); - firstneigh = amoeba_gpu_compute_udirect2b(neighbor->ago, inum, nall, atom->x, - atom->type, amtype, amgroup, rpole, - uind, uinp, sublo, subhi, atom->tag, - atom->nspecial, atom->special, - atom->nspecial15, atom->special15, - eflag, vflag, eflag_atom, vflag_atom, - host_start, &ilist, &numneigh, cpu_time, - success, aewald, off2, atom->q, - domain->boxlo, domain->prd, &fieldp_pinned); - if (!success) - error->one(FLERR,"Insufficient memory on accelerator"); + amoeba_gpu_compute_udirect2b(amtype, amgroup, rpole, uind, uinp, + aewald, off2, &fieldp_pinned); // rebuild dipole-dipole pair list and store pairwise dipole matrices // done one atom at a time in real-space double loop over atoms & neighs @@ -933,10 +901,7 @@ void PairAmoebaGPU::umutual2b(double **field, double **fieldp) int eflag=1, vflag=1; int nall = atom->nlocal + atom->nghost; - int inum, host_start; - - bool success = true; - int *ilist, *numneigh, **firstneigh; + int inum; double sublo[3],subhi[3]; if (domain->triclinic == 0) { @@ -956,17 +921,8 @@ void PairAmoebaGPU::umutual2b(double **field, double **fieldp) if (use_ewald) choose(POLAR_LONG); else choose(POLAR); - firstneigh = amoeba_gpu_compute_umutual2b(neighbor->ago, inum, nall, atom->x, - atom->type, amtype, amgroup, rpole, - uind, uinp, sublo, subhi, atom->tag, - atom->nspecial, atom->special, - atom->nspecial15, atom->special15, - eflag, vflag, eflag_atom, vflag_atom, - host_start, &ilist, &numneigh, cpu_time, - success,aewald, off2, atom->q, - domain->boxlo, domain->prd, &fieldp_pinned); - if (!success) - error->one(FLERR,"Insufficient memory on accelerator"); + amoeba_gpu_compute_umutual2b(amtype, amgroup, rpole, uind, uinp, aewald, + off2, &fieldp_pinned); // accumulate the field and fieldp values from the GPU lib // field and fieldp may already have some nonzero values from kspace (umutual1) @@ -1005,10 +961,7 @@ void PairAmoebaGPU::polar_real() int eflag=1, vflag=1; int nall = atom->nlocal + atom->nghost; - int inum, host_start; - - bool success = true; - int *ilist, *numneigh, **firstneigh; + int inum; double sublo[3],subhi[3]; if (domain->triclinic == 0) { @@ -1032,18 +985,9 @@ void PairAmoebaGPU::polar_real() double felec = 0.5 * electric / am_dielectric; - firstneigh = amoeba_gpu_compute_polar_real(neighbor->ago, inum, nall, atom->x, - atom->type, amtype, amgroup, - rpole, uind, uinp, sublo, subhi, - atom->tag, atom->nspecial, atom->special, - atom->nspecial15, atom->special15, - eflag, vflag, eflag_atom, vflag_atom, - host_start, &ilist, &numneigh, cpu_time, - success, aewald, felec, off2, atom->q, - domain->boxlo, domain->prd, &tq_pinned); - - if (!success) - error->one(FLERR,"Insufficient memory on accelerator"); + amoeba_gpu_compute_polar_real(amtype, amgroup, rpole, uind, uinp, + eflag, vflag, eflag_atom, vflag_atom, + aewald, felec, off2, &tq_pinned); // reference to the tep array from GPU lib diff --git a/src/GPU/pair_hippo_gpu.cpp b/src/GPU/pair_hippo_gpu.cpp index dcdac836bd..0d77c67e10 100644 --- a/src/GPU/pair_hippo_gpu.cpp +++ b/src/GPU/pair_hippo_gpu.cpp @@ -80,16 +80,8 @@ int** hippo_gpu_compute_repulsion(const int ago, const int inum_full, double cut2, double c0, double c1, double c2, double c3, double c4, double c5, void **tep_ptr); -int** hippo_gpu_compute_dispersion_real(const int ago, const int inum_full, - const int nall, double **host_x, int *host_type, - int *host_amtype, int *host_amgroup, double **host_rpole, - double *sublo, double *subhi, tagint *tag, int **nspecial, - tagint **special, int *nspecial15, tagint** special15, - const bool eflag, const bool vflag, const bool eatom, - const bool vatom, int &host_start, - int **ilist, int **jnum, const double cpu_time, - bool &success, const double aewald, const double off2, - double *host_q, double *boxlo, double *prd); +void hippo_gpu_compute_dispersion_real(int *host_amtype, int *host_amgroup, double **host_rpole, + const double aewald, const double off2); int ** hippo_gpu_compute_multipole_real(const int ago, const int inum, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, @@ -100,35 +92,19 @@ int ** hippo_gpu_compute_multipole_real(const int ago, const int inum, const int bool &success, const double aewald, const double felec, const double off2, double *host_q, double *boxlo, double *prd, void **tq_ptr); -int ** hippo_gpu_compute_udirect2b(const int ago, const int inum, const int nall, - double **host_x, int *host_type, int *host_amtype, int *host_amgroup, +void hippo_gpu_compute_udirect2b(int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, - double *host_pval, double *sublo, double *subhi, tagint *tag, int **nspecial, - tagint **special, int* nspecial15, tagint** special15, - const bool eflag, const bool vflag, const bool eatom, const bool vatom, - int &host_start, int **ilist, int **jnum, const double cpu_time, - bool &success, const double aewald, const double off2, double *host_q, - double *boxlo, double *prd, void **fieldp_ptr); + double *host_pval, const double aewald, const double off2, void **fieldp_ptr); -int ** hippo_gpu_compute_umutual2b(const int ago, const int inum, const int nall, - double **host_x, int *host_type, int *host_amtype, int *host_amgroup, +void hippo_gpu_compute_umutual2b(int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, double *host_pval, - double *sublo, double *subhi, tagint *tag, int **nspecial, - tagint **special, int* nspecial15, tagint** special15, - const bool eflag, const bool vflag, const bool eatom, const bool vatom, - int &host_start, int **ilist, int **jnum, const double cpu_time, - bool &success, const double aewald, const double off2, double *host_q, - double *boxlo, double *prd, void **fieldp_ptr); + const double aewald, const double off2, void **fieldp_ptr); -int ** hippo_gpu_compute_polar_real(const int ago, const int inum, const int nall, - double **host_x, int *host_type, int *host_amtype, int *host_amgroup, +void hippo_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, double *host_pval, - double *sublo, double *subhi, tagint *tag, int **nspecial, - tagint **special, int* nspecial15, tagint** special15, const bool eflag, const bool vflag, const bool eatom, const bool vatom, - int &host_start, int **ilist, int **jnum, const double cpu_time, - bool &success, const double aewald, const double felec, const double off2, - double *host_q, double *boxlo, double *prd, void **tq_ptr); + const double aewald, const double felec, const double off2, + void **tq_ptr); double hippo_gpu_bytes(); @@ -301,7 +277,6 @@ void PairHippoGPU::dispersion_real() int nall = atom->nlocal + atom->nghost; int inum, host_start; - bool success = true; int *ilist, *numneigh, **firstneigh; double sublo[3],subhi[3]; @@ -322,18 +297,7 @@ void PairHippoGPU::dispersion_real() if (use_dewald) choose(DISP_LONG); else choose(DISP); - firstneigh = hippo_gpu_compute_dispersion_real(neighbor->ago, inum, nall, atom->x, - atom->type, amtype, amgroup, rpole, - sublo, subhi, atom->tag, - atom->nspecial, atom->special, - atom->nspecial15, atom->special15, - eflag, vflag, eflag_atom, vflag_atom, - host_start, &ilist, &numneigh, cpu_time, - success, aewald, off2, atom->q, - domain->boxlo, domain->prd); - - if (!success) - error->one(FLERR,"Insufficient memory on accelerator"); + hippo_gpu_compute_dispersion_real(amtype, amgroup, rpole, aewald, off2); } /* ---------------------------------------------------------------------- @@ -377,15 +341,15 @@ void PairHippoGPU::multipole_real() double felec = electric / am_dielectric; - firstneigh = hippo_gpu_compute_multipole_real(neighbor->ago, inum, nall, atom->x, - atom->type, amtype, amgroup, rpole, pval, - sublo, subhi, atom->tag, - atom->nspecial, atom->special, - atom->nspecial15, atom->special15, - eflag, vflag, eflag_atom, vflag_atom, - host_start, &ilist, &numneigh, cpu_time, - success, aewald, felec, off2, atom->q, - domain->boxlo, domain->prd, &tq_pinned); + hippo_gpu_compute_multipole_real(neighbor->ago, inum, nall, atom->x, + atom->type, amtype, amgroup, rpole, pval, + sublo, subhi, atom->tag, + atom->nspecial, atom->special, + atom->nspecial15, atom->special15, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success, aewald, felec, off2, atom->q, + domain->boxlo, domain->prd, &tq_pinned); if (!success) error->one(FLERR,"Insufficient memory on accelerator"); @@ -854,9 +818,6 @@ void PairHippoGPU::udirect2b(double **field, double **fieldp) int nall = atom->nlocal + atom->nghost; int inum, host_start; - bool success = true; - int *ilist, *numneigh, **firstneigh; - double sublo[3],subhi[3]; if (domain->triclinic == 0) { sublo[0] = domain->sublo[0]; @@ -875,17 +836,8 @@ void PairHippoGPU::udirect2b(double **field, double **fieldp) if (use_ewald) choose(POLAR_LONG); else choose(POLAR); - firstneigh = hippo_gpu_compute_udirect2b(neighbor->ago, inum, nall, atom->x, - atom->type, amtype, amgroup, rpole, - uind, uinp, pval, sublo, subhi, atom->tag, - atom->nspecial, atom->special, - atom->nspecial15, atom->special15, - eflag, vflag, eflag_atom, vflag_atom, - host_start, &ilist, &numneigh, cpu_time, - success, aewald, off2, atom->q, - domain->boxlo, domain->prd, &fieldp_pinned); - if (!success) - error->one(FLERR,"Insufficient memory on accelerator"); + hippo_gpu_compute_udirect2b(amtype, amgroup, rpole, uind, uinp, pval, + aewald, off2, &fieldp_pinned); // rebuild dipole-dipole pair list and store pairwise dipole matrices // done one atom at a time in real-space double loop over atoms & neighs @@ -1078,10 +1030,7 @@ void PairHippoGPU::umutual2b(double **field, double **fieldp) int eflag=1, vflag=1; int nall = atom->nlocal + atom->nghost; - int inum, host_start; - - bool success = true; - int *ilist, *numneigh, **firstneigh; + int inum; double sublo[3],subhi[3]; if (domain->triclinic == 0) { @@ -1101,17 +1050,9 @@ void PairHippoGPU::umutual2b(double **field, double **fieldp) if (use_ewald) choose(POLAR_LONG); else choose(POLAR); - firstneigh = hippo_gpu_compute_umutual2b(neighbor->ago, inum, nall, atom->x, - atom->type, amtype, amgroup, rpole, - uind, uinp, pval, sublo, subhi, atom->tag, - atom->nspecial, atom->special, - atom->nspecial15, atom->special15, - eflag, vflag, eflag_atom, vflag_atom, - host_start, &ilist, &numneigh, cpu_time, - success,aewald, off2, atom->q, - domain->boxlo, domain->prd, &fieldp_pinned); - if (!success) - error->one(FLERR,"Insufficient memory on accelerator"); + hippo_gpu_compute_umutual2b(amtype, amgroup, rpole, uind, uinp, pval, + aewald, off2, &fieldp_pinned); + // accumulate the field and fieldp values from the GPU lib // field and fieldp may already have some nonzero values from kspace (umutual1) @@ -1150,10 +1091,7 @@ void PairHippoGPU::polar_real() int eflag=1, vflag=1; int nall = atom->nlocal + atom->nghost; - int inum, host_start; - - bool success = true; - int *ilist, *numneigh, **firstneigh; + int inum; double sublo[3],subhi[3]; if (domain->triclinic == 0) { @@ -1177,18 +1115,9 @@ void PairHippoGPU::polar_real() double felec = 0.5 * electric / am_dielectric; - firstneigh = hippo_gpu_compute_polar_real(neighbor->ago, inum, nall, atom->x, - atom->type, amtype, amgroup, - rpole, uind, uinp, pval, sublo, subhi, - atom->tag, atom->nspecial, atom->special, - atom->nspecial15, atom->special15, - eflag, vflag, eflag_atom, vflag_atom, - host_start, &ilist, &numneigh, cpu_time, - success, aewald, felec, off2, atom->q, - domain->boxlo, domain->prd, &tq_pinned); - - if (!success) - error->one(FLERR,"Insufficient memory on accelerator"); + hippo_gpu_compute_polar_real(amtype, amgroup, rpole, uind, uinp, pval, + eflag, vflag, eflag_atom, vflag_atom, + aewald, felec, off2, &tq_pinned); // reference to the tep array from GPU lib