diff --git a/lib/gpu/lal_amoeba.cpp b/lib/gpu/lal_amoeba.cpp index 7be4a6f59c..e3bb4c5ef5 100644 --- a/lib/gpu/lal_amoeba.cpp +++ b/lib/gpu/lal_amoeba.cpp @@ -162,7 +162,7 @@ int AmoebaT::multipole_real(const int eflag, const int vflag) { this->time_pair.start(); // Build the short neighbor list for the cutoff off2_mpole, - // at this point mpole is the first kernel in a time step + // at this point mpole is the first kernel in a time step for AMOEBA this->k_short_nbor.set_size(GX,BX); this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor, diff --git a/lib/gpu/lal_amoeba_ext.cpp b/lib/gpu/lal_amoeba_ext.cpp index 47591e75f6..5e4d48a2da 100644 --- a/lib/gpu/lal_amoeba_ext.cpp +++ b/lib/gpu/lal_amoeba_ext.cpp @@ -117,7 +117,28 @@ void amoeba_gpu_clear() { AMOEBAMF.clear(); } -int** amoeba_gpu_compute_multipole_real(const int ago, const int inum_full, +int** amoeba_gpu_precompute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *host_amtype, + int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, double *host_pval, + double *sublo, double *subhi, tagint *tag, + int **nspecial, tagint **special, + int *nspecial15, tagint **special15, + const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success, double *host_q, double *boxlo, double *prd) { + return AMOEBAMF.precompute(ago, inum_full, nall, host_x, host_type, + host_amtype, host_amgroup, host_rpole, + nullptr, nullptr, nullptr, sublo, subhi, tag, + nspecial, special, nspecial15, special15, + eflag_in, vflag_in, eatom, vatom, + host_start, ilist, jnum, cpu_time, + success, host_q, boxlo, prd); +} + + +void amoeba_gpu_compute_multipole_real(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, double *sublo, double *subhi, tagint *tag, int **nspecial, @@ -127,7 +148,7 @@ int** amoeba_gpu_compute_multipole_real(const int ago, const int inum_full, int **ilist, int **jnum, const double cpu_time, bool &success, const double aewald, const double felec, const double off2, double *host_q, double *boxlo, double *prd, void **tep_ptr) { - return AMOEBAMF.compute_multipole_real(ago, inum_full, nall, host_x, host_type, + AMOEBAMF.compute_multipole_real(ago, inum_full, nall, host_x, host_type, host_amtype, host_amgroup, host_rpole, nullptr, sublo, subhi, tag, nspecial, special, nspecial15, special15, eflag, vflag, eatom, vatom, host_start, ilist, jnum, diff --git a/lib/gpu/lal_base_amoeba.cpp b/lib/gpu/lal_base_amoeba.cpp index 5496236632..16335fa17e 100644 --- a/lib/gpu/lal_base_amoeba.cpp +++ b/lib/gpu/lal_base_amoeba.cpp @@ -226,12 +226,12 @@ int * BaseAmoebaT::reset_nbors(const int nall, const int inum, int *ilist, // --------------------------------------------------------------------------- template inline int BaseAmoebaT::build_nbor_list(const int inum, const int host_inum, - const int nall, double **host_x, - int *host_type, double *sublo, - double *subhi, tagint *tag, - int **nspecial, tagint **special, - int *nspecial15, tagint **special15, - bool &success) { + const int nall, double **host_x, + int *host_type, double *sublo, + double *subhi, tagint *tag, + int **nspecial, tagint **special, + int *nspecial15, tagint **special15, + bool &success) { success=true; resize_atom(inum,nall,success); resize_local(inum,host_inum,nbor->max_nbors(),success); @@ -450,7 +450,7 @@ int** BaseAmoebaT::precompute(const int ago, const int inum_full, const int nall // Reneighbor on GPU if necessary, and then compute multipole real-space // --------------------------------------------------------------------------- template -int** BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full, +void BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, double *host_pval, @@ -469,7 +469,7 @@ int** BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full, // NOTE: // Once all the kernels are ready, precompute() is needed only once // in the first kernel in a time step. - +/* int** firstneigh = nullptr; firstneigh = precompute(ago, inum_full, nall, host_x, host_type, host_amtype, host_amgroup, host_rpole, @@ -478,7 +478,7 @@ int** BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full, eflag_in, vflag_in, eatom, vatom, host_start, ilist, jnum, cpu_time, success, host_q, boxlo, prd); - +*/ // ------------------- Resize _tep array ------------------------ if (inum_full>_max_tep_size) { @@ -503,7 +503,7 @@ int** BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full, _tep.update_host(_max_tep_size*4,false); - return firstneigh; // nbor->host_jlist.begin()-host_start; +// return firstneigh; // nbor->host_jlist.begin()-host_start; } // --------------------------------------------------------------------------- @@ -782,7 +782,6 @@ int BaseAmoebaT::fphi_mpole() { // Compute the block size and grid size to keep all cores busy const int BX=block_size(); - //printf("BX = %d; pppm block size = %d\n", BX, PPPM_BLOCK_1D); int GX=static_cast(ceil(static_cast(this->ans->inum())/BX)); time_pair.start(); diff --git a/lib/gpu/lal_base_amoeba.h b/lib/gpu/lal_base_amoeba.h index f9a715808e..d00833cae7 100644 --- a/lib/gpu/lal_base_amoeba.h +++ b/lib/gpu/lal_base_amoeba.h @@ -152,7 +152,7 @@ class BaseAmoeba { double *charge, double *boxlo, double *prd); /// Compute multipole real-space with device neighboring - virtual int** compute_multipole_real(const int ago, const int inum_full, const int nall, + virtual void compute_multipole_real(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, double *host_pval, double *sublo, double *subhi, tagint *tag, diff --git a/lib/gpu/lal_hippo.cpp b/lib/gpu/lal_hippo.cpp index 3de6dc544c..dc2b6f2c7a 100644 --- a/lib/gpu/lal_hippo.cpp +++ b/lib/gpu/lal_hippo.cpp @@ -172,7 +172,7 @@ double HippoT::host_memory_usage() const { // Reneighbor on GPU if necessary, and then compute repulsion // --------------------------------------------------------------------------- template -int** HippoT::compute_repulsion(const int ago, const int inum_full, +void HippoT::compute_repulsion(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, @@ -213,7 +213,7 @@ int** HippoT::compute_repulsion(const int ago, const int inum_full, // We only need to cast the necessary from host to device here // if the neighbor lists are rebuilt and other per-atom arrays // (x, type, amtype, amgroup, rpole) are ready on the device. - +/* int** firstneigh = nullptr; firstneigh = this->precompute(ago, inum_full, nall, host_x, host_type, host_amtype, host_amgroup, host_rpole, @@ -222,7 +222,7 @@ int** HippoT::compute_repulsion(const int ago, const int inum_full, eflag_in, vflag_in, eatom, vatom, host_start, ilist, jnum, cpu_time, success, host_q, boxlo, prd); - +*/ // ------------------- Resize _tep array ------------------------ if (inum_full>this->_max_tep_size) { @@ -253,7 +253,7 @@ int** HippoT::compute_repulsion(const int ago, const int inum_full, this->_tep.update_host(this->_max_tep_size*4,false); - return firstneigh; // nbor->host_jlist.begin()-host_start; +// return firstneigh; // nbor->host_jlist.begin()-host_start; } // --------------------------------------------------------------------------- @@ -275,7 +275,7 @@ int HippoT::repulsion(const int eflag, const int vflag) { this->time_pair.start(); // Build the short neighbor list for the cutoff off2_disp, - // at this point mpole is the first kernel in a time step + // at this point repuslion is the first kernel in a time step for HIPPO this->k_short_nbor.set_size(GX,BX); this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor, @@ -302,7 +302,7 @@ int HippoT::repulsion(const int eflag, const int vflag) { // Reneighbor on GPU if necessary, and then compute dispersion real-space // --------------------------------------------------------------------------- template -int** HippoT::compute_dispersion_real(int *host_amtype, int *host_amgroup, +void HippoT::compute_dispersion_real(int *host_amtype, int *host_amgroup, double **host_rpole, const double aewald, const double off2_disp) { @@ -324,7 +324,7 @@ int** HippoT::compute_dispersion_real(int *host_amtype, int *host_amgroup, this->hd_balancer.stop_timer(); - return nullptr; // nbor->host_jlist.begin()-host_start; + // return nullptr; // nbor->host_jlist.begin()-host_start; } // --------------------------------------------------------------------------- @@ -372,7 +372,7 @@ int HippoT::dispersion_real(const int eflag, const int vflag) { // Reneighbor on GPU if necessary, and then compute multipole real-space // --------------------------------------------------------------------------- template -int** HippoT::compute_multipole_real(const int ago, const int inum_full, +void HippoT::compute_multipole_real(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, @@ -417,7 +417,7 @@ int** HippoT::compute_multipole_real(const int ago, const int inum_full, this->_tep.update_host(this->_max_tep_size*4,false); - return nullptr; // nbor->host_jlist.begin()-host_start; + //return nullptr; // nbor->host_jlist.begin()-host_start; } // --------------------------------------------------------------------------- diff --git a/lib/gpu/lal_hippo.h b/lib/gpu/lal_hippo.h index 492712eb85..671c9964ff 100644 --- a/lib/gpu/lal_hippo.h +++ b/lib/gpu/lal_hippo.h @@ -55,7 +55,7 @@ class Hippo : public BaseAmoeba { const double polar_dscale, const double polar_uscale); /// Compute repulsion with device neighboring - int** compute_repulsion(const int ago, const int inum_full, + virtual void compute_repulsion(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, @@ -72,12 +72,12 @@ class Hippo : public BaseAmoeba { double c3, double c4, double c5,void** tep_ptr); /// Compute dispersion real-space with device neighboring - int** compute_dispersion_real(int *host_amtype, int *host_amgroup, + virtual void compute_dispersion_real(int *host_amtype, int *host_amgroup, double **host_rpole, const double aewald, const double off2_disp); /// Compute multipole real-space with device neighboring - virtual int** compute_multipole_real(const int ago, const int inum_full, const int nall, + virtual void compute_multipole_real(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, double *host_pval, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, diff --git a/lib/gpu/lal_hippo_ext.cpp b/lib/gpu/lal_hippo_ext.cpp index a75080bfca..9644f5aca4 100644 --- a/lib/gpu/lal_hippo_ext.cpp +++ b/lib/gpu/lal_hippo_ext.cpp @@ -120,7 +120,27 @@ void hippo_gpu_clear() { HIPPOMF.clear(); } -int** hippo_gpu_compute_repulsion(const int ago, const int inum_full, +int** hippo_gpu_precompute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *host_amtype, + int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, double *host_pval, + double *sublo, double *subhi, tagint *tag, + int **nspecial, tagint **special, + int *nspecial15, tagint **special15, + const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success, double *host_q, double *boxlo, double *prd) { + return HIPPOMF.precompute(ago, inum_full, nall, host_x, host_type, + host_amtype, host_amgroup, host_rpole, + nullptr, nullptr, nullptr, sublo, subhi, tag, + nspecial, special, nspecial15, special15, + eflag_in, vflag_in, eatom, vatom, + host_start, ilist, jnum, cpu_time, + success, host_q, boxlo, prd); +} + +void hippo_gpu_compute_repulsion(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, double *sublo, double *subhi, tagint *tag, int **nspecial, @@ -132,7 +152,7 @@ int** hippo_gpu_compute_repulsion(const int ago, const int inum_full, double *host_q, double *boxlo, double *prd, double cut2, double c0, double c1, double c2, double c3, double c4, double c5, void **tep_ptr) { - return HIPPOMF.compute_repulsion(ago, inum_full, nall, host_x, host_type, + HIPPOMF.compute_repulsion(ago, inum_full, nall, host_x, host_type, host_amtype, host_amgroup, host_rpole, sublo, subhi, tag, nspecial, special, nspecial15, special15, eflag, vflag, eatom, vatom, host_start, ilist, jnum, @@ -147,7 +167,7 @@ void hippo_gpu_compute_dispersion_real(int *host_amtype, int *host_amgroup, aewald, off2); } -int** hippo_gpu_compute_multipole_real(const int ago, const int inum_full, +void hippo_gpu_compute_multipole_real(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, double *host_pval, double *sublo, double *subhi, tagint *tag, int **nspecial, @@ -157,7 +177,7 @@ int** hippo_gpu_compute_multipole_real(const int ago, const int inum_full, int **ilist, int **jnum, const double cpu_time, bool &success, const double aewald, const double felec, const double off2, double *host_q, double *boxlo, double *prd, void **tep_ptr) { - return HIPPOMF.compute_multipole_real(ago, inum_full, nall, host_x, host_type, + HIPPOMF.compute_multipole_real(ago, inum_full, nall, host_x, host_type, host_amtype, host_amgroup, host_rpole, host_pval, sublo, subhi, tag, nspecial, special, nspecial15, special15, eflag, vflag, eatom, vatom, host_start, ilist, jnum, diff --git a/src/GPU/pair_amoeba_gpu.cpp b/src/GPU/pair_amoeba_gpu.cpp index d0018bf588..8e021f5ce8 100644 --- a/src/GPU/pair_amoeba_gpu.cpp +++ b/src/GPU/pair_amoeba_gpu.cpp @@ -69,7 +69,19 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, const int max_amclas const double polar_dscale, const double polar_uscale, int& tq_size); void amoeba_gpu_clear(); -int ** amoeba_gpu_compute_multipole_real(const int ago, const int inum, const int nall, +int** amoeba_gpu_precompute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *host_amtype, + int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, double *host_pval, + double *sublo, double *subhi, tagint *tag, + int **nspecial, tagint **special, + int *nspecial15, tagint **special15, + const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success, double *host_q, double *boxlo, double *prd); + +void amoeba_gpu_compute_multipole_real(const int ago, const int inum, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, int* nspecial15, tagint** special15, @@ -240,6 +252,18 @@ void PairAmoebaGPU::multipole_real() } inum = atom->nlocal; + firstneigh = amoeba_gpu_precompute(neighbor->ago, inum, nall, atom->x, + atom->type, amtype, amgroup, rpole, + nullptr, nullptr, nullptr, + sublo, subhi, atom->tag, + atom->nspecial, atom->special, + atom->nspecial15, atom->special15, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success, atom->q, domain->boxlo, domain->prd); + if (!success) + error->one(FLERR,"Insufficient memory on accelerator"); + // select the correct cutoff for the term if (use_ewald) choose(MPOLE_LONG); @@ -249,18 +273,17 @@ void PairAmoebaGPU::multipole_real() double felec = electric / am_dielectric; - firstneigh = amoeba_gpu_compute_multipole_real(neighbor->ago, inum, nall, atom->x, - atom->type, amtype, amgroup, rpole, - sublo, subhi, atom->tag, - atom->nspecial, atom->special, - atom->nspecial15, atom->special15, - eflag, vflag, eflag_atom, vflag_atom, - host_start, &ilist, &numneigh, cpu_time, - success, aewald, felec, off2, atom->q, - domain->boxlo, domain->prd, &tq_pinned); + amoeba_gpu_compute_multipole_real(neighbor->ago, inum, nall, atom->x, + atom->type, amtype, amgroup, rpole, + sublo, subhi, atom->tag, + atom->nspecial, atom->special, + atom->nspecial15, atom->special15, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success, aewald, felec, off2, atom->q, + domain->boxlo, domain->prd, &tq_pinned); - if (!success) - error->one(FLERR,"Insufficient memory on accelerator"); + // reference to the tep array from GPU lib diff --git a/src/GPU/pair_hippo_gpu.cpp b/src/GPU/pair_hippo_gpu.cpp index 4dbc998ee3..7658ddb011 100644 --- a/src/GPU/pair_hippo_gpu.cpp +++ b/src/GPU/pair_hippo_gpu.cpp @@ -70,7 +70,19 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass const double polar_dscale, const double polar_uscale, int& tq_size); void hippo_gpu_clear(); -int** hippo_gpu_compute_repulsion(const int ago, const int inum_full, +int** hippo_gpu_precompute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *host_amtype, + int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, double *host_pval, + double *sublo, double *subhi, tagint *tag, + int **nspecial, tagint **special, + int *nspecial15, tagint **special15, + const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success, double *host_q, double *boxlo, double *prd); + +void hippo_gpu_compute_repulsion(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, double *sublo, double *subhi, tagint *tag, int **nspecial, @@ -86,7 +98,7 @@ int** hippo_gpu_compute_repulsion(const int ago, const int inum_full, void hippo_gpu_compute_dispersion_real(int *host_amtype, int *host_amgroup, double **host_rpole, const double aewald, const double off2); -int ** hippo_gpu_compute_multipole_real(const int ago, const int inum, const int nall, +void hippo_gpu_compute_multipole_real(const int ago, const int inum, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, double *host_pval, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, int* nspecial15, tagint** special15, @@ -258,22 +270,30 @@ void PairHippoGPU::repulsion() } inum = atom->nlocal; + firstneigh = hippo_gpu_precompute(neighbor->ago, inum, nall, atom->x, + atom->type, amtype, amgroup, rpole, + nullptr, nullptr, nullptr, + sublo, subhi, atom->tag, + atom->nspecial, atom->special, + atom->nspecial15, atom->special15, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success, atom->q, domain->boxlo, domain->prd); + // select the correct cutoff for the term choose(REPULSE); - // set the energy unit conversion factor for multipolar real-space calculation - - firstneigh = hippo_gpu_compute_repulsion(neighbor->ago, inum, nall, atom->x, - atom->type, amtype, amgroup, rpole, - sublo, subhi, atom->tag, - atom->nspecial, atom->special, - atom->nspecial15, atom->special15, - eflag, vflag, eflag_atom, vflag_atom, - host_start, &ilist, &numneigh, cpu_time, - success, aewald, off2, atom->q, - domain->boxlo, domain->prd, cut2, - c0, c1, c2, c3, c4, c5, &tq_pinned); + hippo_gpu_compute_repulsion(neighbor->ago, inum, nall, atom->x, + atom->type, amtype, amgroup, rpole, + sublo, subhi, atom->tag, + atom->nspecial, atom->special, + atom->nspecial15, atom->special15, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success, aewald, off2, atom->q, + domain->boxlo, domain->prd, cut2, + c0, c1, c2, c3, c4, c5, &tq_pinned); if (!success) error->one(FLERR,"Insufficient memory on accelerator");