diff --git a/lib/gpu/lal_hippo.cpp b/lib/gpu/lal_hippo.cpp index 12bf9cfd3c..0f87104832 100644 --- a/lib/gpu/lal_hippo.cpp +++ b/lib/gpu/lal_hippo.cpp @@ -430,8 +430,8 @@ int** HippoT::compute_multipole_real(const int ago, const int inum_full, // leave the answers (forces, energies and virial) on the device, // only copy them back in the last kernel (this one, or polar_real once done) - this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks); - this->device->add_ans_object(this->ans); + //this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks); + //this->device->add_ans_object(this->ans); this->hd_balancer.stop_timer(); @@ -568,6 +568,94 @@ int HippoT::umutual2b(const int eflag, const int vflag) { return GX; } +// --------------------------------------------------------------------------- +// Reneighbor on GPU if necessary, and then compute polar real-space +// --------------------------------------------------------------------------- +template +int** HippoT::compute_polar_real(const int ago, const int inum_full, + const int nall, double **host_x, + int *host_type, int *host_amtype, + int *host_amgroup, double **host_rpole, + double **host_uind, double **host_uinp, + double *host_pval, double *sublo, double *subhi, + tagint *tag, int **nspecial, tagint **special, + int *nspecial15, tagint **special15, + const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, + int &host_start, int **ilist, int **jnum, + const double cpu_time, bool &success, + const double aewald, const double felec, + const double off2_polar, double *host_q, + double *boxlo, double *prd, void **tep_ptr) { + this->acc_timers(); + int eflag, vflag; + if (eatom) eflag=2; + else if (eflag_in) eflag=1; + else eflag=0; + if (vatom) vflag=2; + else if (vflag_in) vflag=1; + else vflag=0; + + #ifdef LAL_NO_BLOCK_REDUCE + if (eflag) eflag=2; + if (vflag) vflag=2; + #endif + + this->set_kernel(eflag,vflag); + + // reallocate per-atom arrays, transfer data from the host + // and build the neighbor lists if needed + // NOTE: + // For now we invoke precompute() again here, + // to be able to turn on/off the udirect2b kernel (which comes before this) + // Once all the kernels are ready, precompute() is needed only once + // in the first kernel in a time step. + // We only need to cast uind and uinp from host to device here + // if the neighbor lists are rebuilt and other per-atom arrays + // (x, type, amtype, amgroup, rpole) are ready on the device. + + int** firstneigh = nullptr; + firstneigh = precompute(ago, inum_full, nall, host_x, host_type, + host_amtype, host_amgroup, host_rpole, + host_uind, host_uinp, host_pval, sublo, subhi, tag, + nspecial, special, nspecial15, special15, + eflag_in, vflag_in, eatom, vatom, + host_start, ilist, jnum, cpu_time, + success, host_q, boxlo, prd); + + // ------------------- Resize _tep array ------------------------ + + if (inum_full>this->_max_tep_size) { + this->_max_tep_size=static_cast(static_cast(inum_full)*1.10); + this->_tep.resize(this->_max_tep_size*4); + } + *tep_ptr=this->_tep.host.begin(); + + this->_off2_polar = off2_polar; + this->_felec = felec; + this->_aewald = aewald; + const int red_blocks=polar_real(eflag,vflag); + + // only copy answers (forces, energies and virial) back from the device + // in the last kernel (which is polar_real here) + this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks); + this->device->add_ans_object(this->ans); + + this->hd_balancer.stop_timer(); + + // copy tep from device to host + + this->_tep.update_host(this->_max_tep_size*4,false); +/* + printf("GPU lib: tep size = %d: max tep size = %d\n", this->_tep.cols(), _max_tep_size); + for (int i = 0; i < 10; i++) { + numtyp4* p = (numtyp4*)(&this->_tep[4*i]); + printf("i = %d; tep = %f %f %f\n", i, p->x, p->y, p->z); + } +*/ + return firstneigh; // nbor->host_jlist.begin()-host_start; +} + // --------------------------------------------------------------------------- // Calculate the polar real-space term, returning tep // --------------------------------------------------------------------------- diff --git a/lib/gpu/lal_hippo.cu b/lib/gpu/lal_hippo.cu index afc3cf10af..1f9c14d4da 100644 --- a/lib/gpu/lal_hippo.cu +++ b/lib/gpu/lal_hippo.cu @@ -1753,7 +1753,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_, numtyp corei = coeff_amclass[itype].z; // pcore[iclass]; numtyp alphai = coeff_amclass[itype].w; // palpha[iclass]; - numtyp vali = polar6[i].x; + numtyp vali = polar6[i].x; for ( ; nbor { const double aewald, const double felec, const double off2_mpole, double *charge, double *boxlo, double *prd, void **tep_ptr); + /// Compute polar real-space with device neighboring + virtual int** compute_polar_real(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *host_amtype, + int *host_amgroup, double **host_rpole, double **host_uind, + double **host_uinp, double *host_pval, double *sublo, double *subhi, + tagint *tag, int **nspecial, tagint **special, + int *nspecial15, tagint **special15, + const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **numj, const double cpu_time, bool &success, + const double aewald, const double felec, const double off2_polar, + double *charge, double *boxlo, double *prd, void **tep_ptr); + /// Clear all host and device data /** \note This is called at the beginning of the init() routine **/ void clear(); diff --git a/lib/gpu/lal_hippo_ext.cpp b/lib/gpu/lal_hippo_ext.cpp index 390f713d98..1851c3aba3 100644 --- a/lib/gpu/lal_hippo_ext.cpp +++ b/lib/gpu/lal_hippo_ext.cpp @@ -194,7 +194,7 @@ int** hippo_gpu_compute_polar_real(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, - double *sublo, double *subhi, tagint *tag, int **nspecial, + double *host_pval, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, int *nspecial15, tagint** special15, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, @@ -202,7 +202,7 @@ int** hippo_gpu_compute_polar_real(const int ago, const int inum_full, bool &success, const double aewald, const double felec, const double off2, double *host_q, double *boxlo, double *prd, void **tep_ptr) { return HIPPOMF.compute_polar_real(ago, inum_full, nall, host_x, host_type, - host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, + host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval, sublo, subhi, tag, nspecial, special, nspecial15, special15, eflag, vflag, eatom, vatom, host_start, ilist, jnum, cpu_time, success, aewald, felec, off2, host_q, boxlo, prd, tep_ptr); diff --git a/src/GPU/pair_hippo_gpu.cpp b/src/GPU/pair_hippo_gpu.cpp index 6ac22e0721..23395e5fe3 100644 --- a/src/GPU/pair_hippo_gpu.cpp +++ b/src/GPU/pair_hippo_gpu.cpp @@ -108,7 +108,7 @@ int ** hippo_gpu_compute_umutual2b(const int ago, const int inum, const int nall int ** hippo_gpu_compute_polar_real(const int ago, const int inum, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, - double **host_rpole, double **host_uind, double **host_uinp, + double **host_rpole, double **host_uind, double **host_uinp, double *host_pval, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, int* nspecial15, tagint** special15, const bool eflag, const bool vflag, const bool eatom, const bool vatom, @@ -138,7 +138,7 @@ PairHippoGPU::PairHippoGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE) gpu_multipole_real_ready = true; gpu_udirect2b_ready = false; gpu_umutual2b_ready = false; - gpu_polar_real_ready = false; + gpu_polar_real_ready = true; GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); } @@ -1089,7 +1089,7 @@ void PairHippoGPU::polar_real() firstneigh = hippo_gpu_compute_polar_real(neighbor->ago, inum, nall, atom->x, atom->type, amtype, amgroup, - rpole, uind, uinp, sublo, subhi, + rpole, uind, uinp, pval, sublo, subhi, atom->tag, atom->nspecial, atom->special, atom->nspecial15, atom->special15, eflag, vflag, eflag_atom, vflag_atom,