diff --git a/lib/gpu/lal_hippo.cpp b/lib/gpu/lal_hippo.cpp index 9a45ea6fc8..80762b55aa 100644 --- a/lib/gpu/lal_hippo.cpp +++ b/lib/gpu/lal_hippo.cpp @@ -173,7 +173,7 @@ int** HippoT::compute_repulsion(const int ago, const int inum_full, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, const double aewald, const double off2_repulse, - double *host_q, double *boxlo, double *prd) { + double *host_q, double *boxlo, double *prd, void **tep_ptr) { this->acc_timers(); int eflag, vflag; if (eatom) eflag=2; @@ -210,6 +210,14 @@ int** HippoT::compute_repulsion(const int ago, const int inum_full, host_start, ilist, jnum, cpu_time, success, host_q, boxlo, prd); + // ------------------- Resize _tep array ------------------------ + + if (inum_full>this->_max_tep_size) { + this->_max_tep_size=static_cast(static_cast(inum_full)*1.10); + this->_tep.resize(this->_max_tep_size*4); + } + *tep_ptr=this->_tep.host.begin(); + this->_off2_repulse = off2_repulse; this->_aewald = aewald; const int red_blocks=repulsion(eflag,vflag); @@ -222,6 +230,10 @@ int** HippoT::compute_repulsion(const int ago, const int inum_full, this->hd_balancer.stop_timer(); + // copy tep from device to host + + this->_tep.update_host(this->_max_tep_size*4,false); + return firstneigh; // nbor->host_jlist.begin()-host_start; } @@ -257,7 +269,7 @@ int HippoT::repulsion(const int eflag, const int vflag) { &coeff_amtype, &coeff_amclass, &sp_nonpolar, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->dev_short_nbor, - &this->ans->force, &this->ans->engv, + &this->ans->force, &this->ans->engv, &this->_tep, &eflag, &vflag, &ainum, &_nall, &nbor_pitch, &this->_threads_per_atom, &this->_aewald, &this->_off2_repulse); diff --git a/lib/gpu/lal_hippo.h b/lib/gpu/lal_hippo.h index 17e3a1b03f..374ca5d836 100644 --- a/lib/gpu/lal_hippo.h +++ b/lib/gpu/lal_hippo.h @@ -67,7 +67,7 @@ class Hippo : public BaseAmoeba { int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, const double aewald, const double off2_repulse, - double *host_q, double *boxlo, double *prd); + double *host_q, double *boxlo, double *prd, void** tep_ptr); /// Compute dispersion real-space with device neighboring int** compute_dispersion_real(const int ago, const int inum_full, const int nall, diff --git a/lib/gpu/lal_hippo_ext.cpp b/lib/gpu/lal_hippo_ext.cpp index 982cf894a6..2f1a800589 100644 --- a/lib/gpu/lal_hippo_ext.cpp +++ b/lib/gpu/lal_hippo_ext.cpp @@ -120,6 +120,23 @@ void hippo_gpu_clear() { HIPPOMF.clear(); } +int** hippo_gpu_compute_repulsion(const int ago, const int inum_full, + const int nall, double **host_x, int *host_type, + int *host_amtype, int *host_amgroup, double **host_rpole, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, int *nspecial15, tagint** special15, + const bool eflag, const bool vflag, const bool eatom, + const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success, const double aewald, const double off2, + double *host_q, double *boxlo, double *prd, void **tep_ptr) { + return HIPPOMF.compute_repulsion(ago, inum_full, nall, host_x, host_type, + host_amtype, host_amgroup, host_rpole, sublo, subhi, + tag, nspecial, special, nspecial15, special15, + eflag, vflag, eatom, vatom, host_start, ilist, jnum, + cpu_time, success, aewald, off2, host_q, boxlo, prd, tep_ptr); +} + int** hippo_gpu_compute_dispersion_real(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, diff --git a/src/AMOEBA/pair_amoeba.h b/src/AMOEBA/pair_amoeba.h index 8a2f09d443..5ba7aae981 100644 --- a/src/AMOEBA/pair_amoeba.h +++ b/src/AMOEBA/pair_amoeba.h @@ -343,7 +343,7 @@ class PairAmoeba : public Pair { void hal(); - void repulsion(); + virtual void repulsion(); void damprep(double, double, double, double, double, double, double, double, int, double, double, double *); diff --git a/src/GPU/pair_hippo_gpu.cpp b/src/GPU/pair_hippo_gpu.cpp index fbc1b6b238..4852f75e08 100644 --- a/src/GPU/pair_hippo_gpu.cpp +++ b/src/GPU/pair_hippo_gpu.cpp @@ -66,6 +66,17 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass const double polar_dscale, const double polar_uscale, int& tq_size); void hippo_gpu_clear(); +int** hippo_gpu_compute_repulsion(const int ago, const int inum_full, + const int nall, double **host_x, int *host_type, + int *host_amtype, int *host_amgroup, double **host_rpole, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, int *nspecial15, tagint** special15, + const bool eflag, const bool vflag, const bool eatom, + const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success, const double aewald, const double off2, + double *host_q, double *boxlo, double *prd, void **tep_ptr); + int** hippo_gpu_compute_dispersion_real(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, @@ -209,6 +220,65 @@ void PairHippoGPU::init_style() /* ---------------------------------------------------------------------- */ +void PairHippoGPU::repulsion() +{ + if (!gpu_repulsion_ready) { + PairAmoeba::repulsion(); + return; + } + + int eflag=1, vflag=1; + int nall = atom->nlocal + atom->nghost; + int inum, host_start; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + + double sublo[3],subhi[3]; + if (domain->triclinic == 0) { + sublo[0] = domain->sublo[0]; + sublo[1] = domain->sublo[1]; + sublo[2] = domain->sublo[2]; + subhi[0] = domain->subhi[0]; + subhi[1] = domain->subhi[1]; + subhi[2] = domain->subhi[2]; + } else { + domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi); + } + inum = atom->nlocal; + + // select the correct cutoff for the term + + choose(REPULSE); + + // set the energy unit conversion factor for multipolar real-space calculation + + firstneigh = hippo_gpu_compute_repulsion(neighbor->ago, inum, nall, atom->x, + atom->type, amtype, amgroup, rpole, + sublo, subhi, atom->tag, + atom->nspecial, atom->special, + atom->nspecial15, atom->special15, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success, aewald, off2, atom->q, + domain->boxlo, domain->prd, &tq_pinned); + + if (!success) + error->one(FLERR,"Insufficient memory on accelerator"); + + // reference to the tep array from GPU lib + + if (tq_single) { + float *tq_ptr = (float *)tq_pinned; + compute_force_from_torque(tq_ptr, frepulse, virrepulse); + } else { + double *tq_ptr = (double *)tq_pinned; + compute_force_from_torque(tq_ptr, frepulse, virrepulse); + } +} + +/* ---------------------------------------------------------------------- */ + void PairHippoGPU::dispersion_real() { if (!gpu_dispersion_real_ready) { diff --git a/src/GPU/pair_hippo_gpu.h b/src/GPU/pair_hippo_gpu.h index 9e961045eb..c7a4e75ebe 100644 --- a/src/GPU/pair_hippo_gpu.h +++ b/src/GPU/pair_hippo_gpu.h @@ -35,6 +35,7 @@ class PairHippoGPU : public PairAmoeba { virtual void induce(); + virtual void repulsion(); virtual void dispersion_real(); virtual void multipole_real(); virtual void udirect2b(double **, double **);