diff --git a/lib/gpu/lal_hippo.cpp b/lib/gpu/lal_hippo.cpp index 80762b55aa..6830847e98 100644 --- a/lib/gpu/lal_hippo.cpp +++ b/lib/gpu/lal_hippo.cpp @@ -57,6 +57,7 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass, const double *host_special_polar_wscale, const double *host_special_polar_piscale, const double *host_special_polar_pscale, + const double *host_sizpr, const double *host_dmppr, const double *host_elepr, const double *host_csix, const double *host_adisp, const double *host_pcore, const double *host_palpha, const int nlocal, const int nall, const int max_nbors, @@ -99,6 +100,16 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass, coeff_amtype.alloc(max_amtype,*(this->ucl_device), UCL_READ_ONLY); ucl_copy(coeff_amtype,host_write,false); + for (int i = 0; i < max_amtype; i++) { + host_write[i].x = host_sizpr[i]; + host_write[i].y = host_dmppr[i]; + host_write[i].z = host_elepr[i]; + host_write[i].w = (numtyp)0; + } + + coeff_rep.alloc(max_amtype,*(this->ucl_device), UCL_READ_ONLY); + ucl_copy(coeff_rep,host_write,false); + UCL_H_Vec host_write2(max_amclass, *(this->ucl_device), UCL_WRITE_ONLY); for (int i = 0; i < max_amclass; i++) { host_write2[i].x = host_csix[i]; @@ -133,7 +144,7 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass, _polar_uscale = polar_uscale; _allocated=true; - this->_max_bytes=coeff_amtype.row_bytes() + coeff_amclass.row_bytes() + this->_max_bytes=coeff_amtype.row_bytes() + coeff_rep.row_bytes() + coeff_amclass.row_bytes() + + sp_polar.row_bytes() + sp_nonpolar.row_bytes() + this->_tep.row_bytes(); return 0; } @@ -145,6 +156,7 @@ void HippoT::clear() { _allocated=false; coeff_amtype.clear(); + coeff_rep.clear(); coeff_amclass.clear(); sp_polar.clear(); sp_nonpolar.clear(); @@ -173,7 +185,9 @@ int** HippoT::compute_repulsion(const int ago, const int inum_full, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, const double aewald, const double off2_repulse, - double *host_q, double *boxlo, double *prd, void **tep_ptr) { + double *host_q, double *boxlo, double *prd, + double cut2, double c0, double c1, double c2, + double c3, double c4, double c5, void **tep_ptr) { this->acc_timers(); int eflag, vflag; if (eatom) eflag=2; @@ -219,7 +233,13 @@ int** HippoT::compute_repulsion(const int ago, const int inum_full, *tep_ptr=this->_tep.host.begin(); this->_off2_repulse = off2_repulse; - this->_aewald = aewald; + _cut2 = cut2; + _c0 = c0; + _c1 = c1; + _c2 = c2; + _c3 = c3; + _c4 = c4; + _c5 = c5; const int red_blocks=repulsion(eflag,vflag); // only copy them back if this is the last kernel @@ -266,13 +286,13 @@ int HippoT::repulsion(const int eflag, const int vflag) { k_repulsion.set_size(GX,BX); k_repulsion.run(&this->atom->x, &this->atom->extra, - &coeff_amtype, &coeff_amclass, &sp_nonpolar, + &coeff_rep, &sp_nonpolar, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->dev_short_nbor, &this->ans->force, &this->ans->engv, &this->_tep, &eflag, &vflag, &ainum, &_nall, &nbor_pitch, &this->_threads_per_atom, &this->_aewald, - &this->_off2_repulse); + &this->_off2_repulse, &_cut2, &_c0, &_c1, &_c2, &_c3, &_c4, &_c5); this->time_pair.stop(); return GX; diff --git a/lib/gpu/lal_hippo.cu b/lib/gpu/lal_hippo.cu index 2e62d0703e..1b6344a163 100644 --- a/lib/gpu/lal_hippo.cu +++ b/lib/gpu/lal_hippo.cu @@ -411,7 +411,7 @@ _texture( q_tex,int2); __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_, const __global numtyp *restrict extra, - const __global numtyp4 *restrict coeff, + const __global numtyp4 *restrict coeff_rep, const __global numtyp4 *restrict sp_nonpolar, const __global int *dev_nbor, const __global int *dev_packed, @@ -480,9 +480,9 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_, numtyp qiyz = pol3i.x; // rpole[i][9]; numtyp qizz = pol3i.y; // rpole[i][12]; int itype = pol3i.z; // amtype[i]; - numtyp sizi = coeff[itype].x; // sizpr[itype]; - numtyp dmpi = coeff[itype].y; // dmppr[itype]; - numtyp vali = coeff[itype].z; // elepr[itype]; + numtyp sizi = coeff_rep[itype].x; // sizpr[itype]; + numtyp dmpi = coeff_rep[itype].y; // dmppr[itype]; + numtyp vali = coeff_rep[itype].z; // elepr[itype]; for ( ; nbor { const double *host_special_polar_wscale, const double *host_special_polar_piscale, const double *host_special_polar_pscale, + const double *host_sizpr, const double *host_dmppr, const double *host_elepr, const double *host_csix, const double *host_adisp, const double *host_pcore, const double *host_palpha, const int nlocal, const int nall, const int max_nbors, @@ -56,18 +57,20 @@ class Hippo : public BaseAmoeba { /// Compute repulsion with device neighboring int** compute_repulsion(const int ago, const int inum_full, - const int nall, double **host_x, - int *host_type, int *host_amtype, - int *host_amgroup, double **host_rpole, - double *sublo, double *subhi, tagint *tag, - int **nspecial, tagint **special, - int *nspecial15, tagint **special15, - const bool eflag_in, const bool vflag_in, - const bool eatom, const bool vatom, - int &host_start, int **ilist, int **jnum, - const double cpu_time, bool &success, - const double aewald, const double off2_repulse, - double *host_q, double *boxlo, double *prd, void** tep_ptr); + const int nall, double **host_x, + int *host_type, int *host_amtype, + int *host_amgroup, double **host_rpole, + double *sublo, double *subhi, tagint *tag, + int **nspecial, tagint **special, + int *nspecial15, tagint **special15, + const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, + int &host_start, int **ilist, int **jnum, + const double cpu_time, bool &success, + const double aewald, const double off2_repulse, + double *host_q, double *boxlo, double *prd, + double cut2, double c0, double c1, double c2, + double c3, double c4, double c5,void** tep_ptr); /// Compute dispersion real-space with device neighboring int** compute_dispersion_real(const int ago, const int inum_full, const int nall, @@ -157,6 +160,8 @@ class Hippo : public BaseAmoeba { UCL_D_Vec coeff_amtype; /// csix = coeff_amclass.x; adisp = coeff_amclass.y; UCL_D_Vec coeff_amclass; + /// sizpr = coeff_rep.x; dmppr = coeff_rep.y; elepr = coeff_rep.z; + UCL_D_Vec coeff_rep; /// Special polar values [0-4]: /// sp_polar.x = special_polar_wscale /// sp_polar.y special_polar_pscale, @@ -175,6 +180,7 @@ class Hippo : public BaseAmoeba { /// Number of atom types int _lj_types; + numtyp _cut2,_c0,_c1,_c2,_c3,_c4,_c5; numtyp _polar_dscale, _polar_uscale; numtyp _qqrd2e; diff --git a/lib/gpu/lal_hippo_ext.cpp b/lib/gpu/lal_hippo_ext.cpp index 2f1a800589..15cb53cdb1 100644 --- a/lib/gpu/lal_hippo_ext.cpp +++ b/lib/gpu/lal_hippo_ext.cpp @@ -37,6 +37,7 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass const double *host_special_polar_wscale, const double *host_special_polar_piscale, const double *host_special_polar_pscale, + const double *host_sizpr, const double *host_dmppr, const double *host_elepr, const double *host_csix, const double *host_adisp, const double *host_pcore, const double *host_palpha, const int nlocal, const int nall, const int max_nbors, @@ -74,6 +75,7 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass host_special_repel, host_special_disp, host_special_mpole, host_special_polar_wscale, host_special_polar_piscale, host_special_polar_pscale, + host_sizpr, host_dmppr, host_elepr, host_csix, host_adisp, host_pcore, host_palpha, nlocal, nall, max_nbors, maxspecial, maxspecial15, cell_size, gpu_split, @@ -99,6 +101,7 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass host_special_repel, host_special_disp, host_special_mpole, host_special_polar_wscale, host_special_polar_piscale, host_special_polar_pscale, + host_sizpr, host_dmppr, host_elepr, host_csix, host_adisp, host_pcore, host_palpha, nlocal, nall, max_nbors, maxspecial, maxspecial15, cell_size, gpu_split, @@ -129,12 +132,15 @@ int** hippo_gpu_compute_repulsion(const int ago, const int inum_full, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, const double aewald, const double off2, - double *host_q, double *boxlo, double *prd, void **tep_ptr) { + double *host_q, double *boxlo, double *prd, + double cut2, double c0, double c1, double c2, + double c3, double c4, double c5, void **tep_ptr) { return HIPPOMF.compute_repulsion(ago, inum_full, nall, host_x, host_type, host_amtype, host_amgroup, host_rpole, sublo, subhi, tag, nspecial, special, nspecial15, special15, eflag, vflag, eatom, vatom, host_start, ilist, jnum, - cpu_time, success, aewald, off2, host_q, boxlo, prd, tep_ptr); + cpu_time, success, aewald, off2, host_q, boxlo, prd, + cut2, c0, c1, c2, c3, c4, c5, tep_ptr); } int** hippo_gpu_compute_dispersion_real(const int ago, const int inum_full, diff --git a/src/GPU/pair_hippo_gpu.cpp b/src/GPU/pair_hippo_gpu.cpp index 4852f75e08..d6a16c72fb 100644 --- a/src/GPU/pair_hippo_gpu.cpp +++ b/src/GPU/pair_hippo_gpu.cpp @@ -58,6 +58,7 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass const double *host_special_polar_wscale, const double *host_special_polar_piscale, const double *host_special_polar_pscale, + const double *host_sizpr, const double *host_dmppr, const double *host_elepr, const double *host_csix, const double *host_adisp, const double *host_pcore, const double *host_palpha, const int nlocal, const int nall, const int max_nbors, @@ -75,7 +76,9 @@ int** hippo_gpu_compute_repulsion(const int ago, const int inum_full, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success, const double aewald, const double off2, - double *host_q, double *boxlo, double *prd, void **tep_ptr); + double *host_q, double *boxlo, double *prd, + double cut2, double c0, double c1, double c2, + double c3, double c4, double c5, void **tep_ptr); int** hippo_gpu_compute_dispersion_real(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, @@ -203,7 +206,8 @@ void PairHippoGPU::init_style() pdamp, thole, dirdamp, amtype2class, special_hal, special_repel, special_disp, special_mpole, special_polar_wscale, special_polar_piscale, - special_polar_pscale, csix, adisp, pcore, palpha, + special_polar_pscale, sizpr, dmppr, elepr, + csix, adisp, pcore, palpha, atom->nlocal, atom->nlocal+atom->nghost, mnf, maxspecial, maxspecial15, cell_size, gpu_mode, screen, polar_dscale, polar_uscale, tq_size); @@ -261,7 +265,8 @@ void PairHippoGPU::repulsion() eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time, success, aewald, off2, atom->q, - domain->boxlo, domain->prd, &tq_pinned); + domain->boxlo, domain->prd, cut2, + c0, c1, c2, c3, c4, c5, &tq_pinned); if (!success) error->one(FLERR,"Insufficient memory on accelerator");