diff --git a/lib/gpu/lal_amoeba.cpp b/lib/gpu/lal_amoeba.cpp index 5030025981..8adabbe6d5 100644 --- a/lib/gpu/lal_amoeba.cpp +++ b/lib/gpu/lal_amoeba.cpp @@ -52,8 +52,7 @@ int AmoebaT::init(const int ntypes, const int max_amtype, const double *host_pda const int nlocal, const int nall, const int max_nbors, const int maxspecial, const int maxspecial15, const double cell_size, const double gpu_split, FILE *_screen, - const double aewald, const double felec, - const double off2, const double polar_dscale, + const double aewald, const double polar_dscale, const double polar_uscale) { int success; success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,maxspecial15, @@ -97,8 +96,6 @@ int AmoebaT::init(const int ntypes, const int max_amtype, const double *host_pda ucl_copy(sp_polar,dview,5,false); _aewald = aewald; - _felec = felec; - _off2 = off2; _polar_dscale = polar_dscale; _polar_uscale = polar_uscale; @@ -145,7 +142,7 @@ int AmoebaT::udirect2b(const int eflag, const int vflag) { this->k_short_nbor.set_size(GX,BX); this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->dev_short_nbor, &_off2, &ainum, + &this->dev_short_nbor, &this->_off2_polar, &ainum, &nbor_pitch, &this->_threads_per_atom); this->short_nbor_avail = true; } @@ -155,7 +152,7 @@ int AmoebaT::udirect2b(const int eflag, const int vflag) { &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->dev_short_nbor, &this->_fieldp, &ainum, &_nall, &nbor_pitch, - &this->_threads_per_atom, &_aewald, &_off2, + &this->_threads_per_atom, &_aewald, &this->_off2_polar, &_polar_dscale, &_polar_uscale); this->time_pair.stop(); @@ -181,19 +178,18 @@ int AmoebaT::umutual2b(const int eflag, const int vflag) { if (!this->short_nbor_avail) { this->k_short_nbor.set_size(GX,BX); this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor, - &this->_nbor_data->begin(), - &this->dev_short_nbor, &_off2, &ainum, - &nbor_pitch, &this->_threads_per_atom); + &this->_nbor_data->begin(), &this->dev_short_nbor, + &this->_off2_polar, &ainum, &nbor_pitch, + &this->_threads_per_atom); this->short_nbor_avail = true; } this->k_umutual2b.set_size(GX,BX); this->k_umutual2b.run(&this->atom->x, &this->atom->extra, &damping, &sp_polar, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->dev_short_nbor, - &this->_fieldp, &ainum, &_nall, &nbor_pitch, - &this->_threads_per_atom, &_aewald, &_off2, - &_polar_dscale, &_polar_uscale); + &this->dev_short_nbor, &this->_fieldp, &ainum, &_nall, + &nbor_pitch, &this->_threads_per_atom, &_aewald, + &this->_off2_polar, &_polar_dscale, &_polar_uscale); this->time_pair.stop(); return GX; @@ -219,7 +215,7 @@ int AmoebaT::polar_real(const int eflag, const int vflag) { this->k_short_nbor.set_size(GX,BX); this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->dev_short_nbor, &_off2, &ainum, + &this->dev_short_nbor, &this->_off2_polar, &ainum, &nbor_pitch, &this->_threads_per_atom); this->short_nbor_avail = true; } @@ -230,8 +226,8 @@ int AmoebaT::polar_real(const int eflag, const int vflag) { &this->dev_short_nbor, &this->ans->force, &this->ans->engv, &this->_tep, &eflag, &vflag, &ainum, &_nall, &nbor_pitch, - &this->_threads_per_atom, - &_aewald, &_felec, &_off2, &_polar_dscale, &_polar_uscale); + &this->_threads_per_atom, &_aewald, &this->_felec, + &this->_off2_polar, &_polar_dscale, &_polar_uscale); this->time_pair.stop(); // Signal that short nbor list is not avail for the next time step diff --git a/lib/gpu/lal_amoeba.h b/lib/gpu/lal_amoeba.h index ea4f8b9d1d..ce30b6ab19 100644 --- a/lib/gpu/lal_amoeba.h +++ b/lib/gpu/lal_amoeba.h @@ -45,8 +45,7 @@ class Amoeba : public BaseAmoeba { const int nlocal, const int nall, const int max_nbors, const int maxspecial, const int maxspecial15, const double cell_size, const double gpu_split, FILE *_screen, - const double aewald, const double felec, - const double off2, const double polar_dscale, + const double aewald, const double polar_dscale, const double polar_uscale); /// Clear all host and device data @@ -75,7 +74,7 @@ class Amoeba : public BaseAmoeba { /// Number of atom types int _lj_types; - numtyp _aewald, _felec, _off2, _polar_dscale, _polar_uscale; + numtyp _aewald, _polar_dscale, _polar_uscale; numtyp _qqrd2e; protected: diff --git a/lib/gpu/lal_amoeba_ext.cpp b/lib/gpu/lal_amoeba_ext.cpp index 5bb4dea25f..bbebaa09da 100644 --- a/lib/gpu/lal_amoeba_ext.cpp +++ b/lib/gpu/lal_amoeba_ext.cpp @@ -36,8 +36,7 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, const int nlocal, const int nall, const int max_nbors, const int maxspecial, const int maxspecial15, const double cell_size, int &gpu_mode, FILE *screen, - const double aewald, const double felec, - const double off2, const double polar_dscale, + const double aewald, const double polar_dscale, const double polar_uscale, int& tep_size) { AMOEBAMF.clear(); gpu_mode=AMOEBAMF.device->gpu_mode(); @@ -67,7 +66,7 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, host_special_polar_wscale, host_special_polar_piscale, host_special_polar_pscale, nlocal, nall, max_nbors, maxspecial, maxspecial15, cell_size, gpu_split, screen, - aewald, felec, off2, polar_dscale, polar_uscale); + aewald, polar_dscale, polar_uscale); AMOEBAMF.device->world_barrier(); if (message) @@ -87,7 +86,7 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, host_special_polar_wscale, host_special_polar_piscale, host_special_polar_pscale, nlocal, nall, max_nbors, maxspecial, maxspecial15, cell_size, gpu_split, screen, - aewald, felec, off2, polar_dscale, polar_uscale); + aewald, polar_dscale, polar_uscale); AMOEBAMF.device->gpu_barrier(); if (message) @@ -111,16 +110,16 @@ int** amoeba_gpu_compute_udirect2b(const int ago, const int inum_full, double **host_uind, double **host_uinp, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, int *nspecial15, tagint** special15, - const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, + const bool eflag, const bool vflag, const bool eatom, + const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, - bool &success, double *host_q, double *boxlo, - double *prd, void **fieldp_ptr) { + bool &success, const double off2, double *host_q, + double *boxlo, double *prd, void **fieldp_ptr) { return AMOEBAMF.compute_udirect2b(ago, inum_full, nall, host_x, host_type, host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, sublo, subhi, tag, nspecial, special, nspecial15, special15, eflag, vflag, eatom, vatom, host_start, ilist, jnum, - cpu_time, success, host_q, boxlo, prd, fieldp_ptr); + cpu_time, success, off2, host_q, boxlo, prd, fieldp_ptr); } int** amoeba_gpu_compute_umutual2b(const int ago, const int inum_full, @@ -132,13 +131,13 @@ int** amoeba_gpu_compute_umutual2b(const int ago, const int inum_full, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, - bool &success, double *host_q, double *boxlo, - double *prd, void **fieldp_ptr) { + bool &success, const double off2, double *host_q, + double *boxlo, double *prd, void **fieldp_ptr) { return AMOEBAMF.compute_umutual2b(ago, inum_full, nall, host_x, host_type, host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, sublo, subhi, tag, nspecial, special, nspecial15, special15, eflag, vflag, eatom, vatom, host_start, ilist, jnum, - cpu_time, success, host_q, boxlo, prd, fieldp_ptr); + cpu_time, success, off2, host_q, boxlo, prd, fieldp_ptr); } int** amoeba_gpu_compute_polar_real(const int ago, const int inum_full, @@ -147,17 +146,16 @@ int** amoeba_gpu_compute_polar_real(const int ago, const int inum_full, double **host_rpole, double **host_uind, double **host_uinp, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, int *nspecial15, tagint** special15, - const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, + const bool eflag, const bool vflag, const bool eatom, + const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, - bool &success, double *host_q, double *boxlo, - double *prd, void **tep_ptr) { + bool &success, const double felec, const double off2, + double *host_q, double *boxlo, double *prd, void **tep_ptr) { return AMOEBAMF.compute_polar_real(ago, inum_full, nall, host_x, host_type, host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, sublo, subhi, tag, nspecial, special, nspecial15, special15, - eflag, vflag, eatom, - vatom, host_start, ilist, jnum, cpu_time, success, - host_q, boxlo, prd, tep_ptr); + eflag, vflag, eatom, vatom, host_start, ilist, jnum, + cpu_time, success, felec, off2, host_q, boxlo, prd, tep_ptr); } double amoeba_gpu_bytes() { diff --git a/lib/gpu/lal_base_amoeba.cpp b/lib/gpu/lal_base_amoeba.cpp index f4036ec110..2fe0e1e4b8 100644 --- a/lib/gpu/lal_base_amoeba.cpp +++ b/lib/gpu/lal_base_amoeba.cpp @@ -250,7 +250,8 @@ void BaseAmoebaT::compute_polar_real_host_nbor(const int f_ago, const int inum_f const bool eflag_in, const bool vflag_in, const bool eatom, const bool vatom, int &host_start, const double cpu_time, - bool &success, double *host_q, const int nlocal, + bool &success, const double off2_polar, const double felec, + double *host_q, const int nlocal, double *boxlo, double *prd, void **tep_ptr) { acc_timers(); int eflag, vflag; @@ -316,6 +317,8 @@ void BaseAmoebaT::compute_polar_real_host_nbor(const int f_ago, const int inum_f device->precompute(f_ago,nlocal,nall,host_x,host_type,success,host_q, boxlo, prd); + _off2_polar = off2_polar; + _felec = felec; const int red_blocks=polar_real(eflag,vflag); ans->copy_answers(eflag_in,vflag_in,eatom,vatom,ilist,red_blocks); device->add_ans_object(ans); @@ -437,8 +440,8 @@ int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full, const i const bool eflag_in, const bool vflag_in, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, - bool &success, double *host_q, double *boxlo, - double *prd, void** fieldp_ptr) { + bool &success, const double off2_polar, double *host_q, + double *boxlo, double *prd, void** fieldp_ptr) { acc_timers(); int eflag, vflag; if (eatom) eflag=2; @@ -475,6 +478,7 @@ int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full, const i } *fieldp_ptr=_fieldp.host.begin(); + _off2_polar = off2_polar; const int red_blocks=udirect2b(eflag,vflag); // copy field and fieldp from device to host (_fieldp store both arrays, one after another) @@ -506,8 +510,8 @@ int** BaseAmoebaT::compute_umutual2b(const int ago, const int inum_full, const i const bool eflag_in, const bool vflag_in, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, - bool &success, double *host_q, double *boxlo, - double *prd, void** fieldp_ptr) { + bool &success, const double off2_polar, double *host_q, + double *boxlo, double *prd, void** fieldp_ptr) { acc_timers(); int eflag, vflag; if (eatom) eflag=2; @@ -544,6 +548,7 @@ int** BaseAmoebaT::compute_umutual2b(const int ago, const int inum_full, const i } *fieldp_ptr=_fieldp.host.begin(); + _off2_polar = off2_polar; const int red_blocks=umutual2b(eflag,vflag); // copy field and fieldp from device to host (_fieldp store both arrays, one after another) @@ -574,8 +579,8 @@ int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full, const const bool eflag_in, const bool vflag_in, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, - bool &success, double *host_q, double *boxlo, - double *prd, void **tep_ptr) { + bool &success, const double felec, const double off2_polar, + double *host_q, double *boxlo, double *prd, void **tep_ptr) { acc_timers(); int eflag, vflag; if (eatom) eflag=2; @@ -620,6 +625,8 @@ int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full, const } *tep_ptr=_tep.host.begin(); + _off2_polar = off2_polar; + _felec = felec; const int red_blocks=polar_real(eflag,vflag); ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks); device->add_ans_object(ans); diff --git a/lib/gpu/lal_base_amoeba.h b/lib/gpu/lal_base_amoeba.h index eb8938d7c4..b14a234e7b 100644 --- a/lib/gpu/lal_base_amoeba.h +++ b/lib/gpu/lal_base_amoeba.h @@ -152,7 +152,7 @@ class BaseAmoeba { const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **numj, const double cpu_time, bool &success, - double *charge, double *boxlo, double *prd, void **fieldp_ptr); + const double off2_polar, double *charge, double *boxlo, double *prd, void **fieldp_ptr); /// Compute the real space part of the induced field (umutual2b) with device neighboring int** compute_umutual2b(const int ago, const int inum_full, const int nall, @@ -165,7 +165,7 @@ class BaseAmoeba { const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **numj, const double cpu_time, bool &success, - double *charge, double *boxlo, double *prd, void **fieldp_ptr); + const double off2_polar, double *charge, double *boxlo, double *prd, void **fieldp_ptr); /// Compute polar real-space with device neighboring int** compute_polar_real(const int ago, const int inum_full, const int nall, @@ -177,7 +177,8 @@ class BaseAmoeba { const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **numj, const double cpu_time, bool &success, - double *charge, double *boxlo, double *prd, void **tep_ptr); + const double felec, const double off2_polar, double *charge, + double *boxlo, double *prd, void **tep_ptr); /// Compute polar real-space with host neighboring (not active for now) void compute_polar_real_host_nbor(const int f_ago, const int inum_full, const int nall, @@ -186,8 +187,8 @@ class BaseAmoeba { double **host_uinp, int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, - const double cpu_time, bool &success, double *charge, - const int nlocal, double *boxlo, double *prd, void **tep_ptr); + const double cpu_time, bool &success, const double felec, const double off2_polar, + double *charge, const int nlocal, double *boxlo, double *prd, void **tep_ptr); // -------------------------- DEVICE DATA ------------------------- @@ -258,6 +259,8 @@ class BaseAmoeba { bool short_nbor_avail; UCL_D_Vec *_nbor_data; + numtyp _felec,_off2_hal,_off2_repulse,_off2_dispersion,_off2_mpole,_off2_polar; + void compile_kernels(UCL_Device &dev, const void *pair_string, const char *kname_polar, const char *kname_udirect2b, const char *kname_umutual2b, const char *kname_short_nbor); diff --git a/src/GPU/pair_amoeba_gpu.cpp b/src/GPU/pair_amoeba_gpu.cpp index 640d94972a..f4ead3c5fa 100644 --- a/src/GPU/pair_amoeba_gpu.cpp +++ b/src/GPU/pair_amoeba_gpu.cpp @@ -59,8 +59,7 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, const int nlocal, const int nall, const int max_nbors, const int maxspecial, const int maxspecial15, const double cell_size, int &gpu_mode, FILE *screen, - const double aewald, const double felec, - const double off2, const double polar_dscale, + const double aewald, const double polar_dscale, const double polar_uscale, int& tep_size); void amoeba_gpu_clear(); @@ -69,33 +68,30 @@ int ** amoeba_gpu_compute_udirect2b(const int ago, const int inum, const int nal double **host_rpole, double **host_uind, double **host_uinp, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, int* nspecial15, tagint** special15, - const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **jnum, const double cpu_time, - bool &success, double *host_q, double *boxlo, double *prd, - void **fieldp_ptr); + const bool eflag, const bool vflag, const bool eatom, const bool vatom, + int &host_start, int **ilist, int **jnum, const double cpu_time, + bool &success, const double off2, double *host_q, + double *boxlo, double *prd, void **fieldp_ptr); int ** amoeba_gpu_compute_umutual2b(const int ago, const int inum, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, int* nspecial15, tagint** special15, - const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **jnum, const double cpu_time, - bool &success, double *host_q, double *boxlo, double *prd, - void **fieldp_ptr); + const bool eflag, const bool vflag, const bool eatom, const bool vatom, + int &host_start, int **ilist, int **jnum, const double cpu_time, + bool &success, const double off2, double *host_q, + double *boxlo, double *prd, void **fieldp_ptr); int ** amoeba_gpu_compute_polar_real(const int ago, const int inum, const int nall, double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, int* nspecial15, tagint** special15, - const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **jnum, const double cpu_time, - bool &success, double *host_q, double *boxlo, double *prd, - void **tep_ptr); + const bool eflag, const bool vflag, const bool eatom, const bool vatom, + int &host_start, int **ilist, int **jnum, const double cpu_time, + bool &success, const double off2, const double felec, double *host_q, + double *boxlo, double *prd, void **tep_ptr); double amoeba_gpu_bytes(); @@ -155,6 +151,15 @@ void PairAmoebaGPU::polar_real() } inum = atom->nlocal; + // select the correct cutoff for the term + + if (use_ewald) choose(POLAR_LONG); + else choose(POLAR); + + // set the energy unit conversion factor for polar real-space calculation + + double felec = 0.5 * electric / am_dielectric; + firstneigh = amoeba_gpu_compute_polar_real(neighbor->ago, inum, nall, atom->x, atom->type, amtype, amgroup, rpole, uind, uinp, sublo, subhi, @@ -162,7 +167,7 @@ void PairAmoebaGPU::polar_real() atom->nspecial15, atom->special15, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time, - success, atom->q, domain->boxlo, + success, felec, off2, atom->q, domain->boxlo, domain->prd, &tep_pinned); @@ -278,11 +283,11 @@ void PairAmoebaGPU::init_style() // select the squared cutoff (off2) for neighbor list builds (the polar term for now) // NOTE: induce and polar terms are using the same flags here - +/* if (use_ewald) choose(POLAR_LONG); else choose(POLAR); - - double cell_size = sqrt(off2) + neighbor->skin; +*/ + double cell_size = sqrt(maxcut) + neighbor->skin; int maxspecial=0; int maxspecial15=0; @@ -303,8 +308,7 @@ void PairAmoebaGPU::init_style() special_polar_pscale, atom->nlocal, atom->nlocal+atom->nghost, mnf, maxspecial, maxspecial15, cell_size, gpu_mode, screen, - aewald, felec, off2, polar_dscale, polar_uscale, - tep_size); + aewald, polar_dscale, polar_uscale, tep_size); GPU_EXTRA::check_flag(success,error,world); if (gpu_mode == GPU_FORCE) @@ -784,13 +788,18 @@ void PairAmoebaGPU::udirect2b(double **field, double **fieldp) } inum = atom->nlocal; + // select the correct cutoff (off2) for the term + + if (use_ewald) choose(POLAR_LONG); + else choose(POLAR); + firstneigh = amoeba_gpu_compute_udirect2b(neighbor->ago, inum, nall, atom->x, atom->type, amtype, amgroup, rpole, uind, uinp, sublo, subhi, atom->tag, atom->nspecial, atom->special, atom->nspecial15, atom->special15, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time, - success, atom->q, domain->boxlo, + success, off2, atom->q, domain->boxlo, domain->prd, &fieldp_pinned); if (!success) error->one(FLERR,"Insufficient memory on accelerator"); @@ -1003,13 +1012,18 @@ void PairAmoebaGPU::umutual2b(double **field, double **fieldp) } inum = atom->nlocal; + // select the correct cutoff (off2) for the term + + if (use_ewald) choose(POLAR_LONG); + else choose(POLAR); + firstneigh = amoeba_gpu_compute_umutual2b(neighbor->ago, inum, nall, atom->x, atom->type, amtype, amgroup, rpole, uind, uinp, sublo, subhi, atom->tag, atom->nspecial, atom->special, atom->nspecial15, atom->special15, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time, - success, atom->q, domain->boxlo, + success, off2, atom->q, domain->boxlo, domain->prd, &fieldp_pinned); if (!success) error->one(FLERR,"Insufficient memory on accelerator");