diff --git a/lib/gpu/lal_amoeba_ext.cpp b/lib/gpu/lal_amoeba_ext.cpp index 18e1cf22f8..63ed683833 100644 --- a/lib/gpu/lal_amoeba_ext.cpp +++ b/lib/gpu/lal_amoeba_ext.cpp @@ -148,6 +148,10 @@ void amoeba_gpu_compute_umutual2b(int *host_amtype, int *host_amgroup, double ** aewald, off2, fieldp_ptr); } +void amoeba_gpu_update_fieldp(void **fieldp_ptr) { + AMOEBAMF.update_fieldp(fieldp_ptr); +} + void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, const bool eflag_in, const bool vflag_in, diff --git a/lib/gpu/lal_base_amoeba.cpp b/lib/gpu/lal_base_amoeba.cpp index 5b396a641e..781945b77b 100644 --- a/lib/gpu/lal_base_amoeba.cpp +++ b/lib/gpu/lal_base_amoeba.cpp @@ -528,8 +528,8 @@ void BaseAmoebaT::compute_umutual2b(int *host_amtype, int *host_amgroup, double const int red_blocks=umutual2b(_eflag,_vflag); // copy field and fieldp from device to host (_fieldp store both arrays, one after another) - - _fieldp.update_host(_max_fieldp_size*8,false); + // NOTE: move this step to update_fieldp() to delay device-host transfer + //_fieldp.update_host(_max_fieldp_size*8,false); } // --------------------------------------------------------------------------- diff --git a/lib/gpu/lal_base_amoeba.h b/lib/gpu/lal_base_amoeba.h index 7f9777061c..f439e2945f 100644 --- a/lib/gpu/lal_base_amoeba.h +++ b/lib/gpu/lal_base_amoeba.h @@ -183,6 +183,13 @@ class BaseAmoeba { const double off2_polar, double *charge, const int nlocal, double *boxlo, double *prd, void **tep_ptr); + // copy field and fieldp from device to host after umutual2b + virtual void update_fieldp(void **fieldp_ptr) { + *fieldp_ptr=_fieldp.host.begin(); + // _fieldp store both arrays, one after another + _fieldp.update_host(_max_fieldp_size*8,false); + } + // -------------------------- DEVICE DATA ------------------------- /// Device Properties and Atom and Neighbor storage diff --git a/lib/gpu/lal_hippo.cpp b/lib/gpu/lal_hippo.cpp index f62c46aaec..3065bfefd4 100644 --- a/lib/gpu/lal_hippo.cpp +++ b/lib/gpu/lal_hippo.cpp @@ -549,8 +549,8 @@ void HippoT::compute_umutual2b(int *host_amtype, int *host_amgroup, double **hos const int red_blocks=umutual2b(this->_eflag,this->_vflag); // copy field and fieldp from device to host (_fieldp store both arrays, one after another) - - this->_fieldp.update_host(this->_max_fieldp_size*8,false); + // NOTE: move this step to update_fieldp() to delay device-host transfer + //this->_fieldp.update_host(this->_max_fieldp_size*8,false); } // --------------------------------------------------------------------------- diff --git a/lib/gpu/lal_hippo_ext.cpp b/lib/gpu/lal_hippo_ext.cpp index 9d3d845ad0..e7deaddbf3 100644 --- a/lib/gpu/lal_hippo_ext.cpp +++ b/lib/gpu/lal_hippo_ext.cpp @@ -179,6 +179,10 @@ void hippo_gpu_compute_umutual2b(int *host_amtype, int *host_amgroup, double **h aewald, off2, fieldp_ptr); } +void hippo_gpu_update_fieldp(void **fieldp_ptr) { + HIPPOMF.update_fieldp(fieldp_ptr); +} + void hippo_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, double *host_pval, const bool eflag_in, const bool vflag_in, diff --git a/src/GPU/pair_amoeba_gpu.cpp b/src/GPU/pair_amoeba_gpu.cpp index fd9d99e56c..1376a6bd12 100644 --- a/src/GPU/pair_amoeba_gpu.cpp +++ b/src/GPU/pair_amoeba_gpu.cpp @@ -82,6 +82,8 @@ void amoeba_gpu_compute_umutual2b(int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, const double aewald, const double off2, void **fieldp_ptr); +void amoeba_gpu_update_fieldp(void **fieldp_ptr); + void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, const bool eflag, const bool vflag, const bool eatom, const bool vatom, @@ -844,6 +846,72 @@ void PairAmoebaGPU::udirect2b_cpu() } } +/* ---------------------------------------------------------------------- + ufield0c = mutual induction via Ewald sum + ufield0c computes the mutual electrostatic field due to + induced dipole moments via Ewald summation +------------------------------------------------------------------------- */ + +void PairAmoebaGPU::ufield0c(double **field, double **fieldp) +{ + int i,j; + double term; + + // zero field,fieldp for owned and ghost atoms + + int nlocal = atom->nlocal; + int nall = nlocal + atom->nghost; + + for (i = 0; i < nall; i++) { + for (j = 0; j < 3; j++) { + field[i][j] = 0.0; + fieldp[i][j] = 0.0; + } + } + + // get the real space portion of the mutual field first + + if (polar_rspace_flag) umutual2b(field,fieldp); + + // get the reciprocal space part of the mutual field + + if (polar_kspace_flag) umutual1(field,fieldp); + + // add the self-energy portion of the mutual field + + term = (4.0/3.0) * aewald*aewald*aewald / MY_PIS; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < 3; j++) { + field[i][j] += term*uind[i][j]; + fieldp[i][j] += term*uinp[i][j]; + } + } + + // accumulate the field and fieldp values from the real space portion from umutual2b() on the GPU + // field and fieldp may already have some nonzero values from kspace (umutual1 and self) + + amoeba_gpu_update_fieldp(&fieldp_pinned); + + int inum = atom->nlocal; + double *field_ptr = (double *)fieldp_pinned; + + for (int i = 0; i < nlocal; i++) { + int idx = 4*i; + field[i][0] += field_ptr[idx]; + field[i][1] += field_ptr[idx+1]; + field[i][2] += field_ptr[idx+2]; + } + + double* fieldp_ptr = (double *)fieldp_pinned; + fieldp_ptr += 4*inum; + for (int i = 0; i < nlocal; i++) { + int idx = 4*i; + fieldp[i][0] += fieldp_ptr[idx]; + fieldp[i][1] += fieldp_ptr[idx+1]; + fieldp[i][2] += fieldp_ptr[idx+2]; + } +} + /* ---------------------------------------------------------------------- umutual2b = Ewald real mutual field via list umutual2b computes the real space contribution of the induced @@ -881,7 +949,7 @@ void PairAmoebaGPU::umutual2b(double **field, double **fieldp) amoeba_gpu_compute_umutual2b(amtype, amgroup, rpole, uind, uinp, aewald, off2, &fieldp_pinned); - +/* // accumulate the field and fieldp values from the GPU lib // field and fieldp may already have some nonzero values from kspace (umutual1) @@ -903,6 +971,7 @@ void PairAmoebaGPU::umutual2b(double **field, double **fieldp) fieldp[i][1] += fieldp_ptr[idx+1]; fieldp[i][2] += fieldp_ptr[idx+2]; } +*/ } /* ---------------------------------------------------------------------- diff --git a/src/GPU/pair_amoeba_gpu.h b/src/GPU/pair_amoeba_gpu.h index e0210faa68..e419ccd1a1 100644 --- a/src/GPU/pair_amoeba_gpu.h +++ b/src/GPU/pair_amoeba_gpu.h @@ -39,6 +39,7 @@ class PairAmoebaGPU : public PairAmoeba { virtual void multipole_real(); virtual void udirect2b(double **, double **); virtual void umutual2b(double **, double **); + virtual void ufield0c(double **, double **); virtual void polar_real(); private: diff --git a/src/GPU/pair_hippo_gpu.cpp b/src/GPU/pair_hippo_gpu.cpp index 535be0c160..41c1355fbb 100644 --- a/src/GPU/pair_hippo_gpu.cpp +++ b/src/GPU/pair_hippo_gpu.cpp @@ -100,6 +100,8 @@ void hippo_gpu_compute_umutual2b(int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, double *host_pval, const double aewald, const double off2, void **fieldp_ptr); +void hippo_gpu_update_fieldp(void **fieldp_ptr); + void hippo_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole, double **host_uind, double **host_uinp, double *host_pval, const bool eflag, const bool vflag, const bool eatom, const bool vatom, @@ -983,6 +985,72 @@ void PairHippoGPU::udirect2b_cpu() } } +/* ---------------------------------------------------------------------- + ufield0c = mutual induction via Ewald sum + ufield0c computes the mutual electrostatic field due to + induced dipole moments via Ewald summation +------------------------------------------------------------------------- */ + +void PairHippoGPU::ufield0c(double **field, double **fieldp) +{ + int i,j; + double term; + + // zero field,fieldp for owned and ghost atoms + + int nlocal = atom->nlocal; + int nall = nlocal + atom->nghost; + + for (i = 0; i < nall; i++) { + for (j = 0; j < 3; j++) { + field[i][j] = 0.0; + fieldp[i][j] = 0.0; + } + } + + // get the real space portion of the mutual field first + + if (polar_rspace_flag) umutual2b(field,fieldp); + + // get the reciprocal space part of the mutual field + + if (polar_kspace_flag) umutual1(field,fieldp); + + // add the self-energy portion of the mutual field + + term = (4.0/3.0) * aewald*aewald*aewald / MY_PIS; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < 3; j++) { + field[i][j] += term*uind[i][j]; + fieldp[i][j] += term*uinp[i][j]; + } + } + + // accumulate the field and fieldp values from real-space portion from umutual2b() on the GPU + // field and fieldp may already have some nonzero values from kspace (umutual1 and self) + + hippo_gpu_update_fieldp(&fieldp_pinned); + + int inum = atom->nlocal; + double *field_ptr = (double *)fieldp_pinned; + + for (int i = 0; i < nlocal; i++) { + int idx = 4*i; + field[i][0] += field_ptr[idx]; + field[i][1] += field_ptr[idx+1]; + field[i][2] += field_ptr[idx+2]; + } + + double* fieldp_ptr = (double *)fieldp_pinned; + fieldp_ptr += 4*inum; + for (int i = 0; i < nlocal; i++) { + int idx = 4*i; + fieldp[i][0] += fieldp_ptr[idx]; + fieldp[i][1] += fieldp_ptr[idx+1]; + fieldp[i][2] += fieldp_ptr[idx+2]; + } +} + /* ---------------------------------------------------------------------- umutual2b = Ewald real mutual field via list umutual2b computes the real space contribution of the induced @@ -1021,7 +1089,7 @@ void PairHippoGPU::umutual2b(double **field, double **fieldp) double *pval = atom->dvector[index_pval]; hippo_gpu_compute_umutual2b(amtype, amgroup, rpole, uind, uinp, pval, aewald, off2, &fieldp_pinned); - +/* // accumulate the field and fieldp values from the GPU lib // field and fieldp may already have some nonzero values from kspace (umutual1) @@ -1043,6 +1111,7 @@ void PairHippoGPU::umutual2b(double **field, double **fieldp) fieldp[i][1] += fieldp_ptr[idx+1]; fieldp[i][2] += fieldp_ptr[idx+2]; } +*/ } /* ---------------------------------------------------------------------- diff --git a/src/GPU/pair_hippo_gpu.h b/src/GPU/pair_hippo_gpu.h index c7a4e75ebe..1ed1c3299d 100644 --- a/src/GPU/pair_hippo_gpu.h +++ b/src/GPU/pair_hippo_gpu.h @@ -40,6 +40,7 @@ class PairHippoGPU : public PairAmoeba { virtual void multipole_real(); virtual void udirect2b(double **, double **); virtual void umutual2b(double **, double **); + virtual void ufield0c(double **, double **); virtual void polar_real(); private: