Rearranged the order of real-space and kspace part of ufield0c(), delayed device-host transfer from umutual2b() to overlap with kspace part
This commit is contained in:
@ -148,6 +148,10 @@ void amoeba_gpu_compute_umutual2b(int *host_amtype, int *host_amgroup, double **
|
|||||||
aewald, off2, fieldp_ptr);
|
aewald, off2, fieldp_ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void amoeba_gpu_update_fieldp(void **fieldp_ptr) {
|
||||||
|
AMOEBAMF.update_fieldp(fieldp_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole,
|
void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole,
|
||||||
double **host_uind, double **host_uinp,
|
double **host_uind, double **host_uinp,
|
||||||
const bool eflag_in, const bool vflag_in,
|
const bool eflag_in, const bool vflag_in,
|
||||||
|
|||||||
@ -528,8 +528,8 @@ void BaseAmoebaT::compute_umutual2b(int *host_amtype, int *host_amgroup, double
|
|||||||
const int red_blocks=umutual2b(_eflag,_vflag);
|
const int red_blocks=umutual2b(_eflag,_vflag);
|
||||||
|
|
||||||
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
|
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
|
||||||
|
// NOTE: move this step to update_fieldp() to delay device-host transfer
|
||||||
_fieldp.update_host(_max_fieldp_size*8,false);
|
//_fieldp.update_host(_max_fieldp_size*8,false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|||||||
@ -183,6 +183,13 @@ class BaseAmoeba {
|
|||||||
const double off2_polar, double *charge, const int nlocal, double *boxlo,
|
const double off2_polar, double *charge, const int nlocal, double *boxlo,
|
||||||
double *prd, void **tep_ptr);
|
double *prd, void **tep_ptr);
|
||||||
|
|
||||||
|
// copy field and fieldp from device to host after umutual2b
|
||||||
|
virtual void update_fieldp(void **fieldp_ptr) {
|
||||||
|
*fieldp_ptr=_fieldp.host.begin();
|
||||||
|
// _fieldp store both arrays, one after another
|
||||||
|
_fieldp.update_host(_max_fieldp_size*8,false);
|
||||||
|
}
|
||||||
|
|
||||||
// -------------------------- DEVICE DATA -------------------------
|
// -------------------------- DEVICE DATA -------------------------
|
||||||
|
|
||||||
/// Device Properties and Atom and Neighbor storage
|
/// Device Properties and Atom and Neighbor storage
|
||||||
|
|||||||
@ -549,8 +549,8 @@ void HippoT::compute_umutual2b(int *host_amtype, int *host_amgroup, double **hos
|
|||||||
const int red_blocks=umutual2b(this->_eflag,this->_vflag);
|
const int red_blocks=umutual2b(this->_eflag,this->_vflag);
|
||||||
|
|
||||||
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
|
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
|
||||||
|
// NOTE: move this step to update_fieldp() to delay device-host transfer
|
||||||
this->_fieldp.update_host(this->_max_fieldp_size*8,false);
|
//this->_fieldp.update_host(this->_max_fieldp_size*8,false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|||||||
@ -179,6 +179,10 @@ void hippo_gpu_compute_umutual2b(int *host_amtype, int *host_amgroup, double **h
|
|||||||
aewald, off2, fieldp_ptr);
|
aewald, off2, fieldp_ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void hippo_gpu_update_fieldp(void **fieldp_ptr) {
|
||||||
|
HIPPOMF.update_fieldp(fieldp_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
void hippo_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole,
|
void hippo_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole,
|
||||||
double **host_uind, double **host_uinp, double *host_pval,
|
double **host_uind, double **host_uinp, double *host_pval,
|
||||||
const bool eflag_in, const bool vflag_in,
|
const bool eflag_in, const bool vflag_in,
|
||||||
|
|||||||
@ -82,6 +82,8 @@ void amoeba_gpu_compute_umutual2b(int *host_amtype, int *host_amgroup,
|
|||||||
double **host_rpole, double **host_uind, double **host_uinp,
|
double **host_rpole, double **host_uind, double **host_uinp,
|
||||||
const double aewald, const double off2, void **fieldp_ptr);
|
const double aewald, const double off2, void **fieldp_ptr);
|
||||||
|
|
||||||
|
void amoeba_gpu_update_fieldp(void **fieldp_ptr);
|
||||||
|
|
||||||
void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup,
|
void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup,
|
||||||
double **host_rpole, double **host_uind, double **host_uinp,
|
double **host_rpole, double **host_uind, double **host_uinp,
|
||||||
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
|
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
|
||||||
@ -844,6 +846,72 @@ void PairAmoebaGPU::udirect2b_cpu()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
ufield0c = mutual induction via Ewald sum
|
||||||
|
ufield0c computes the mutual electrostatic field due to
|
||||||
|
induced dipole moments via Ewald summation
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PairAmoebaGPU::ufield0c(double **field, double **fieldp)
|
||||||
|
{
|
||||||
|
int i,j;
|
||||||
|
double term;
|
||||||
|
|
||||||
|
// zero field,fieldp for owned and ghost atoms
|
||||||
|
|
||||||
|
int nlocal = atom->nlocal;
|
||||||
|
int nall = nlocal + atom->nghost;
|
||||||
|
|
||||||
|
for (i = 0; i < nall; i++) {
|
||||||
|
for (j = 0; j < 3; j++) {
|
||||||
|
field[i][j] = 0.0;
|
||||||
|
fieldp[i][j] = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// get the real space portion of the mutual field first
|
||||||
|
|
||||||
|
if (polar_rspace_flag) umutual2b(field,fieldp);
|
||||||
|
|
||||||
|
// get the reciprocal space part of the mutual field
|
||||||
|
|
||||||
|
if (polar_kspace_flag) umutual1(field,fieldp);
|
||||||
|
|
||||||
|
// add the self-energy portion of the mutual field
|
||||||
|
|
||||||
|
term = (4.0/3.0) * aewald*aewald*aewald / MY_PIS;
|
||||||
|
for (i = 0; i < nlocal; i++) {
|
||||||
|
for (j = 0; j < 3; j++) {
|
||||||
|
field[i][j] += term*uind[i][j];
|
||||||
|
fieldp[i][j] += term*uinp[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// accumulate the field and fieldp values from the real space portion from umutual2b() on the GPU
|
||||||
|
// field and fieldp may already have some nonzero values from kspace (umutual1 and self)
|
||||||
|
|
||||||
|
amoeba_gpu_update_fieldp(&fieldp_pinned);
|
||||||
|
|
||||||
|
int inum = atom->nlocal;
|
||||||
|
double *field_ptr = (double *)fieldp_pinned;
|
||||||
|
|
||||||
|
for (int i = 0; i < nlocal; i++) {
|
||||||
|
int idx = 4*i;
|
||||||
|
field[i][0] += field_ptr[idx];
|
||||||
|
field[i][1] += field_ptr[idx+1];
|
||||||
|
field[i][2] += field_ptr[idx+2];
|
||||||
|
}
|
||||||
|
|
||||||
|
double* fieldp_ptr = (double *)fieldp_pinned;
|
||||||
|
fieldp_ptr += 4*inum;
|
||||||
|
for (int i = 0; i < nlocal; i++) {
|
||||||
|
int idx = 4*i;
|
||||||
|
fieldp[i][0] += fieldp_ptr[idx];
|
||||||
|
fieldp[i][1] += fieldp_ptr[idx+1];
|
||||||
|
fieldp[i][2] += fieldp_ptr[idx+2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
umutual2b = Ewald real mutual field via list
|
umutual2b = Ewald real mutual field via list
|
||||||
umutual2b computes the real space contribution of the induced
|
umutual2b computes the real space contribution of the induced
|
||||||
@ -881,7 +949,7 @@ void PairAmoebaGPU::umutual2b(double **field, double **fieldp)
|
|||||||
|
|
||||||
amoeba_gpu_compute_umutual2b(amtype, amgroup, rpole, uind, uinp,
|
amoeba_gpu_compute_umutual2b(amtype, amgroup, rpole, uind, uinp,
|
||||||
aewald, off2, &fieldp_pinned);
|
aewald, off2, &fieldp_pinned);
|
||||||
|
/*
|
||||||
// accumulate the field and fieldp values from the GPU lib
|
// accumulate the field and fieldp values from the GPU lib
|
||||||
// field and fieldp may already have some nonzero values from kspace (umutual1)
|
// field and fieldp may already have some nonzero values from kspace (umutual1)
|
||||||
|
|
||||||
@ -903,6 +971,7 @@ void PairAmoebaGPU::umutual2b(double **field, double **fieldp)
|
|||||||
fieldp[i][1] += fieldp_ptr[idx+1];
|
fieldp[i][1] += fieldp_ptr[idx+1];
|
||||||
fieldp[i][2] += fieldp_ptr[idx+2];
|
fieldp[i][2] += fieldp_ptr[idx+2];
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
|
|||||||
@ -39,6 +39,7 @@ class PairAmoebaGPU : public PairAmoeba {
|
|||||||
virtual void multipole_real();
|
virtual void multipole_real();
|
||||||
virtual void udirect2b(double **, double **);
|
virtual void udirect2b(double **, double **);
|
||||||
virtual void umutual2b(double **, double **);
|
virtual void umutual2b(double **, double **);
|
||||||
|
virtual void ufield0c(double **, double **);
|
||||||
virtual void polar_real();
|
virtual void polar_real();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|||||||
@ -100,6 +100,8 @@ void hippo_gpu_compute_umutual2b(int *host_amtype, int *host_amgroup,
|
|||||||
double **host_rpole, double **host_uind, double **host_uinp, double *host_pval,
|
double **host_rpole, double **host_uind, double **host_uinp, double *host_pval,
|
||||||
const double aewald, const double off2, void **fieldp_ptr);
|
const double aewald, const double off2, void **fieldp_ptr);
|
||||||
|
|
||||||
|
void hippo_gpu_update_fieldp(void **fieldp_ptr);
|
||||||
|
|
||||||
void hippo_gpu_compute_polar_real(int *host_amtype, int *host_amgroup,
|
void hippo_gpu_compute_polar_real(int *host_amtype, int *host_amgroup,
|
||||||
double **host_rpole, double **host_uind, double **host_uinp, double *host_pval,
|
double **host_rpole, double **host_uind, double **host_uinp, double *host_pval,
|
||||||
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
|
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
|
||||||
@ -983,6 +985,72 @@ void PairHippoGPU::udirect2b_cpu()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
ufield0c = mutual induction via Ewald sum
|
||||||
|
ufield0c computes the mutual electrostatic field due to
|
||||||
|
induced dipole moments via Ewald summation
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void PairHippoGPU::ufield0c(double **field, double **fieldp)
|
||||||
|
{
|
||||||
|
int i,j;
|
||||||
|
double term;
|
||||||
|
|
||||||
|
// zero field,fieldp for owned and ghost atoms
|
||||||
|
|
||||||
|
int nlocal = atom->nlocal;
|
||||||
|
int nall = nlocal + atom->nghost;
|
||||||
|
|
||||||
|
for (i = 0; i < nall; i++) {
|
||||||
|
for (j = 0; j < 3; j++) {
|
||||||
|
field[i][j] = 0.0;
|
||||||
|
fieldp[i][j] = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// get the real space portion of the mutual field first
|
||||||
|
|
||||||
|
if (polar_rspace_flag) umutual2b(field,fieldp);
|
||||||
|
|
||||||
|
// get the reciprocal space part of the mutual field
|
||||||
|
|
||||||
|
if (polar_kspace_flag) umutual1(field,fieldp);
|
||||||
|
|
||||||
|
// add the self-energy portion of the mutual field
|
||||||
|
|
||||||
|
term = (4.0/3.0) * aewald*aewald*aewald / MY_PIS;
|
||||||
|
for (i = 0; i < nlocal; i++) {
|
||||||
|
for (j = 0; j < 3; j++) {
|
||||||
|
field[i][j] += term*uind[i][j];
|
||||||
|
fieldp[i][j] += term*uinp[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// accumulate the field and fieldp values from real-space portion from umutual2b() on the GPU
|
||||||
|
// field and fieldp may already have some nonzero values from kspace (umutual1 and self)
|
||||||
|
|
||||||
|
hippo_gpu_update_fieldp(&fieldp_pinned);
|
||||||
|
|
||||||
|
int inum = atom->nlocal;
|
||||||
|
double *field_ptr = (double *)fieldp_pinned;
|
||||||
|
|
||||||
|
for (int i = 0; i < nlocal; i++) {
|
||||||
|
int idx = 4*i;
|
||||||
|
field[i][0] += field_ptr[idx];
|
||||||
|
field[i][1] += field_ptr[idx+1];
|
||||||
|
field[i][2] += field_ptr[idx+2];
|
||||||
|
}
|
||||||
|
|
||||||
|
double* fieldp_ptr = (double *)fieldp_pinned;
|
||||||
|
fieldp_ptr += 4*inum;
|
||||||
|
for (int i = 0; i < nlocal; i++) {
|
||||||
|
int idx = 4*i;
|
||||||
|
fieldp[i][0] += fieldp_ptr[idx];
|
||||||
|
fieldp[i][1] += fieldp_ptr[idx+1];
|
||||||
|
fieldp[i][2] += fieldp_ptr[idx+2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
umutual2b = Ewald real mutual field via list
|
umutual2b = Ewald real mutual field via list
|
||||||
umutual2b computes the real space contribution of the induced
|
umutual2b computes the real space contribution of the induced
|
||||||
@ -1021,7 +1089,7 @@ void PairHippoGPU::umutual2b(double **field, double **fieldp)
|
|||||||
double *pval = atom->dvector[index_pval];
|
double *pval = atom->dvector[index_pval];
|
||||||
hippo_gpu_compute_umutual2b(amtype, amgroup, rpole, uind, uinp, pval,
|
hippo_gpu_compute_umutual2b(amtype, amgroup, rpole, uind, uinp, pval,
|
||||||
aewald, off2, &fieldp_pinned);
|
aewald, off2, &fieldp_pinned);
|
||||||
|
/*
|
||||||
// accumulate the field and fieldp values from the GPU lib
|
// accumulate the field and fieldp values from the GPU lib
|
||||||
// field and fieldp may already have some nonzero values from kspace (umutual1)
|
// field and fieldp may already have some nonzero values from kspace (umutual1)
|
||||||
|
|
||||||
@ -1043,6 +1111,7 @@ void PairHippoGPU::umutual2b(double **field, double **fieldp)
|
|||||||
fieldp[i][1] += fieldp_ptr[idx+1];
|
fieldp[i][1] += fieldp_ptr[idx+1];
|
||||||
fieldp[i][2] += fieldp_ptr[idx+2];
|
fieldp[i][2] += fieldp_ptr[idx+2];
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
|
|||||||
@ -40,6 +40,7 @@ class PairHippoGPU : public PairAmoeba {
|
|||||||
virtual void multipole_real();
|
virtual void multipole_real();
|
||||||
virtual void udirect2b(double **, double **);
|
virtual void udirect2b(double **, double **);
|
||||||
virtual void umutual2b(double **, double **);
|
virtual void umutual2b(double **, double **);
|
||||||
|
virtual void ufield0c(double **, double **);
|
||||||
virtual void polar_real();
|
virtual void polar_real();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|||||||
Reference in New Issue
Block a user