diff --git a/examples/amoeba/in.ubiquitin b/examples/amoeba/in.ubiquitin index 2491493c45..f017d8f122 100644 --- a/examples/amoeba/in.ubiquitin +++ b/examples/amoeba/in.ubiquitin @@ -4,7 +4,7 @@ units real boundary p p p atom_style amoeba -#atom_modify sort 1000 7.0 +atom_modify sort 1000 7.0 bond_style class2 angle_style amoeba dihedral_style none diff --git a/src/AMOEBA/pair_amoeba.h b/src/AMOEBA/pair_amoeba.h index 9d23fccdd8..0ec601de47 100644 --- a/src/AMOEBA/pair_amoeba.h +++ b/src/AMOEBA/pair_amoeba.h @@ -361,11 +361,11 @@ class PairAmoeba : public Pair { void polar_kspace(); void damppole(double, int, double, double, double *, double *, double *); - void induce(); + virtual void induce(); void ulspred(); void ufield0c(double **, double **); void uscale0b(int, double **, double **, double **, double **); - void dfield0c(double **, double **); + virtual void dfield0c(double **, double **); void umutual1(double **, double **); void umutual2b(double **, double **); void udirect1(double **); diff --git a/src/GPU/pair_amoeba_gpu.cpp b/src/GPU/pair_amoeba_gpu.cpp index a5cc86e39d..f2ba3acceb 100644 --- a/src/GPU/pair_amoeba_gpu.cpp +++ b/src/GPU/pair_amoeba_gpu.cpp @@ -18,13 +18,16 @@ #include "pair_amoeba_gpu.h" +#include "amoeba_convolution.h" #include "atom.h" #include "comm.h" #include "domain.h" #include "error.h" +#include "fix_store.h" #include "force.h" #include "gpu_extra.h" #include "math_const.h" +#include "memory.h" #include "my_page.h" #include "neigh_list.h" #include "neigh_request.h" @@ -35,7 +38,15 @@ using namespace LAMMPS_NS; using namespace MathConst; +enum{INDUCE,RSD,SETUP_AMOEBA,SETUP_HIPPO,KMPOLE,AMGROUP}; // forward comm +enum{FIELD,ZRSD,TORQUE,UFLD}; // reverse comm +enum{VDWL,REPULSE,QFER,DISP,MPOLE,POLAR,USOLV,DISP_LONG,MPOLE_LONG,POLAR_LONG}; enum{MUTUAL,OPT,TCG,DIRECT}; +enum{GEAR,ASPC,LSQR}; +enum{BUILD,APPLY}; +enum{GORDON1,GORDON2}; + +#define DEBYE 4.80321 // conversion factor from q-Angs (real units) to Debye // External functions from cuda library for atom decomposition @@ -54,30 +65,28 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype, void amoeba_gpu_clear(); int ** amoeba_gpu_compute_udirect2b(const int ago, const int inum, const int nall, - double **host_x, int *host_type, int *host_amtype, int *host_amgroup, - double **host_rpole, double *sublo, double *subhi, tagint *tag, int **nspecial, - tagint **special, int* nspecial15, tagint** special15, - const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **jnum, const double cpu_time, - bool &success, double *host_q, double *boxlo, double *prd, - void **fieldp_ptr); + double **host_x, int *host_type, int *host_amtype, int *host_amgroup, + double **host_rpole, double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, int* nspecial15, tagint** special15, + const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success, double *host_q, double *boxlo, double *prd, + void **fieldp_ptr); int ** amoeba_gpu_compute_polar_real(const int ago, const int inum, const int nall, - double **host_x, int *host_type, int *host_amtype, int *host_amgroup, - double **host_rpole, double **host_uind, double **host_uinp, - double *sublo, double *subhi, tagint *tag, int **nspecial, - tagint **special, int* nspecial15, tagint** special15, - const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **jnum, const double cpu_time, - bool &success, double *host_q, double *boxlo, double *prd, - void **tep_ptr); + double **host_x, int *host_type, int *host_amtype, int *host_amgroup, + double **host_rpole, double **host_uind, double **host_uinp, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, int* nspecial15, tagint** special15, + const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success, double *host_q, double *boxlo, double *prd, + void **tep_ptr); double amoeba_gpu_bytes(); -enum{VDWL,REPULSE,QFER,DISP,MPOLE,POLAR,USOLV,DISP_LONG,MPOLE_LONG,POLAR_LONG}; - /* ---------------------------------------------------------------------- */ PairAmoebaGPU::PairAmoebaGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE) @@ -290,6 +299,486 @@ void PairAmoebaGPU::init_style() tep_single = true; } +/* ---------------------------------------------------------------------- + induce = induced dipole moments via pre-conditioned CG solver + adapted from Tinker induce0a() routine +------------------------------------------------------------------------- */ + +void PairAmoebaGPU::induce() +{ + bool done; + int i,j,m,ii,itype; + int iter,maxiter; + double polmin; + double eps,epsold; + double epsd,epsp; + double udsum,upsum; + double a,ap,b,bp; + double sum,sump,term; + double reduce[4],allreduce[4]; + + double *poli; + double **conj,**conjp; + double **vec,**vecp; + double **udir,**usum,**usump; + + int debug = 1; + + // set cutoffs, taper coeffs, and PME params + // create qfac here, free at end of polar() + + if (use_ewald) { + choose(POLAR_LONG); + int nmine = p_kspace->nfft_owned; + memory->create(qfac,nmine,"ameoba/induce:qfac"); + } else choose(POLAR); + + // owned atoms + + double **x = atom->x; + double **f = atom->f; + int nlocal = atom->nlocal; + + // zero out the induced dipoles at each site + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < 3; j++) { + uind[i][j] = 0.0; + uinp[i][j] = 0.0; + } + } + + // allocation of arrays + // NOTE: not all are used by all methods + // NOTE: could be re-allocated dynamically + + memory->create(poli,nlocal,"ameoba/induce:poli"); + memory->create(conj,nlocal,3,"ameoba/induce:conj"); + memory->create(conjp,nlocal,3,"ameoba/induce:conjp"); + memory->create(vec,nlocal,3,"ameoba/induce:vec"); + memory->create(vecp,nlocal,3,"ameoba/induce:vecp"); + memory->create(udir,nlocal,3,"ameoba/induce:udir"); + memory->create(usum,nlocal,3,"ameoba/induce:usum"); + memory->create(usump,nlocal,3,"ameoba/induce:usump"); + + // get the electrostatic field due to permanent multipoles + + dfield0c(field,fieldp); + + // reverse comm to sum field,fieldp from ghost atoms to owned atoms + + crstyle = FIELD; + comm->reverse_comm_pair(this); + + // DEBUG statements + + /* + for (i = 0; i < nlocal; i++) + if (atom->tag[i] == 1) + printf("AAA FIELD atom %d: field %g %g %g: fieldp %g %g %g\n", + atom->tag[i], + field[i][0],field[i][1],field[i][2], + fieldp[i][0],fieldp[i][1],fieldp[i][2]); + */ + + // set induced dipoles to polarizability times direct field + + for (i = 0; i < nlocal; i++) { + itype = amtype[i]; + for (j = 0; j < 3; j++) { + udir[i][j] = polarity[itype] * field[i][j]; + udirp[i][j] = polarity[itype] * fieldp[i][j]; + if (pcgguess) { + uind[i][j] = udir[i][j]; + uinp[i][j] = udirp[i][j]; + } + } + } + + // get induced dipoles via the OPT extrapolation method + // NOTE: any way to rewrite these loops to avoid allocating + // uopt,uoptp with a optorder+1 dimension, just optorder ?? + // since no need to store optorder+1 values after these loops + + if (poltyp == OPT) { + for (i = 0; i < nlocal; i++) { + for (j = 0; j < 3; j++) { + uopt[i][0][j] = udir[i][j]; + uoptp[i][0][j] = udirp[i][j]; + } + } + + for (m = 1; m <= optorder; m++) { + optlevel = m - 1; // used in umutual1() for fopt,foptp + + cfstyle = INDUCE; + comm->forward_comm_pair(this); + + ufield0c(field,fieldp); + + crstyle = FIELD; + comm->reverse_comm_pair(this); + + for (i = 0; i < nlocal; i++) { + itype = amtype[i]; + for (j = 0; j < 3; j++) { + uopt[i][m][j] = polarity[itype] * field[i][j]; + uoptp[i][m][j] = polarity[itype] * fieldp[i][j]; + uind[i][j] = uopt[i][m][j]; + uinp[i][j] = uoptp[i][m][j]; + } + } + } + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < 3; j++) { + uind[i][j] = 0.0; + uinp[i][j] = 0.0; + usum[i][j] = 0.0; + usump[i][j] = 0.0; + for (m = 0; m <= optorder; m++) { + usum[i][j] += uopt[i][m][j]; + usump[i][j] += uoptp[i][m][j]; + uind[i][j] += copt[m]*usum[i][j]; + uinp[i][j] += copt[m]*usump[i][j]; + } + } + } + } + + // set tolerances for computation of mutual induced dipoles + + if (poltyp == MUTUAL) { + done = false; + maxiter = 100; + iter = 0; + polmin = 0.00000001; + eps = 100.0; + + // estimate induced dipoles using a polynomial predictor + + if (use_pred && nualt == maxualt) { + ulspred(); + + double ***udalt = fixudalt->tstore; + double ***upalt = fixupalt->tstore; + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < 3; j++) { + udsum = 0.0; + upsum = 0.0; + for (m = 0; m < nualt; m++) { + udsum += bpred[m]*udalt[i][m][j]; + upsum += bpredp[m]*upalt[i][m][j]; + } + uind[i][j] = udsum; + uinp[i][j] = upsum; + } + } + } + + // estimate induced dipoles via inertial extended Lagrangian + // not supported for now + // requires uaux,upaux to persist with each atom + // also requires a velocity vector(s) to persist + // also requires updating uaux,upaux in the Verlet integration + + /* + if (use_ielscf) { + for (i = 0; i < nlocal; i++) { + for (j = 0; j < 3; j++) { + uind[i][j] = uaux[i][j]; + uinp[i][j] = upaux[i][j]; + } + } + } + */ + + // get the electrostatic field due to induced dipoles + + cfstyle = INDUCE; + comm->forward_comm_pair(this); + + ufield0c(field,fieldp); + + crstyle = FIELD; + comm->reverse_comm_pair(this); + + + // set initial conjugate gradient residual and conjugate vector + + for (i = 0; i < nlocal; i++) { + itype = amtype[i]; + + poli[i] = MAX(polmin,polarity[itype]); + for (j = 0; j < 3; j++) { + if (pcgguess) { + rsd[i][j] = (udir[i][j]-uind[i][j])/poli[i] + field[i][j]; + rsdp[i][j] = (udirp[i][j]-uinp[i][j])/poli[i] + fieldp[i][j]; + } else { + rsd[i][j] = udir[i][j] / poli[i]; + rsdp[i][j] = udirp[i][j] / poli[i]; + } + zrsd[i][j] = rsd[i][j]; + zrsdp[i][j] = rsdp[i][j]; + } + } + + if (pcgprec) { + cfstyle = RSD; + comm->forward_comm_pair(this); + uscale0b(BUILD,rsd,rsdp,zrsd,zrsdp); + uscale0b(APPLY,rsd,rsdp,zrsd,zrsdp); + crstyle = ZRSD; + comm->reverse_comm_pair(this); + } + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < 3; j++) { + conj[i][j] = zrsd[i][j]; + conjp[i][j] = zrsdp[i][j]; + } + } + + // conjugate gradient iteration of the mutual induced dipoles + + while (!done) { + iter++; + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < 3; j++) { + vec[i][j] = uind[i][j]; + vecp[i][j] = uinp[i][j]; + uind[i][j] = conj[i][j]; + uinp[i][j] = conjp[i][j]; + } + } + + cfstyle = INDUCE; + comm->forward_comm_pair(this); + + ufield0c(field,fieldp); + + //error->all(FLERR,"STOP"); + + crstyle = FIELD; + comm->reverse_comm_pair(this); + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < 3; j++) { + uind[i][j] = vec[i][j]; + uinp[i][j] = vecp[i][j]; + vec[i][j] = conj[i][j]/poli[i] - field[i][j]; + vecp[i][j] = conjp[i][j]/poli[i] - fieldp[i][j]; + } + } + + a = 0.0; + ap = 0.0; + sum = 0.0; + sump = 0.0; + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < 3; j++) { + a += conj[i][j]*vec[i][j]; + ap += conjp[i][j]*vecp[i][j]; + sum += rsd[i][j]*zrsd[i][j]; + sump += rsdp[i][j]*zrsdp[i][j]; + } + } + + reduce[0] = a; + reduce[1] = ap; + reduce[2] = sum; + reduce[3] = sump; + MPI_Allreduce(reduce,allreduce,4,MPI_DOUBLE,MPI_SUM,world); + a = allreduce[0]; + ap = allreduce[1]; + sum = allreduce[2]; + sump = allreduce[3]; + + if (a != 0.0) a = sum / a; + if (ap != 0.0) ap = sump / ap; + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < 3; j++) { + uind[i][j] = uind[i][j] + a*conj[i][j]; + uinp[i][j] = uinp[i][j] + ap*conjp[i][j]; + rsd[i][j] = rsd[i][j] - a*vec[i][j]; + rsdp[i][j] = rsdp[i][j] - ap*vecp[i][j]; + zrsd[i][j] = rsd[i][j]; + zrsdp[i][j] = rsdp[i][j]; + } + } + + if (pcgprec) { + cfstyle = RSD; + comm->forward_comm_pair(this); + uscale0b(APPLY,rsd,rsdp,zrsd,zrsdp); + crstyle = ZRSD; + comm->reverse_comm_pair(this); + } + + b = 0.0; + bp = 0.0; + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < 3; j++) { + b += rsd[i][j]*zrsd[i][j]; + bp += rsdp[i][j]*zrsdp[i][j]; + } + } + + // NOTE: comp of b,bp and allreduce only needed if pcgprec ? + + reduce[0] = b; + reduce[1] = bp; + MPI_Allreduce(reduce,allreduce,4,MPI_DOUBLE,MPI_SUM,world); + b = allreduce[0]; + bp = allreduce[1]; + + if (sum != 0.0) b /= sum; + if (sump != 0.0) bp /= sump; + + epsd = 0.0; + epsp = 0.0; + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < 3; j++) { + conj[i][j] = zrsd[i][j] + b*conj[i][j]; + conjp[i][j] = zrsdp[i][j] + bp*conjp[i][j]; + epsd += rsd[i][j]*rsd[i][j]; + epsp += rsdp[i][j]*rsdp[i][j]; + } + } + + reduce[0] = epsd; + reduce[1] = epsp; + MPI_Allreduce(reduce,allreduce,4,MPI_DOUBLE,MPI_SUM,world); + epsd = allreduce[0]; + epsp = allreduce[1]; + + // check the convergence of the mutual induced dipoles + + epsold = eps; + eps = MAX(epsd,epsp); + eps = DEBYE * sqrt(eps/atom->natoms); + + if (eps < poleps) done = true; + if (eps > epsold) done = true; + if (iter >= politer) done = true; + + // apply a "peek" iteration to the mutual induced dipoles + + if (done) { + for (i = 0; i < nlocal; i++) { + term = pcgpeek * poli[i]; + for (j = 0; j < 3; j++) { + uind[i][j] += term*rsd[i][j]; + uinp[i][j] += term*rsdp[i][j]; + } + } + } + + } + + // terminate the calculation if dipoles failed to converge + // NOTE: could make this an error + + if (iter >= maxiter || eps > epsold) + if (me == 0) + error->warning(FLERR,"AMOEBA induced dipoles did not converge"); + } + + // DEBUG output to dump file + + if (uind_flag) + dump6(fp_uind,"id uindx uindy uindz uinpx uinpy uinpz",DEBYE,uind,uinp); + + // deallocation of arrays + + memory->destroy(poli); + memory->destroy(conj); + memory->destroy(conjp); + memory->destroy(vec); + memory->destroy(vecp); + memory->destroy(udir); + memory->destroy(usum); + memory->destroy(usump); + + // update the lists of previous induced dipole values + // shift previous m values up to m+1, add new values at m = 0 + // only when preconditioner is used + + if (use_pred) { + double ***udalt = fixudalt->tstore; + double ***upalt = fixupalt->tstore; + + nualt = MIN(nualt+1,maxualt); + for (i = 0; i < nlocal; i++) { + for (j = 0; j < 3; j++) { + for (m = nualt-1; m > 0; m--) { + udalt[i][m][j] = udalt[i][m-1][j]; + upalt[i][m][j] = upalt[i][m-1][j]; + } + udalt[i][0][j] = uind[i][j]; + upalt[i][0][j] = uinp[i][j]; + } + } + } +} + +/* ---------------------------------------------------------------------- + dfield0c = direct induction via Ewald sum + dfield0c computes the mutual electrostatic field due to + permanent multipole moments via Ewald summation +------------------------------------------------------------------------- */ + +void PairAmoebaGPU::dfield0c(double **field, double **fieldp) +{ + int i,j,ii; + double term; + + int inum; + int *ilist; + + // zero out field,fieldp for owned and ghost atoms + + int nlocal = atom->nlocal; + int nall = nlocal + atom->nghost; + + for (i = 0; i < nall; i++) { + for (j = 0; j < 3; j++) { + field[i][j] = 0.0; + fieldp[i][j] = 0.0; + } + } + + // get the reciprocal space part of the permanent field + + if (kspace_flag) udirect1(field); + + for (i = 0; i < nlocal; i++) { + for (j = 0; j < 3; j++) { + fieldp[i][j] = field[i][j]; + } + } + + // get the real space portion of the permanent field + + if (rspace_flag) udirect2b(field,fieldp); + + // get the self-energy portion of the permanent field + + term = (4.0/3.0) * aewald*aewald*aewald / MY_PIS; + for (i = 0; i < nlocal; i++) { + for (j = 0; j < 3; j++) { + field[i][j] += term*rpole[i][j+1]; + fieldp[i][j] += term*rpole[i][j+1]; + } + } +} + /* ---------------------------------------------------------------------- udirect2b = Ewald real direct field via list udirect2b computes the real space contribution of the permanent @@ -298,7 +787,7 @@ void PairAmoebaGPU::init_style() void PairAmoebaGPU::udirect2b(double **field, double **fieldp) { - bool gpu_udirect2b_ready = true; + bool gpu_udirect2b_ready = false; if (!gpu_udirect2b_ready) { PairAmoeba::udirect2b(field, fieldp); return; diff --git a/src/GPU/pair_amoeba_gpu.h b/src/GPU/pair_amoeba_gpu.h index e5d4aab176..9f538ca903 100644 --- a/src/GPU/pair_amoeba_gpu.h +++ b/src/GPU/pair_amoeba_gpu.h @@ -34,6 +34,8 @@ class PairAmoebaGPU : public PairAmoeba { enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; virtual void polar_real(); + virtual void induce(); + virtual void dfield0c(double **, double **); virtual void udirect2b(double **, double **); private: