From 56b0856e2feb55185cc7b1d8a870979d3e2fa318 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Tue, 27 Sep 2016 21:16:33 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@15661 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/REPLICA/fix_neb.cpp | 352 +++++++++++++++++++++++++++++++++++----- src/REPLICA/fix_neb.h | 23 ++- src/REPLICA/neb.cpp | 7 +- 3 files changed, 336 insertions(+), 46 deletions(-) diff --git a/src/REPLICA/fix_neb.cpp b/src/REPLICA/fix_neb.cpp index c66fa2dd04..5b426465e3 100644 --- a/src/REPLICA/fix_neb.cpp +++ b/src/REPLICA/fix_neb.cpp @@ -18,10 +18,12 @@ #include "fix_neb.h" #include "universe.h" #include "update.h" +#include "atom.h" #include "domain.h" +#include "comm.h" #include "modify.h" #include "compute.h" -#include "atom.h" +#include "group.h" #include "memory.h" #include "error.h" #include "force.h" @@ -29,6 +31,8 @@ using namespace LAMMPS_NS; using namespace FixConst; +enum{SINGLE_PROC_DIRECT,SINGLE_PROC_MAP,MULTI_PROC}; + /* ---------------------------------------------------------------------- */ FixNEB::FixNEB(LAMMPS *lmp, int narg, char **arg) : @@ -41,8 +45,13 @@ FixNEB::FixNEB(LAMMPS *lmp, int narg, char **arg) : // nreplica = number of partitions // ireplica = which world I am in universe + // nprocs_universe = # of procs in all replicase // procprev,procnext = root proc in adjacent replicas + me = comm->me; + nprocs = comm->nprocs; + + nprocs_universe = universe->nprocs; nreplica = universe->nworlds; ireplica = universe->iworld; @@ -67,7 +76,17 @@ FixNEB::FixNEB(LAMMPS *lmp, int narg, char **arg) : modify->add_compute(3,newarg); delete [] newarg; + // initialize local storage + + maxlocal = 0; + ntotal = 0; + xprev = xnext = tangent = NULL; + xsend = xrecv = NULL; + tagsend = tagrecv = NULL; + xsendall = xrecvall = NULL; + tagsendall = tagrecvall = NULL; + counts = displacements = NULL; } /* ---------------------------------------------------------------------- */ @@ -80,6 +99,19 @@ FixNEB::~FixNEB() memory->destroy(xprev); memory->destroy(xnext); memory->destroy(tangent); + + memory->destroy(xsend); + memory->destroy(xrecv); + memory->destroy(tagsend); + memory->destroy(tagrecv); + + memory->destroy(xsendall); + memory->destroy(xrecvall); + memory->destroy(tagsendall); + memory->destroy(tagrecvall); + + memory->destroy(counts); + memory->destroy(displacements); } /* ---------------------------------------------------------------------- */ @@ -104,15 +136,35 @@ void FixNEB::init() rclimber = -1; - // setup xprev and xnext arrays + // nebatoms = # of atoms in fix group = atoms with inter-replica forces - memory->destroy(xprev); - memory->destroy(xnext); - memory->destroy(tangent); - nebatoms = atom->nlocal; - memory->create(xprev,nebatoms,3,"neb:xprev"); - memory->create(xnext,nebatoms,3,"neb:xnext"); - memory->create(tangent,nebatoms,3,"neb:tangent"); + bigint count = group->count(igroup); + if (count > MAXSMALLINT) error->all(FLERR,"Too many active NEB atoms"); + nebatoms = count; + + // comm style for inter-replica exchange of coords + + if (nreplica == nprocs_universe && + nebatoms == atom->natoms && atom->sortfreq == 0) + cmode = SINGLE_PROC_DIRECT; + else if (nreplica == nprocs_universe) cmode = SINGLE_PROC_MAP; + else cmode = MULTI_PROC; + + // ntotal = total # of atoms in system, NEB atoms or not + + if (atom->natoms > MAXSMALLINT) error->all(FLERR,"Too many atoms for NEB"); + ntotal = atom->natoms; + + if (atom->nlocal > maxlocal) reallocate(); + + if (MULTI_PROC && counts == NULL) { + memory->create(xsendall,ntotal,3,"neb:xsendall"); + memory->create(xrecvall,ntotal,3,"neb:xrecvall"); + memory->create(tagsendall,ntotal,"neb:tagsendall"); + memory->create(tagrecvall,ntotal,"neb:tagrecvall"); + memory->create(counts,nprocs,"neb:counts"); + memory->create(displacements,nprocs,"neb:displacements"); + } } /* ---------------------------------------------------------------------- */ @@ -133,40 +185,31 @@ void FixNEB::min_post_force(int vflag) double vprev,vnext,vmax,vmin; double delx,dely,delz; double delta1[3],delta2[3]; - MPI_Request request; // veng = PE of this replica // vprev,vnext = PEs of adjacent replicas + // only proc 0 in each replica communicates vprev = vnext = veng = pe->compute_scalar(); - if (ireplica < nreplica-1) MPI_Send(&veng,1,MPI_DOUBLE,procnext,0,uworld); - if (ireplica > 0) MPI_Recv(&vprev,1,MPI_DOUBLE,procprev,0,uworld,MPI_STATUS_IGNORE); + if (ireplica < nreplica-1 && me == 0) + MPI_Send(&veng,1,MPI_DOUBLE,procnext,0,uworld); + if (ireplica > 0 && me == 0) + MPI_Recv(&vprev,1,MPI_DOUBLE,procprev,0,uworld,MPI_STATUS_IGNORE); - if (ireplica > 0) MPI_Send(&veng,1,MPI_DOUBLE,procprev,0,uworld); - if (ireplica < nreplica-1) + if (ireplica > 0 && me == 0) + MPI_Send(&veng,1,MPI_DOUBLE,procprev,0,uworld); + if (ireplica < nreplica-1 && me == 0) MPI_Recv(&vnext,1,MPI_DOUBLE,procnext,0,uworld,MPI_STATUS_IGNORE); - // xprev,xnext = atom coords of adjacent replicas - // assume order of atoms in all replicas is the same - // check that number of atoms hasn't changed + if (cmode == MULTI_PROC) { + MPI_Bcast(&vprev,1,MPI_DOUBLE,0,world); + MPI_Bcast(&vnext,1,MPI_DOUBLE,0,world); + } - double **x = atom->x; - int *mask = atom->mask; - int nlocal = atom->nlocal; - if (nlocal != nebatoms) error->one(FLERR,"Atom count changed in fix neb"); + // communicate atoms to/from adjacent replicas to fill xprev,xnext - if (ireplica > 0) - MPI_Irecv(xprev[0],3*nlocal,MPI_DOUBLE,procprev,0,uworld,&request); - if (ireplica < nreplica-1) - MPI_Send(x[0],3*nlocal,MPI_DOUBLE,procnext,0,uworld); - if (ireplica > 0) MPI_Wait(&request,MPI_STATUS_IGNORE); - - if (ireplica < nreplica-1) - MPI_Irecv(xnext[0],3*nlocal,MPI_DOUBLE,procnext,0,uworld,&request); - if (ireplica > 0) - MPI_Send(x[0],3*nlocal,MPI_DOUBLE,procprev,0,uworld); - if (ireplica < nreplica-1) MPI_Wait(&request,MPI_STATUS_IGNORE); + inter_replica_comm(); // trigger potential energy computation on next timestep @@ -175,11 +218,13 @@ void FixNEB::min_post_force(int vflag) // compute norm of GradV for log output double **f = atom->f; + int nlocal = atom->nlocal; + double fsq = 0.0; for (int i = 0; i < nlocal; i++) fsq += f[i][0]*f[i][0] + f[i][1]*f[i][1] + f[i][2]*f[i][2]; - MPI_Allreduce(&fsq,&gradvnorm,1,MPI_DOUBLE,MPI_MAX,world); + MPI_Allreduce(&fsq,&gradvnorm,1,MPI_DOUBLE,MPI_SUM,world); gradvnorm = sqrt(gradvnorm); // first or last replica has no change to forces, just return @@ -195,6 +240,9 @@ void FixNEB::min_post_force(int vflag) // depending on relative PEs of 3 replicas // see Henkelman & Jonsson 2000 paper, eqs 8-11 + double **x = atom->x; + int *mask = atom->mask; + if (vnext > veng && veng > vprev) { for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { @@ -260,9 +308,15 @@ void FixNEB::min_post_force(int vflag) nlen += delx*delx + dely*dely + delz*delz; } - tlen = sqrt(tlen); - plen = sqrt(plen); - nlen = sqrt(nlen); + double lenall; + MPI_Allreduce(&tlen,&lenall,1,MPI_DOUBLE,MPI_SUM,world); + tlen = sqrt(lenall); + + MPI_Allreduce(&plen,&lenall,1,MPI_DOUBLE,MPI_SUM,world); + plen = sqrt(lenall); + + MPI_Allreduce(&nlen,&lenall,1,MPI_DOUBLE,MPI_SUM,world); + nlen = sqrt(lenall); // normalize tangent vector @@ -295,9 +349,12 @@ void FixNEB::min_post_force(int vflag) f[i][2]*tangent[i][2]; } + double dotall; + MPI_Allreduce(&dot,&dotall,1,MPI_DOUBLE,MPI_SUM,world); + double prefactor; - if (ireplica == rclimber) prefactor = -2.0*dot; - else prefactor = -dot + kspring*(nlen-plen); + if (ireplica == rclimber) prefactor = -2.0*dotall; + else prefactor = -dotall + kspring*(nlen-plen); for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { @@ -306,3 +363,222 @@ void FixNEB::min_post_force(int vflag) f[i][2] += prefactor*tangent[i][2]; } } + +/* ---------------------------------------------------------------------- + send/recv NEB atoms to/from adjacent replicas + received atoms matching my local atoms are stored in xprev,xnext + replicas 0 and N-1 send but do not receive any atoms +------------------------------------------------------------------------- */ + +void FixNEB::inter_replica_comm() +{ + int i,m; + MPI_Request request; + MPI_Request requests[2]; + MPI_Status statuses[2]; + + // reallocate memory if necessary + + if (atom->nlocal > maxlocal) reallocate(); + + double **x = atom->x; + tagint *tag = atom->tag; + int *mask = atom->mask; + int nlocal = atom->nlocal; + + // ----------------------------------------------------- + // 3 cases: two for single proc per replica + // one for multiple procs per replica + // ----------------------------------------------------- + + // single proc per replica + // all atoms are NEB atoms and no atom sorting is enabled + // direct comm of x -> xprev and x -> xnext + + if (cmode == SINGLE_PROC_DIRECT) { + if (ireplica > 0) + MPI_Irecv(xprev[0],3*nlocal,MPI_DOUBLE,procprev,0,uworld,&request); + if (ireplica < nreplica-1) + MPI_Send(x[0],3*nlocal,MPI_DOUBLE,procnext,0,uworld); + if (ireplica > 0) MPI_Wait(&request,MPI_STATUS_IGNORE); + + if (ireplica < nreplica-1) + MPI_Irecv(xnext[0],3*nlocal,MPI_DOUBLE,procnext,0,uworld,&request); + if (ireplica > 0) + MPI_Send(x[0],3*nlocal,MPI_DOUBLE,procprev,0,uworld); + if (ireplica < nreplica-1) MPI_Wait(&request,MPI_STATUS_IGNORE); + + return; + } + + // single proc per replica + // but only some atoms are NEB atoms or atom sorting is enabled + // send atom IDs and coords of only NEB atoms to prev/next proc + // recv proc uses atom->map() to match received coords to owned atoms + + if (cmode == SINGLE_PROC_MAP) { + m = 0; + for (i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + tagsend[m] = tag[i]; + xsend[m][0] = x[i][0]; + xsend[m][1] = x[i][1]; + xsend[m][2] = x[i][2]; + m++; + } + + if (ireplica > 0) { + MPI_Irecv(xrecv[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld,&requests[0]); + MPI_Irecv(tagrecv,nebatoms,MPI_LMP_TAGINT,procprev,0,uworld,&requests[1]); + } + if (ireplica < nreplica-1) { + MPI_Send(xsend[0],3*nebatoms,MPI_DOUBLE,procnext,0,uworld); + MPI_Send(tagsend,nebatoms,MPI_LMP_TAGINT,procnext,0,uworld); + } + + if (ireplica > 0) { + MPI_Waitall(2,requests,statuses); + for (i = 0; i < nebatoms; i++) { + m = atom->map(tagrecv[i]); + xprev[m][0] = xrecv[i][0]; + xprev[m][1] = xrecv[i][1]; + xprev[m][2] = xrecv[i][2]; + } + } + + if (ireplica < nreplica-1) { + MPI_Irecv(xrecv[0],3*nebatoms,MPI_DOUBLE,procnext,0,uworld,&requests[0]); + MPI_Irecv(tagrecv,nebatoms,MPI_LMP_TAGINT,procnext,0,uworld,&requests[1]); + } + if (ireplica > 0) { + MPI_Send(xsend[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld); + MPI_Send(tagsend,nebatoms,MPI_LMP_TAGINT,procprev,0,uworld); + } + + if (ireplica < nreplica-1) { + MPI_Waitall(2,requests,statuses); + for (i = 0; i < nebatoms; i++) { + m = atom->map(tagrecv[i]); + xnext[m][0] = xrecv[i][0]; + xnext[m][1] = xrecv[i][1]; + xnext[m][2] = xrecv[i][2]; + } + } + + return; + } + + // multiple procs per replica + // MPI_Gather all coords and atom IDs to root proc of each replica + // send to root of adjacent replicas + // bcast within each replica + // each proc extracts info for atoms it owns via atom->map() + + m = 0; + for (i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + tagsend[m] = tag[i]; + xsend[m][0] = x[i][0]; + xsend[m][1] = x[i][1]; + xsend[m][2] = x[i][2]; + m++; + } + + MPI_Gather(&m,1,MPI_INT,counts,1,MPI_INT,0,world); + displacements[0] = 0; + for (i = 0; i < nprocs-1; i++) + displacements[i+1] = displacements[i] + counts[i]; + MPI_Gatherv(tagsend,m,MPI_LMP_TAGINT, + tagsendall,counts,displacements,MPI_LMP_TAGINT,0,world); + for (i = 0; i < nprocs; i++) counts[i] *= 3; + for (i = 0; i < nprocs-1; i++) + displacements[i+1] = displacements[i] + counts[i]; + if (xsend) + MPI_Gatherv(xsend[0],3*m,MPI_DOUBLE, + xsendall[0],counts,displacements,MPI_DOUBLE,0,world); + else + MPI_Gatherv(NULL,3*m,MPI_DOUBLE, + xsendall[0],counts,displacements,MPI_DOUBLE,0,world); + + if (ireplica > 0 && me == 0) { + MPI_Irecv(xrecvall[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld,&requests[0]); + MPI_Irecv(tagrecvall,nebatoms,MPI_LMP_TAGINT,procprev,0,uworld, + &requests[1]); + } + if (ireplica < nreplica-1 && me == 0) { + MPI_Send(xsendall[0],3*nebatoms,MPI_DOUBLE,procnext,0,uworld); + MPI_Send(tagsendall,nebatoms,MPI_LMP_TAGINT,procnext,0,uworld); + } + + if (ireplica > 0) { + if (me == 0) MPI_Waitall(2,requests,statuses); + + MPI_Bcast(tagrecvall,nebatoms,MPI_INT,0,world); + MPI_Bcast(xrecvall[0],3*nebatoms,MPI_DOUBLE,0,world); + + for (i = 0; i < nebatoms; i++) { + m = atom->map(tagrecvall[i]); + if (m < 0 || m >= nlocal) continue; + xprev[m][0] = xrecvall[i][0]; + xprev[m][1] = xrecvall[i][1]; + xprev[m][2] = xrecvall[i][2]; + } + } + + if (ireplica < nreplica-1 && me == 0) { + MPI_Irecv(xrecvall[0],3*nebatoms,MPI_DOUBLE,procnext,0,uworld,&requests[0]); + MPI_Irecv(tagrecvall,nebatoms,MPI_LMP_TAGINT,procnext,0,uworld, + &requests[1]); + } + if (ireplica > 0 && me == 0) { + MPI_Send(xsendall[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld); + MPI_Send(tagsendall,nebatoms,MPI_LMP_TAGINT,procprev,0,uworld); + } + + if (ireplica < nreplica-1) { + if (me == 0) MPI_Waitall(2,requests,statuses); + + MPI_Bcast(tagrecvall,nebatoms,MPI_INT,0,world); + MPI_Bcast(xrecvall[0],3*nebatoms,MPI_DOUBLE,0,world); + + for (i = 0; i < nebatoms; i++) { + m = atom->map(tagrecvall[i]); + if (m < 0 || m >= nlocal) continue; + xnext[m][0] = xrecvall[i][0]; + xnext[m][1] = xrecvall[i][1]; + xnext[m][2] = xrecvall[i][2]; + } + } +} + +/* ---------------------------------------------------------------------- + reallocate xprev,xnext,tangent arrays if necessary + reallocate communication arrays if necessary +------------------------------------------------------------------------- */ + +void FixNEB::reallocate() +{ + memory->destroy(xprev); + memory->destroy(xnext); + memory->destroy(tangent); + + if (cmode != SINGLE_PROC_DIRECT) { + memory->destroy(xsend); + memory->destroy(xrecv); + memory->destroy(tagsend); + memory->destroy(tagrecv); + } + + maxlocal = atom->nmax; + + memory->create(xprev,maxlocal,3,"neb:xprev"); + memory->create(xnext,maxlocal,3,"neb:xnext"); + memory->create(tangent,maxlocal,3,"neb:tangent"); + + if (cmode != SINGLE_PROC_DIRECT) { + memory->create(xsend,maxlocal,3,"neb:xsend"); + memory->create(xrecv,maxlocal,3,"neb:xrecv"); + memory->create(tagsend,maxlocal,"neb:tagsend"); + memory->create(tagrecv,maxlocal,"neb:tagrecv"); + } +} diff --git a/src/REPLICA/fix_neb.h b/src/REPLICA/fix_neb.h index 39bb5dc81f..4026f84f15 100644 --- a/src/REPLICA/fix_neb.h +++ b/src/REPLICA/fix_neb.h @@ -38,17 +38,34 @@ class FixNEB : public Fix { void min_post_force(int); private: + int me,nprocs,nprocs_universe; double kspring; int ireplica,nreplica; int procnext,procprev; + int cmode; MPI_Comm uworld; char *id_pe; class Compute *pe; - int nebatoms; - double **xprev,**xnext; - double **tangent; + int nebatoms; // # of active NEB atoms + int ntotal; // total # of atoms, NEB or not + int maxlocal; // size of xprev,xnext,tangent arrays + + double **xprev,**xnext; // coords of my owned atoms in neighbor replicas + double **tangent; // work vector for inter-replica forces + + double **xsend,**xrecv; // coords to send/recv to/from other replica + tagint *tagsend,*tagrecv; // ditto for atom IDs + + // info gathered from all procs in my replica + double **xsendall,**xrecvall; // coords to send/recv to/from other replica + tagint *tagsendall,*tagrecvall; // ditto for atom IDs + + int *counts,*displacements; // used for MPI_Gather + + void inter_replica_comm(); + void reallocate(); }; } diff --git a/src/REPLICA/neb.cpp b/src/REPLICA/neb.cpp index 3677c834a6..03bd0b61e1 100644 --- a/src/REPLICA/neb.cpp +++ b/src/REPLICA/neb.cpp @@ -137,10 +137,6 @@ void NEB::command(int narg, char **arg) // error checks if (nreplica == 1) error->all(FLERR,"Cannot use NEB with a single replica"); - if (nreplica != universe->nprocs) - error->all(FLERR,"Can only use NEB with 1-processor replicas"); - if (atom->sortfreq > 0) - error->all(FLERR,"Cannot use NEB with atom_modify sort enabled"); if (atom->map_style == 0) error->all(FLERR,"Cannot use NEB unless atom map exists"); @@ -531,7 +527,7 @@ void NEB::open(char *file) /* ---------------------------------------------------------------------- query fix NEB for info on each replica - proc 0 prints current NEB status + universe proc 0 prints current NEB status ------------------------------------------------------------------------- */ void NEB::print_status() @@ -552,6 +548,7 @@ void NEB::print_status() if (output->thermo->normflag) one[0] /= atom->natoms; if (me == 0) MPI_Allgather(one,nall,MPI_DOUBLE,&all[0][0],nall,MPI_DOUBLE,roots); + MPI_Bcast(&all[0][0],nall*nreplica,MPI_DOUBLE,0,world); rdist[0] = 0.0; for (int i = 1; i < nreplica; i++)