diff --git a/src/comm_tiled.cpp b/src/comm_tiled.cpp index f9ad6901a1..1fca4e721b 100644 --- a/src/comm_tiled.cpp +++ b/src/comm_tiled.cpp @@ -13,10 +13,21 @@ #include "lmptype.h" #include "comm_tiled.h" +#include "atom.h" +#include "atom_vec.h" +#include "domain.h" +#include "pair.h" +#include "modify.h" +#include "fix.h" +#include "compute.h" +#include "dump.h" +#include "memory.h" #include "error.h" using namespace LAMMPS_NS; +#define BUFFACTOR 1.5 + enum{SINGLE,MULTI}; // same as in Comm /* ---------------------------------------------------------------------- */ @@ -39,6 +50,28 @@ CommTiled::~CommTiled() void CommTiled::init() { + triclinic = domain->triclinic; + map_style = atom->map_style; + + // comm_only = 1 if only x,f are exchanged in forward/reverse comm + // comm_x_only = 0 if ghost_velocity since velocities are added + + comm_x_only = atom->avec->comm_x_only; + comm_f_only = atom->avec->comm_f_only; + if (ghost_velocity) comm_x_only = 0; + + // set per-atom sizes for forward/reverse/border comm + // augment by velocity and fix quantities if needed + + size_forward = atom->avec->size_forward; + size_reverse = atom->avec->size_reverse; + size_border = atom->avec->size_border; + + if (ghost_velocity) size_forward += atom->avec->size_velocity; + if (ghost_velocity) size_border += atom->avec->size_velocity; + + for (int i = 0; i < modify->nfix; i++) + size_border += modify->fix[i]->comm_border; } /* ---------------------------------------------------------------------- @@ -50,6 +83,13 @@ void CommTiled::init() void CommTiled::setup() { + // error on triclinic or multi? + // set nswap + // setup neighbor proc info for exchange() + // setup nsendproc and nrecvproc bounts + // setup sendproc and recvproc lists + // setup sendbox + // reallocate requests and statuses } /* ---------------------------------------------------------------------- @@ -59,83 +99,78 @@ void CommTiled::setup() void CommTiled::forward_comm(int dummy) { - /* - - int n; - MPI_Request request; + int i,irecv,n; MPI_Status status; AtomVec *avec = atom->avec; double **x = atom->x; - double *buf; - // exchange data with another proc - // if other proc is self, just copy + // exchange data with another set of procs in each swap + // if first proc in set is self, then is just self across PBC, just copy // if comm_x_only set, exchange or copy directly to x, don't unpack for (int iswap = 0; iswap < nswap; iswap++) { - if (sendproc[iswap] != me) { + if (sendproc[iswap][0] != me) { if (comm_x_only) { - // loop over recvs - if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]]; - else buf = NULL; - if (size_forward_recv[iswap]) - MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE, - recvproc[iswap],0,world,&request); - // loop over sends - n = avec->pack_comm(sendnum[iswap],sendlist[iswap], - buf_send,pbc_flag[iswap],pbc[iswap]); - if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); - // wait any or all? - if (size_forward_recv[iswap]) MPI_Wait(&request,&status); + for (i = 0; i < nrecvproc[iswap]; i++) + MPI_Irecv(x[firstrecv[iswap][i]],size_forward_recv[iswap][i], + MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); + for (i = 0; i < nsendproc[iswap]; i++) { + n = avec->pack_comm(sendnum[iswap][i],sendlist[iswap][i], + buf_send,pbc_flag[iswap][i],pbc[iswap][i]); + MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world); + } + MPI_Waitall(nrecvproc[iswap],requests,statuses); + } else if (ghost_velocity) { - // loop over recvs - if (size_forward_recv[iswap]) - MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE, - recvproc[iswap],0,world,&request); - // loop over sends - n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap], - buf_send,pbc_flag[iswap],pbc[iswap]); - if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); - // wait any or all? - if (size_forward_recv[iswap]) MPI_Wait(&request,&status); - // loop over recvs and unpack each - avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_recv); + for (i = 0; i < nrecvproc[iswap]; i++) + MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]], + size_forward_recv[iswap][i], + MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); + for (i = 0; i < nsendproc[iswap]; i++) { + n = avec->pack_comm_vel(sendnum[iswap][i],sendlist[iswap][i], + buf_send,pbc_flag[iswap][i],pbc[iswap][i]); + MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world); + } + for (i = 0; i < nrecvproc[iswap]; i++) { + MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status); + avec->unpack_comm_vel(recvnum[iswap][i],firstrecv[iswap][irecv], + &buf_recv[forward_recv_offset[iswap][irecv]]); + } + } else { - // loop over recvs - if (size_forward_recv[iswap]) - MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE, - recvproc[iswap],0,world,&request); - // loop over sends - n = avec->pack_comm(sendnum[iswap],sendlist[iswap], - buf_send,pbc_flag[iswap],pbc[iswap]); - if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); - // wait any or all? - if (size_forward_recv[iswap]) MPI_Wait(&request,&status); - // loop over recvs and unpack each - avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_recv); + for (i = 0; i < nrecvproc[iswap]; i++) + MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]], + size_forward_recv[iswap][i], + MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); + for (i = 0; i < nsendproc[iswap]; i++) { + n = avec->pack_comm(sendnum[iswap][i],sendlist[iswap][i], + buf_send,pbc_flag[iswap][i],pbc[iswap][i]); + MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world); + } + for (i = 0; i < nrecvproc[iswap]; i++) { + MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status); + avec->unpack_comm(recvnum[iswap][i],firstrecv[iswap][irecv], + &buf_recv[forward_recv_offset[iswap][irecv]]); + } } } else { if (comm_x_only) { - // single copy - if (sendnum[iswap]) - n = avec->pack_comm(sendnum[iswap],sendlist[iswap], - x[firstrecv[iswap]],pbc_flag[iswap], - pbc[iswap]); + if (sendnum[iswap][0]) + n = avec->pack_comm(sendnum[iswap][0],sendlist[iswap][0], + x[firstrecv[iswap][0]],pbc_flag[iswap][0], + pbc[iswap][0]); } else if (ghost_velocity) { - // single copy - n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap], - buf_send,pbc_flag[iswap],pbc[iswap]); - avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send); + n = avec->pack_comm_vel(sendnum[iswap][0],sendlist[iswap][0], + buf_send,pbc_flag[iswap][0],pbc[iswap][0]); + avec->unpack_comm_vel(recvnum[iswap][0],firstrecv[iswap][0],buf_send); } else { - // single copy - n = avec->pack_comm(sendnum[iswap],sendlist[iswap], - buf_send,pbc_flag[iswap],pbc[iswap]); - avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_send); + n = avec->pack_comm(sendnum[iswap][0],sendlist[iswap][0], + buf_send,pbc_flag[iswap][0],pbc[iswap][0]); + avec->unpack_comm(recvnum[iswap][0],firstrecv[iswap][0],buf_send); } } } - */ } /* ---------------------------------------------------------------------- @@ -145,63 +180,60 @@ void CommTiled::forward_comm(int dummy) void CommTiled::reverse_comm() { - /* - int n; + int i,irecv,n; MPI_Request request; MPI_Status status; AtomVec *avec = atom->avec; double **f = atom->f; - double *buf; - // exchange data with other procs in each swap - // if other proc is self, just copy + // exchange data with another set of procs in each swap + // if first proc in set is self, then is just self across PBC, just copy // if comm_f_only set, exchange or copy directly from f, don't pack for (int iswap = nswap-1; iswap >= 0; iswap--) { - if (!selfflag[iswap]) { + if (sendproc[iswap][0] != me) { if (comm_f_only) { - // loop over recvs - if (size_reverse_recv[iswap]) - MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE, - sendproc[iswap],0,world,&request); - // loop over sends - if (size_reverse_send[iswap]) buf = f[firstrecv[iswap]]; - else buf = NULL; - if (size_reverse_send[iswap]) - MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE, - recvproc[iswap],0,world); - // wait any or all? - if (size_reverse_recv[iswap]) MPI_Wait(&request,&status); - // loop over recvs and unpack each - avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_recv); + for (i = 0; i < nsendproc[iswap]; i++) + MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]], + size_reverse_recv[iswap][i],MPI_DOUBLE, + sendproc[iswap][i],0,world,&requests[i]); + for (i = 0; i < nrecvproc[iswap]; i++) + MPI_Send(f[firstrecv[iswap][i]],size_reverse_send[iswap][i], + MPI_DOUBLE,recvproc[iswap][i],0,world); + for (i = 0; i < nsendproc[iswap]; i++) { + MPI_Waitany(nsendproc[iswap],requests,&irecv,&status); + avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv], + &buf_recv[reverse_recv_offset[iswap][irecv]]); + } + } else { - // loop over recvs - if (size_reverse_recv[iswap]) - MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE, - sendproc[iswap],0,world,&request); - // loop over sends - n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send); - if (n) MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap],0,world); - // wait any or all? - if (size_reverse_recv[iswap]) MPI_Wait(&request,&status); - // loop over recvs and unpack each - avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_recv); + for (i = 0; i < nsendproc[iswap]; i++) + MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]], + size_reverse_recv[iswap][i],MPI_DOUBLE, + sendproc[iswap][i],0,world,&requests[i]); + for (i = 0; i < nrecvproc[iswap]; i++) { + n = avec->pack_reverse(recvnum[iswap][i],firstrecv[iswap][i], + buf_send); + MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap][i],0,world); + } + for (i = 0; i < nsendproc[iswap]; i++) { + MPI_Waitany(nsendproc[iswap],requests,&irecv,&status); + avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv], + &buf_recv[reverse_recv_offset[iswap][irecv]]); + } } } else { if (comm_f_only) { - // single copy - if (sendnum[iswap]) - avec->unpack_reverse(sendnum[iswap],sendlist[iswap], - f[firstrecv[iswap]]); + if (sendnum[iswap][0]) + avec->unpack_reverse(sendnum[iswap][0],sendlist[iswap][0], + f[firstrecv[iswap][0]]); } else { - // single copy - n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send); - avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_send); + n = avec->pack_reverse(recvnum[iswap][0],firstrecv[iswap][0],buf_send); + avec->unpack_reverse(sendnum[iswap][0],sendlist[iswap][0],buf_send); } } } - */ } /* ---------------------------------------------------------------------- @@ -221,32 +253,247 @@ void CommTiled::exchange() /* ---------------------------------------------------------------------- borders: list nearby atoms to send to neighboring procs at every timestep - one list is created for every swap that will be made - as list is made, actually do swaps - this does equivalent of a communicate, so don't need to explicitly - call communicate routine on reneighboring timestep + one list is created per swap/proc that will be made + as list is made, actually do communication + this does equivalent of a forward_comm(), so don't need to explicitly + call forward_comm() on reneighboring timestep this routine is called before every reneighboring for triclinic, atoms must be in lamda coords (0-1) before borders is called ------------------------------------------------------------------------- */ void CommTiled::borders() { + int i,n,irecv,ngroup,nlast,nsend,rmaxswap; + double xlo,xhi,ylo,yhi,zlo,zhi; + double *bbox; + double **x; + MPI_Status status; + AtomVec *avec = atom->avec; + + // smax = max size of single send in a swap/proc + // rmax = max size of recvs from all procs for a swap + + int smax = 0; + int rmax = 0; + + // loop over all swaps in all dimensions + + for (int iswap = 0; iswap < nswap; iswap++) { + + // find atoms within rectangles using <= and >= + // for x-dim swaps, check owned atoms + // for yz-dim swaps, check owned and ghost atoms + // store sent atom indices in list for use in future timesteps + // NOTE: assume SINGLE mode, add back in logic for MULTI mode later + + x = atom->x; + + for (i = 0; i < nsendproc[iswap]; i++) { + bbox = sendbox[iswap][i]; + xlo = bbox[0]; xhi = bbox[1]; + ylo = bbox[2]; yhi = bbox[3]; + zlo = bbox[4]; zhi = bbox[5]; + + ngroup = atom->nfirst; + if (iswap < 2) nlast = atom->nlocal; + else nlast = atom->nlocal + atom->nghost; + + nsend = 0; + for (i = 0; i < ngroup; i++) + if (x[i][0] >= xlo && x[i][0] <= xhi && + x[i][1] >= ylo && x[i][1] <= yhi && + x[i][2] >= zlo && x[i][2] <= zhi) { + if (nsend == maxsendlist[iswap][i]) grow_list(iswap,i,nsend); + sendlist[iswap][i][nsend++] = i; + } + for (i = atom->nlocal; i < nlast; i++) + if (x[i][0] >= xlo && x[i][0] <= xhi && + x[i][1] >= ylo && x[i][1] <= yhi && + x[i][2] >= zlo && x[i][2] <= zhi) { + if (nsend == maxsendlist[iswap][i]) grow_list(iswap,i,nsend); + sendlist[iswap][i][nsend++] = i; + } + sendnum[iswap][i] = nsend; + smax = MAX(smax,nsend); + } + + // send sendnum counts to procs who recv from me + + if (sendproc[iswap][0] != me) { + for (i = 0; i < nrecvproc[iswap]; i++) + MPI_Irecv(&recvnum[iswap][i],1,MPI_INT, + recvproc[iswap][i],0,world,&requests[i]); + for (i = 0; i < nsendproc[iswap]; i++) + MPI_Send(&sendnum[iswap][i],1,MPI_INT,sendproc[iswap][i],0,world); + MPI_Waitall(nrecvproc[iswap],requests,statuses); + + } else recvnum[iswap][0] = sendnum[iswap][0]; + + // setup other per swap/proc values from sendnum and recvnum + + rmaxswap = 0; + for (i = 0; i < nrecvproc[iswap]; i++) { + rmaxswap += recvnum[iswap][i]; + size_forward_recv[iswap][i] = recvnum[iswap][i]*size_forward; + size_reverse_send[iswap][i] = recvnum[iswap][i]*size_reverse; + size_reverse_recv[iswap][i] = sendnum[iswap][i]*size_reverse; + if (i == 0) { + firstrecv[iswap][0] = atom->nlocal + atom->nghost; + forward_recv_offset[iswap][0] = 0; + } else { + firstrecv[iswap][i] = firstrecv[iswap][i-1] + recvnum[iswap][i-1]; + forward_recv_offset[iswap][i] = + forward_recv_offset[iswap][i-1] + recvnum[iswap][i-1]; + } + } + rmax = MAX(rmax,rmaxswap); + + // insure send/recv buffers are large enough for border comm + + if (smax*size_border > maxsend) grow_send(smax*size_border,0); + if (rmax*size_border > maxrecv) grow_recv(rmax*size_border); + + // swap atoms with other procs using pack_border(), unpack_border() + + if (sendproc[iswap][0] != me) { + for (i = 0; i < nsendproc[iswap]; i++) { + if (ghost_velocity) { + for (i = 0; i < nrecvproc[iswap]; i++) + MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]], + recvnum[iswap][i]*size_border, + MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); + for (i = 0; i < nsendproc[iswap]; i++) { + n = avec->pack_border_vel(sendnum[iswap][i],sendlist[iswap][i], + buf_send,pbc_flag[iswap][i], + pbc[iswap][i]); + MPI_Send(buf_send,n*size_border,MPI_DOUBLE, + sendproc[iswap][i],0,world); + } + for (i = 0; i < nrecvproc[iswap]; i++) { + MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status); + avec->unpack_border(recvnum[iswap][i],firstrecv[iswap][irecv], + &buf_recv[forward_recv_offset[iswap][irecv]]); + } + + } else { + for (i = 0; i < nrecvproc[iswap]; i++) + MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]], + recvnum[iswap][i]*size_border, + MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); + for (i = 0; i < nsendproc[iswap]; i++) { + n = avec->pack_border(sendnum[iswap][i],sendlist[iswap][i], + buf_send,pbc_flag[iswap][i],pbc[iswap][i]); + MPI_Send(buf_send,n*size_border,MPI_DOUBLE, + sendproc[iswap][i],0,world); + } + for (i = 0; i < nrecvproc[iswap]; i++) { + MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status); + avec->unpack_border(recvnum[iswap][i],firstrecv[iswap][irecv], + &buf_recv[forward_recv_offset[iswap][irecv]]); + } + } + } + + } else { + if (ghost_velocity) { + n = avec->pack_border_vel(sendnum[iswap][0],sendlist[iswap][0], + buf_send,pbc_flag[iswap][0],pbc[iswap][0]); + avec->unpack_border_vel(recvnum[iswap][0],firstrecv[iswap][0],buf_send); + } else { + n = avec->pack_border(sendnum[iswap][0],sendlist[iswap][0], + buf_send,pbc_flag[iswap][0],pbc[iswap][0]); + avec->unpack_border(recvnum[iswap][0],firstrecv[iswap][0],buf_send); + } + } + + // increment ghost atoms + + n = nrecvproc[iswap]; + atom->nghost += forward_recv_offset[iswap][n-1] + recvnum[iswap][n-1]; + } + + // insure send/recv buffers are long enough for all forward & reverse comm + + int max = MAX(maxforward*smax,maxreverse*rmax); + if (max > maxsend) grow_send(max,0); + max = MAX(maxforward*rmax,maxreverse*smax); + if (max > maxrecv) grow_recv(max); + + // reset global->local map + + if (map_style) atom->map_set(); } /* ---------------------------------------------------------------------- forward communication invoked by a Pair + n = constant number of datums per atom ------------------------------------------------------------------------- */ void CommTiled::forward_comm_pair(Pair *pair) { + int i,irecv,n; + MPI_Status status; + + for (int iswap = 0; iswap < nswap; iswap++) { + if (sendproc[iswap][0] != me) { + for (i = 0; i < nrecvproc[iswap]; i++) + MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]], + size_forward_recv[iswap][i], + MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); + for (i = 0; i < nsendproc[iswap]; i++) { + n = pair->pack_comm(sendnum[iswap][i],sendlist[iswap][i], + buf_send,pbc_flag[iswap][i],pbc[iswap][i]); + MPI_Send(buf_send,n*sendnum[iswap][i],MPI_DOUBLE, + sendproc[iswap][i],0,world); + } + for (i = 0; i < nrecvproc[iswap]; i++) { + MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status); + pair->unpack_comm(recvnum[iswap][irecv],firstrecv[iswap][irecv], + &buf_recv[forward_recv_offset[iswap][irecv]]); + } + + } else { + n = pair->pack_comm(sendnum[iswap][0],sendlist[iswap][0], + buf_send,pbc_flag[iswap][0],pbc[iswap][0]); + pair->unpack_comm(recvnum[iswap][0],firstrecv[iswap][0],buf_send); + } + } } /* ---------------------------------------------------------------------- reverse communication invoked by a Pair + n = constant number of datums per atom ------------------------------------------------------------------------- */ void CommTiled::reverse_comm_pair(Pair *pair) { + int i,irecv,n; + MPI_Status status; + + for (int iswap = nswap-1; iswap >= 0; iswap--) { + if (sendproc[iswap][0] != me) { + for (i = 0; i < nsendproc[iswap]; i++) + MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]], + size_reverse_recv[iswap][i],MPI_DOUBLE, + sendproc[iswap][i],0,world,&requests[i]); + for (i = 0; i < nrecvproc[iswap]; i++) { + n = pair->pack_reverse_comm(recvnum[iswap][i],firstrecv[iswap][i], + buf_send); + MPI_Send(buf_send,n*recvnum[iswap][i],MPI_DOUBLE, + recvproc[iswap][i],0,world); + } + for (i = 0; i < nsendproc[iswap]; i++) { + MPI_Waitany(nsendproc[iswap],requests,&irecv,&status); + pair->unpack_reverse_comm(sendnum[iswap][irecv],sendlist[iswap][irecv], + &buf_recv[reverse_recv_offset[iswap][irecv]]); + } + + } else { + n = pair->pack_reverse_comm(recvnum[iswap][0],firstrecv[iswap][0], + buf_send); + pair->unpack_reverse_comm(sendnum[iswap][0],sendlist[iswap][0],buf_send); + } + } } /* ---------------------------------------------------------------------- @@ -256,6 +503,33 @@ void CommTiled::reverse_comm_pair(Pair *pair) void CommTiled::forward_comm_fix(Fix *fix) { + int i,irecv,n; + MPI_Status status; + + for (int iswap = 0; iswap < nswap; iswap++) { + if (sendproc[iswap][0] != me) { + for (i = 0; i < nrecvproc[iswap]; i++) + MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]], + size_forward_recv[iswap][i], + MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); + for (i = 0; i < nsendproc[iswap]; i++) { + n = fix->pack_comm(sendnum[iswap][i],sendlist[iswap][i], + buf_send,pbc_flag[iswap][i],pbc[iswap][i]); + MPI_Send(buf_send,n*sendnum[iswap][i],MPI_DOUBLE, + sendproc[iswap][i],0,world); + } + for (i = 0; i < nrecvproc[iswap]; i++) { + MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status); + fix->unpack_comm(recvnum[iswap][irecv],firstrecv[iswap][irecv], + &buf_recv[forward_recv_offset[iswap][irecv]]); + } + + } else { + n = fix->pack_comm(sendnum[iswap][0],sendlist[iswap][0], + buf_send,pbc_flag[iswap][0],pbc[iswap][0]); + fix->unpack_comm(recvnum[iswap][0],firstrecv[iswap][0],buf_send); + } + } } /* ---------------------------------------------------------------------- @@ -265,11 +539,39 @@ void CommTiled::forward_comm_fix(Fix *fix) void CommTiled::reverse_comm_fix(Fix *fix) { + int i,irecv,n; + MPI_Status status; + + for (int iswap = nswap-1; iswap >= 0; iswap--) { + if (sendproc[iswap][0] != me) { + for (i = 0; i < nsendproc[iswap]; i++) + MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]], + size_reverse_recv[iswap][i],MPI_DOUBLE, + sendproc[iswap][i],0,world,&requests[i]); + for (i = 0; i < nrecvproc[iswap]; i++) { + n = fix->pack_reverse_comm(recvnum[iswap][i],firstrecv[iswap][i], + buf_send); + MPI_Send(buf_send,n*recvnum[iswap][i],MPI_DOUBLE, + recvproc[iswap][i],0,world); + } + for (i = 0; i < nsendproc[iswap]; i++) { + MPI_Waitany(nsendproc[iswap],requests,&irecv,&status); + fix->unpack_reverse_comm(sendnum[iswap][irecv],sendlist[iswap][irecv], + &buf_recv[reverse_recv_offset[iswap][irecv]]); + } + + } else { + n = fix->pack_reverse_comm(recvnum[iswap][0],firstrecv[iswap][0], + buf_send); + fix->unpack_reverse_comm(sendnum[iswap][0],sendlist[iswap][0],buf_send); + } + } } /* ---------------------------------------------------------------------- forward communication invoked by a Fix n = total datums for all atoms, allows for variable number/atom + NOTE: complicated b/c don't know # to recv a priori ------------------------------------------------------------------------- */ void CommTiled::forward_comm_variable_fix(Fix *fix) @@ -287,34 +589,148 @@ void CommTiled::reverse_comm_variable_fix(Fix *fix) /* ---------------------------------------------------------------------- forward communication invoked by a Compute + n = constant number of datums per atom ------------------------------------------------------------------------- */ void CommTiled::forward_comm_compute(Compute *compute) { + int i,irecv,n; + MPI_Status status; + + for (int iswap = 0; iswap < nswap; iswap++) { + if (sendproc[iswap][0] != me) { + for (i = 0; i < nrecvproc[iswap]; i++) + MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]], + size_forward_recv[iswap][i], + MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); + for (i = 0; i < nsendproc[iswap]; i++) { + n = compute->pack_comm(sendnum[iswap][i],sendlist[iswap][i], + buf_send,pbc_flag[iswap][i],pbc[iswap][i]); + MPI_Send(buf_send,n*sendnum[iswap][i],MPI_DOUBLE, + sendproc[iswap][i],0,world); + } + for (i = 0; i < nrecvproc[iswap]; i++) { + MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status); + compute->unpack_comm(recvnum[iswap][irecv],firstrecv[iswap][irecv], + &buf_recv[forward_recv_offset[iswap][irecv]]); + } + + } else { + n = compute->pack_comm(sendnum[iswap][0],sendlist[iswap][0], + buf_send,pbc_flag[iswap][0],pbc[iswap][0]); + compute->unpack_comm(recvnum[iswap][0],firstrecv[iswap][0],buf_send); + } + } } /* ---------------------------------------------------------------------- reverse communication invoked by a Compute + n = constant number of datums per atom ------------------------------------------------------------------------- */ void CommTiled::reverse_comm_compute(Compute *compute) { + int i,irecv,n; + MPI_Status status; + + for (int iswap = nswap-1; iswap >= 0; iswap--) { + if (sendproc[iswap][0] != me) { + for (i = 0; i < nsendproc[iswap]; i++) + MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]], + size_reverse_recv[iswap][i],MPI_DOUBLE, + sendproc[iswap][i],0,world,&requests[i]); + for (i = 0; i < nrecvproc[iswap]; i++) { + n = compute->pack_reverse_comm(recvnum[iswap][i],firstrecv[iswap][i], + buf_send); + MPI_Send(buf_send,n*recvnum[iswap][i],MPI_DOUBLE, + recvproc[iswap][i],0,world); + } + for (i = 0; i < nsendproc[iswap]; i++) { + MPI_Waitany(nsendproc[iswap],requests,&irecv,&status); + compute-> + unpack_reverse_comm(sendnum[iswap][irecv],sendlist[iswap][irecv], + &buf_recv[reverse_recv_offset[iswap][irecv]]); + } + + } else { + n = compute->pack_reverse_comm(recvnum[iswap][0],firstrecv[iswap][0], + buf_send); + compute->unpack_reverse_comm(sendnum[iswap][0],sendlist[iswap][0], + buf_send); + } + } } /* ---------------------------------------------------------------------- forward communication invoked by a Dump + n = constant number of datums per atom ------------------------------------------------------------------------- */ void CommTiled::forward_comm_dump(Dump *dump) { + int i,irecv,n; + MPI_Status status; + + for (int iswap = 0; iswap < nswap; iswap++) { + if (sendproc[iswap][0] != me) { + for (i = 0; i < nrecvproc[iswap]; i++) + MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]], + size_forward_recv[iswap][i], + MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); + for (i = 0; i < nsendproc[iswap]; i++) { + n = dump->pack_comm(sendnum[iswap][i],sendlist[iswap][i], + buf_send,pbc_flag[iswap][i],pbc[iswap][i]); + MPI_Send(buf_send,n*sendnum[iswap][i],MPI_DOUBLE, + sendproc[iswap][i],0,world); + } + for (i = 0; i < nrecvproc[iswap]; i++) { + MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status); + dump->unpack_comm(recvnum[iswap][irecv],firstrecv[iswap][irecv], + &buf_recv[forward_recv_offset[iswap][irecv]]); + } + + } else { + n = dump->pack_comm(sendnum[iswap][0],sendlist[iswap][0], + buf_send,pbc_flag[iswap][0],pbc[iswap][0]); + dump->unpack_comm(recvnum[iswap][0],firstrecv[iswap][0],buf_send); + } + } } /* ---------------------------------------------------------------------- reverse communication invoked by a Dump + n = constant number of datums per atom ------------------------------------------------------------------------- */ void CommTiled::reverse_comm_dump(Dump *dump) { + int i,irecv,n; + MPI_Status status; + + for (int iswap = nswap-1; iswap >= 0; iswap--) { + if (sendproc[iswap][0] != me) { + for (i = 0; i < nsendproc[iswap]; i++) + MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]], + size_reverse_recv[iswap][i],MPI_DOUBLE, + sendproc[iswap][i],0,world,&requests[i]); + for (i = 0; i < nrecvproc[iswap]; i++) { + n = dump->pack_reverse_comm(recvnum[iswap][i],firstrecv[iswap][i], + buf_send); + MPI_Send(buf_send,n*recvnum[iswap][i],MPI_DOUBLE, + recvproc[iswap][i],0,world); + } + for (i = 0; i < nsendproc[iswap]; i++) { + MPI_Waitany(nsendproc[iswap],requests,&irecv,&status); + dump->unpack_reverse_comm(sendnum[iswap][irecv],sendlist[iswap][irecv], + &buf_recv[reverse_recv_offset[iswap][irecv]]); + } + + } else { + n = dump->pack_reverse_comm(recvnum[iswap][0],firstrecv[iswap][0], + buf_send); + dump->unpack_reverse_comm(sendnum[iswap][0],sendlist[iswap][0],buf_send); + } + } } /* ---------------------------------------------------------------------- @@ -335,6 +751,45 @@ int CommTiled::exchange_variable(int n, double *inbuf, double *&outbuf) return nrecv; } +/* ---------------------------------------------------------------------- + realloc the size of the send buffer as needed with BUFFACTOR and bufextra + if flag = 1, realloc + if flag = 0, don't need to realloc with copy, just free/malloc +------------------------------------------------------------------------- */ + +void CommTiled::grow_send(int n, int flag) +{ + maxsend = static_cast (BUFFACTOR * n); + if (flag) + memory->grow(buf_send,maxsend+bufextra,"comm:buf_send"); + else { + memory->destroy(buf_send); + memory->create(buf_send,maxsend+bufextra,"comm:buf_send"); + } +} + +/* ---------------------------------------------------------------------- + free/malloc the size of the recv buffer as needed with BUFFACTOR +------------------------------------------------------------------------- */ + +void CommTiled::grow_recv(int n) +{ + maxrecv = static_cast (BUFFACTOR * n); + memory->destroy(buf_recv); + memory->create(buf_recv,maxrecv,"comm:buf_recv"); +} + +/* ---------------------------------------------------------------------- + realloc the size of the iswap sendlist as needed with BUFFACTOR +------------------------------------------------------------------------- */ + +void CommTiled::grow_list(int iswap, int iwhich, int n) +{ + maxsendlist[iswap][iwhich] = static_cast (BUFFACTOR * n); + memory->grow(sendlist[iswap][iwhich],maxsendlist[iswap][iwhich], + "comm:sendlist[iswap]"); +} + /* ---------------------------------------------------------------------- return # of bytes of allocated memory ------------------------------------------------------------------------- */ diff --git a/src/comm_tiled.h b/src/comm_tiled.h index 139e829313..2c7a6e62df 100644 --- a/src/comm_tiled.h +++ b/src/comm_tiled.h @@ -41,9 +41,52 @@ class CommTiled : public Comm { void forward_comm_dump(class Dump *); // forward comm from a Dump void reverse_comm_dump(class Dump *); // reverse comm from a Dump - void forward_comm_array(int, double **); // forward comm of array + void forward_comm_array(int, double **); // forward comm of array int exchange_variable(int, double *, double *&); // exchange on neigh stencil bigint memory_usage(); + + private: + int nswap; // # of swaps to perform = 2*dim + int *nsendproc,*nrecvproc; // # of procs to send/recv to/from in each swap + + int triclinic; // 0 if domain is orthog, 1 if triclinic + int map_style; // non-0 if global->local mapping is done + int size_forward; // # of per-atom datums in forward comm + int size_reverse; // # of datums in reverse comm + int size_border; // # of datums in forward border comm + + int **sendnum,**recvnum; // # of atoms to send/recv per swap/proc + int **sendproc,**recvproc; // proc to send/recv to/from per swap/proc + int **size_forward_recv; // # of values to recv in each forward swap/proc + int **firstrecv; // where to put 1st recv atom per swap/proc + int **size_reverse_send; // # to send in each reverse comm per swap/proc + int **size_reverse_recv; // # to recv in each reverse comm per swap/proc + + int **forward_recv_offset; // forward comm offsets in buf_recv per swap/proc + int **reverse_recv_offset; // reverse comm offsets in buf_recv per swap/proc + + int ***sendlist; // list of atoms to send per swap/proc + int **maxsendlist; // max size of send list per swap/proc + int **pbc_flag; // general flag for sending atoms thru PBC + int ***pbc; // dimension flags for PBC adjustments + + double ***sendbox; // bounding box of atoms to send per swap/proc + + double *buf_send; // send buffer for all comm + double *buf_recv; // recv buffer for all comm + int maxsend,maxrecv; // current size of send/recv buffer + int maxforward,maxreverse; // max # of datums in forward/reverse comm + + int bufextra; // extra space beyond maxsend in send buffer + + MPI_Request *requests; + MPI_Status *statuses; + + int comm_x_only,comm_f_only; // 1 if only exchange x,f in for/rev comm + + void grow_send(int, int); // reallocate send buffer + void grow_recv(int); // free/allocate recv buffer + void grow_list(int, int, int); // reallocate sendlist for one swap/proc }; } diff --git a/src/dump.cpp b/src/dump.cpp index 878304addf..c3ac1b11dd 100644 --- a/src/dump.cpp +++ b/src/dump.cpp @@ -752,13 +752,6 @@ void Dump::modify_params(int narg, char **arg) output->every_dump[idump] = n; iarg += 2; - } else if (strcmp(arg[iarg],"first") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal dump_modify command"); - if (strcmp(arg[iarg+1],"yes") == 0) first_flag = 1; - else if (strcmp(arg[iarg+1],"no") == 0) first_flag = 0; - else error->all(FLERR,"Illegal dump_modify command"); - iarg += 2; - } else if (strcmp(arg[iarg],"fileper") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal dump_modify command"); if (!multiproc) @@ -787,6 +780,13 @@ void Dump::modify_params(int narg, char **arg) *ptr = '%'; iarg += 2; + } else if (strcmp(arg[iarg],"first") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal dump_modify command"); + if (strcmp(arg[iarg+1],"yes") == 0) first_flag = 1; + else if (strcmp(arg[iarg+1],"no") == 0) first_flag = 0; + else error->all(FLERR,"Illegal dump_modify command"); + iarg += 2; + } else if (strcmp(arg[iarg],"flush") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal dump_modify command"); if (strcmp(arg[iarg+1],"yes") == 0) flush_flag = 1; diff --git a/src/dump.h b/src/dump.h index 41d8d5c38a..b14a858d56 100644 --- a/src/dump.h +++ b/src/dump.h @@ -56,8 +56,7 @@ class Dump : protected Pointers { int compressed; // 1 if dump file is written compressed, 0 no int binary; // 1 if dump file is written binary, 0 no int multifile; // 0 = one big file, 1 = one file per timestep - - int multiproc; // 0 = proc 0 writes for all + int multiproc; // 0 = proc 0 writes for all, // else # of procs writing files int nclusterprocs; // # of procs in my cluster that write to one file int filewriter; // 1 if this proc writes a file, else 0 @@ -99,7 +98,6 @@ class Dump : protected Pointers { int maxbuf; // size of buf double *buf; // memory for atom quantities - int maxsbuf; // size of sbuf char *sbuf; // memory for atom quantities in string format diff --git a/src/neigh_request.cpp b/src/neigh_request.cpp index 5d0044556e..32317ffe02 100644 --- a/src/neigh_request.cpp +++ b/src/neigh_request.cpp @@ -24,6 +24,7 @@ NeighRequest::NeighRequest(LAMMPS *lmp) : Pointers(lmp) // default ID = 0 id = 0; + unprocessed = 1; // default is pair request @@ -96,6 +97,7 @@ int NeighRequest::identical(NeighRequest *other) { int same = 1; + if (unprocessed) same = 0; if (requestor != other->requestor) same = 0; if (id != other->id) same = 0; diff --git a/src/neigh_request.h b/src/neigh_request.h index 4facd9f405..65a8c6f41e 100644 --- a/src/neigh_request.h +++ b/src/neigh_request.h @@ -23,6 +23,8 @@ class NeighRequest : protected Pointers { void *requestor; // class that made request int id; // ID of request // used to track multiple requests from one class + int unprocessed; // 1 when first requested + // 0 after processed by Neighbor class // which class is requesting the list, one flag is 1, others are 0 diff --git a/src/neighbor.cpp b/src/neighbor.cpp index 30ff8b3b0a..0d1601a86e 100644 --- a/src/neighbor.cpp +++ b/src/neighbor.cpp @@ -490,6 +490,7 @@ void Neighbor::init() // wait to allocate initial pages until copy lists are detected for (i = 0; i < nrequest; i++) { + requests[i]->unprocessed = 0; if (requests[i]->kokkos_host || requests[i]->kokkos_device) continue; lists[i] = new NeighList(lmp); lists[i]->index = i; diff --git a/src/write_restart.cpp b/src/write_restart.cpp index 5c5bd066ed..27ebac3c7a 100644 --- a/src/write_restart.cpp +++ b/src/write_restart.cpp @@ -127,12 +127,13 @@ void WriteRestart::command(int narg, char **arg) comm->borders(); if (domain->triclinic) domain->lamda2x(atom->nlocal+atom->nghost); + // write single restart file + write(file); delete [] file; } -/* ---------------------------------------------------------------------- -------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ void WriteRestart::multiproc_options(int multiproc_caller, int mpiioflag_caller, int narg, char **arg) @@ -579,8 +580,7 @@ void WriteRestart::file_layout(int send_size) // ---------------------------------------------------------------------- // ---------------------------------------------------------------------- -/* ---------------------------------------------------------------------- -------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ void WriteRestart::magic_string() { @@ -591,8 +591,7 @@ void WriteRestart::magic_string() delete [] str; } -/* ---------------------------------------------------------------------- -------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ void WriteRestart::endian() { @@ -600,8 +599,7 @@ void WriteRestart::endian() fwrite(&endian,sizeof(int),1,fp); } -/* ---------------------------------------------------------------------- -------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- */ void WriteRestart::version_numeric() {