git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7734 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2012-02-09 16:56:02 +00:00
parent 2ab478095e
commit d892afbf67
8 changed files with 1235 additions and 199 deletions

View File

@ -78,6 +78,8 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
bordergroup = 0;
style = SINGLE;
uniform = 1;
xsplit = ysplit = zsplit = NULL;
multilo = multihi = NULL;
cutghostmulti = NULL;
cutghostuser = 0.0;
@ -124,6 +126,10 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
Comm::~Comm()
{
memory->destroy(xsplit);
memory->destroy(ysplit);
memory->destroy(zsplit);
delete [] customfile;
delete [] outfile;
@ -145,7 +151,7 @@ Comm::~Comm()
/* ----------------------------------------------------------------------
create 3d grid of procs based on Nprocs and box size & shape
map processors to grid
map processors to grid, setup xyz split for a uniform grid
------------------------------------------------------------------------- */
void Comm::set_proc_grid()
@ -252,14 +258,31 @@ void Comm::set_proc_grid()
if (outfile) pmap->output(outfile,procgrid,grid2proc);
// set lamda box params after procs are assigned
if (domain->triclinic) domain->set_lamda_box();
// free ProcMap class
delete pmap;
// set xsplit,ysplit,zsplit for uniform spacings
memory->destroy(xsplit);
memory->destroy(ysplit);
memory->destroy(zsplit);
memory->create(xsplit,procgrid[0]+1,"comm:xsplit");
memory->create(ysplit,procgrid[1]+1,"comm:ysplit");
memory->create(zsplit,procgrid[2]+1,"comm:zsplit");
for (int i = 0; i < procgrid[0]; i++) xsplit[i] = i * 1.0/procgrid[0];
for (int i = 0; i < procgrid[1]; i++) ysplit[i] = i * 1.0/procgrid[1];
for (int i = 0; i < procgrid[2]; i++) zsplit[i] = i * 1.0/procgrid[2];
xsplit[procgrid[0]] = ysplit[procgrid[1]] = zsplit[procgrid[2]] = 1.0;
// set lamda box params after procs are assigned
// only set once unless load-balancing occurs
if (domain->triclinic) domain->set_lamda_box();
// send my 3d proc grid to another partition if requested
if (send_to_partition >= 0) {
@ -399,25 +422,112 @@ void Comm::setup()
}
}
// need = # of procs I need atoms from in each dim based on max cutoff
// for 2d, don't communicate in z
need[0] = static_cast<int> (cutghost[0] * procgrid[0] / prd[0]) + 1;
need[1] = static_cast<int> (cutghost[1] * procgrid[1] / prd[1]) + 1;
need[2] = static_cast<int> (cutghost[2] * procgrid[2] / prd[2]) + 1;
if (domain->dimension == 2) need[2] = 0;
// if non-periodic, do not communicate further than procgrid-1 away
// this enables very large cutoffs in non-periodic systems
// recvneed[idim][0/1] = # of procs away I recv atoms from, within cutghost
// 0 = from left, 1 = from right
// do not cross non-periodic boundaries, need[2] = 0 for 2d
// sendneed[idim][0/1] = # of procs away I send atoms to
// 0 = to left, 1 = to right
// set equal to recvneed[idim][1/0] of neighbor proc
// maxneed[idim] = max procs away any proc recvs atoms in either direction
// uniform = 1 = uniform sized sub-domains:
// maxneed is directly computable from sub-domain size
// limit to procgrid-1 for non-PBC
// recvneed = maxneed except for procs near non-PBC
// sendneed = recvneed of neighbor on each side
// uniform = 0 = non-uniform sized sub-domains:
// compute recvneed via updown() which accounts for non-PBC
// sendneed = recvneed of neighbor on each side
// maxneed via Allreduce() of recvneed
int *periodicity = domain->periodicity;
if (periodicity[0] == 0) need[0] = MIN(need[0],procgrid[0]-1);
if (periodicity[1] == 0) need[1] = MIN(need[1],procgrid[1]-1);
if (periodicity[2] == 0) need[2] = MIN(need[2],procgrid[2]-1);
int left,right;
if (uniform) {
maxneed[0] = static_cast<int> (cutghost[0] * procgrid[0] / prd[0]) + 1;
maxneed[1] = static_cast<int> (cutghost[1] * procgrid[1] / prd[1]) + 1;
maxneed[2] = static_cast<int> (cutghost[2] * procgrid[2] / prd[2]) + 1;
if (domain->dimension == 2) maxneed[2] = 0;
if (!periodicity[0]) maxneed[0] = MIN(maxneed[0],procgrid[0]-1);
if (!periodicity[1]) maxneed[1] = MIN(maxneed[1],procgrid[1]-1);
if (!periodicity[2]) maxneed[2] = MIN(maxneed[2],procgrid[2]-1);
if (!periodicity[0]) {
recvneed[0][0] = MIN(maxneed[0],myloc[0]);
recvneed[0][1] = MIN(maxneed[0],procgrid[0]-myloc[0]-1);
left = myloc[0] - 1;
if (left < 0) left = procgrid[0] - 1;
sendneed[0][0] = MIN(maxneed[0],procgrid[0]-left-1);
right = myloc[0] + 1;
if (right == procgrid[0]) right = 0;
sendneed[0][1] = MIN(maxneed[0],right);
} else recvneed[0][0] = recvneed[0][1] =
sendneed[0][0] = sendneed[0][1] = maxneed[0];
if (!periodicity[1]) {
recvneed[1][0] = MIN(maxneed[1],myloc[1]);
recvneed[1][1] = MIN(maxneed[1],procgrid[1]-myloc[1]-1);
left = myloc[1] - 1;
if (left < 0) left = procgrid[1] - 1;
sendneed[1][0] = MIN(maxneed[1],procgrid[1]-left-1);
right = myloc[1] + 1;
if (right == procgrid[1]) right = 0;
sendneed[1][1] = MIN(maxneed[1],right);
} else recvneed[1][0] = recvneed[1][1] =
sendneed[1][0] = sendneed[1][1] = maxneed[1];
if (!periodicity[2]) {
recvneed[2][0] = MIN(maxneed[2],myloc[2]);
recvneed[2][1] = MIN(maxneed[2],procgrid[2]-myloc[2]-1);
left = myloc[2] - 1;
if (left < 0) left = procgrid[2] - 1;
sendneed[2][0] = MIN(maxneed[2],procgrid[2]-left-1);
right = myloc[2] + 1;
if (right == procgrid[2]) right = 0;
sendneed[2][1] = MIN(maxneed[2],right);
} else recvneed[2][0] = recvneed[2][1] =
sendneed[2][0] = sendneed[2][1] = maxneed[2];
} else {
recvneed[0][0] = updown(0,0,myloc[0],prd[0],periodicity[0],xsplit);
recvneed[0][1] = updown(0,1,myloc[0],prd[0],periodicity[0],xsplit);
left = myloc[0] - 1;
if (left < 0) left = procgrid[0] - 1;
sendneed[0][0] = updown(0,1,left,prd[0],periodicity[0],xsplit);
right = myloc[0] + 1;
if (right == procgrid[0]) right = 0;
sendneed[0][1] = updown(0,0,right,prd[0],periodicity[0],xsplit);
recvneed[1][0] = updown(1,0,myloc[1],prd[1],periodicity[1],ysplit);
recvneed[1][1] = updown(1,1,myloc[1],prd[1],periodicity[1],ysplit);
left = myloc[1] - 1;
if (left < 0) left = procgrid[1] - 1;
sendneed[1][0] = updown(1,1,left,prd[1],periodicity[1],ysplit);
right = myloc[1] + 1;
if (right == procgrid[1]) right = 0;
sendneed[1][1] = updown(1,0,right,prd[1],periodicity[1],ysplit);
if (domain->dimension == 3) {
recvneed[2][0] = updown(2,0,myloc[2],prd[2],periodicity[2],zsplit);
recvneed[2][1] = updown(2,1,myloc[2],prd[2],periodicity[2],zsplit);
left = myloc[2] - 1;
if (left < 0) left = procgrid[2] - 1;
sendneed[2][0] = updown(2,1,left,prd[2],periodicity[2],zsplit);
right = myloc[2] + 1;
if (right == procgrid[2]) right = 0;
sendneed[2][1] = updown(2,0,right,prd[2],periodicity[2],zsplit);
} else recvneed[2][0] = recvneed[2][1] =
sendneed[2][0] = sendneed[2][1] = 0;
int all[6];
MPI_Allreduce(&recvneed[0][0],all,6,MPI_INT,MPI_MAX,world);
maxneed[0] = MAX(all[0],all[1]);
maxneed[1] = MAX(all[2],all[3]);
maxneed[2] = MAX(all[4],all[5]);
}
// allocate comm memory
nswap = 2 * (need[0]+need[1]+need[2]);
nswap = 2 * (maxneed[0]+maxneed[1]+maxneed[2]);
if (nswap > maxswap) grow_swap(nswap);
// setup parameters for each exchange:
@ -428,13 +538,11 @@ void Comm::setup()
// use -BIG/midpt/BIG to insure all atoms included even if round-off occurs
// if round-off, atoms recvd across PBC can be < or > than subbox boundary
// note that borders() only loops over subset of atoms during each swap
// set slablo > slabhi for swaps across non-periodic boundaries
// this insures no atoms are swapped
// only for procs owning sub-box at non-periodic end of global box
// treat all as PBC here, non-PBC is handled in borders() via r/s need[][]
// for style MULTI:
// multilo/multihi is same as slablo/slabhi, only for each atom type
// multilo/multihi is same, with slablo/slabhi for each atom type
// pbc_flag: 0 = nothing across a boundary, 1 = something across a boundary
// pbc = -1/0/1 for PBC factor in each of 3/6 orthog/triclinic dirs
// pbc = -1/0/1 for PBC factor in each of 3/6 orthogonal/triclinic dirs
// for triclinic, slablo/hi and pbc_border will be used in lamda (0-1) coords
// 1st part of if statement is sending to the west/south/down
// 2nd part of if statement is sending to the east/north/up
@ -443,7 +551,7 @@ void Comm::setup()
int iswap = 0;
for (dim = 0; dim < 3; dim++) {
for (ineed = 0; ineed < 2*need[dim]; ineed++) {
for (ineed = 0; ineed < 2*maxneed[dim]; ineed++) {
pbc_flag[iswap] = 0;
pbc[iswap][0] = pbc[iswap][1] = pbc[iswap][2] =
pbc[iswap][3] = pbc[iswap][4] = pbc[iswap][5] = 0;
@ -463,18 +571,11 @@ void Comm::setup()
}
}
if (myloc[dim] == 0) {
if (periodicity[dim] == 0) {
if (style == SINGLE) slabhi[iswap] = slablo[iswap] - 1.0;
else
for (i = 1; i <= ntypes; i++)
multihi[iswap][i] = multilo[iswap][i] - 1.0;
} else {
pbc_flag[iswap] = 1;
pbc[iswap][dim] = 1;
if (triclinic) {
if (dim == 1) pbc[iswap][5] = 1;
else if (dim == 2) pbc[iswap][4] = pbc[iswap][3] = 1;
}
pbc_flag[iswap] = 1;
pbc[iswap][dim] = 1;
if (triclinic) {
if (dim == 1) pbc[iswap][5] = 1;
else if (dim == 2) pbc[iswap][4] = pbc[iswap][3] = 1;
}
}
@ -493,18 +594,11 @@ void Comm::setup()
}
}
if (myloc[dim] == procgrid[dim]-1) {
if (periodicity[dim] == 0) {
if (style == SINGLE) slabhi[iswap] = slablo[iswap] - 1.0;
else
for (i = 1; i <= ntypes; i++)
multihi[iswap][i] = multilo[iswap][i] - 1.0;
} else {
pbc_flag[iswap] = 1;
pbc[iswap][dim] = -1;
if (triclinic) {
if (dim == 1) pbc[iswap][5] = -1;
else if (dim == 2) pbc[iswap][4] = pbc[iswap][3] = -1;
}
pbc_flag[iswap] = 1;
pbc[iswap][dim] = -1;
if (triclinic) {
if (dim == 1) pbc[iswap][5] = -1;
else if (dim == 2) pbc[iswap][4] = pbc[iswap][3] = -1;
}
}
}
@ -514,6 +608,55 @@ void Comm::setup()
}
}
/* ----------------------------------------------------------------------
walk up/down the extent of nearby processors in dim and dir
loc = myloc of proc to start at
dir = 0/1 = walk to left/right
do not cross non-periodic boundaries
is not called for z dim in 2d
return how many procs away are needed to encompass cutghost away from loc
------------------------------------------------------------------------- */
int Comm::updown(int dim, int dir, int loc,
double prd, int periodicity, double *split)
{
int index,count;
double frac,delta;
if (dir == 0) {
frac = cutghost[dim]/prd;
index = loc - 1;
delta = 0.0;
count = 0;
while (delta < frac) {
if (index < 0) {
if (!periodicity) break;
index = procgrid[dim] - 1;
}
count++;
delta += split[index+1] - split[index];
index--;
}
} else {
frac = cutghost[dim]/prd;
index = loc + 1;
delta = 0.0;
count = 0;
while (delta < frac) {
if (index >= procgrid[dim]) {
if (!periodicity) break;
index = 0;
}
count++;
delta += split[index+1] - split[index];
index++;
}
}
return count;
}
/* ----------------------------------------------------------------------
forward communication of atom coords every timestep
other per-atom attributes may also be sent via pack/unpack routines
@ -537,27 +680,30 @@ void Comm::forward_comm(int dummy)
if (comm_x_only) {
if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]];
else buf = NULL;
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
if (size_forward_recv[iswap])
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
buf_send,pbc_flag[iswap],pbc[iswap]);
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
MPI_Wait(&request,&status);
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
} else if (ghost_velocity) {
MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
if (size_forward_recv[iswap])
MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
buf_send,pbc_flag[iswap],pbc[iswap]);
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
MPI_Wait(&request,&status);
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_recv);
} else {
MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
if (size_forward_recv[iswap])
MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
buf_send,pbc_flag[iswap],pbc[iswap]);
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
MPI_Wait(&request,&status);
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_recv);
}
@ -601,19 +747,22 @@ void Comm::reverse_comm()
for (int iswap = nswap-1; iswap >= 0; iswap--) {
if (sendproc[iswap] != me) {
if (comm_f_only) {
MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
sendproc[iswap],0,world,&request);
if (size_reverse_recv[iswap])
MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
sendproc[iswap],0,world,&request);
if (size_reverse_send[iswap]) buf = f[firstrecv[iswap]];
else buf = NULL;
MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE,
recvproc[iswap],0,world);
MPI_Wait(&request,&status);
if (size_reverse_send[iswap])
MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE,
recvproc[iswap],0,world);
if (size_reverse_recv[iswap]) MPI_Wait(&request,&status);
} else {
MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
sendproc[iswap],0,world,&request);
if (size_reverse_recv[iswap])
MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
sendproc[iswap],0,world,&request);
n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send);
MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap],0,world);
MPI_Wait(&request,&status);
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap],0,world);
if (size_reverse_recv[iswap]) MPI_Wait(&request,&status);
}
avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_recv);
@ -752,8 +901,8 @@ void Comm::exchange()
void Comm::borders()
{
int i,n,itype,iswap,dim,ineed,maxneed,smax,rmax;
int nsend,nrecv,nfirst,nlast,ngroup;
int i,n,itype,iswap,dim,ineed,twoneed,smax,rmax;
int nsend,nrecv,sendflag,nfirst,nlast,ngroup;
double lo,hi;
int *type;
double **x;
@ -774,8 +923,8 @@ void Comm::borders()
for (dim = 0; dim < 3; dim++) {
nlast = 0;
maxneed = 2*need[dim];
for (ineed = 0; ineed < maxneed; ineed++) {
twoneed = 2*maxneed[dim];
for (ineed = 0; ineed < twoneed; ineed++) {
// find atoms within slab boundaries lo/hi using <= and >=
// check atoms between nfirst and nlast
@ -799,55 +948,64 @@ void Comm::borders()
nsend = 0;
// sendflag = 0 if I do not send on this swap
// sendneed test indicates receiver no longer requires data
// e.g. due to non-PBC or non-uniform sub-domains
if (ineed/2 >= sendneed[dim][ineed % 2]) sendflag = 0;
else sendflag = 1;
// find send atoms according to SINGLE vs MULTI
// all atoms eligible versus atoms in bordergroup
// only need to limit loop to bordergroup for first sends (ineed < 2)
// on these sends, break loop in two: owned (in group) and ghost
if (!bordergroup || ineed >= 2) {
if (style == SINGLE) {
for (i = nfirst; i < nlast; i++)
if (x[i][dim] >= lo && x[i][dim] <= hi) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
}
} else {
for (i = nfirst; i < nlast; i++) {
itype = type[i];
if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
if (sendflag) {
if (!bordergroup || ineed >= 2) {
if (style == SINGLE) {
for (i = nfirst; i < nlast; i++)
if (x[i][dim] >= lo && x[i][dim] <= hi) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
}
} else {
for (i = nfirst; i < nlast; i++) {
itype = type[i];
if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
}
}
}
}
} else {
if (style == SINGLE) {
ngroup = atom->nfirst;
for (i = 0; i < ngroup; i++)
if (x[i][dim] >= lo && x[i][dim] <= hi) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
}
for (i = atom->nlocal; i < nlast; i++)
if (x[i][dim] >= lo && x[i][dim] <= hi) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
}
} else {
ngroup = atom->nfirst;
for (i = 0; i < ngroup; i++) {
itype = type[i];
if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
if (style == SINGLE) {
ngroup = atom->nfirst;
for (i = 0; i < ngroup; i++)
if (x[i][dim] >= lo && x[i][dim] <= hi) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
}
for (i = atom->nlocal; i < nlast; i++)
if (x[i][dim] >= lo && x[i][dim] <= hi) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
}
} else {
ngroup = atom->nfirst;
for (i = 0; i < ngroup; i++) {
itype = type[i];
if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
}
}
}
for (i = atom->nlocal; i < nlast; i++) {
itype = type[i];
if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
for (i = atom->nlocal; i < nlast; i++) {
itype = type[i];
if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
sendlist[iswap][nsend++] = i;
}
}
}
}
@ -865,18 +1023,18 @@ void Comm::borders()
pbc_flag[iswap],pbc[iswap]);
// swap atoms with other proc
// no MPI calls except SendRecv if nsend/nrecv = 0
// put incoming ghosts at end of my atom arrays
// if swapping with self, simply copy, no messages
if (sendproc[iswap] != me) {
MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0,
&nrecv,1,MPI_INT,recvproc[iswap],0,world,&status);
if (nrecv*size_border > maxrecv)
grow_recv(nrecv*size_border);
MPI_Irecv(buf_recv,nrecv*size_border,MPI_DOUBLE,
recvproc[iswap],0,world,&request);
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
MPI_Wait(&request,&status);
if (nrecv*size_border > maxrecv) grow_recv(nrecv*size_border);
if (nrecv) MPI_Irecv(buf_recv,nrecv*size_border,MPI_DOUBLE,
recvproc[iswap],0,world,&request);
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
if (nrecv) MPI_Wait(&request,&status);
buf = buf_recv;
} else {
nrecv = nsend;
@ -939,10 +1097,12 @@ void Comm::forward_comm_pair(Pair *pair)
// if self, set recv buffer to send buffer
if (sendproc[iswap] != me) {
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
world,&request);
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
MPI_Wait(&request,&status);
if (recvnum[iswap])
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
world,&request);
if (sendnum[iswap])
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
if (recvnum[iswap]) MPI_Wait(&request,&status);
buf = buf_recv;
} else buf = buf_send;
@ -973,10 +1133,12 @@ void Comm::reverse_comm_pair(Pair *pair)
// if self, set recv buffer to send buffer
if (sendproc[iswap] != me) {
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
world,&request);
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
MPI_Wait(&request,&status);
if (sendnum[iswap])
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
world,&request);
if (recvnum[iswap])
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
if (sendnum[iswap]) MPI_Wait(&request,&status);
buf = buf_recv;
} else buf = buf_send;
@ -1008,10 +1170,12 @@ void Comm::forward_comm_fix(Fix *fix)
// if self, set recv buffer to send buffer
if (sendproc[iswap] != me) {
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
world,&request);
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
MPI_Wait(&request,&status);
if (recvnum[iswap])
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
world,&request);
if (sendnum[iswap])
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
if (recvnum[iswap]) MPI_Wait(&request,&status);
buf = buf_recv;
} else buf = buf_send;
@ -1042,10 +1206,12 @@ void Comm::reverse_comm_fix(Fix *fix)
// if self, set recv buffer to send buffer
if (sendproc[iswap] != me) {
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
world,&request);
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
MPI_Wait(&request,&status);
if (sendnum[iswap])
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
world,&request);
if (recvnum[iswap])
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
if (sendnum[iswap]) MPI_Wait(&request,&status);
buf = buf_recv;
} else buf = buf_send;
@ -1077,10 +1243,12 @@ void Comm::forward_comm_compute(Compute *compute)
// if self, set recv buffer to send buffer
if (sendproc[iswap] != me) {
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
world,&request);
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
MPI_Wait(&request,&status);
if (recvnum[iswap])
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
world,&request);
if (sendnum[iswap])
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
if (recvnum[iswap]) MPI_Wait(&request,&status);
buf = buf_recv;
} else buf = buf_send;
@ -1111,10 +1279,12 @@ void Comm::reverse_comm_compute(Compute *compute)
// if self, set recv buffer to send buffer
if (sendproc[iswap] != me) {
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
world,&request);
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
MPI_Wait(&request,&status);
if (sendnum[iswap])
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
world,&request);
if (recvnum[iswap])
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
if (sendnum[iswap]) MPI_Wait(&request,&status);
buf = buf_recv;
} else buf = buf_send;
@ -1146,10 +1316,12 @@ void Comm::forward_comm_dump(Dump *dump)
// if self, set recv buffer to send buffer
if (sendproc[iswap] != me) {
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
world,&request);
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
MPI_Wait(&request,&status);
if (recvnum[iswap])
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
world,&request);
if (sendnum[iswap])
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
if (recvnum[iswap]) MPI_Wait(&request,&status);
buf = buf_recv;
} else buf = buf_send;
@ -1180,10 +1352,12 @@ void Comm::reverse_comm_dump(Dump *dump)
// if self, set recv buffer to send buffer
if (sendproc[iswap] != me) {
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
world,&request);
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
MPI_Wait(&request,&status);
if (sendnum[iswap])
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
world,&request);
if (recvnum[iswap])
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
if (sendnum[iswap]) MPI_Wait(&request,&status);
buf = buf_recv;
} else buf = buf_send;
@ -1373,6 +1547,10 @@ void Comm::set_processors(int narg, char **arg)
if (user_procgrid[0] < 0 || user_procgrid[1] < 0 || user_procgrid[2] < 0)
error->all(FLERR,"Illegal processors command");
int p = user_procgrid[0]*user_procgrid[1]*user_procgrid[2];
if (p && p != nprocs)
error->all(FLERR,"Specified processors != physical processors");
int iarg = 3;
while (iarg < narg) {
if (strcmp(arg[iarg],"grid") == 0) {