git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12207 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2014-07-28 19:58:51 +00:00
parent 3cc23720e9
commit 4455b66003
23 changed files with 2418 additions and 700 deletions

View File

@ -11,15 +11,20 @@
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "string.h"
#include "lmptype.h"
#include "comm_tiled.h"
#include "comm_brick.h"
#include "atom.h"
#include "atom_vec.h"
#include "domain.h"
#include "force.h"
#include "pair.h"
#include "neighbor.h"
#include "modify.h"
#include "fix.h"
#include "compute.h"
#include "output.h"
#include "dump.h"
#include "memory.h"
#include "error.h"
@ -27,32 +32,93 @@
using namespace LAMMPS_NS;
#define BUFFACTOR 1.5
#define BUFFACTOR 1.5
#define BUFMIN 1000
#define BUFEXTRA 1000
enum{SINGLE,MULTI}; // same as in Comm
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
/* ---------------------------------------------------------------------- */
CommTiled::CommTiled(LAMMPS *lmp) : Comm(lmp)
{
style = 1;
layout = 0;
error->all(FLERR,"Comm_style tiled is not yet supported");
style = 1;
layout = LAYOUT_UNIFORM;
init_buffers();
}
/* ---------------------------------------------------------------------- */
CommTiled::CommTiled(LAMMPS *lmp, Comm *oldcomm) : Comm(*oldcomm)
{
style = 1;
layout = oldcomm->layout;
copy_arrays(oldcomm);
init_buffers();
}
/* ---------------------------------------------------------------------- */
CommTiled::~CommTiled()
{
free_swap();
if (sendlist) for (int i = 0; i < nswap; i++) memory->destroy(sendlist[i]);
memory->sfree(sendlist);
memory->destroy(maxsendlist);
memory->destroy(buf_send);
memory->destroy(buf_recv);
}
/* ---------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
initialize comm buffers and other data structs local to CommTiled
NOTE: if this is identical to CommBrick, put it into Comm ??
------------------------------------------------------------------------- */
void CommTiled::init_buffers()
{
maxexchange = maxexchange_atom + maxexchange_fix;
bufextra = maxexchange + BUFEXTRA;
maxsend = BUFMIN;
memory->create(buf_send,maxsend+bufextra,"comm:buf_send");
maxrecv = BUFMIN;
memory->create(buf_recv,maxrecv,"comm:buf_recv");
nswap = 2 * domain->dimension;
allocate_swap(nswap);
//sendlist = (int **) memory->smalloc(nswap*sizeof(int *),"comm:sendlist");
//memory->create(maxsendlist,nswap,"comm:maxsendlist");
//for (int i = 0; i < nswap; i++) {
// maxsendlist[i] = BUFMIN;
// memory->create(sendlist[i],BUFMIN,"comm:sendlist[i]");
//}
}
/* ----------------------------------------------------------------------
NOTE: if this is nearly identical to CommBrick, put it into Comm ??
------------------------------------------------------------------------- */
void CommTiled::init()
{
triclinic = domain->triclinic;
map_style = atom->map_style;
// temporary restrictions
if (triclinic)
error->all(FLERR,"Cannot yet use comm_style tiled with triclinic box");
if (domain->xperiodic || domain->yperiodic ||
(domain->dimension == 2 && domain->zperiodic))
error->all(FLERR,"Cannot yet use comm_style tiled with periodic box");
if (mode == MULTI)
error->all(FLERR,"Cannot yet use comm_style tiled with multi-mode comm");
// comm_only = 1 if only x,f are exchanged in forward/reverse comm
// comm_x_only = 0 if ghost_velocity since velocities are added
@ -72,51 +138,210 @@ void CommTiled::init()
for (int i = 0; i < modify->nfix; i++)
size_border += modify->fix[i]->comm_border;
// maxexchange = max # of datums/atom in exchange communication
// maxforward = # of datums in largest forward communication
// maxreverse = # of datums in largest reverse communication
// query pair,fix,compute,dump for their requirements
// pair style can force reverse comm even if newton off
maxexchange = BUFMIN + maxexchange_fix;
maxforward = MAX(size_forward,size_border);
maxreverse = size_reverse;
if (force->pair) maxforward = MAX(maxforward,force->pair->comm_forward);
if (force->pair) maxreverse = MAX(maxreverse,force->pair->comm_reverse);
for (int i = 0; i < modify->nfix; i++) {
maxforward = MAX(maxforward,modify->fix[i]->comm_forward);
maxreverse = MAX(maxreverse,modify->fix[i]->comm_reverse);
}
for (int i = 0; i < modify->ncompute; i++) {
maxforward = MAX(maxforward,modify->compute[i]->comm_forward);
maxreverse = MAX(maxreverse,modify->compute[i]->comm_reverse);
}
for (int i = 0; i < output->ndump; i++) {
maxforward = MAX(maxforward,output->dump[i]->comm_forward);
maxreverse = MAX(maxreverse,output->dump[i]->comm_reverse);
}
if (force->newton == 0) maxreverse = 0;
if (force->pair) maxreverse = MAX(maxreverse,force->pair->comm_reverse_off);
}
/* ----------------------------------------------------------------------
setup spatial-decomposition communication patterns
function of neighbor cutoff(s) & cutghostuser & current box size
single mode sets slab boundaries (slablo,slabhi) based on max cutoff
multi mode sets type-dependent slab boundaries (multilo,multihi)
function of neighbor cutoff(s) & cutghostuser & current box size and tiling
------------------------------------------------------------------------- */
void CommTiled::setup()
{
// error on triclinic or multi?
// set nswap = 2*dim
// setup neighbor proc info for exchange()
// setup nsendproc and nrecvproc bounts
// setup sendproc and recvproc lists
// setup sendboxes
// reallocate requests and statuses
int i;
// check that cutoff is <= 1/2 of periodic box len?
int dimension;
int *periodicity;
double *prd,*sublo,*subhi,*boxlo,*boxhi;
// loop over dims
// left:
// construct ghost boxes
// differnet in x,y,z
// account for ghost borders in y,z
// account for PBC by shifting
// split into multiple boxes if straddles PBC
// drop boxes down RCB tree
// count unique procs they cover
// what about self if crosses PBC
// for each proc they cover:
// compute box I send it to left
// is a message I will recv from right (don't care about box)
// for ghost-extended boxes
// do not count procs that do not overlap my owned box at all
// only touching edge of my owned box does not count
// in this case list I send to and recv from may be different?
// same thing to right
double cut = MAX(neighbor->cutneighmax,cutghostuser);
dimension = domain->dimension;
periodicity = domain->periodicity;
prd = domain->prd;
sublo = domain->sublo;
subhi = domain->subhi;
boxlo = domain->boxlo;
boxhi = domain->boxhi;
cutghost[0] = cutghost[1] = cutghost[2] = cut;
if ((periodicity[0] && cut > prd[0]) ||
(periodicity[1] && cut > prd[1]) ||
(dimension == 3 && periodicity[2] && cut > prd[2]))
error->all(FLERR,"Communication cutoff for comm_style tiled "
"cannot exceed periodic box length");
// allocate overlap
int *overlap;
int noverlap,noverlap1,indexme;
double lo1[3],hi1[3],lo2[3],hi2[3];
int one,two;
nswap = 0;
for (int idim = 0; idim < dimension; idim++) {
// ghost box in lower direction
one = 1;
lo1[0] = sublo[0]; lo1[1] = sublo[1]; lo1[2] = sublo[2];
hi1[0] = subhi[0]; hi1[1] = subhi[1]; hi1[2] = subhi[2];
lo1[idim] = sublo[idim] - cut;
hi1[idim] = sublo[idim];
two = 0;
if (periodicity[idim] && lo1[idim] < boxlo[idim]) {
two = 1;
lo2[0] = sublo[0]; lo2[1] = sublo[1]; lo2[2] = sublo[2];
hi2[0] = subhi[0]; hi2[1] = subhi[1]; hi2[2] = subhi[2];
lo2[idim] = lo1[idim] + prd[idim];
hi2[idim] = hi1[idim] + prd[idim];
if (sublo[idim] == boxlo[idim]) {
one = 0;
hi2[idim] = boxhi[idim];
}
}
indexme = -1;
noverlap = 0;
if (one) {
if (layout == LAYOUT_UNIFORM)
box_drop_uniform(idim,lo1,hi1,noverlap,overlap,indexme);
else if (layout == LAYOUT_NONUNIFORM)
box_drop_nonuniform(idim,lo1,hi1,noverlap,overlap,indexme);
else
box_drop_tiled(lo1,hi1,0,nprocs-1,noverlap,overlap,indexme);
}
noverlap1 = noverlap;
if (two) {
if (layout == LAYOUT_UNIFORM)
box_drop_uniform(idim,lo2,hi2,noverlap,overlap,indexme);
else if (layout == LAYOUT_NONUNIFORM)
box_drop_nonuniform(idim,lo2,hi2,noverlap,overlap,indexme);
else
box_drop_tiled(lo2,hi2,0,nprocs-1,noverlap,overlap,indexme);
}
// if this (self) proc is in overlap list, move it to end of list
if (indexme >= 0) {
int tmp = overlap[noverlap-1];
overlap[noverlap-1] = overlap[indexme];
overlap[indexme] = tmp;
}
// overlap how has list of noverlap procs
// includes PBC effects
if (overlap[noverlap-1] == me) sendself[nswap] = 1;
else sendself[nswap] = 0;
if (noverlap-sendself[nswap]) sendother[nswap] = 1;
else sendother[nswap] = 0;
nsendproc[nswap] = noverlap;
for (i = 0; i < noverlap; i++) sendproc[nswap][i] = overlap[i];
nrecvproc[nswap+1] = noverlap;
for (i = 0; i < noverlap; i++) recvproc[nswap+1][i] = overlap[i];
// compute sendbox for each of my sends
// ibox = intersection of ghostbox with other proc's sub-domain
// sendbox = ibox displaced by cutoff in dim
// NOTE: need to extend send box in lower dims by cutoff
// NOTE: this logic for overlapping boxes is not correct for sending
double oboxlo[3],oboxhi[3],sbox[6];
for (i = 0; i < noverlap; i++) {
pbc_flag[nswap][i] = 0;
pbc[nswap][i][0] = pbc[nswap][i][1] = pbc[nswap][i][2] =
pbc[nswap][i][3] = pbc[nswap][i][4] = pbc[nswap][i][5] = 0;
if (layout == LAYOUT_UNIFORM)
box_other_uniform(overlap[i],oboxlo,oboxhi);
else if (layout == LAYOUT_NONUNIFORM)
box_other_nonuniform(overlap[i],oboxlo,oboxhi);
else
box_other_tiled(overlap[i],oboxlo,oboxhi);
if (i < noverlap1) {
sbox[0] = MAX(oboxlo[0],lo1[0]);
sbox[1] = MAX(oboxlo[1],lo1[1]);
sbox[2] = MAX(oboxlo[2],lo1[2]);
sbox[3] = MIN(oboxhi[0],hi1[0]);
sbox[4] = MIN(oboxhi[1],hi1[1]);
sbox[5] = MIN(oboxhi[2],hi1[2]);
sbox[idim] += cut;
sbox[3+idim] += cut;
if (sbox[idim] == lo1[idim]) sbox[idim] = sublo[idim];
} else {
pbc_flag[nswap][i] = 1;
pbc[nswap][i][idim] = 1;
sbox[0] = MAX(oboxlo[0],lo2[0]);
sbox[1] = MAX(oboxlo[1],lo2[1]);
sbox[2] = MAX(oboxlo[2],lo2[2]);
sbox[3] = MIN(oboxhi[0],hi2[0]);
sbox[4] = MIN(oboxhi[1],hi2[1]);
sbox[5] = MIN(oboxhi[2],hi2[2]);
sbox[idim] -= prd[idim] - cut;
sbox[3+idim] -= prd[idim] + cut;
if (sbox[idim] == lo1[idim]) sbox[idim] = sublo[idim];
}
if (idim >= 1) {
if (sbox[0] == sublo[0]) sbox[0] -= cut;
if (sbox[4] == subhi[0]) sbox[4] += cut;
}
if (idim == 2) {
if (sbox[1] == sublo[1]) sbox[1] -= cut;
if (sbox[5] == subhi[1]) sbox[5] += cut;
}
memcpy(sendbox[nswap][i],sbox,6*sizeof(double));
}
// ghost box in upper direction
nswap += 2;
}
// reallocate requests and statuses to max of any swap
// what need from decomp (RCB):
// dropbox: return list of procs with overlap and overlapping boxes
// return n, proclist, boxlist
// otherbox: bbox of another proc
// dropatom: return what proc owns the atom coord
}
/* ----------------------------------------------------------------------
@ -126,46 +351,73 @@ void CommTiled::setup()
void CommTiled::forward_comm(int dummy)
{
int i,irecv,n;
int i,irecv,n,nsend,nrecv;
MPI_Status status;
AtomVec *avec = atom->avec;
double **x = atom->x;
// exchange data with another set of procs in each swap
// if first proc in set is self, then is just self across PBC, just copy
// post recvs from all procs except self
// send data to all procs except self
// copy data to self if sendself is set
// wait on all procs except self and unpack received data
// if comm_x_only set, exchange or copy directly to x, don't unpack
for (int iswap = 0; iswap < nswap; iswap++) {
if (sendproc[iswap][0] != me) {
if (comm_x_only) {
for (i = 0; i < nrecvproc[iswap]; i++)
nsend = nsendproc[iswap] - sendself[iswap];
nrecv = nrecvproc[iswap] - sendself[iswap];
if (comm_x_only) {
if (sendother[iswap]) {
for (i = 0; i < nrecv; i++)
MPI_Irecv(x[firstrecv[iswap][i]],size_forward_recv[iswap][i],
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
for (i = 0; i < nsendproc[iswap]; i++) {
for (i = 0; i < nsend; i++) {
n = avec->pack_comm(sendnum[iswap][i],sendlist[iswap][i],
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
}
MPI_Waitall(nrecvproc[iswap],requests,statuses);
}
} else if (ghost_velocity) {
for (i = 0; i < nrecvproc[iswap]; i++)
if (sendself[iswap]) {
avec->pack_comm(sendnum[iswap][nsend],sendlist[iswap][nsend],
x[firstrecv[iswap][nrecv]],pbc_flag[iswap][nsend],
pbc[iswap][nsend]);
}
if (sendother[iswap]) MPI_Waitall(nrecv,requests,statuses);
} else if (ghost_velocity) {
if (sendother[iswap]) {
for (i = 0; i < nrecv; i++)
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
size_forward_recv[iswap][i],
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
for (i = 0; i < nsendproc[iswap]; i++) {
for (i = 0; i < nsend; i++) {
n = avec->pack_comm_vel(sendnum[iswap][i],sendlist[iswap][i],
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
}
for (i = 0; i < nrecvproc[iswap]; i++) {
MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status);
avec->unpack_comm_vel(recvnum[iswap][i],firstrecv[iswap][irecv],
}
if (sendself[iswap]) {
avec->pack_comm_vel(sendnum[iswap][nsend],sendlist[iswap][nsend],
buf_send,pbc_flag[iswap][nsend],pbc[iswap][nsend]);
avec->unpack_comm_vel(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
buf_send);
}
if (sendother[iswap]) {
for (i = 0; i < nrecv; i++) {
MPI_Waitany(nrecv,requests,&irecv,&status);
avec->unpack_comm_vel(recvnum[iswap][irecv],firstrecv[iswap][irecv],
&buf_recv[forward_recv_offset[iswap][irecv]]);
}
}
} else {
for (i = 0; i < nrecvproc[iswap]; i++)
} else {
if (sendother[iswap]) {
for (i = 0; i < nrecv; i++)
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
size_forward_recv[iswap][i],
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
@ -174,27 +426,21 @@ void CommTiled::forward_comm(int dummy)
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
}
for (i = 0; i < nrecvproc[iswap]; i++) {
MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status);
avec->unpack_comm(recvnum[iswap][i],firstrecv[iswap][irecv],
&buf_recv[forward_recv_offset[iswap][irecv]]);
}
}
} else {
if (comm_x_only) {
if (sendnum[iswap][0])
n = avec->pack_comm(sendnum[iswap][0],sendlist[iswap][0],
x[firstrecv[iswap][0]],pbc_flag[iswap][0],
pbc[iswap][0]);
} else if (ghost_velocity) {
n = avec->pack_comm_vel(sendnum[iswap][0],sendlist[iswap][0],
buf_send,pbc_flag[iswap][0],pbc[iswap][0]);
avec->unpack_comm_vel(recvnum[iswap][0],firstrecv[iswap][0],buf_send);
} else {
n = avec->pack_comm(sendnum[iswap][0],sendlist[iswap][0],
buf_send,pbc_flag[iswap][0],pbc[iswap][0]);
avec->unpack_comm(recvnum[iswap][0],firstrecv[iswap][0],buf_send);
if (sendself[iswap]) {
avec->pack_comm(sendnum[iswap][nsend],sendlist[iswap][nsend],
buf_send,pbc_flag[iswap][nsend],pbc[iswap][nsend]);
avec->unpack_comm(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
buf_send);
}
if (sendother[iswap]) {
for (i = 0; i < nrecv; i++) {
MPI_Waitany(nrecv,requests,&irecv,&status);
avec->unpack_comm(recvnum[iswap][irecv],firstrecv[iswap][irecv],
&buf_recv[forward_recv_offset[iswap][irecv]]);
}
}
}
}
@ -207,57 +453,70 @@ void CommTiled::forward_comm(int dummy)
void CommTiled::reverse_comm()
{
int i,irecv,n;
int i,irecv,n,nsend,nrecv;
MPI_Request request;
MPI_Status status;
AtomVec *avec = atom->avec;
double **f = atom->f;
// exchange data with another set of procs in each swap
// if first proc in set is self, then is just self across PBC, just copy
// post recvs from all procs except self
// send data to all procs except self
// copy data to self if sendself is set
// wait on all procs except self and unpack received data
// if comm_f_only set, exchange or copy directly from f, don't pack
for (int iswap = nswap-1; iswap >= 0; iswap--) {
if (sendproc[iswap][0] != me) {
if (comm_f_only) {
for (i = 0; i < nsendproc[iswap]; i++)
if (comm_f_only) {
if (sendother[iswap]) {
for (i = 0; i < nsend; i++)
MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]],
size_reverse_recv[iswap][i],MPI_DOUBLE,
sendproc[iswap][i],0,world,&requests[i]);
for (i = 0; i < nrecvproc[iswap]; i++)
for (i = 0; i < nrecv; i++)
MPI_Send(f[firstrecv[iswap][i]],size_reverse_send[iswap][i],
MPI_DOUBLE,recvproc[iswap][i],0,world);
for (i = 0; i < nsendproc[iswap]; i++) {
MPI_Waitany(nsendproc[iswap],requests,&irecv,&status);
avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv],
&buf_recv[reverse_recv_offset[iswap][irecv]]);
}
}
} else {
for (i = 0; i < nsendproc[iswap]; i++)
MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]],
size_reverse_recv[iswap][i],MPI_DOUBLE,
sendproc[iswap][i],0,world,&requests[i]);
for (i = 0; i < nrecvproc[iswap]; i++) {
n = avec->pack_reverse(recvnum[iswap][i],firstrecv[iswap][i],
buf_send);
MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap][i],0,world);
}
for (i = 0; i < nsendproc[iswap]; i++) {
MPI_Waitany(nsendproc[iswap],requests,&irecv,&status);
if (sendself[iswap]) {
avec->unpack_reverse(sendnum[iswap][nsend],sendlist[iswap][nsend],
f[firstrecv[iswap][nrecv]]);
}
if (sendother[iswap]) {
for (i = 0; i < nsend; i++) {
MPI_Waitany(nsend,requests,&irecv,&status);
avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv],
&buf_recv[reverse_recv_offset[iswap][irecv]]);
}
}
} else {
if (comm_f_only) {
if (sendnum[iswap][0])
avec->unpack_reverse(sendnum[iswap][0],sendlist[iswap][0],
f[firstrecv[iswap][0]]);
} else {
n = avec->pack_reverse(recvnum[iswap][0],firstrecv[iswap][0],buf_send);
avec->unpack_reverse(sendnum[iswap][0],sendlist[iswap][0],buf_send);
if (sendother[iswap]) {
for (i = 0; i < nsend; i++)
MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]],
size_reverse_recv[iswap][i],MPI_DOUBLE,
sendproc[iswap][i],0,world,&requests[i]);
for (i = 0; i < nrecv; i++) {
n = avec->pack_reverse(recvnum[iswap][i],firstrecv[iswap][i],
buf_send);
MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap][i],0,world);
}
}
if (sendself[iswap]) {
avec->pack_reverse(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
buf_send);
avec->unpack_reverse(sendnum[iswap][nsend],sendlist[iswap][nsend],
buf_send);
}
if (sendother[iswap]) {
for (i = 0; i < nsend; i++) {
MPI_Waitany(nsend,requests,&irecv,&status);
avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv],
&buf_recv[reverse_recv_offset[iswap][irecv]]);
}
}
}
}
@ -298,7 +557,7 @@ void CommTiled::exchange()
void CommTiled::borders()
{
int i,n,irecv,ngroup,nlast,nsend,rmaxswap;
int i,n,irecv,ngroup,nlast,nsend,nrecv,ncount,rmaxswap;
double xlo,xhi,ylo,yhi,zlo,zhi;
double *bbox;
double **x;
@ -333,36 +592,40 @@ void CommTiled::borders()
if (iswap < 2) nlast = atom->nlocal;
else nlast = atom->nlocal + atom->nghost;
nsend = 0;
ncount = 0;
for (i = 0; i < ngroup; i++)
if (x[i][0] >= xlo && x[i][0] <= xhi &&
x[i][1] >= ylo && x[i][1] <= yhi &&
x[i][2] >= zlo && x[i][2] <= zhi) {
if (nsend == maxsendlist[iswap][i]) grow_list(iswap,i,nsend);
sendlist[iswap][i][nsend++] = i;
if (ncount == maxsendlist[iswap][i]) grow_list(iswap,i,ncount);
sendlist[iswap][i][ncount++] = i;
}
for (i = atom->nlocal; i < nlast; i++)
if (x[i][0] >= xlo && x[i][0] <= xhi &&
x[i][1] >= ylo && x[i][1] <= yhi &&
x[i][2] >= zlo && x[i][2] <= zhi) {
if (nsend == maxsendlist[iswap][i]) grow_list(iswap,i,nsend);
sendlist[iswap][i][nsend++] = i;
if (ncount == maxsendlist[iswap][i]) grow_list(iswap,i,ncount);
sendlist[iswap][i][ncount++] = i;
}
sendnum[iswap][i] = nsend;
smax = MAX(smax,nsend);
sendnum[iswap][i] = ncount;
smax = MAX(smax,ncount);
}
// send sendnum counts to procs who recv from me
// send sendnum counts to procs who recv from me except self
// copy data to self if sendself is set
if (sendproc[iswap][0] != me) {
for (i = 0; i < nrecvproc[iswap]; i++)
nsend = nsendproc[iswap] - sendself[iswap];
nrecv = nrecvproc[iswap] - sendself[iswap];
if (sendother[iswap]) {
for (i = 0; i < nrecv; i++)
MPI_Irecv(&recvnum[iswap][i],1,MPI_INT,
recvproc[iswap][i],0,world,&requests[i]);
for (i = 0; i < nsendproc[iswap]; i++)
for (i = 0; i < nsend; i++)
MPI_Send(&sendnum[iswap][i],1,MPI_INT,sendproc[iswap][i],0,world);
MPI_Waitall(nrecvproc[iswap],requests,statuses);
} else recvnum[iswap][0] = sendnum[iswap][0];
}
if (sendself[iswap]) recvnum[iswap][nrecv] = sendnum[iswap][nsend];
if (sendother[iswap]) MPI_Waitall(nrecv,requests,statuses);
// setup other per swap/proc values from sendnum and recvnum
@ -390,54 +653,64 @@ void CommTiled::borders()
// swap atoms with other procs using pack_border(), unpack_border()
if (sendproc[iswap][0] != me) {
for (i = 0; i < nsendproc[iswap]; i++) {
if (ghost_velocity) {
for (i = 0; i < nrecvproc[iswap]; i++)
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
recvnum[iswap][i]*size_border,
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
for (i = 0; i < nsendproc[iswap]; i++) {
n = avec->pack_border_vel(sendnum[iswap][i],sendlist[iswap][i],
buf_send,pbc_flag[iswap][i],
pbc[iswap][i]);
MPI_Send(buf_send,n*size_border,MPI_DOUBLE,
sendproc[iswap][i],0,world);
}
for (i = 0; i < nrecvproc[iswap]; i++) {
MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status);
avec->unpack_border(recvnum[iswap][i],firstrecv[iswap][irecv],
&buf_recv[forward_recv_offset[iswap][irecv]]);
}
} else {
for (i = 0; i < nrecvproc[iswap]; i++)
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
recvnum[iswap][i]*size_border,
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
for (i = 0; i < nsendproc[iswap]; i++) {
n = avec->pack_border(sendnum[iswap][i],sendlist[iswap][i],
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
MPI_Send(buf_send,n*size_border,MPI_DOUBLE,
sendproc[iswap][i],0,world);
}
for (i = 0; i < nrecvproc[iswap]; i++) {
MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status);
avec->unpack_border(recvnum[iswap][i],firstrecv[iswap][irecv],
&buf_recv[forward_recv_offset[iswap][irecv]]);
}
if (ghost_velocity) {
if (sendother[iswap]) {
for (i = 0; i < nrecv; i++)
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
recvnum[iswap][i]*size_border,
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
for (i = 0; i < nsend; i++) {
n = avec->pack_border_vel(sendnum[iswap][i],sendlist[iswap][i],
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
MPI_Send(buf_send,n*size_border,MPI_DOUBLE,
sendproc[iswap][i],0,world);
}
}
if (sendself[iswap]) {
n = avec->pack_border_vel(sendnum[iswap][nsend],sendlist[iswap][nsend],
buf_send,pbc_flag[iswap][nsend],
pbc[iswap][nsend]);
avec->unpack_border_vel(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
buf_send);
}
if (sendother[iswap]) {
for (i = 0; i < nrecv; i++) {
MPI_Waitany(nrecv,requests,&irecv,&status);
avec->unpack_border(recvnum[iswap][irecv],firstrecv[iswap][irecv],
&buf_recv[forward_recv_offset[iswap][irecv]]);
}
}
} else {
if (ghost_velocity) {
n = avec->pack_border_vel(sendnum[iswap][0],sendlist[iswap][0],
buf_send,pbc_flag[iswap][0],pbc[iswap][0]);
avec->unpack_border_vel(recvnum[iswap][0],firstrecv[iswap][0],buf_send);
} else {
n = avec->pack_border(sendnum[iswap][0],sendlist[iswap][0],
buf_send,pbc_flag[iswap][0],pbc[iswap][0]);
avec->unpack_border(recvnum[iswap][0],firstrecv[iswap][0],buf_send);
if (sendother[iswap]) {
for (i = 0; i < nrecv; i++)
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
recvnum[iswap][i]*size_border,
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
for (i = 0; i < nsend; i++) {
n = avec->pack_border(sendnum[iswap][i],sendlist[iswap][i],
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
MPI_Send(buf_send,n*size_border,MPI_DOUBLE,
sendproc[iswap][i],0,world);
}
}
if (sendself[iswap]) {
n = avec->pack_border(sendnum[iswap][nsend],sendlist[iswap][nsend],
buf_send,pbc_flag[iswap][nsend],
pbc[iswap][nsend]);
avec->unpack_border(recvnum[iswap][nsend],firstrecv[iswap][nsend],
buf_send);
}
if (sendother[iswap]) {
for (i = 0; i < nrecv; i++) {
MPI_Waitany(nrecv,requests,&irecv,&status);
avec->unpack_border(recvnum[iswap][irecv],firstrecv[iswap][irecv],
&buf_recv[forward_recv_offset[iswap][irecv]]);
}
}
}
@ -786,6 +1059,64 @@ int CommTiled::exchange_variable(int n, double *inbuf, double *&outbuf)
return nrecv;
}
/* ----------------------------------------------------------------------
determine overlap list of Noverlap procs the lo/hi box overlaps
overlap = non-zero area in common between box and proc sub-domain
box is onwed by me and extends in dim
------------------------------------------------------------------------- */
void CommTiled::box_drop_uniform(int dim, double *lo, double *hi,
int &noverlap, int *overlap, int &indexme)
{
}
/* ----------------------------------------------------------------------
determine overlap list of Noverlap procs the lo/hi box overlaps
overlap = non-zero area in common between box and proc sub-domain
------------------------------------------------------------------------- */
void CommTiled::box_drop_nonuniform(int dim, double *lo, double *hi,
int &noverlap, int *overlap, int &indexme)
{
}
/* ----------------------------------------------------------------------
determine overlap list of Noverlap procs the lo/hi box overlaps
overlap = non-zero area in common between box and proc sub-domain
recursive routine for traversing an RCB tree of cuts
------------------------------------------------------------------------- */
void CommTiled::box_drop_tiled(double *lo, double *hi,
int proclower, int procupper,
int &noverlap, int *overlap, int &indexme)
{
// end recursion when partition is a single proc
// add proc to overlap list
if (proclower == procupper) {
if (proclower == me) indexme = noverlap;
overlap[noverlap++] = proclower;
return;
}
// drop box on each side of cut it extends beyond
// use > and < criteria to not include procs it only touches
// procmid = 1st processor in upper half of partition
// = location in tree that stores this cut
// dim = 0,1,2 dimension of cut
// cut = position of cut
int procmid = proclower + (procupper - proclower) / 2 + 1;
double cut = tree[procmid].cut;
int dim = tree[procmid].dim;
if (lo[dim] < cut)
box_drop_tiled(lo,hi,proclower,procmid-1,noverlap,overlap,indexme);
if (hi[dim] > cut)
box_drop_tiled(lo,hi,procmid,procupper,noverlap,overlap,indexme);
}
/* ----------------------------------------------------------------------
realloc the size of the send buffer as needed with BUFFACTOR and bufextra
if flag = 1, realloc
@ -825,6 +1156,42 @@ void CommTiled::grow_list(int iswap, int iwhich, int n)
"comm:sendlist[iswap]");
}
/* ----------------------------------------------------------------------
allocation of swap info
------------------------------------------------------------------------- */
void CommTiled::allocate_swap(int n)
{
memory->create(sendnum,n,"comm:sendnum");
memory->create(recvnum,n,"comm:recvnum");
memory->create(sendproc,n,"comm:sendproc");
memory->create(recvproc,n,"comm:recvproc");
memory->create(size_forward_recv,n,"comm:size");
memory->create(size_reverse_send,n,"comm:size");
memory->create(size_reverse_recv,n,"comm:size");
memory->create(firstrecv,n,"comm:firstrecv");
memory->create(pbc_flag,n,"comm:pbc_flag");
memory->create(pbc,n,6,"comm:pbc");
}
/* ----------------------------------------------------------------------
free memory for swaps
------------------------------------------------------------------------- */
void CommTiled::free_swap()
{
memory->destroy(sendnum);
memory->destroy(recvnum);
memory->destroy(sendproc);
memory->destroy(recvproc);
memory->destroy(size_forward_recv);
memory->destroy(size_reverse_send);
memory->destroy(size_reverse_recv);
memory->destroy(firstrecv);
memory->destroy(pbc_flag);
memory->destroy(pbc);
}
/* ----------------------------------------------------------------------
return # of bytes of allocated memory
------------------------------------------------------------------------- */