diff --git a/src/balance.cpp b/src/balance.cpp index 89f0ccb5cc..8675d8568e 100644 --- a/src/balance.cpp +++ b/src/balance.cpp @@ -279,14 +279,14 @@ void Balance::command(int narg, char **arg) if (style == XYZ) { if (comm->layout == LAYOUT_UNIFORM) { if (xflag == USER || yflag == USER || zflag == USER) - comm->layout == LAYOUT_NONUNIFORM; + comm->layout = LAYOUT_NONUNIFORM; } else if (comm->style == LAYOUT_NONUNIFORM) { if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM) - comm->layout == LAYOUT_UNIFORM; + comm->layout = LAYOUT_UNIFORM; } else if (comm->style == LAYOUT_TILED) { if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM) - comm->layout == LAYOUT_UNIFORM; - else comm->layout == LAYOUT_NONUNIFORM; + comm->layout = LAYOUT_UNIFORM; + else comm->layout = LAYOUT_NONUNIFORM; } if (xflag == UNIFORM) { @@ -476,6 +476,7 @@ int *Balance::bisection(int sortflag) if (!rcb) rcb = new RCB(lmp); // NOTE: lo/hi args could be simulation box or particle bounding box + // if particle bbox, then mysplit needs to be reset to sim box // NOTE: triclinic needs to be in lamda coords int dim = domain->dimension; @@ -488,6 +489,10 @@ int *Balance::bisection(int sortflag) // NOTE: this logic is specific to orthogonal boxes, not triclinic + comm->rcbnew = 1; + comm->rcbcut = rcb->cut; + comm->rcbcutdim = rcb->cutdim; + double (*mysplit)[2] = comm->mysplit; mysplit[0][0] = (rcb->lo[0] - boxlo[0]) / prd[0]; @@ -502,6 +507,8 @@ int *Balance::bisection(int sortflag) if (rcb->hi[2] == boxhi[2]) mysplit[2][1] = 1.0; else mysplit[2][1] = (rcb->hi[2] - boxlo[2]) / prd[2]; + // return list of procs to send my atoms to + return rcb->sendproc; } diff --git a/src/comm.cpp b/src/comm.cpp index 522fef6f1c..ad4b18786e 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -60,6 +60,7 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp) grid2proc = NULL; xsplit = ysplit = zsplit = NULL; + rcbnew = 0; // use of OpenMP threads // query OpenMP for number of threads/process set by user at run-time diff --git a/src/comm.h b/src/comm.h index 1aceb20c2c..c1376e14b1 100644 --- a/src/comm.h +++ b/src/comm.h @@ -50,7 +50,10 @@ class Comm : protected Pointers { // public settings specific to layout = TILED + int rcbnew; // 1 if just reset by rebalance, else 0 double mysplit[3][2]; // fractional (0-1) bounds of my sub-domain + double rcbcut; // RCB cut by this proc + int rcbcutdim; // dimension of RCB cut // methods diff --git a/src/comm_tiled.cpp b/src/comm_tiled.cpp index 039adfc200..2bd089e35d 100644 --- a/src/comm_tiled.cpp +++ b/src/comm_tiled.cpp @@ -36,6 +36,10 @@ using namespace LAMMPS_NS; #define BUFMIN 1000 #define BUFEXTRA 1000 +// NOTE: change this to 16 after debugged + +#define DELTA_PROCS 1 + enum{SINGLE,MULTI}; // same as in Comm enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files @@ -66,14 +70,10 @@ CommTiled::CommTiled(LAMMPS *lmp, Comm *oldcomm) : Comm(*oldcomm) CommTiled::~CommTiled() { - free_swap(); - - if (sendlist) for (int i = 0; i < nswap; i++) memory->destroy(sendlist[i]); - memory->sfree(sendlist); - memory->destroy(maxsendlist); - + memory->destroy(overlap); memory->destroy(buf_send); memory->destroy(buf_recv); + deallocate_swap(nswap); } /* ---------------------------------------------------------------------- @@ -83,6 +83,8 @@ CommTiled::~CommTiled() void CommTiled::init_buffers() { + memory->create(overlap,nprocs,"comm:overlap"); + maxexchange = maxexchange_atom + maxexchange_fix; bufextra = maxexchange + BUFEXTRA; @@ -93,13 +95,6 @@ void CommTiled::init_buffers() nswap = 2 * domain->dimension; allocate_swap(nswap); - - //sendlist = (int **) memory->smalloc(nswap*sizeof(int *),"comm:sendlist"); - //memory->create(maxsendlist,nswap,"comm:maxsendlist"); - //for (int i = 0; i < nswap; i++) { - // maxsendlist[i] = BUFMIN; - // memory->create(sendlist[i],BUFMIN,"comm:sendlist[i]"); - //} } /* ---------------------------------------------------------------------- @@ -115,9 +110,6 @@ void CommTiled::init() if (triclinic) error->all(FLERR,"Cannot yet use comm_style tiled with triclinic box"); - if (domain->xperiodic || domain->yperiodic || - (domain->dimension == 2 && domain->zperiodic)) - error->all(FLERR,"Cannot yet use comm_style tiled with periodic box"); if (mode == MULTI) error->all(FLERR,"Cannot yet use comm_style tiled with multi-mode comm"); @@ -176,27 +168,44 @@ void CommTiled::init() /* ---------------------------------------------------------------------- setup spatial-decomposition communication patterns function of neighbor cutoff(s) & cutghostuser & current box size and tiling - sets nsendproc, nrecvproc, sendproc, recvproc - sets sendother, sendself, pbc_flag, pbc, sendbox ------------------------------------------------------------------------- */ void CommTiled::setup() { int i; - int dimension; - int *periodicity; - double *prd,*sublo,*subhi,*boxlo,*boxhi; + int dimension = domain->dimension; + int *periodicity = domain->periodicity; + double *prd = domain->prd; + double *boxlo = domain->boxlo; + double *boxhi = domain->boxhi; + double *sublo = domain->sublo; + double *subhi = domain->subhi; + + // set function pointers + + if (layout != LAYOUT_TILED) { + box_drop = &CommTiled::box_drop_brick; + box_other = &CommTiled::box_other_brick; + } else { + box_drop = &CommTiled::box_drop_tiled; + box_other = &CommTiled::box_other_tiled; + } + + // if RCB decomp has just changed, gather needed global RCB info + + if (rcbnew) { + rcbnew = 0; + memcpy(&rcbinfo[me].mysplit[0][0],&mysplit[0][0],6*sizeof(double)); + rcbinfo[me].cut = rcbcut; + rcbinfo[me].dim = rcbcutdim; + MPI_Allgather(&rcbinfo[me],sizeof(RCBinfo),MPI_CHAR, + rcbinfo,sizeof(RCBinfo),MPI_CHAR,world); + } + + // check that cutoff < any periodic box length double cut = MAX(neighbor->cutneighmax,cutghostuser); - - dimension = domain->dimension; - periodicity = domain->periodicity; - prd = domain->prd; - sublo = domain->sublo; - subhi = domain->subhi; - boxlo = domain->boxlo; - boxhi = domain->boxhi; cutghost[0] = cutghost[1] = cutghost[2] = cut; if ((periodicity[0] && cut > prd[0]) || @@ -205,13 +214,12 @@ void CommTiled::setup() error->all(FLERR,"Communication cutoff for comm_style tiled " "cannot exceed periodic box length"); - // NOTE: allocate overlap (to Nprocs?) - // NOTE: allocate 2nd dim of sendproc, recvproc, sendbox - // NOTE: func pointers for box_drop and box_other - // NOTE: write box_drop and box_other methods - // NOTE: for tiled, must do one-time gather of RCB cuts and proc boxes + // loop over 6 swap directions + // determine which procs I will send to and receive from in each swap + // done by intersecting ghost box with all proc sub-boxes it overlaps + // sets nsendproc, nrecvproc, sendproc, recvproc + // sets sendother, sendself, pbc_flag, pbc, sendbox - int *overlap; int noverlap,noverlap1,indexme; double lo1[3],hi1[3],lo2[3],hi2[3]; int one,two; @@ -220,7 +228,9 @@ void CommTiled::setup() for (int idim = 0; idim < dimension; idim++) { for (int iswap = 0; iswap < 2; iswap++) { - // ghost box in lower direction + // one = first ghost box in same periodic image + // two = second ghost box wrapped across periodic boundary + // either may not exist one = 1; lo1[0] = sublo[0]; lo1[1] = sublo[1]; lo1[2] = sublo[2]; @@ -257,35 +267,35 @@ void CommTiled::setup() } } + // noverlap = # of overlaps of box1/2 with procs via box_drop() + // overlap = list of overlapping procs + // if overlap with self, indexme = index of me in list + indexme = -1; noverlap = 0; - if (one) { - if (layout == LAYOUT_UNIFORM) - box_drop_uniform(idim,lo1,hi1,noverlap,overlap,indexme); - else if (layout == LAYOUT_NONUNIFORM) - box_drop_nonuniform(idim,lo1,hi1,noverlap,overlap,indexme); - else - box_drop_tiled(lo1,hi1,0,nprocs-1,noverlap,overlap,indexme); - } - + if (one) (this->*box_drop)(idim,lo1,hi1,noverlap,indexme); noverlap1 = noverlap; - if (two) { - if (layout == LAYOUT_UNIFORM) - box_drop_uniform(idim,lo2,hi2,noverlap,overlap,indexme); - else if (layout == LAYOUT_NONUNIFORM) - box_drop_nonuniform(idim,lo2,hi2,noverlap,overlap,indexme); - else - box_drop_tiled(lo2,hi2,0,nprocs-1,noverlap,overlap,indexme); - } + if (two) (this->*box_drop)(idim,lo2,hi2,noverlap,indexme); + + // if self is in overlap list, move it to end of list - // if this (self) proc is in overlap list, move it to end of list - if (indexme >= 0) { int tmp = overlap[noverlap-1]; overlap[noverlap-1] = overlap[indexme]; overlap[indexme] = tmp; } + // reallocate 2nd dimensions of all send/recv arrays, based on noverlap + // # of sends of this swap = # of recvs of nswap +/- 1 + + if (noverlap > nprocmax[iswap]) { + int oldmax = nprocmax[iswap]; + while (nprocmax[iswap] < noverlap) nprocmax[iswap] += DELTA_PROCS; + grow_swap_send(iswap,nprocmax[iswap],oldmax); + if (iswap == 0) grow_swap_recv(iswap+1,nprocmax[iswap]); + else grow_swap_recv(iswap-1,nprocmax[iswap]); + } + // overlap how has list of noverlap procs // includes PBC effects @@ -320,12 +330,7 @@ void CommTiled::setup() pbc[nswap][i][0] = pbc[nswap][i][1] = pbc[nswap][i][2] = pbc[nswap][i][3] = pbc[nswap][i][4] = pbc[nswap][i][5] = 0; - if (layout == LAYOUT_UNIFORM) - box_other_uniform(overlap[i],oboxlo,oboxhi); - else if (layout == LAYOUT_NONUNIFORM) - box_other_nonuniform(overlap[i],oboxlo,oboxhi); - else - box_other_tiled(overlap[i],oboxlo,oboxhi); + (this->*box_other)(idim,iswap,overlap[i],oboxlo,oboxhi); if (i < noverlap1) { sbox[0] = MAX(oboxlo[0],lo1[0]); @@ -368,10 +373,6 @@ void CommTiled::setup() nswap++; } } - - - // reallocate requests and statuses to max of any swap - } /* ---------------------------------------------------------------------- @@ -497,6 +498,9 @@ void CommTiled::reverse_comm() // if comm_f_only set, exchange or copy directly from f, don't pack for (int iswap = nswap-1; iswap >= 0; iswap--) { + nsend = nsendproc[iswap] - sendself[iswap]; + nrecv = nrecvproc[iswap] - sendself[iswap]; + if (comm_f_only) { if (sendother[iswap]) { for (i = 0; i < nsend; i++) @@ -587,7 +591,7 @@ void CommTiled::exchange() void CommTiled::borders() { - int i,n,irecv,ngroup,nlast,nsend,nrecv,ncount,rmaxswap; + int i,m,n,irecv,nlocal,nlast,nsend,nrecv,ncount,rmaxswap; double xlo,xhi,ylo,yhi,zlo,zhi; double *bbox; double **x; @@ -609,35 +613,35 @@ void CommTiled::borders() // for yz-dim swaps, check owned and ghost atoms // store sent atom indices in list for use in future timesteps // NOTE: assume SINGLE mode, add back in logic for MULTI mode later + // and for ngroup when bordergroup is set x = atom->x; - for (i = 0; i < nsendproc[iswap]; i++) { - bbox = sendbox[iswap][i]; - xlo = bbox[0]; xhi = bbox[1]; - ylo = bbox[2]; yhi = bbox[3]; - zlo = bbox[4]; zhi = bbox[5]; + for (m = 0; m < nsendproc[iswap]; m++) { + bbox = sendbox[iswap][m]; + xlo = bbox[0]; ylo = bbox[1]; zlo = bbox[2]; + xhi = bbox[3]; yhi = bbox[4]; zhi = bbox[5]; - ngroup = atom->nfirst; + nlocal = atom->nlocal; if (iswap < 2) nlast = atom->nlocal; else nlast = atom->nlocal + atom->nghost; ncount = 0; - for (i = 0; i < ngroup; i++) + for (i = 0; i < nlocal; i++) if (x[i][0] >= xlo && x[i][0] <= xhi && x[i][1] >= ylo && x[i][1] <= yhi && x[i][2] >= zlo && x[i][2] <= zhi) { - if (ncount == maxsendlist[iswap][i]) grow_list(iswap,i,ncount); - sendlist[iswap][i][ncount++] = i; + if (ncount == maxsendlist[iswap][m]) grow_list(iswap,i,ncount); + sendlist[iswap][m][ncount++] = i; } for (i = atom->nlocal; i < nlast; i++) if (x[i][0] >= xlo && x[i][0] <= xhi && x[i][1] >= ylo && x[i][1] <= yhi && x[i][2] >= zlo && x[i][2] <= zhi) { - if (ncount == maxsendlist[iswap][i]) grow_list(iswap,i,ncount); - sendlist[iswap][i][ncount++] = i; + if (ncount == maxsendlist[iswap][m]) grow_list(iswap,i,ncount); + sendlist[iswap][m][ncount++] = i; } - sendnum[iswap][i] = ncount; + sendnum[iswap][m] = ncount; smax = MAX(smax,ncount); } @@ -648,11 +652,11 @@ void CommTiled::borders() nrecv = nrecvproc[iswap] - sendself[iswap]; if (sendother[iswap]) { - for (i = 0; i < nrecv; i++) - MPI_Irecv(&recvnum[iswap][i],1,MPI_INT, - recvproc[iswap][i],0,world,&requests[i]); - for (i = 0; i < nsend; i++) - MPI_Send(&sendnum[iswap][i],1,MPI_INT,sendproc[iswap][i],0,world); + for (m = 0; m < nrecv; m++) + MPI_Irecv(&recvnum[iswap][m],1,MPI_INT, + recvproc[iswap][m],0,world,&requests[m]); + for (m = 0; m < nsend; m++) + MPI_Send(&sendnum[iswap][m],1,MPI_INT,sendproc[iswap][m],0,world); } if (sendself[iswap]) recvnum[iswap][nrecv] = sendnum[iswap][nsend]; if (sendother[iswap]) MPI_Waitall(nrecv,requests,statuses); @@ -660,18 +664,18 @@ void CommTiled::borders() // setup other per swap/proc values from sendnum and recvnum rmaxswap = 0; - for (i = 0; i < nrecvproc[iswap]; i++) { - rmaxswap += recvnum[iswap][i]; - size_forward_recv[iswap][i] = recvnum[iswap][i]*size_forward; - size_reverse_send[iswap][i] = recvnum[iswap][i]*size_reverse; - size_reverse_recv[iswap][i] = sendnum[iswap][i]*size_reverse; - if (i == 0) { + for (m = 0; m < nrecvproc[iswap]; m++) { + rmaxswap += recvnum[iswap][m]; + size_forward_recv[iswap][m] = recvnum[iswap][m]*size_forward; + size_reverse_send[iswap][m] = recvnum[iswap][m]*size_reverse; + size_reverse_recv[iswap][m] = sendnum[iswap][m]*size_reverse; + if (m == 0) { firstrecv[iswap][0] = atom->nlocal + atom->nghost; forward_recv_offset[iswap][0] = 0; } else { - firstrecv[iswap][i] = firstrecv[iswap][i-1] + recvnum[iswap][i-1]; - forward_recv_offset[iswap][i] = - forward_recv_offset[iswap][i-1] + recvnum[iswap][i-1]; + firstrecv[iswap][m] = firstrecv[iswap][m-1] + recvnum[iswap][m-1]; + forward_recv_offset[iswap][m] = + forward_recv_offset[iswap][m-1] + recvnum[iswap][m-1]; } } rmax = MAX(rmax,rmaxswap); @@ -685,15 +689,15 @@ void CommTiled::borders() if (ghost_velocity) { if (sendother[iswap]) { - for (i = 0; i < nrecv; i++) - MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]], - recvnum[iswap][i]*size_border, - MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); - for (i = 0; i < nsend; i++) { - n = avec->pack_border_vel(sendnum[iswap][i],sendlist[iswap][i], - buf_send,pbc_flag[iswap][i],pbc[iswap][i]); + for (m = 0; m < nrecv; m++) + MPI_Irecv(&buf_recv[forward_recv_offset[iswap][m]], + recvnum[iswap][m]*size_border, + MPI_DOUBLE,recvproc[iswap][m],0,world,&requests[m]); + for (m = 0; m < nsend; m++) { + n = avec->pack_border_vel(sendnum[iswap][m],sendlist[iswap][m], + buf_send,pbc_flag[iswap][m],pbc[iswap][m]); MPI_Send(buf_send,n*size_border,MPI_DOUBLE, - sendproc[iswap][i],0,world); + sendproc[iswap][m],0,world); } } @@ -706,7 +710,7 @@ void CommTiled::borders() } if (sendother[iswap]) { - for (i = 0; i < nrecv; i++) { + for (m = 0; m < nrecv; m++) { MPI_Waitany(nrecv,requests,&irecv,&status); avec->unpack_border(recvnum[iswap][irecv],firstrecv[iswap][irecv], &buf_recv[forward_recv_offset[iswap][irecv]]); @@ -715,15 +719,15 @@ void CommTiled::borders() } else { if (sendother[iswap]) { - for (i = 0; i < nrecv; i++) - MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]], - recvnum[iswap][i]*size_border, - MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); - for (i = 0; i < nsend; i++) { - n = avec->pack_border(sendnum[iswap][i],sendlist[iswap][i], - buf_send,pbc_flag[iswap][i],pbc[iswap][i]); + for (m = 0; m < nrecv; m++) + MPI_Irecv(&buf_recv[forward_recv_offset[iswap][m]], + recvnum[iswap][m]*size_border, + MPI_DOUBLE,recvproc[iswap][m],0,world,&requests[m]); + for (m = 0; m < nsend; m++) { + n = avec->pack_border(sendnum[iswap][m],sendlist[iswap][m], + buf_send,pbc_flag[iswap][m],pbc[iswap][m]); MPI_Send(buf_send,n*size_border,MPI_DOUBLE, - sendproc[iswap][i],0,world); + sendproc[iswap][m],0,world); } } @@ -736,7 +740,7 @@ void CommTiled::borders() } if (sendother[iswap]) { - for (i = 0; i < nrecv; i++) { + for (m = 0; m < nrecv; m++) { MPI_Waitany(nrecv,requests,&irecv,&status); avec->unpack_border(recvnum[iswap][irecv],firstrecv[iswap][irecv], &buf_recv[forward_recv_offset[iswap][irecv]]); @@ -762,6 +766,8 @@ void CommTiled::borders() if (map_style) atom->map_set(); } +// NOTE: remaining forward/reverse methods still need to be updated + /* ---------------------------------------------------------------------- forward communication invoked by a Pair n = constant number of datums per atom @@ -1095,20 +1101,63 @@ int CommTiled::exchange_variable(int n, double *inbuf, double *&outbuf) box is owned by me and extends in dim ------------------------------------------------------------------------- */ -void CommTiled::box_drop_uniform(int dim, double *lo, double *hi, - int &noverlap, int *overlap, int &indexme) +void CommTiled::box_drop_brick(int idim, double *lo, double *hi, + int &noverlap, int &indexme) { + // NOTE: this is not triclinic compatible -} + double *prd = domain->prd; + double *boxlo = domain->boxlo; + double *boxhi = domain->boxhi; + double *sublo = domain->sublo; + double *subhi = domain->subhi; -/* ---------------------------------------------------------------------- - determine overlap list of Noverlap procs the lo/hi box overlaps - overlap = non-zero area in common between box and proc sub-domain -------------------------------------------------------------------------- */ + int index,dir; + if (hi[idim] == sublo[idim]) { + index = myloc[idim] - 1; + dir = -1; + } else if (lo[idim] == subhi[idim]) { + index = myloc[idim] + 1; + dir = 1; + } else if (hi[idim] == boxhi[idim]) { + index = procgrid[idim] - 1; + dir = -1; + } else if (lo[idim] == boxlo[idim]) { + index = 0; + dir = 1; + } -void CommTiled::box_drop_nonuniform(int dim, double *lo, double *hi, - int &noverlap, int *overlap, int &indexme) -{ + int other1,other2,proc; + double lower,upper; + double *split; + + if (idim == 0) { + other1 = myloc[1]; other2 = myloc[2]; + split = xsplit; + } else if (idim == 1) { + other1 = myloc[0]; other2 = myloc[2]; + split = ysplit; + } else { + other1 = myloc[0]; other2 = myloc[1]; + split = zsplit; + } + + while (1) { + lower = boxlo[idim] + prd[idim]*split[index]; + if (index < procgrid[idim]-1) + upper = boxlo[idim] + prd[idim]*split[index+1]; + else upper = boxhi[idim]; + if (lower >= hi[idim] || upper <= lo[idim]) break; + + if (idim == 0) proc = grid2proc[index][other1][other2]; + else if (idim == 1) proc = grid2proc[other1][index][other2]; + else proc = grid2proc[other1][other2][idim]; + + if (proc == me) indexme = noverlap; + overlap[noverlap++] = proc; + index += dir; + if (index < 0 || index >= procgrid[idim]) break; + } } /* ---------------------------------------------------------------------- @@ -1118,9 +1167,15 @@ void CommTiled::box_drop_nonuniform(int dim, double *lo, double *hi, no need to split lo/hi box as recurse b/c OK if box extends outside RCB box ------------------------------------------------------------------------- */ -void CommTiled::box_drop_tiled(double *lo, double *hi, - int proclower, int procupper, - int &noverlap, int *overlap, int &indexme) +void CommTiled::box_drop_tiled(int idim, double *lo, double *hi, + int &noverlap, int &indexme) +{ + box_drop_tiled_recurse(lo,hi,0,nprocs-1,noverlap,indexme); +} + +void CommTiled::box_drop_tiled_recurse(double *lo, double *hi, + int proclower, int procupper, + int &noverlap, int &indexme) { // end recursion when partition is a single proc // add proc to overlap list @@ -1139,13 +1194,93 @@ void CommTiled::box_drop_tiled(double *lo, double *hi, // cut = position of cut int procmid = proclower + (procupper - proclower) / 2 + 1; - double cut = tree[procmid].cut; - int dim = tree[procmid].dim; + double cut = rcbinfo[procmid].cut; + int idim = rcbinfo[procmid].dim; - if (lo[dim] < cut) - box_drop_tiled(lo,hi,proclower,procmid-1,noverlap,overlap,indexme); - if (hi[dim] > cut) - box_drop_tiled(lo,hi,procmid,procupper,noverlap,overlap,indexme); + if (lo[idim] < cut) + box_drop_tiled_recurse(lo,hi,proclower,procmid-1,noverlap,indexme); + if (hi[idim] > cut) + box_drop_tiled_recurse(lo,hi,procmid,procupper,noverlap,indexme); +} + +/* ---------------------------------------------------------------------- + return other box owned by proc as lo/hi corner pts +------------------------------------------------------------------------- */ + +void CommTiled::box_other_brick(int idim, int iswap, + int proc, double *lo, double *hi) +{ + double *prd = domain->prd; + double *boxlo = domain->boxlo; + double *boxhi = domain->boxhi; + double *sublo = domain->sublo; + double *subhi = domain->subhi; + + lo[0] = sublo[0]; lo[1] = sublo[1]; lo[2] = sublo[2]; + hi[0] = subhi[0]; hi[1] = subhi[1]; hi[2] = subhi[2]; + + int other1,other2,oproc; + double *split; + + if (idim == 0) { + other1 = myloc[1]; other2 = myloc[2]; + split = xsplit; + } else if (idim == 1) { + other1 = myloc[0]; other2 = myloc[2]; + split = ysplit; + } else { + other1 = myloc[0]; other2 = myloc[1]; + split = zsplit; + } + + int dir = -1; + if (iswap) dir = 1; + int index = myloc[idim]; + int n = procgrid[idim]; + + for (int i = 0; i < n; i++) { + index += dir; + if (index < 0) index = n-1; + else if (index >= n) index = 0; + + if (idim == 0) oproc = grid2proc[index][other1][other2]; + else if (idim == 1) oproc = grid2proc[other1][index][other2]; + else oproc = grid2proc[other1][other2][idim]; + + if (proc == oproc) { + lo[idim] = boxlo[idim] + prd[idim]*split[index]; + if (split[index+1] < 1.0) + hi[idim] = boxlo[idim] + prd[idim]*split[index+1]; + else hi[idim] = boxhi[idim]; + return; + } + } +} + +/* ---------------------------------------------------------------------- + return other box owned by proc as lo/hi corner pts +------------------------------------------------------------------------- */ + +void CommTiled::box_other_tiled(int idim, int iswap, + int proc, double *lo, double *hi) +{ + double *prd = domain->prd; + double *boxlo = domain->boxlo; + double *boxhi = domain->boxhi; + + double (*split)[2] = rcbinfo[proc].mysplit; + + lo[0] = boxlo[0] + prd[0]*split[0][0]; + if (split[0][1] < 1.0) hi[0] = boxlo[0] + prd[0]*split[0][1]; + else hi[0] = boxhi[0]; + + lo[1] = boxlo[1] + prd[1]*split[1][0]; + if (split[1][1] < 1.0) hi[1] = boxlo[1] + prd[1]*split[1][1]; + else hi[1] = boxhi[1]; + + lo[2] = boxlo[2] + prd[2]*split[2][0]; + if (split[2][1] < 1.0) hi[2] = boxlo[2] + prd[2]*split[2][1]; + else hi[2] = boxhi[2]; } /* ---------------------------------------------------------------------- @@ -1184,7 +1319,7 @@ void CommTiled::grow_list(int iswap, int iwhich, int n) { maxsendlist[iswap][iwhich] = static_cast (BUFFACTOR * n); memory->grow(sendlist[iswap][iwhich],maxsendlist[iswap][iwhich], - "comm:sendlist[iswap]"); + "comm:sendlist[i][j]"); } /* ---------------------------------------------------------------------- @@ -1193,34 +1328,169 @@ void CommTiled::grow_list(int iswap, int iwhich, int n) void CommTiled::allocate_swap(int n) { - memory->create(sendnum,n,"comm:sendnum"); - memory->create(recvnum,n,"comm:recvnum"); - memory->create(sendproc,n,"comm:sendproc"); - memory->create(recvproc,n,"comm:recvproc"); - memory->create(size_forward_recv,n,"comm:size"); - memory->create(size_reverse_send,n,"comm:size"); - memory->create(size_reverse_recv,n,"comm:size"); - memory->create(firstrecv,n,"comm:firstrecv"); - memory->create(pbc_flag,n,"comm:pbc_flag"); - memory->create(pbc,n,6,"comm:pbc"); + nsendproc = new int[n]; + nrecvproc = new int[n]; + sendother = new int[n]; + sendself = new int[n]; + nprocmax = new int[n]; + + sendproc = new int*[n]; + recvproc = new int*[n]; + sendnum = new int*[n]; + recvnum = new int*[n]; + size_forward_recv = new int*[n]; + firstrecv = new int*[n]; + size_reverse_send = new int*[n]; + size_reverse_recv = new int*[n]; + forward_recv_offset = new int*[n]; + reverse_recv_offset = new int*[n]; + + pbc_flag = new int*[n]; + pbc = new int**[n]; + sendbox = new double**[n]; + maxsendlist = new int*[n]; + sendlist = new int**[n]; + + for (int i = 0; i < n; i++) { + sendproc[i] = recvproc[i] = NULL; + sendnum[i] = recvnum[i] = NULL; + size_forward_recv[i] = firstrecv[i] = NULL; + size_reverse_send[i] = size_reverse_recv[i] = NULL; + forward_recv_offset[i] = reverse_recv_offset[i] = NULL; + + pbc_flag[i] = NULL; + pbc[i] = NULL; + sendbox[i] = NULL; + maxsendlist[i] = NULL; + sendlist[i] = NULL; + } + + maxreqstat = 0; + requests = NULL; + statuses = NULL; + + for (int i = 0; i < n; i++) { + nprocmax[i] = DELTA_PROCS; + grow_swap_send(i,DELTA_PROCS,0); + grow_swap_recv(i,DELTA_PROCS); + } } /* ---------------------------------------------------------------------- - free memory for swaps + grow info for swap I, to allow for N procs to communicate with + ditto for complementary recv for swap I+1 or I-1, as invoked by caller ------------------------------------------------------------------------- */ -void CommTiled::free_swap() +void CommTiled::grow_swap_send(int i, int n, int nold) { - memory->destroy(sendnum); - memory->destroy(recvnum); - memory->destroy(sendproc); - memory->destroy(recvproc); - memory->destroy(size_forward_recv); - memory->destroy(size_reverse_send); - memory->destroy(size_reverse_recv); - memory->destroy(firstrecv); - memory->destroy(pbc_flag); - memory->destroy(pbc); + delete [] sendproc[i]; + sendproc[i] = new int[n]; + delete [] sendnum[i]; + sendnum[i] = new int[n]; + + delete [] size_reverse_recv[i]; + size_reverse_recv[i] = new int[n]; + delete [] reverse_recv_offset[i]; + reverse_recv_offset[i] = new int[n]; + + delete [] pbc_flag[i]; + pbc_flag[i] = new int[n]; + memory->destroy(pbc[i]); + memory->create(pbc[i],n,6,"comm:pbc_flag"); + memory->destroy(sendbox[i]); + memory->create(sendbox[i],n,6,"comm:sendbox"); + + delete [] maxsendlist[i]; + maxsendlist[i] = new int[n]; + + for (int j = 0; j < nold; j++) memory->destroy(sendlist[i][j]); + delete [] sendlist[i]; + sendlist[i] = new int*[n]; + for (int j = 0; j < n; j++) { + maxsendlist[i][j] = BUFMIN; + memory->create(sendlist[i][j],BUFMIN,"comm:sendlist[i][j]"); + } +} + +void CommTiled::grow_swap_recv(int i, int n) +{ + delete [] recvproc[i]; + recvproc[i] = new int[n]; + delete [] recvnum[i]; + recvnum[i] = new int[n]; + + delete [] size_forward_recv[i]; + size_forward_recv[i] = new int[n]; + delete [] firstrecv[i]; + firstrecv[i] = new int[n]; + delete [] forward_recv_offset[i]; + forward_recv_offset[i] = new int[n]; + + delete [] size_reverse_send[i]; + size_reverse_send[i] = new int[n]; + + if (n > maxreqstat) { + maxreqstat = n; + delete [] requests; + delete [] statuses; + requests = new MPI_Request[n]; + statuses = new MPI_Status[n]; + } +} + +/* ---------------------------------------------------------------------- + deallocate swap info +------------------------------------------------------------------------- */ + +void CommTiled::deallocate_swap(int n) +{ + delete [] nsendproc; + delete [] nrecvproc; + delete [] sendother; + delete [] sendself; + + for (int i = 0; i < n; i++) { + delete [] sendproc[i]; + delete [] recvproc[i]; + delete [] sendnum[i]; + delete [] recvnum[i]; + delete [] size_forward_recv[i]; + delete [] firstrecv[i]; + delete [] size_reverse_send[i]; + delete [] size_reverse_recv[i]; + delete [] forward_recv_offset[i]; + delete [] reverse_recv_offset[i]; + + delete [] pbc_flag[i]; + memory->destroy(pbc[i]); + memory->destroy(sendbox[i]); + delete [] maxsendlist[i]; + + for (int j = 0; j < nprocmax[i]; j++) memory->destroy(sendlist[i][j]); + delete [] sendlist[i]; + } + + delete [] sendproc; + delete [] recvproc; + delete [] sendnum; + delete [] recvnum; + delete [] size_forward_recv; + delete [] firstrecv; + delete [] size_reverse_send; + delete [] size_reverse_recv; + delete [] forward_recv_offset; + delete [] reverse_recv_offset; + + delete [] pbc_flag; + delete [] pbc; + delete [] sendbox; + delete [] maxsendlist; + delete [] sendlist; + + delete [] requests; + delete [] statuses; + + delete [] nprocmax; } /* ---------------------------------------------------------------------- diff --git a/src/comm_tiled.h b/src/comm_tiled.h index b992242476..f3f8f65e74 100644 --- a/src/comm_tiled.h +++ b/src/comm_tiled.h @@ -57,13 +57,13 @@ class CommTiled : public Comm { int *nsendproc,*nrecvproc; // # of procs to send/recv to/from in each swap int *sendother; // 1 if send to any other proc in each swap int *sendself; // 1 if send to self in each swap - int **sendnum,**recvnum; // # of atoms to send/recv per swap/proc + int *nprocmax; // current max # of send procs for each swap int **sendproc,**recvproc; // proc to send/recv to/from per swap/proc + int **sendnum,**recvnum; // # of atoms to send/recv per swap/proc int **size_forward_recv; // # of values to recv in each forward swap/proc int **firstrecv; // where to put 1st recv atom per swap/proc int **size_reverse_send; // # to send in each reverse comm per swap/proc int **size_reverse_recv; // # to recv in each reverse comm per swap/proc - int **forward_recv_offset; // forward comm offsets in buf_recv per swap/proc int **reverse_recv_offset; // reverse comm offsets in buf_recv per swap/proc @@ -82,40 +82,43 @@ class CommTiled : public Comm { int maxexchange; // max # of datums/atom in exchange comm int bufextra; // extra space beyond maxsend in send buffer + int maxreqstat; // max size of Request and Status vectors MPI_Request *requests; MPI_Status *statuses; int comm_x_only,comm_f_only; // 1 if only exchange x,f in for/rev comm - struct Tree { - double cut; - int dim; + struct RCBinfo { + double mysplit[3][2]; // fractional RCB bounding box for one proc + double cut; // position of cut this proc owns + int dim; // dimension = 0/1/2 of cut }; - Tree *tree; - - // info from RCB decomp - - double rcbcut; - int rcbcutdim; - double rcblo[3]; - double rcbhi[3]; + int *overlap; + RCBinfo *rcbinfo; // list of RCB info for all procs void init_buffers(); - void box_drop_uniform(int, double *, double *, int &, int *, int &); - void box_drop_nonuniform(int, double *, double *, int &, int *, int &); - void box_drop_tiled(double *, double *, int, int, int &, int *, int &); + // box drop and other functions - void box_other_uniform(int, double *, double *) {} - void box_other_nonuniform(int, double *, double *) {} - void box_other_tiled(int, double *, double *) {} + typedef void (CommTiled::*BoxDropPtr)(int, double *, double *, int &, int &); + BoxDropPtr box_drop; + void box_drop_brick(int, double *, double *, int &, int &); + void box_drop_tiled(int, double *, double *, int &, int &); + void box_drop_tiled_recurse(double *, double *, int, int, int &, int &); - void grow_send(int, int); // reallocate send buffer - void grow_recv(int); // free/allocate recv buffer - void grow_list(int, int, int); // reallocate sendlist for one swap/proc - void allocate_swap(int); // allocate swap arrays - void free_swap(); // free swap arrays + typedef void (CommTiled::*BoxOtherPtr)(int, int, int, double *, double *); + BoxOtherPtr box_other; + void box_other_brick(int, int, int, double *, double *); + void box_other_tiled(int, int, int, double *, double *); + + void grow_send(int, int); // reallocate send buffer + void grow_recv(int); // free/allocate recv buffer + void grow_list(int, int, int); // reallocate sendlist for one swap/proc + void allocate_swap(int); // allocate swap arrays + void grow_swap_send(int, int, int); // grow swap arrays for send and recv + void grow_swap_recv(int, int); + void deallocate_swap(int); // deallocate swap arrays }; } diff --git a/src/domain.cpp b/src/domain.cpp index 83fafd7261..13e8a190dc 100644 --- a/src/domain.cpp +++ b/src/domain.cpp @@ -301,34 +301,31 @@ void Domain::set_local_box() double *zsplit = comm->zsplit; sublo[0] = boxlo[0] + xprd*xsplit[myloc[0]]; - if (myloc[0] < procgrid[0]-1) - subhi[0] = boxlo[0] + xprd*xsplit[myloc[0]+1]; + if (myloc[0] < procgrid[0]-1) subhi[0] = boxlo[0] + xprd*xsplit[myloc[0]+1]; else subhi[0] = boxhi[0]; sublo[1] = boxlo[1] + yprd*ysplit[myloc[1]]; - if (myloc[1] < procgrid[1]-1) - subhi[1] = boxlo[1] + yprd*ysplit[myloc[1]+1]; + if (myloc[1] < procgrid[1]-1) subhi[1] = boxlo[1] + yprd*ysplit[myloc[1]+1]; else subhi[1] = boxhi[1]; sublo[2] = boxlo[2] + zprd*zsplit[myloc[2]]; - if (myloc[2] < procgrid[2]-1) - subhi[2] = boxlo[2] + zprd*zsplit[myloc[2]+1]; + if (myloc[2] < procgrid[2]-1) subhi[2] = boxlo[2] + zprd*zsplit[myloc[2]+1]; else subhi[2] = boxhi[2]; } else { double (*mysplit)[2] = comm->mysplit; sublo[0] = boxlo[0] + xprd*mysplit[0][0]; - subhi[0] = boxlo[0] + xprd*mysplit[0][1]; - if (mysplit[0][1] == 1.0) subhi[0] = boxhi[0]; + if (mysplit[0][1] < 1.0) subhi[0] = boxlo[0] + xprd*mysplit[0][1]; + else subhi[0] = boxhi[0]; sublo[1] = boxlo[1] + yprd*mysplit[1][0]; - subhi[1] = boxlo[1] + yprd*mysplit[1][1]; - if (mysplit[1][1] == 1.0) subhi[1] = boxhi[1]; + if (mysplit[1][1] < 1.0) subhi[1] = boxlo[1] + yprd*mysplit[1][1]; + else subhi[1] = boxhi[1]; sublo[2] = boxlo[2] + zprd*mysplit[2][0]; - subhi[2] = boxlo[2] + zprd*mysplit[2][1]; - if (mysplit[2][1] == 1.0) subhi[2] = boxhi[2]; + if (mysplit[2][1] < 1.0) subhi[2] = boxlo[2] + zprd*mysplit[2][1]; + else subhi[2] = boxhi[2]; } }