// clang-format off /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "rcb.h" #include "irregular.h" #include "memory.h" #include using namespace LAMMPS_NS; #define MYHUGE 1.0e30 #define TINY 1.0e-6 #define DELTA 16384 // prototypes for non-class functions void box_merge(void *, void *, int *, MPI_Datatype *); void median_merge(void *, void *, int *, MPI_Datatype *); // NOTE: if want to have reuse flag, need to sum Tree across procs /* ---------------------------------------------------------------------- */ RCB::RCB(LAMMPS *lmp) : Pointers(lmp) { MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); ndot = maxdot = 0; dots = nullptr; nlist = maxlist = 0; dotlist = dotmark = dotmark_select = nullptr; maxbuf = 0; buf = nullptr; maxrecv = maxsend = 0; recvproc = recvindex = sendproc = sendindex = nullptr; tree = nullptr; irregular = nullptr; // create MPI data and function types for box and median AllReduce ops MPI_Type_contiguous(6,MPI_DOUBLE,&box_type); MPI_Type_commit(&box_type); MPI_Type_contiguous(sizeof(Median),MPI_CHAR,&med_type); MPI_Type_commit(&med_type); MPI_Op_create(box_merge,1,&box_op); MPI_Op_create(median_merge,1,&med_op); reuse = 0; } /* ---------------------------------------------------------------------- */ RCB::~RCB() { memory->sfree(dots); memory->destroy(dotlist); memory->destroy(dotmark); memory->destroy(dotmark_select); memory->sfree(buf); memory->destroy(recvproc); memory->destroy(recvindex); memory->destroy(sendproc); memory->destroy(sendindex); memory->sfree(tree); delete irregular; MPI_Type_free(&med_type); MPI_Type_free(&box_type); MPI_Op_free(&box_op); MPI_Op_free(&med_op); } /* ---------------------------------------------------------------------- perform RCB balancing of N particles at coords X in bounding box LO/HI NEW version: each RCB cut is tested in all dimensions dimeension that produces 2 boxes with largest min size is selected this is to prevent very narrow boxes from being produced if wt = nullptr, ignore per-particle weights if wt defined, per-particle weights > 0.0 dimension = 2 or 3 as documented in rcb.h: sets noriginal,nfinal,nkeep,recvproc,recvindex,lo,hi all proc particles will be inside or on surface of 3-d box defined by final lo/hi // NOTE: worry about re-use of data structs for fix balance? ------------------------------------------------------------------------- */ void RCB::compute(int dimension, int n, double **x, double *wt, double *bboxlo, double *bboxhi) { int i,j,k; int keep,outgoing,incoming,incoming2; int dim,markactive; int indexlo,indexhi; int first_iteration,breakflag; double wttot,wtlo,wthi,wtsum,wtok,wtupto,wtmax; double targetlo,targethi; double valuemin,valuemax,valuehalf,valuehalf_select,smaller; double tolerance; MPI_Comm comm,comm_half; MPI_Request request,request2; Median med,medme; // create list of my Dots ndot = nkeep = noriginal = n; if (ndot > maxdot) { maxdot = ndot; memory->sfree(dots); dots = (Dot *) memory->smalloc(ndot*sizeof(Dot),"RCB:dots"); } for (i = 0; i < ndot; i++) { dots[i].x[0] = x[i][0]; dots[i].x[1] = x[i][1]; dots[i].x[2] = x[i][2]; dots[i].proc = me; dots[i].index = i; } if (wt) for (i = 0; i < ndot; i++) dots[i].wt = wt[i]; else for (i = 0; i < ndot; i++) dots[i].wt = 1.0; // initial bounding box = simulation box // includes periodic or shrink-wrapped boundaries lo = bbox.lo; hi = bbox.hi; lo[0] = bboxlo[0]; lo[1] = bboxlo[1]; lo[2] = bboxlo[2]; hi[0] = bboxhi[0]; hi[1] = bboxhi[1]; hi[2] = bboxhi[2]; cut = 0.0; cutdim = -1; // initialize counters counters[0] = 0; counters[1] = 0; counters[2] = 0; counters[3] = ndot; counters[4] = maxdot; counters[5] = 0; counters[6] = 0; // create communicator for use in recursion MPI_Comm_dup(world,&comm); // recurse until partition is a single proc = me // proclower,procupper = lower,upper procs in partition // procmid = 1st proc in upper half of partition int procpartner,procpartner2; int procmid; int proclower = 0; int procupper = nprocs - 1; while (proclower != procupper) { // if odd # of procs, lower partition gets extra one procmid = proclower + (procupper - proclower) / 2 + 1; // determine communication partner(s) // readnumber = # of proc partners to read from if (me < procmid) procpartner = me + (procmid - proclower); else procpartner = me - (procmid - proclower); int readnumber = 1; if (procpartner > procupper) { readnumber = 0; procpartner--; } if (me == procupper && procpartner != procmid - 1) { readnumber = 2; procpartner2 = procpartner + 1; } // wttot = summed weight of entire partition // search tolerance = largest single weight (plus epsilon) // targetlo = desired weight in lower half of partition // targethi = desired weight in upper half of partition wtmax = wtsum = 0.0; if (wt) { for (i = 0; i < ndot; i++) { wtsum += dots[i].wt; if (dots[i].wt > wtmax) wtmax = dots[i].wt; } } else { for (i = 0; i < ndot; i++) wtsum += dots[i].wt; wtmax = 1.0; } MPI_Allreduce(&wtsum,&wttot,1,MPI_DOUBLE,MPI_SUM,comm); if (wt) MPI_Allreduce(&wtmax,&tolerance,1,MPI_DOUBLE,MPI_MAX,comm); else tolerance = 1.0; tolerance *= 1.0 + TINY; targetlo = wttot * (procmid - proclower) / (procupper + 1 - proclower); targethi = wttot - targetlo; // attempt a cut in each dimension // each cut produces 2 boxes, each with a reduced box length in that dim // smaller = the smaller of the 2 reduced box lengths in that dimension // choose to cut in dimension which produces largest smaller value // this should induce final proc sub-boxes to be as cube-ish as possible // dim_select = selected cut dimension // valuehalf_select = valuehalf in that dimension // dotmark_select = dot markings in that dimension // initialize largest = -1.0 to insure a cut in some dim is accepted // e.g. if current recursed box is size 0 in all dims int dim_select = -1; double largest = -1.0; for (dim = 0; dim < dimension; dim++) { // create active list and mark array for dots // initialize active list to all dots if (ndot > maxlist) { memory->destroy(dotlist); memory->destroy(dotmark); memory->destroy(dotmark_select); maxlist = maxdot; memory->create(dotlist,maxlist,"RCB:dotlist"); memory->create(dotmark,maxlist,"RCB:dotmark"); memory->create(dotmark_select,maxlist,"RCB:dotmark_select"); } nlist = ndot; for (i = 0; i < nlist; i++) dotlist[i] = i; // median iteration // zoom in on bisector until correct # of dots in each half of partition // as each iteration of median-loop begins, require: // all non-active dots are marked with 0/1 in dotmark // valuemin <= every active dot <= valuemax // wtlo, wthi = total wt of non-active dots // when leave median-loop, require only: // valuehalf = correct cut position // all dots <= valuehalf are marked with 0 in dotmark // all dots >= valuehalf are marked with 1 in dotmark // markactive = which side of cut is active = 0/1 // indexlo,indexhi = indices of dot closest to median wtlo = wthi = 0.0; valuemin = lo[dim]; valuemax = hi[dim]; first_iteration = 1; indexlo = indexhi = 0; while (true) { // choose bisector value // use old value on 1st iteration if old cut dimension is the same // on 2nd option: could push valuehalf towards geometric center // with "1.0-factor" to force overshoot if (first_iteration && reuse && dim == tree[procmid].dim) { counters[5]++; valuehalf = tree[procmid].cut; if (valuehalf < valuemin || valuehalf > valuemax) valuehalf = 0.5 * (valuemin + valuemax); } else if (wt) valuehalf = valuemin + (targetlo - wtlo) / (wttot - wtlo - wthi) * (valuemax - valuemin); else valuehalf = 0.5 * (valuemin + valuemax); first_iteration = 0; // initialize local median data structure medme.totallo = medme.totalhi = 0.0; medme.valuelo = -MYHUGE; medme.valuehi = MYHUGE; medme.wtlo = medme.wthi = 0.0; medme.countlo = medme.counthi = 0; medme.proclo = medme.prochi = me; // mark all active dots on one side or other of bisector // also set all fields in median data struct // save indices of closest dots on either side for (j = 0; j < nlist; j++) { i = dotlist[j]; if (dots[i].x[dim] <= valuehalf) { // in lower part medme.totallo += dots[i].wt; dotmark[i] = 0; if (dots[i].x[dim] > medme.valuelo) { // my closest dot medme.valuelo = dots[i].x[dim]; medme.wtlo = dots[i].wt; medme.countlo = 1; indexlo = i; } else if (dots[i].x[dim] == medme.valuelo) { // tied for closest medme.wtlo += dots[i].wt; medme.countlo++; } } else { // in upper part medme.totalhi += dots[i].wt; dotmark[i] = 1; if (dots[i].x[dim] < medme.valuehi) { // my closest dot medme.valuehi = dots[i].x[dim]; medme.wthi = dots[i].wt; medme.counthi = 1; indexhi = i; } else if (dots[i].x[dim] == medme.valuehi) { // tied for closest medme.wthi += dots[i].wt; medme.counthi++; } } } // combine median data struct across current subset of procs counters[0]++; MPI_Allreduce(&medme,&med,1,med_type,med_op,comm); // test median guess for convergence // move additional dots that are next to cut across it if (wtlo + med.totallo < targetlo) { // lower half TOO SMALL wtlo += med.totallo; valuehalf = med.valuehi; if (med.counthi == 1) { // only one dot to move if (wtlo + med.wthi < targetlo) { // move it, keep iterating if (me == med.prochi) dotmark[indexhi] = 0; } else { // only move if beneficial if (wtlo + med.wthi - targetlo < targetlo - wtlo) if (me == med.prochi) dotmark[indexhi] = 0; break; // all done } } else { // multiple dots to move breakflag = 0; wtok = 0.0; if (medme.valuehi == med.valuehi) wtok = medme.wthi; if (wtlo + med.wthi >= targetlo) { // all done MPI_Scan(&wtok,&wtupto,1,MPI_DOUBLE,MPI_SUM,comm); wtmax = targetlo - wtlo; if (wtupto > wtmax) wtok = wtok - (wtupto - wtmax); breakflag = 1; } // wtok = most I can move for (j = 0, wtsum = 0.0; j < nlist && wtsum < wtok; j++) { i = dotlist[j]; if (dots[i].x[dim] == med.valuehi) { // only move if better if (wtsum + dots[i].wt - wtok < wtok - wtsum) dotmark[i] = 0; wtsum += dots[i].wt; } } if (breakflag) break; // done if moved enough } wtlo += med.wthi; if (targetlo-wtlo <= tolerance) break; // close enough valuemin = med.valuehi; // iterate again markactive = 1; } else if (wthi + med.totalhi < targethi) { // upper half TOO SMALL wthi += med.totalhi; valuehalf = med.valuelo; if (med.countlo == 1) { // only one dot to move if (wthi + med.wtlo < targethi) { // move it, keep iterating if (me == med.proclo) dotmark[indexlo] = 1; } else { // only move if beneficial if (wthi + med.wtlo - targethi < targethi - wthi) if (me == med.proclo) dotmark[indexlo] = 1; break; // all done } } else { // multiple dots to move breakflag = 0; wtok = 0.0; if (medme.valuelo == med.valuelo) wtok = medme.wtlo; if (wthi + med.wtlo >= targethi) { // all done MPI_Scan(&wtok,&wtupto,1,MPI_DOUBLE,MPI_SUM,comm); wtmax = targethi - wthi; if (wtupto > wtmax) wtok = wtok - (wtupto - wtmax); breakflag = 1; } // wtok = most I can move for (j = 0, wtsum = 0.0; j < nlist && wtsum < wtok; j++) { i = dotlist[j]; if (dots[i].x[dim] == med.valuelo) { // only move if better if (wtsum + dots[i].wt - wtok < wtok - wtsum) dotmark[i] = 1; wtsum += dots[i].wt; } } if (breakflag) break; // done if moved enough } wthi += med.wtlo; if (targethi-wthi <= tolerance) break; // close enough valuemax = med.valuelo; // iterate again markactive = 0; } else // Goldilocks result: both partitions just right break; // shrink the active list k = 0; for (j = 0; j < nlist; j++) { i = dotlist[j]; if (dotmark[i] == markactive) dotlist[k++] = i; } nlist = k; } // cut produces 2 sub-boxes with reduced size in dim // compare smaller of the 2 sizes to previous dims // keep dim that has the largest smaller smaller = MIN(valuehalf-lo[dim],hi[dim]-valuehalf); if (smaller > largest) { largest = smaller; dim_select = dim; valuehalf_select = valuehalf; if (ndot > 0) memcpy(dotmark_select,dotmark,ndot*sizeof(int)); } } // copy results for best dim cut into dim,valuehalf,dotmark dim = dim_select; valuehalf = valuehalf_select; if (ndot > 0) memcpy(dotmark,dotmark_select,ndot*sizeof(int)); // special case for zero box width // can occur when all dots are on corner vertices of this sub-box // split box on longest dimension // reset dotmark for that cut if (largest == 0.0) { dim = 0; if (hi[1]-lo[1] > hi[0]-lo[0]) dim = 1; if (dimension == 3 && hi[2]-lo[2] > hi[dim]-lo[dim]) dim = 2; valuehalf = 0.5* (lo[dim] + hi[dim]); for (j = 0; j < nlist; j++) { i = dotlist[j]; if (dots[i].x[dim] <= valuehalf) dotmark[i] = 0; else dotmark[i] = 1; } } // found median // store cut info only if I am procmid if (me == procmid) { cut = valuehalf; cutdim = dim; } // use cut to shrink my RCB bounding box if (me < procmid) hi[dim] = valuehalf; else lo[dim] = valuehalf; // outgoing = number of dots to ship to partner // nkeep = number of dots that have never migrated markactive = (me < procpartner); for (i = 0, keep = 0, outgoing = 0; i < ndot; i++) if (dotmark[i] == markactive) outgoing++; else if (i < nkeep) keep++; nkeep = keep; // alert partner how many dots I'll send, read how many I'll recv MPI_Send(&outgoing,1,MPI_INT,procpartner,0,world); incoming = 0; if (readnumber) { MPI_Recv(&incoming,1,MPI_INT,procpartner,0,world,MPI_STATUS_IGNORE); if (readnumber == 2) { MPI_Recv(&incoming2,1,MPI_INT,procpartner2,0,world,MPI_STATUS_IGNORE); incoming += incoming2; } } // check if need to alloc more space int ndotnew = ndot - outgoing + incoming; if (ndotnew > maxdot) { while (maxdot < ndotnew) maxdot += DELTA; dots = (Dot *) memory->srealloc(dots,maxdot*sizeof(Dot),"RCB::dots"); counters[6]++; } counters[1] += outgoing; counters[2] += incoming; if (ndotnew > counters[3]) counters[3] = ndotnew; if (maxdot > counters[4]) counters[4] = maxdot; // malloc comm send buffer if (outgoing > maxbuf) { memory->sfree(buf); maxbuf = outgoing; buf = (Dot *) memory->smalloc(maxbuf*sizeof(Dot),"RCB:buf"); } // fill buffer with dots that are marked for sending // pack down the unmarked ones keep = outgoing = 0; for (i = 0; i < ndot; i++) { if (dotmark[i] == markactive) memcpy(&buf[outgoing++],&dots[i],sizeof(Dot)); else memmove(&dots[keep++],&dots[i],sizeof(Dot)); } // post receives for dots if (readnumber > 0) { MPI_Irecv(&dots[keep],incoming*sizeof(Dot),MPI_CHAR, procpartner,1,world,&request); if (readnumber == 2) { keep += incoming - incoming2; MPI_Irecv(&dots[keep],incoming2*sizeof(Dot),MPI_CHAR, procpartner2,1,world,&request2); } } // handshake before sending dots to insure recvs have been posted if (readnumber > 0) { MPI_Send(nullptr,0,MPI_INT,procpartner,0,world); if (readnumber == 2) MPI_Send(nullptr,0,MPI_INT,procpartner2,0,world); } MPI_Recv(nullptr,0,MPI_INT,procpartner,0,world,MPI_STATUS_IGNORE); // send dots to partner MPI_Rsend(buf,outgoing*sizeof(Dot),MPI_CHAR,procpartner,1,world); // wait until all dots are received if (readnumber > 0) { MPI_Wait(&request,MPI_STATUS_IGNORE); if (readnumber == 2) MPI_Wait(&request2,MPI_STATUS_IGNORE); } ndot = ndotnew; // cut partition in half, create new communicators of 1/2 size int split; if (me < procmid) { procupper = procmid - 1; split = 0; } else { proclower = procmid; split = 1; } MPI_Comm_split(comm,split,me,&comm_half); MPI_Comm_free(&comm); comm = comm_half; } // clean up MPI_Comm_free(&comm); // set public variables with results of rebalance nfinal = ndot; if (nfinal > maxrecv) { memory->destroy(recvproc); memory->destroy(recvindex); maxrecv = nfinal; memory->create(recvproc,maxrecv,"RCB:recvproc"); memory->create(recvindex,maxrecv,"RCB:recvindex"); } for (i = 0; i < nfinal; i++) { recvproc[i] = dots[i].proc; recvindex[i] = dots[i].index; } } /* ---------------------------------------------------------------------- perform RCB balancing of N particles at coords X in bounding box LO/HI OLD version: each RCB cut is made in longest dimension of sub-box if wt = nullptr, ignore per-particle weights if wt defined, per-particle weights > 0.0 dimension = 2 or 3 as documented in rcb.h: sets noriginal,nfinal,nkeep,recvproc,recvindex,lo,hi all proc particles will be inside or on surface of 3-d box defined by final lo/hi // NOTE: worry about re-use of data structs for fix balance? ------------------------------------------------------------------------- */ void RCB::compute_old(int dimension, int n, double **x, double *wt, double *bboxlo, double *bboxhi) { int i,j,k; int keep,outgoing,incoming,incoming2; int dim,markactive; int indexlo,indexhi; int first_iteration,breakflag; double wttot,wtlo,wthi,wtsum,wtok,wtupto,wtmax; double targetlo,targethi; double valuemin,valuemax,valuehalf; double tolerance; MPI_Comm comm,comm_half; MPI_Request request,request2; Median med,medme; // create list of my Dots ndot = nkeep = noriginal = n; if (ndot > maxdot) { maxdot = ndot; memory->sfree(dots); dots = (Dot *) memory->smalloc(ndot*sizeof(Dot),"RCB:dots"); } for (i = 0; i < ndot; i++) { dots[i].x[0] = x[i][0]; dots[i].x[1] = x[i][1]; dots[i].x[2] = x[i][2]; dots[i].proc = me; dots[i].index = i; } if (wt) for (i = 0; i < ndot; i++) dots[i].wt = wt[i]; else for (i = 0; i < ndot; i++) dots[i].wt = 1.0; // initial bounding box = simulation box // includes periodic or shrink-wrapped boundaries lo = bbox.lo; hi = bbox.hi; lo[0] = bboxlo[0]; lo[1] = bboxlo[1]; lo[2] = bboxlo[2]; hi[0] = bboxhi[0]; hi[1] = bboxhi[1]; hi[2] = bboxhi[2]; cut = 0.0; cutdim = -1; // initialize counters counters[0] = 0; counters[1] = 0; counters[2] = 0; counters[3] = ndot; counters[4] = maxdot; counters[5] = 0; counters[6] = 0; // create communicator for use in recursion MPI_Comm_dup(world,&comm); // recurse until partition is a single proc = me // proclower,procupper = lower,upper procs in partition // procmid = 1st proc in upper half of partition int procpartner,procpartner2; int procmid; int proclower = 0; int procupper = nprocs - 1; while (proclower != procupper) { // if odd # of procs, lower partition gets extra one procmid = proclower + (procupper - proclower) / 2 + 1; // determine communication partner(s) // readnumber = # of proc partners to read from if (me < procmid) procpartner = me + (procmid - proclower); else procpartner = me - (procmid - proclower); int readnumber = 1; if (procpartner > procupper) { readnumber = 0; procpartner--; } if (me == procupper && procpartner != procmid - 1) { readnumber = 2; procpartner2 = procpartner + 1; } // wttot = summed weight of entire partition // search tolerance = largest single weight (plus epsilon) // targetlo = desired weight in lower half of partition // targethi = desired weight in upper half of partition wtmax = wtsum = 0.0; if (wt) { for (i = 0; i < ndot; i++) { wtsum += dots[i].wt; if (dots[i].wt > wtmax) wtmax = dots[i].wt; } } else { for (i = 0; i < ndot; i++) wtsum += dots[i].wt; wtmax = 1.0; } MPI_Allreduce(&wtsum,&wttot,1,MPI_DOUBLE,MPI_SUM,comm); if (wt) MPI_Allreduce(&wtmax,&tolerance,1,MPI_DOUBLE,MPI_MAX,comm); else tolerance = 1.0; tolerance *= 1.0 + TINY; targetlo = wttot * (procmid - proclower) / (procupper + 1 - proclower); targethi = wttot - targetlo; // dim = dimension to bisect on // do not allow choice of z dimension for 2d system dim = 0; if (hi[1]-lo[1] > hi[0]-lo[0]) dim = 1; if (dimension == 3) { if (dim == 0 && hi[2]-lo[2] > hi[0]-lo[0]) dim = 2; if (dim == 1 && hi[2]-lo[2] > hi[1]-lo[1]) dim = 2; } // create active list and mark array for dots // initialize active list to all dots if (ndot > maxlist) { memory->destroy(dotlist); memory->destroy(dotmark); maxlist = maxdot; memory->create(dotlist,maxlist,"RCB:dotlist"); memory->create(dotmark,maxlist,"RCB:dotmark"); } nlist = ndot; for (i = 0; i < nlist; i++) dotlist[i] = i; // median iteration // zoom in on bisector until correct # of dots in each half of partition // as each iteration of median-loop begins, require: // all non-active dots are marked with 0/1 in dotmark // valuemin <= every active dot <= valuemax // wtlo, wthi = total wt of non-active dots // when leave median-loop, require only: // valuehalf = correct cut position // all dots <= valuehalf are marked with 0 in dotmark // all dots >= valuehalf are marked with 1 in dotmark // markactive = which side of cut is active = 0/1 // indexlo,indexhi = indices of dot closest to median wtlo = wthi = 0.0; valuemin = lo[dim]; valuemax = hi[dim]; first_iteration = 1; indexlo = indexhi = 0; while (true) { // choose bisector value // use old value on 1st iteration if old cut dimension is the same // on 2nd option: could push valuehalf towards geometric center // with "1.0-factor" to force overshoot if (first_iteration && reuse && dim == tree[procmid].dim) { counters[5]++; valuehalf = tree[procmid].cut; if (valuehalf < valuemin || valuehalf > valuemax) valuehalf = 0.5 * (valuemin + valuemax); } else if (wt) valuehalf = valuemin + (targetlo - wtlo) / (wttot - wtlo - wthi) * (valuemax - valuemin); else valuehalf = 0.5 * (valuemin + valuemax); first_iteration = 0; // initialize local median data structure medme.totallo = medme.totalhi = 0.0; medme.valuelo = -MYHUGE; medme.valuehi = MYHUGE; medme.wtlo = medme.wthi = 0.0; medme.countlo = medme.counthi = 0; medme.proclo = medme.prochi = me; // mark all active dots on one side or other of bisector // also set all fields in median data struct // save indices of closest dots on either side for (j = 0; j < nlist; j++) { i = dotlist[j]; if (dots[i].x[dim] <= valuehalf) { // in lower part medme.totallo += dots[i].wt; dotmark[i] = 0; if (dots[i].x[dim] > medme.valuelo) { // my closest dot medme.valuelo = dots[i].x[dim]; medme.wtlo = dots[i].wt; medme.countlo = 1; indexlo = i; } else if (dots[i].x[dim] == medme.valuelo) { // tied for closest medme.wtlo += dots[i].wt; medme.countlo++; } } else { // in upper part medme.totalhi += dots[i].wt; dotmark[i] = 1; if (dots[i].x[dim] < medme.valuehi) { // my closest dot medme.valuehi = dots[i].x[dim]; medme.wthi = dots[i].wt; medme.counthi = 1; indexhi = i; } else if (dots[i].x[dim] == medme.valuehi) { // tied for closest medme.wthi += dots[i].wt; medme.counthi++; } } } // combine median data struct across current subset of procs counters[0]++; MPI_Allreduce(&medme,&med,1,med_type,med_op,comm); // test median guess for convergence // move additional dots that are next to cut across it if (wtlo + med.totallo < targetlo) { // lower half TOO SMALL wtlo += med.totallo; valuehalf = med.valuehi; if (med.counthi == 1) { // only one dot to move if (wtlo + med.wthi < targetlo) { // move it, keep iterating if (me == med.prochi) dotmark[indexhi] = 0; } else { // only move if beneficial if (wtlo + med.wthi - targetlo < targetlo - wtlo) if (me == med.prochi) dotmark[indexhi] = 0; break; // all done } } else { // multiple dots to move breakflag = 0; wtok = 0.0; if (medme.valuehi == med.valuehi) wtok = medme.wthi; if (wtlo + med.wthi >= targetlo) { // all done MPI_Scan(&wtok,&wtupto,1,MPI_DOUBLE,MPI_SUM,comm); wtmax = targetlo - wtlo; if (wtupto > wtmax) wtok = wtok - (wtupto - wtmax); breakflag = 1; } // wtok = most I can move for (j = 0, wtsum = 0.0; j < nlist && wtsum < wtok; j++) { i = dotlist[j]; if (dots[i].x[dim] == med.valuehi) { // only move if better if (wtsum + dots[i].wt - wtok < wtok - wtsum) dotmark[i] = 0; wtsum += dots[i].wt; } } if (breakflag) break; // done if moved enough } wtlo += med.wthi; if (targetlo-wtlo <= tolerance) break; // close enough valuemin = med.valuehi; // iterate again markactive = 1; } else if (wthi + med.totalhi < targethi) { // upper half TOO SMALL wthi += med.totalhi; valuehalf = med.valuelo; if (med.countlo == 1) { // only one dot to move if (wthi + med.wtlo < targethi) { // move it, keep iterating if (me == med.proclo) dotmark[indexlo] = 1; } else { // only move if beneficial if (wthi + med.wtlo - targethi < targethi - wthi) if (me == med.proclo) dotmark[indexlo] = 1; break; // all done } } else { // multiple dots to move breakflag = 0; wtok = 0.0; if (medme.valuelo == med.valuelo) wtok = medme.wtlo; if (wthi + med.wtlo >= targethi) { // all done MPI_Scan(&wtok,&wtupto,1,MPI_DOUBLE,MPI_SUM,comm); wtmax = targethi - wthi; if (wtupto > wtmax) wtok = wtok - (wtupto - wtmax); breakflag = 1; } // wtok = most I can move for (j = 0, wtsum = 0.0; j < nlist && wtsum < wtok; j++) { i = dotlist[j]; if (dots[i].x[dim] == med.valuelo) { // only move if better if (wtsum + dots[i].wt - wtok < wtok - wtsum) dotmark[i] = 1; wtsum += dots[i].wt; } } if (breakflag) break; // done if moved enough } wthi += med.wtlo; if (targethi-wthi <= tolerance) break; // close enough valuemax = med.valuelo; // iterate again markactive = 0; } else // Goldilocks result: both partitions just right break; // shrink the active list k = 0; for (j = 0; j < nlist; j++) { i = dotlist[j]; if (dotmark[i] == markactive) dotlist[k++] = i; } nlist = k; } // found median // store cut info only if I am procmid if (me == procmid) { cut = valuehalf; cutdim = dim; } // use cut to shrink my RCB bounding box if (me < procmid) hi[dim] = valuehalf; else lo[dim] = valuehalf; // outgoing = number of dots to ship to partner // nkeep = number of dots that have never migrated markactive = (me < procpartner); for (i = 0, keep = 0, outgoing = 0; i < ndot; i++) if (dotmark[i] == markactive) outgoing++; else if (i < nkeep) keep++; nkeep = keep; // alert partner how many dots I'll send, read how many I'll recv MPI_Send(&outgoing,1,MPI_INT,procpartner,0,world); incoming = 0; if (readnumber) { MPI_Recv(&incoming,1,MPI_INT,procpartner,0,world,MPI_STATUS_IGNORE); if (readnumber == 2) { MPI_Recv(&incoming2,1,MPI_INT,procpartner2,0,world,MPI_STATUS_IGNORE); incoming += incoming2; } } // check if need to alloc more space int ndotnew = ndot - outgoing + incoming; if (ndotnew > maxdot) { while (maxdot < ndotnew) maxdot += DELTA; dots = (Dot *) memory->srealloc(dots,maxdot*sizeof(Dot),"RCB::dots"); counters[6]++; } counters[1] += outgoing; counters[2] += incoming; if (ndotnew > counters[3]) counters[3] = ndotnew; if (maxdot > counters[4]) counters[4] = maxdot; // malloc comm send buffer if (outgoing > maxbuf) { memory->sfree(buf); maxbuf = outgoing; buf = (Dot *) memory->smalloc(maxbuf*sizeof(Dot),"RCB:buf"); } // fill buffer with dots that are marked for sending // pack down the unmarked ones keep = outgoing = 0; for (i = 0; i < ndot; i++) { if (dotmark[i] == markactive) memcpy(&buf[outgoing++],&dots[i],sizeof(Dot)); else memmove(&dots[keep++],&dots[i],sizeof(Dot)); } // post receives for dots if (readnumber > 0) { MPI_Irecv(&dots[keep],incoming*sizeof(Dot),MPI_CHAR, procpartner,1,world,&request); if (readnumber == 2) { keep += incoming - incoming2; MPI_Irecv(&dots[keep],incoming2*sizeof(Dot),MPI_CHAR, procpartner2,1,world,&request2); } } // handshake before sending dots to insure recvs have been posted if (readnumber > 0) { MPI_Send(nullptr,0,MPI_INT,procpartner,0,world); if (readnumber == 2) MPI_Send(nullptr,0,MPI_INT,procpartner2,0,world); } MPI_Recv(nullptr,0,MPI_INT,procpartner,0,world,MPI_STATUS_IGNORE); // send dots to partner MPI_Rsend(buf,outgoing*sizeof(Dot),MPI_CHAR,procpartner,1,world); // wait until all dots are received if (readnumber > 0) { MPI_Wait(&request,MPI_STATUS_IGNORE); if (readnumber == 2) MPI_Wait(&request2,MPI_STATUS_IGNORE); } ndot = ndotnew; // cut partition in half, create new communicators of 1/2 size int split; if (me < procmid) { procupper = procmid - 1; split = 0; } else { proclower = procmid; split = 1; } MPI_Comm_split(comm,split,me,&comm_half); MPI_Comm_free(&comm); comm = comm_half; } // clean up MPI_Comm_free(&comm); // set public variables with results of rebalance nfinal = ndot; if (nfinal > maxrecv) { memory->destroy(recvproc); memory->destroy(recvindex); maxrecv = nfinal; memory->create(recvproc,maxrecv,"RCB:recvproc"); memory->create(recvindex,maxrecv,"RCB:recvindex"); } for (i = 0; i < nfinal; i++) { recvproc[i] = dots[i].proc; recvindex[i] = dots[i].index; } } /* ---------------------------------------------------------------------- custom MPI reduce operation merge of each component of an RCB bounding box ------------------------------------------------------------------------- */ void box_merge(void *in, void *inout, int * /*len*/, MPI_Datatype * /*dptr*/) { RCB::BBox *box1 = (RCB::BBox *) in; RCB::BBox *box2 = (RCB::BBox *) inout; for (int i = 0; i < 3; i++) { if (box1->lo[i] < box2->lo[i]) box2->lo[i] = box1->lo[i]; if (box1->hi[i] > box2->hi[i]) box2->hi[i] = box1->hi[i]; } } /* ---------------------------------------------------------------------- custom MPI reduce operation merge median data structure on input: in,inout->totallo, totalhi = weight in both partitions on this proc valuelo, valuehi = pos of nearest dot(s) to cut on this proc wtlo, wthi = total wt of dot(s) at that pos on this proc countlo, counthi = # of dot(s) nearest to cut on this proc proclo, prochi = not used on exit: inout-> totallo, totalhi = total # of active dots in both partitions valuelo, valuehi = pos of nearest dot(s) to cut wtlo, wthi = total wt of dot(s) at that position countlo, counthi = total # of dot(s) nearest to cut proclo, prochi = one unique proc who owns a nearest dot all procs must get same proclo,prochi ------------------------------------------------------------------------- */ void median_merge(void *in, void *inout, int * /*len*/, MPI_Datatype * /*dptr*/) { RCB::Median *med1 = (RCB::Median *) in; RCB::Median *med2 = (RCB::Median *) inout; med2->totallo += med1->totallo; if (med1->valuelo > med2->valuelo) { med2->valuelo = med1->valuelo; med2->wtlo = med1->wtlo; med2->countlo = med1->countlo; med2->proclo = med1->proclo; } else if (med1->valuelo == med2->valuelo) { med2->wtlo += med1->wtlo; med2->countlo += med1->countlo; if (med1->proclo < med2->proclo) med2->proclo = med1->proclo; } med2->totalhi += med1->totalhi; if (med1->valuehi < med2->valuehi) { med2->valuehi = med1->valuehi; med2->wthi = med1->wthi; med2->counthi = med1->counthi; med2->prochi = med1->prochi; } else if (med1->valuehi == med2->valuehi) { med2->wthi += med1->wthi; med2->counthi += med1->counthi; if (med1->prochi < med2->prochi) med2->prochi = med1->prochi; } } /* ---------------------------------------------------------------------- invert the RCB rebalance result to convert receive info into send info sortflag = flag for sorting order of received messages by proc ID ------------------------------------------------------------------------- */ void RCB::invert(int sortflag) { // only create Irregular if not previously created // allows Irregular to persist for multiple RCB calls by fix balance if (!irregular) irregular = new Irregular(lmp); // nsend = # of dots to request from other procs int nsend = nfinal-nkeep; int *proclist; memory->create(proclist,nsend,"RCB:proclist"); Invert *sinvert = (Invert *) memory->smalloc(nsend*sizeof(Invert),"RCB:sinvert"); int m = 0; for (int i = nkeep; i < nfinal; i++) { proclist[m] = recvproc[i]; sinvert[m].rindex = recvindex[i]; sinvert[m].sproc = me; sinvert[m].sindex = i; m++; } // perform inversion via irregular comm // nrecv = # of my dots to send to other procs int nrecv = irregular->create_data(nsend,proclist,sortflag); Invert *rinvert = (Invert *) memory->smalloc(nrecv*sizeof(Invert),"RCB:rinvert"); irregular->exchange_data((char *) sinvert,sizeof(Invert),(char *) rinvert); irregular->destroy_data(); // set public variables from requests to send my dots if (noriginal > maxsend) { memory->destroy(sendproc); memory->destroy(sendindex); maxsend = noriginal; memory->create(sendproc,maxsend,"RCB:sendproc"); memory->create(sendindex,maxsend,"RCB:sendindex"); } for (int i = 0; i < nkeep; i++) { sendproc[recvindex[i]] = me; sendindex[recvindex[i]] = i; } for (int i = 0; i < nrecv; i++) { m = rinvert[i].rindex; sendproc[m] = rinvert[i].sproc; sendindex[m] = rinvert[i].sindex; } // clean-up memory->destroy(proclist); memory->destroy(sinvert); memory->destroy(rinvert); } /* ---------------------------------------------------------------------- memory use of Irregular ------------------------------------------------------------------------- */ double RCB::memory_usage() { double bytes = 0; if (irregular) bytes += irregular->memory_usage(); return bytes; } // ----------------------------------------------------------------------- // DEBUG methods // ----------------------------------------------------------------------- /* // consistency checks on RCB results void RCB::check() { int i,iflag,total1,total2; double weight,wtmax,wtmin,wtone,tolerance; // check that total # of dots remained the same MPI_Allreduce(&ndotorig,&total1,1,MPI_INT,MPI_SUM,world); MPI_Allreduce(&ndot,&total2,1,MPI_INT,MPI_SUM,world); if (total1 != total2) { if (me == 0) printf("ERROR: Points before RCB = %d, Points after RCB = %d\n", total1,total2); } // check that result is load-balanced within log2(P)*max-wt weight = wtone = 0.0; for (i = 0; i < ndot; i++) { weight += dots[i].wt; if (dots[i].wt > wtone) wtone = dots[i].wt; } MPI_Allreduce(&weight,&wtmin,1,MPI_DOUBLE,MPI_MIN,world); MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world); MPI_Allreduce(&wtone,&tolerance,1,MPI_DOUBLE,MPI_MAX,world); // i = smallest power-of-2 >= nprocs // tolerance = largest-single-weight*log2(nprocs) for (i = 0; (nprocs >> i) != 0; i++); tolerance = tolerance * i * (1.0 + TINY); if (wtmax - wtmin > tolerance) { if (me == 0) printf("ERROR: Load-imbalance > tolerance of %g\n",tolerance); MPI_Barrier(world); if (weight == wtmin) printf(" Proc %d has weight = %g\n",me,weight); if (weight == wtmax) printf(" Proc %d has weight = %g\n",me,weight); } MPI_Barrier(world); // check that final set of points is inside RCB box of each proc iflag = 0; for (i = 0; i < ndot; i++) { if (dots[i].x[0] < lo[0] || dots[i].x[0] > hi[0] || dots[i].x[1] < lo[1] || dots[i].x[1] > hi[1] || dots[i].x[2] < lo[2] || dots[i].x[2] > hi[2]) iflag++; } if (iflag > 0) printf("ERROR: %d points are out-of-box on proc %d\n",iflag,me); } // stats for RCB decomposition void RCB::stats(int flag) { int i,iflag,sum,min,max; double ave,rsum,rmin,rmax; double weight,wttot,wtmin,wtmax; if (me == 0) printf("RCB Statistics:\n"); // distribution info for (i = 0, weight = 0.0; i < ndot; i++) weight += dots[i].wt; MPI_Allreduce(&weight,&wttot,1,MPI_DOUBLE,MPI_SUM,world); MPI_Allreduce(&weight,&wtmin,1,MPI_DOUBLE,MPI_MIN,world); MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world); if (me == 0) { printf(" Total weight of dots = %g\n",wttot); printf(" Weight on each proc: ave = %g, max = %g, min = %g\n", wttot/nprocs,wtmax,wtmin); } if (flag) { MPI_Barrier(world); printf(" Proc %d has weight = %g\n",me,weight); } for (i = 0, weight = 0.0; i < ndot; i++) if (dots[i].wt > weight) weight = dots[i].wt; MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world); if (me == 0) printf(" Maximum weight of single dot = %g\n",wtmax); if (flag) { MPI_Barrier(world); printf(" Proc %d max weight = %g\n",me,weight); } // counter info MPI_Allreduce(&counters[0],&sum,1,MPI_INT,MPI_SUM,world); MPI_Allreduce(&counters[0],&min,1,MPI_INT,MPI_MIN,world); MPI_Allreduce(&counters[0],&max,1,MPI_INT,MPI_MAX,world); ave = ((double) sum)/nprocs; if (me == 0) printf(" Median iter: ave = %g, min = %d, max = %d\n",ave,min,max); if (flag) { MPI_Barrier(world); printf(" Proc %d median count = %d\n",me,counters[0]); } MPI_Allreduce(&counters[1],&sum,1,MPI_INT,MPI_SUM,world); MPI_Allreduce(&counters[1],&min,1,MPI_INT,MPI_MIN,world); MPI_Allreduce(&counters[1],&max,1,MPI_INT,MPI_MAX,world); ave = ((double) sum)/nprocs; if (me == 0) printf(" Send count: ave = %g, min = %d, max = %d\n",ave,min,max); if (flag) { MPI_Barrier(world); printf(" Proc %d send count = %d\n",me,counters[1]); } MPI_Allreduce(&counters[2],&sum,1,MPI_INT,MPI_SUM,world); MPI_Allreduce(&counters[2],&min,1,MPI_INT,MPI_MIN,world); MPI_Allreduce(&counters[2],&max,1,MPI_INT,MPI_MAX,world); ave = ((double) sum)/nprocs; if (me == 0) printf(" Recv count: ave = %g, min = %d, max = %d\n",ave,min,max); if (flag) { MPI_Barrier(world); printf(" Proc %d recv count = %d\n",me,counters[2]); } MPI_Allreduce(&counters[3],&sum,1,MPI_INT,MPI_SUM,world); MPI_Allreduce(&counters[3],&min,1,MPI_INT,MPI_MIN,world); MPI_Allreduce(&counters[3],&max,1,MPI_INT,MPI_MAX,world); ave = ((double) sum)/nprocs; if (me == 0) printf(" Max dots: ave = %g, min = %d, max = %d\n",ave,min,max); if (flag) { MPI_Barrier(world); printf(" Proc %d max dots = %d\n",me,counters[3]); } MPI_Allreduce(&counters[4],&sum,1,MPI_INT,MPI_SUM,world); MPI_Allreduce(&counters[4],&min,1,MPI_INT,MPI_MIN,world); MPI_Allreduce(&counters[4],&max,1,MPI_INT,MPI_MAX,world); ave = ((double) sum)/nprocs; if (me == 0) printf(" Max memory: ave = %g, min = %d, max = %d\n",ave,min,max); if (flag) { MPI_Barrier(world); printf(" Proc %d max memory = %d\n",me,counters[4]); } if (reuse) { MPI_Allreduce(&counters[5],&sum,1,MPI_INT,MPI_SUM,world); MPI_Allreduce(&counters[5],&min,1,MPI_INT,MPI_MIN,world); MPI_Allreduce(&counters[5],&max,1,MPI_INT,MPI_MAX,world); ave = ((double) sum)/nprocs; if (me == 0) printf(" # of Reuse: ave = %g, min = %d, max = %d\n",ave,min,max); if (flag) { MPI_Barrier(world); printf(" Proc %d # of Reuse = %d\n",me,counters[5]); } } MPI_Allreduce(&counters[6],&sum,1,MPI_INT,MPI_SUM,world); MPI_Allreduce(&counters[6],&min,1,MPI_INT,MPI_MIN,world); MPI_Allreduce(&counters[6],&max,1,MPI_INT,MPI_MAX,world); ave = ((double) sum)/nprocs; if (me == 0) printf(" # of OverAlloc: ave = %g, min = %d, max = %d\n",ave,min,max); if (flag) { MPI_Barrier(world); printf(" Proc %d # of OverAlloc = %d\n",me,counters[6]); } // RCB boxes for each proc if (flag) { if (me == 0) printf(" RCB sub-domain boxes:\n"); for (i = 0; i < 3; i++) { MPI_Barrier(world); if (me == 0) printf(" Dimension %d\n",i+1); MPI_Barrier(world); printf(" Proc = %d: Box = %g %g\n",me,lo[i],hi[i]); } } } */