/* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "lmptype.h" #include "mpi.h" #include "math.h" #include "stdlib.h" #include "string.h" #include "balance.h" #include "atom.h" #include "comm.h" #include "irregular.h" #include "domain.h" #include "force.h" #include "update.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; enum{NONE,UNIFORM,USER,DYNAMIC}; enum{X,Y,Z}; //#define BALANCE_DEBUG 1 /* ---------------------------------------------------------------------- */ Balance::Balance(LAMMPS *lmp) : Pointers(lmp) { MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); memory->create(proccount,nprocs,"balance:proccount"); memory->create(allproccount,nprocs,"balance:allproccount"); user_xsplit = user_ysplit = user_zsplit = NULL; dflag = 0; fp = NULL; firststep = 1; } /* ---------------------------------------------------------------------- */ Balance::~Balance() { memory->destroy(proccount); memory->destroy(allproccount); delete [] user_xsplit; delete [] user_ysplit; delete [] user_zsplit; if (dflag) { delete [] bdim; delete [] count; delete [] sum; delete [] target; delete [] onecount; delete [] lo; delete [] hi; delete [] losum; delete [] hisum; } if (fp) fclose(fp); } /* ---------------------------------------------------------------------- called as balance command in input script ------------------------------------------------------------------------- */ void Balance::command(int narg, char **arg) { if (domain->box_exist == 0) error->all(FLERR,"Balance command before simulation box is defined"); if (comm->me == 0 && screen) fprintf(screen,"Balancing ...\n"); // parse arguments if (narg < 1) error->all(FLERR,"Illegal balance command"); int dimension = domain->dimension; int *procgrid = comm->procgrid; xflag = yflag = zflag = NONE; dflag = 0; outflag = 0; int iarg = 0; while (iarg < narg) { if (strcmp(arg[iarg],"x") == 0) { if (xflag == DYNAMIC) error->all(FLERR,"Illegal balance command"); if (strcmp(arg[iarg+1],"uniform") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal balance command"); xflag = UNIFORM; iarg += 2; } else { if (1 + procgrid[0]-1 > narg) error->all(FLERR,"Illegal balance command"); xflag = USER; delete [] user_xsplit; user_xsplit = new double[procgrid[0]+1]; user_xsplit[0] = 0.0; iarg++; for (int i = 1; i < procgrid[0]; i++) user_xsplit[i] = force->numeric(arg[iarg++]); user_xsplit[procgrid[0]] = 1.0; } } else if (strcmp(arg[iarg],"y") == 0) { if (yflag == DYNAMIC) error->all(FLERR,"Illegal balance command"); if (strcmp(arg[iarg+1],"uniform") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal balance command"); yflag = UNIFORM; iarg += 2; } else { if (1 + procgrid[1]-1 > narg) error->all(FLERR,"Illegal balance command"); yflag = USER; delete [] user_ysplit; user_ysplit = new double[procgrid[1]+1]; user_ysplit[0] = 0.0; iarg++; for (int i = 1; i < procgrid[1]; i++) user_ysplit[i] = force->numeric(arg[iarg++]); user_ysplit[procgrid[1]] = 1.0; } } else if (strcmp(arg[iarg],"z") == 0) { if (zflag == DYNAMIC) error->all(FLERR,"Illegal balance command"); if (strcmp(arg[iarg+1],"uniform") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal balance command"); zflag = UNIFORM; iarg += 2; } else { if (1 + procgrid[2]-1 > narg) error->all(FLERR,"Illegal balance command"); zflag = USER; delete [] user_zsplit; user_zsplit = new double[procgrid[2]+1]; user_zsplit[0] = 0.0; iarg++; for (int i = 1; i < procgrid[2]; i++) user_zsplit[i] = force->numeric(arg[iarg++]); user_zsplit[procgrid[2]] = 1.0; } } else if (strcmp(arg[iarg],"dynamic") == 0) { if (xflag != NONE || yflag != NONE || zflag != NONE) error->all(FLERR,"Illegal balance command"); if (iarg+4 > narg) error->all(FLERR,"Illegal balance command"); dflag = 1; xflag = yflag = DYNAMIC; if (dimension == 3) zflag = DYNAMIC; if (strlen(arg[iarg+1]) > 3) error->all(FLERR,"Illegal balance command"); strcpy(bstr,arg[iarg+1]); nitermax = atoi(arg[iarg+2]); if (nitermax <= 0) error->all(FLERR,"Illegal balance command"); thresh = atof(arg[iarg+3]); if (thresh < 1.0) error->all(FLERR,"Illegal balance command"); iarg += 4; } else if (strcmp(arg[iarg],"out") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal balance command"); if (outflag) error->all(FLERR,"Illegal balance command"); outflag = 1; if (me == 0) { fp = fopen(arg[iarg+1],"w"); if (fp == NULL) error->one(FLERR,"Cannot open balance output file"); } iarg += 2; } else error->all(FLERR,"Illegal balance command"); } // error check if (zflag && dimension == 2) error->all(FLERR,"Cannot balance in z dimension for 2d simulation"); if (xflag == USER) for (int i = 1; i <= procgrid[0]; i++) if (user_xsplit[i-1] >= user_xsplit[i]) error->all(FLERR,"Illegal balance command"); if (yflag == USER) for (int i = 1; i <= procgrid[1]; i++) if (user_ysplit[i-1] >= user_ysplit[i]) error->all(FLERR,"Illegal balance command"); if (zflag == USER) for (int i = 1; i <= procgrid[2]; i++) if (user_zsplit[i-1] >= user_zsplit[i]) error->all(FLERR,"Illegal balance command"); if (dflag) { for (int i = 0; i < strlen(bstr); i++) { if (bstr[i] != 'x' && bstr[i] != 'y' && bstr[i] != 'z') error->all(FLERR,"Balance dynamic string is invalid"); if (bstr[i] == 'z' && dimension == 2) error->all(FLERR,"Balance dynamic string is invalid"); for (int j = i+1; j < strlen(bstr); j++) if (bstr[i] == bstr[j]) error->all(FLERR,"Balance dynamic string is invalid"); } } // insure atoms are in current box & update box via shrink-wrap // no exchange() since doesn't matter if atoms are assigned to correct procs if (domain->triclinic) domain->x2lamda(atom->nlocal); domain->pbc(); domain->reset_box(); if (domain->triclinic) domain->lamda2x(atom->nlocal); // imbinit = initial imbalance // use current splits instead of nlocal since atoms may not be in sub-box domain->x2lamda(atom->nlocal); int maxinit; double imbinit = imbalance_splits(maxinit); domain->lamda2x(atom->nlocal); // debug output of initial state #ifdef BALANCE_DEBUG if (me == 0 && fp) dumpout(update->ntimestep,fp); #endif int niter = 0; // explicit setting of sub-domain sizes if (xflag == UNIFORM) { for (int i = 0; i < procgrid[0]; i++) comm->xsplit[i] = i * 1.0/procgrid[0]; comm->xsplit[procgrid[0]] = 1.0; } if (yflag == UNIFORM) { for (int i = 0; i < procgrid[1]; i++) comm->ysplit[i] = i * 1.0/procgrid[1]; comm->ysplit[procgrid[1]] = 1.0; } if (zflag == UNIFORM) { for (int i = 0; i < procgrid[2]; i++) comm->zsplit[i] = i * 1.0/procgrid[2]; comm->zsplit[procgrid[2]] = 1.0; } if (xflag == USER) for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i]; if (yflag == USER) for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i]; if (zflag == USER) for (int i = 0; i <= procgrid[2]; i++) comm->zsplit[i] = user_zsplit[i]; // static load-balance of sub-domain sizes if (dflag) { static_setup(bstr); niter = dynamic(); } // output of final result if (outflag && me == 0) dumpout(update->ntimestep,fp); // reset comm->uniform flag if necessary if (comm->uniform) { if (xflag == USER || xflag == DYNAMIC) comm->uniform = 0; if (yflag == USER || yflag == DYNAMIC) comm->uniform = 0; if (zflag == USER || zflag == DYNAMIC) comm->uniform = 0; } else { if (dimension == 3) { if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM) comm->uniform = 1; } else { if (xflag == UNIFORM && yflag == UNIFORM) comm->uniform = 1; } } // reset proc sub-domains if (domain->triclinic) domain->set_lamda_box(); domain->set_local_box(); // move atoms to new processors via irregular() if (domain->triclinic) domain->x2lamda(atom->nlocal); Irregular *irregular = new Irregular(lmp); irregular->migrate_atoms(); delete irregular; if (domain->triclinic) domain->lamda2x(atom->nlocal); // check if any atoms were lost bigint natoms; bigint nblocal = atom->nlocal; MPI_Allreduce(&nblocal,&natoms,1,MPI_LMP_BIGINT,MPI_SUM,world); if (natoms != atom->natoms) { char str[128]; sprintf(str,"Lost atoms via balance: original " BIGINT_FORMAT " current " BIGINT_FORMAT,atom->natoms,natoms); error->all(FLERR,str); } // imbfinal = final imbalance based on final nlocal int maxfinal; double imbfinal = imbalance_nlocal(maxfinal); if (me == 0) { if (screen) { fprintf(screen," iteration count = %d\n",niter); fprintf(screen," initial/final max atoms/proc = %d %d\n", maxinit,maxfinal); fprintf(screen," initial/final imbalance factor = %g %g\n", imbinit,imbfinal); } if (logfile) { fprintf(logfile," iteration count = %d\n",niter); fprintf(logfile," initial/final max atoms/proc = %d %d\n", maxinit,maxfinal); fprintf(logfile," initial/final imbalance factor = %g %g\n", imbinit,imbfinal); } } if (me == 0) { if (screen) { fprintf(screen," x cuts:"); for (int i = 0; i <= comm->procgrid[0]; i++) fprintf(screen," %g",comm->xsplit[i]); fprintf(screen,"\n"); fprintf(screen," y cuts:"); for (int i = 0; i <= comm->procgrid[1]; i++) fprintf(screen," %g",comm->ysplit[i]); fprintf(screen,"\n"); fprintf(screen," z cuts:"); for (int i = 0; i <= comm->procgrid[2]; i++) fprintf(screen," %g",comm->zsplit[i]); fprintf(screen,"\n"); } if (logfile) { fprintf(logfile," x cuts:"); for (int i = 0; i <= comm->procgrid[0]; i++) fprintf(logfile," %g",comm->xsplit[i]); fprintf(logfile,"\n"); fprintf(logfile," y cuts:"); for (int i = 0; i <= comm->procgrid[1]; i++) fprintf(logfile," %g",comm->ysplit[i]); fprintf(logfile,"\n"); fprintf(logfile," z cuts:"); for (int i = 0; i <= comm->procgrid[2]; i++) fprintf(logfile," %g",comm->zsplit[i]); fprintf(logfile,"\n"); } } } /* ---------------------------------------------------------------------- calculate imbalance based on nlocal return max = max atom per proc return imbalance factor = max atom per proc / ave atom per proc ------------------------------------------------------------------------- */ double Balance::imbalance_nlocal(int &max) { MPI_Allreduce(&atom->nlocal,&max,1,MPI_INT,MPI_MAX,world); double imbalance = 1.0; if (max) imbalance = max / (1.0 * atom->natoms / nprocs); return imbalance; } /* ---------------------------------------------------------------------- calculate imbalance based on processor splits in 3 dims atoms must be in lamda coords (0-1) before called map atoms to 3d grid of procs return max = max atom per proc return imbalance factor = max atom per proc / ave atom per proc ------------------------------------------------------------------------- */ double Balance::imbalance_splits(int &max) { double *xsplit = comm->xsplit; double *ysplit = comm->ysplit; double *zsplit = comm->zsplit; int nx = comm->procgrid[0]; int ny = comm->procgrid[1]; int nz = comm->procgrid[2]; for (int i = 0; i < nprocs; i++) proccount[i] = 0; double **x = atom->x; int nlocal = atom->nlocal; int ix,iy,iz; for (int i = 0; i < nlocal; i++) { ix = binary(x[i][0],nx,xsplit); iy = binary(x[i][1],ny,ysplit); iz = binary(x[i][2],nz,zsplit); proccount[iz*nx*ny + iy*nx + ix]++; } MPI_Allreduce(proccount,allproccount,nprocs,MPI_INT,MPI_SUM,world); max = 0; for (int i = 0; i < nprocs; i++) max = MAX(max,allproccount[i]); double imbalance = 1.0; if (max) imbalance = max / (1.0 * atom->natoms / nprocs); return imbalance; } /* ---------------------------------------------------------------------- setup static load balance operations called from command set rho = 0 for static balancing ------------------------------------------------------------------------- */ void Balance::static_setup(char *str) { ndim = strlen(str); bdim = new int[ndim]; for (int i = 0; i < strlen(str); i++) { if (str[i] == 'x') bdim[i] = X; if (str[i] == 'y') bdim[i] = Y; if (str[i] == 'z') bdim[i] = Z; } int max = MAX(comm->procgrid[0],comm->procgrid[1]); max = MAX(max,comm->procgrid[2]); count = new bigint[max]; onecount = new bigint[max]; sum = new bigint[max+1]; target = new bigint[max+1]; lo = new double[max+1]; hi = new double[max+1]; losum = new bigint[max+1]; hisum = new bigint[max+1]; rho = 0; } /* ---------------------------------------------------------------------- setup dynamic load balance operations called from fix balance set rho = 1 for dynamic balancing after call to dynamic_setup() ------------------------------------------------------------------------- */ void Balance::dynamic_setup(char *str, int nitermax_in, double thresh_in) { dflag = 1; static_setup(str); nitermax = nitermax_in; thresh = thresh_in; rho = 1; } /* ---------------------------------------------------------------------- load balance by changing xyz split proc boundaries in Comm called one time from input script command or many times from fix balance return niter = iteration count ------------------------------------------------------------------------- */ int Balance::dynamic() { int i,j,k,m,np,max; double *split; // no balancing if no atoms bigint natoms = atom->natoms; if (natoms == 0) return 0; // set delta for 1d balancing = root of threshhold // root = # of dimensions being balanced on double delta = pow(thresh,1.0/ndim) - 1.0; int *procgrid = comm->procgrid; // all balancing done in lamda coords domain->x2lamda(atom->nlocal); // loop over dimensions in balance string int niter = 0; for (int idim = 0; idim < ndim; idim++) { // split = ptr to xyz split in Comm if (bdim[idim] == X) split = comm->xsplit; else if (bdim[idim] == Y) split = comm->ysplit; else if (bdim[idim] == Z) split = comm->zsplit; // intial count and sum np = procgrid[bdim[idim]]; tally(bdim[idim],np,split); // target[i] = desired sum at split I for (i = 0; i < np; i++) target[i] = static_cast (1.0*natoms/np * i + 0.5); target[np] = natoms; // lo[i] = closest split <= split[i] with a sum <= target // hi[i] = closest split >= split[i] with a sum >= target lo[0] = hi[0] = 0.0; lo[np] = hi[np] = 1.0; losum[0] = hisum[0] = 0; losum[np] = hisum[np] = natoms; for (i = 1; i < np; i++) { for (j = i; j >= 0; j--) if (sum[j] <= target[i]) { lo[i] = split[j]; losum[i] = sum[j]; break; } for (j = i; j <= np; j++) if (sum[j] >= target[i]) { hi[i] = split[j]; hisum[i] = sum[j]; break; } } // iterate until balanced #ifdef BALANCE_DEBUG if (me == 0) debug_output(idim,0,np,split); #endif int doneflag; int change = 1; for (m = 0; m < nitermax; m++) { change = adjust(np,split); tally(bdim[idim],np,split); niter++; #ifdef BALANCE_DEBUG if (me == 0) debug_output(idim,m+1,np,split); if (me == 0 && fp) dumpout(update->ntimestep,fp); #endif // stop if no change in splits, b/c all targets are met exactly if (!change) break; // stop if all split sums are within delta of targets // this is a 1d test of particle count per slice // assumption is that this is sufficient accuracy // for 3d imbalance factor to reach threshhold doneflag = 1; for (i = 1; i < np; i++) if (fabs(1.0*(sum[i]-target[i]))/target[i] > delta) doneflag = 0; if (doneflag) break; } // eliminate final adjacent splits that are duplicates // can happen if particle distribution is narrow and Nitermax is small // set lo = midpt between splits // spread duplicates out evenly between bounding midpts with non-duplicates // i,j = lo/hi indices of set of duplicate splits // delta = new spacing between duplicates // bounding midpts = lo[i-1] and lo[j] int duplicate = 0; for (i = 1; i < np-1; i++) if (split[i] == split[i+1]) duplicate = 1; if (duplicate) { for (i = 0; i < np; i++) lo[i] = 0.5 * (split[i] + split[i+1]); i = 1; while (i < np-1) { j = i+1; while (split[j] == split[i]) j++; j--; if (j > i) { delta = (lo[j] - lo[i-1]) / (j-i+2); for (k = i; k <= j; k++) split[k] = lo[i-1] + (k-i+1)*delta; } i = j+1; } } // sanity check on bad duplicate or inverted splits // zero or negative width sub-domains will break Comm class // should never happen if recursive multisection algorithm is correct int bad = 0; for (i = 0; i < np; i++) if (split[i] >= split[i+1]) bad = 1; if (bad) error->all(FLERR,"Balance produced bad splits"); /* if (me == 0) { printf("BAD SPLITS %d %d %d\n",np+1,niter,delta); for (i = 0; i < np+1; i++) printf(" %g",split[i]); printf("\n"); } */ // stop at this point in bstr if imbalance factor < threshhold // this is a true 3d test of particle count per processor double imbfactor = imbalance_splits(max); if (imbfactor <= thresh) break; } // restore real coords domain->lamda2x(atom->nlocal); return niter; } /* ---------------------------------------------------------------------- count atoms in each slice, based on their dim coordinate N = # of slices split = N+1 cuts between N slices return updated count = particles per slice retrun updated sum = cummulative count below each of N+1 splits use binary search to find which slice each atom is in ------------------------------------------------------------------------- */ void Balance::tally(int dim, int n, double *split) { for (int i = 0; i < n; i++) onecount[i] = 0; double **x = atom->x; int nlocal = atom->nlocal; int index; for (int i = 0; i < nlocal; i++) { index = binary(x[i][dim],n,split); onecount[index]++; } MPI_Allreduce(onecount,count,n,MPI_LMP_BIGINT,MPI_SUM,world); sum[0] = 0; for (int i = 1; i < n+1; i++) sum[i] = sum[i-1] + count[i-1]; } /* ---------------------------------------------------------------------- adjust cuts between N slices in a dim via recursive multisectioning method split = current N+1 cuts, with 0.0 and 1.0 at end points sum = cummulative count up to each split target = desired cummulative count up to each split lo/hi = split values that bound current split update lo/hi to reflect sums at current split values overwrite split with new cuts guaranteed that splits will remain in ascending order, though adjacent values may be identical recursive bisectioning zooms in on each cut by halving lo/hi return 0 if no changes in any splits, b/c they are all perfect ------------------------------------------------------------------------- */ int Balance::adjust(int n, double *split) { int i; double fraction; // reset lo/hi based on current sum and splits // insure lo is monotonically increasing, ties are OK // insure hi is monotonically decreasing, ties are OK // this effectively uses info from nearby splits // to possibly tighten bounds on lo/hi for (i = 1; i < n; i++) { if (sum[i] <= target[i]) { lo[i] = split[i]; losum[i] = sum[i]; } if (sum[i] >= target[i]) { hi[i] = split[i]; hisum[i] = sum[i]; } } for (i = 1; i < n; i++) if (lo[i] < lo[i-1]) { lo[i] = lo[i-1]; losum[i] = losum[i-1]; } for (i = n-1; i > 0; i--) if (hi[i] > hi[i+1]) { hi[i] = hi[i+1]; hisum[i] = hisum[i+1]; } int change = 0; for (int i = 1; i < n; i++) if (sum[i] != target[i]) { change = 1; if (rho == 0) split[i] = 0.5 * (lo[i]+hi[i]); else { fraction = 1.0*(target[i]-losum[i]) / (hisum[i]-losum[i]); split[i] = lo[i] + fraction * (hi[i]-lo[i]); } } return change; } /* ---------------------------------------------------------------------- OLD code: for local diffusion method that didn't work as well as RCB adjust cuts between N slices in a dim via diffusive method count = atoms per slice split = current N+1 cuts, with 0.0 and 1.0 at end points overwrite split with new cuts diffusion means slices with more atoms than their neighbors "send" atoms, by moving cut closer to sender, further from receiver ------------------------------------------------------------------------- */ void Balance::old_adjust(int iter, int n, bigint *count, double *split) { // need to allocate this if start using it again double *cuts; // damping factor double damp = 0.5; // loop over slices // cut I is between 2 slices (I-1 and I) with counts // cut I+1 is between 2 slices (I and I+1) with counts // for a cut between 2 slices, only slice with larger count adjusts it // special treatment of end slices with only 1 neighbor bigint leftcount,mycount,rightcount; double rho,target,targetleft,targetright; for (int i = 0; i < n; i++) { if (i == 0) leftcount = MAXBIGINT; else leftcount = count[i-1]; mycount = count[i]; if (i == n-1) rightcount = MAXBIGINT; else rightcount = count[i+1]; // middle slice is <= both left and right, so do nothing // special case if 2 slices both have count = 0 -> no change in cut if (mycount <= leftcount && mycount <= rightcount) { if (leftcount == 0) cuts[i] = split[i]; if (rightcount == 0) cuts[i+1] = split[i+1]; continue; } // rho = density of atoms in the slice rho = mycount / (split[i+1] - split[i]); // middle slice has more atoms than left or right slice // send atoms in that dir if (mycount > leftcount) { target = damp * 0.5*(mycount-leftcount); cuts[i] = split[i] + target/rho; } if (mycount > rightcount) { target = damp * 0.5*(mycount-rightcount); cuts[i+1] = split[i+1] - target/rho; } /* // middle slice has more atoms then left or right slice // if delta from middle to top slice > delta between top and bottom slice // then send atoms both dirs to bring all 3 slices to same count // else bottom slice is very low, so send atoms only in that dir if (mycount > leftcount && mycount > rightcount) { if (mycount-MAX(leftcount,rightcount) >= fabs(leftcount-rightcount)) { if (leftcount <= rightcount) { targetleft = damp * (rightcount-leftcount + (mycount-rightcount)/3.0); targetright = damp * (mycount-rightcount)/3.0; cuts[i] = split[i] + targetleft/rho; cuts[i+1] = split[i+1] - targetright/rho; } else { targetleft = damp * (mycount-leftcount)/3.0; targetright = damp * (leftcount-rightcount + (mycount-leftcount)/3.0); cuts[i] = split[i] + targetleft/rho; cuts[i+1] = split[i+1] - targetright/rho; } } else if (leftcount < rightcount) { target = damp * 0.5*(mycount-leftcount); cuts[i] = split[i] + target/rho; cuts[i+1] = split[i+1]; } else if (rightcount < leftcount) { target = damp * 0.5*(mycount-rightcount); cuts[i+1] = split[i+1] - target/rho; cuts[i] = split[i]; } // middle slice has more atoms than only left or right slice // send atoms only in that dir } else if (mycount > leftcount) { target = damp * 0.5*(mycount-leftcount); cuts[i] = split[i] + target/rho; } else if (mycount > rightcount) { target = damp * 0.5*(mycount-rightcount); cuts[i+1] = split[i+1] - target/rho; } */ } // overwrite adjustable splits with new cuts for (int i = 1; i < n; i++) split[i] = cuts[i]; } /* ---------------------------------------------------------------------- binary search for where value falls in N-length vec note that vec actually has N+1 values, but ignore last one values in vec are monotonically increasing, but adjacent values can be ties value may be outside range of vec limits always return index from 0 to N-1 inclusive return 0 if value < vec[0] reutrn N-1 if value >= vec[N-1] return index = 1 to N-2 inclusive if vec[index] <= value < vec[index+1] note that for adjacent tie values, index of lower tie is not returned since never satisfies 2nd condition that value < vec[index+1] ------------------------------------------------------------------------- */ int Balance::binary(double value, int n, double *vec) { int lo = 0; int hi = n-1; if (value < vec[lo]) return lo; if (value >= vec[hi]) return hi; // insure vec[lo] <= value < vec[hi] at every iteration // done when lo,hi are adjacent int index = (lo+hi)/2; while (lo < hi-1) { if (value < vec[index]) hi = index; else if (value >= vec[index]) lo = index; index = (lo+hi)/2; } return index; } /* ---------------------------------------------------------------------- write dump snapshot of line segments in Pizza.py mdump mesh format write xy lines around each proc's sub-domain for 2d write xyz cubes around each proc's sub-domain for 3d only called by proc 0 ------------------------------------------------------------------------- */ void Balance::dumpout(bigint tstep, FILE *bfp) { int dimension = domain->dimension; // write out one square/cube per processor for 2d/3d // only write once since topology is static if (firststep) { firststep = 0; fprintf(bfp,"ITEM: TIMESTEP\n"); fprintf(bfp,BIGINT_FORMAT "\n",tstep); if (dimension == 2) fprintf(bfp,"ITEM: NUMBER OF SQUARES\n"); else fprintf(bfp,"ITEM: NUMBER OF CUBES\n"); fprintf(bfp,"%d\n",nprocs); if (dimension == 2) fprintf(bfp,"ITEM: SQUARES\n"); else fprintf(bfp,"ITEM: CUBES\n"); int nx = comm->procgrid[0] + 1; int ny = comm->procgrid[1] + 1; int nz = comm->procgrid[2] + 1; if (dimension == 2) { int m = 0; for (int j = 0; j < comm->procgrid[1]; j++) for (int i = 0; i < comm->procgrid[0]; i++) { int c1 = j*nx + i + 1; int c2 = c1 + 1; int c3 = c2 + nx; int c4 = c3 - 1; fprintf(bfp,"%d %d %d %d %d %d\n",m+1,m+1,c1,c2,c3,c4); m++; } } else { int m = 0; for (int k = 0; k < comm->procgrid[2]; k++) for (int j = 0; j < comm->procgrid[1]; j++) for (int i = 0; i < comm->procgrid[0]; i++) { int c1 = k*ny*nx + j*nx + i + 1; int c2 = c1 + 1; int c3 = c2 + nx; int c4 = c3 - 1; int c5 = c1 + ny*nx; int c6 = c2 + ny*nx; int c7 = c3 + ny*nx; int c8 = c4 + ny*nx; fprintf(bfp,"%d %d %d %d %d %d %d %d %d %d\n", m+1,m+1,c1,c2,c3,c4,c5,c6,c7,c8); m++; } } } // write out nodal coords, can be different every call // scale xsplit,ysplit,zsplit values to full box // only implmented for orthogonal boxes, not triclinic int nx = comm->procgrid[0] + 1; int ny = comm->procgrid[1] + 1; int nz = comm->procgrid[2] + 1; double *boxlo = domain->boxlo; double *boxhi = domain->boxhi; double *prd = domain->prd; fprintf(bfp,"ITEM: TIMESTEP\n"); fprintf(bfp,BIGINT_FORMAT "\n",tstep); fprintf(bfp,"ITEM: NUMBER OF NODES\n"); if (dimension == 2) fprintf(bfp,"%d\n",nx*ny); else fprintf(bfp,"%d\n",nx*ny*nz); fprintf(bfp,"ITEM: BOX BOUNDS\n"); fprintf(bfp,"%g %g\n",boxlo[0],boxhi[0]); fprintf(bfp,"%g %g\n",boxlo[1],boxhi[1]); fprintf(bfp,"%g %g\n",boxlo[2],boxhi[2]); fprintf(bfp,"ITEM: NODES\n"); if (dimension == 2) { int m = 0; for (int j = 0; j < ny; j++) for (int i = 0; i < nx; i++) { fprintf(bfp,"%d %d %g %g %g\n",m+1,1, boxlo[0] + prd[0]*comm->xsplit[i], boxlo[1] + prd[1]*comm->ysplit[j], 0.0); m++; } } else { int m = 0; for (int k = 0; k < nz; k++) for (int j = 0; j < ny; j++) for (int i = 0; i < nx; i++) { fprintf(bfp,"%d %d %g %g %g\n",m+1,1, boxlo[0] + prd[0]*comm->xsplit[i], boxlo[1] + prd[1]*comm->ysplit[j], boxlo[2] + prd[2]*comm->zsplit[j]); m++; } } } /* ---------------------------------------------------------------------- debug output for Idim and count only called by proc 0 ------------------------------------------------------------------------- */ void Balance::debug_output(int idim, int m, int np, double *split) { int i; const char *dim; double *boxlo = domain->boxlo; double *prd = domain->prd; if (bdim[idim] == X) dim = "X"; else if (bdim[idim] == Y) dim = "Y"; else if (bdim[idim] == Z) dim = "Z"; printf("Dimension %s, Iteration %d\n",dim,m); printf(" Count:"); for (i = 0; i < np; i++) printf(" " BIGINT_FORMAT,count[i]); printf("\n"); printf(" Sum:"); for (i = 0; i <= np; i++) printf(" " BIGINT_FORMAT,sum[i]); printf("\n"); printf(" Target:"); for (i = 0; i <= np; i++) printf(" " BIGINT_FORMAT,target[i]); printf("\n"); printf(" Actual cut:"); for (i = 0; i <= np; i++) printf(" %g",boxlo[bdim[idim]] + split[i]*prd[bdim[idim]]); printf("\n"); printf(" Split:"); for (i = 0; i <= np; i++) printf(" %g",split[i]); printf("\n"); printf(" Low:"); for (i = 0; i <= np; i++) printf(" %g",lo[i]); printf("\n"); printf(" Low-sum:"); for (i = 0; i <= np; i++) printf(" " BIGINT_FORMAT,losum[i]); printf("\n"); printf(" Hi:"); for (i = 0; i <= np; i++) printf(" %g",hi[i]); printf("\n"); printf(" Hi-sum:"); for (i = 0; i <= np; i++) printf(" " BIGINT_FORMAT,hisum[i]); printf("\n"); printf(" Delta:"); for (i = 0; i < np; i++) printf(" %g",split[i+1]-split[i]); printf("\n"); bigint max = 0; for (i = 0; i < np; i++) max = MAX(max,count[i]); printf(" Imbalance factor: %g\n",1.0*max*np/target[np]); }