/* ---------------------------------------------------------------------- LIGGGHTS - LAMMPS Improved for General Granular and Granular Heat Transfer Simulations www.liggghts.com | www.cfdem.com Christoph Kloss, christoph.kloss@cfdem.com LIGGGHTS is based on LAMMPS LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "lbalance_simple.h" #include "math.h" #include "domain.h" #include "mpi.h" #include "atom.h" #include "comm.h" #include "error.h" #include "neighbor.h" #include "update.h" #include "stdlib.h" #include "string.h" using namespace LAMMPS_NS; /*NL*/ #define LMP_DEBUGMODE_LBALANCE_SIMPLE false /*NL*/ #define LMP_DEBUGMODE_LBALANCE_SIMPLE_RESULTS false //(idim==2)//(update->ntimestep>7000) /*NL*/ #define LMP_DEBUG_OUT_LBALANCE_SIMPLE screen /* ---------------------------------------------------------------------- */ LbalanceSimple::LbalanceSimple(class LAMMPS *lmp,int narg, char **arg): Lbalance(lmp,narg,arg) { //NP do not parse here for derived classes such as hybrid if(strncmp(style,"nlocal/",6)) return; if(narg < iarg+2) error->all(FLERR,"Loadbalance simple: Not enough arguments"); if(strcmp(arg[iarg++],"ntry")) error->all(FLERR,"Loadbalance nlocal/simple: Expecting 'ntry'"); ntry_simple = atoi(arg[iarg++]); if(ntry_simple < 1) error->all(FLERR,"Loadbalance max: ntry too small"); if(ntry_simple > 10) error->warning(FLERR,"Loadbalance max: ntry >10 might result in high comm cost"); } /* ---------------------------------------------------------------------- */ LbalanceSimple::~LbalanceSimple() {} /*NP ---------------------------------------------------------------------- reset_box() has been called before, global box is set this function sets the local boxes and should yield an equal number of particles in each domain for low # of processes function is called each reneighboring if lbalance == 1 for each dim, borders are calculated so atom distribution is equal possible problem: if atoms jump from proc i to i+2 in a dim b/c communication works on a stencil (domain->procneigh) atoms could jump if half skin is larger than smallest subhi-sublo or if neigh list build is off for a while ------------------------------------------------------------------------- */ void LbalanceSimple::loadbalance_local_boxes() { // do not do anything if there is no reasonable # of particles in the system if(atom->natoms < 10 * comm->nprocs) return; if(domain->triclinic) error->all(FLERR,"Load balancing not implemented for triclinic boxes"); /*NL*/ //if(comm->me == 0) fprintf(screen,"Loadbalancing: %f particles\n",atom->natoms); procgrid = comm->procgrid; myloc = comm->myloc; lodim[0] = lodim[1] = lodim[2] = 0; hidim[0] = procgrid[0]-1; hidim[1] = procgrid[1]-1; hidim[2] = procgrid[2]-1; loadbalance_local_boxes_simple(); } void LbalanceSimple::loadbalance_local_boxes_simple() { // new borders for load-balanced system double bal_sublo[3],bal_subhi[3]; double border[3]; int idim,idim_proc,ncount[3],ncount_ideal,nproc_grid; subhi = domain->subhi; sublo = domain->sublo; boxlo = domain->boxlo; boxhi = domain->boxhi; //NP minimum box extent equivalent to max cutoff minextent = 1.05 * cutneighmax(); //NP count total particles in proc box to handle //NP equal to total particle number if proc stencil extends whole box int natoms = count_particles(0,boxhi[0]); //NP error if domain is too small to be loadbalanced //NP should not occur since minextent is accounted for in apply_border() for (int i = 0; i < 3; i++) if ( (procgrid[i] > 1) && (boxhi[i] - boxlo[i] < procgrid[i] * minextent) ) error->all(FLERR,"Domain too small for this processor grid and this cutoff size:\n" " Enlarge domain, reduce # of processors, or choose smaller cutoff"); /*NL*/ if (LMP_DEBUGMODE_LBALANCE_SIMPLE) fprintf(LMP_DEBUG_OUT_LBALANCE_SIMPLE,"minextent %f\n",minextent); for (idim = 0; idim < 3; idim++) { if (procgrid[idim] < 2) continue; nproc_grid = hidim[idim] - lodim[idim] + 1; /*NL*/ //if(LMP_DEBUGMODE_LBALANCE_SIMPLE && idim == 2) fprintf(LMP_DEBUG_OUT_LBALANCE_SIMPLE,"Processor %d:, nlocal //%d\n",comm->me,atom->nlocal); //NP loop proc stencil for(idim_proc = lodim[idim]; idim_proc <= hidim[idim]; idim_proc++) { //NP first calculate the maximum allowable border shift //NP this ensures that no proc is skipped - i.e. particles are always communicated to a neigh proc calc_max_shift(idim, idim_proc); //-------------------------- //step 1- calculate lo bound //-------------------------- //NP for the first proc - take boxlo if(idim_proc == 0) bal_sublo[idim] = boxlo[idim]; //otherwise last hi limit as lo limit else bal_sublo[idim] = bal_subhi[idim]; //-------------------------- //step2 - calculate hi bound //-------------------------- //NP for the last proc - take boxhi if(idim_proc == procgrid[idim]-1) bal_subhi[idim] = boxhi[idim]; else { //NP ideal particle count for the slice ncount_ideal = (idim_proc - lodim[idim] + 1) * static_cast(natoms / nproc_grid); /*NL*/ //if(LMP_DEBUGMODE_LBALANCE_SIMPLE) fprintf(LMP_DEBUG_OUT_LBALANCE_SIMPLE,"ncount_ideal %d\n",ncount_ideal); //NP init and perform recursive search border[0] = boxlo[idim]; border[2] = boxhi[idim]; ncount[0] = 0; ncount[2] = natoms; bal_subhi[idim] = calc_border(ntry_simple,idim,ncount_ideal,border,ncount); } //-------------------------- //step3 - apply bounds //-------------------------- // this may change the value for bal_subhi if necesary apply_border(bal_sublo,bal_subhi,idim,idim_proc); } } //NP error->all(FLERR,"loadbalance finished"); } /*NP ---------------------------------------------------------------------- recursive function to calc optimal domain decomposition ------------------------------------------------------------------------- */ double LbalanceSimple::calc_border(int ntry,int dim,int ncount_ideal,double *border,int *ncount) { double btemp; int ntemp; border[1] = 0.5 * (border[0] + border[2]); ncount[1] = count_particles(dim,border[1]); /*NL*/ //if(LMP_DEBUGMODE_LBALANCE_SIMPLE) fprintf(LMP_DEBUG_OUT_LBALANCE_SIMPLE, "proc %d, iteration %d, border %f\n",comm->me,ntry,border[1]); //NP recursive binary search if(ntry > 0) { if(ncount[0] > ncount_ideal || ncount[2] < ncount_ideal) error->all(FLERR,"Illegal situation in LbalanceSimple::calc_border"); if(ncount[1] == ncount_ideal) return border[1]; //NP adjust hi and lo border accordingly else if(ncount[1] < ncount_ideal) { border[0] = border[1]; ncount[0] = ncount[1]; } else if(ncount[1] > ncount_ideal) { border[2] = border[1]; ncount[2] = ncount[1]; } return calc_border(ntry-1,dim,ncount_ideal,border,ncount); } //NP calc relative deviation for the three results double rel_dev[3]; for(int i = 0; i < 3; i++) rel_dev[i] = fabs(static_cast(ncount[i]-ncount_ideal) / static_cast(ncount_ideal)) ; //NP return the border with minimum deviation if(rel_dev[0] < rel_dev[1] && rel_dev[0] < rel_dev[2]) return border[0]; else if(rel_dev[1] < rel_dev[2]) return border[1]; else return border[2]; } /*NP ---------------------------------------------------------------------- function that counts number of particles only count particles in the proc stencil ------------------------------------------------------------------------- */ inline int LbalanceSimple::count_particles(int dim,double border) { int nlocal = atom->nlocal; double **x = atom->x; int count = 0, count_all; //NP only take the right processors if( myloc[dim] < lodim[dim] || myloc[dim] > hidim[dim]) count = 0; else { for(int i = 0; i < nlocal; i++) if(x[i][dim] < border) count++; } MPI_Allreduce(&count,&count_all,1,MPI_INT,MPI_SUM,world); /*NL*/ //if(LMP_DEBUGMODE_LBALANCE_SIMPLE) fprintf(LMP_DEBUG_OUT_LBALANCE_SIMPLE," proc %d counted %d particles for this border \n",comm->me,count_all); return count_all; } /*NP ---------------------------------------------------------------------- calculate the maximum allowable shift ------------------------------------------------------------------------- */ void LbalanceSimple::calc_max_shift(int idim,int idim_proc) { //NP get max shift distance for negative and positive direction = subbox extent in this dim for //NP this ensures that no proc is skipped - i.e. particles are always communicated to a neigh proc double max_shift_all[2]; for(int i = 0; i < 2; i++) { if (myloc[idim] == idim_proc+i) max_shift[i] = subhi[idim] - sublo[idim]; else max_shift[i] = BIG; MPI_Allreduce(&(max_shift[i]),&(max_shift_all[i]),1,MPI_DOUBLE,MPI_MIN,world); max_shift[i] = 0.90*max_shift_all[i]; } //NP handle special cases - may not shift last border if(idim_proc == comm->procgrid[idim]-1) max_shift[1] = 0.; } /*NP ---------------------------------------------------------------------- apply that has been calculated border ------------------------------------------------------------------------- */ void LbalanceSimple::apply_border(double *bal_sublo, double *bal_subhi,int idim,int idim_proc) { double subhi_final,subhi_final_all, boxhi_stencil,boxhi_stencil_all,bal_subhi_max; //NP get boxhi for the stencil boxhi_stencil = - BIG; if(myloc[idim] <= hidim[idim]) boxhi_stencil = subhi[idim]; MPI_Allreduce(&boxhi_stencil,&boxhi_stencil_all,1,MPI_DOUBLE,MPI_MAX,world); boxhi_stencil = boxhi_stencil_all; //NP prevent the case that choosing a border does not leave enough place //NP for the other procs to fulfil the minimum extent bal_subhi_max = boxhi_stencil - (hidim[idim] - idim_proc) * minextent; if(bal_subhi[idim] > bal_subhi_max) bal_subhi[idim] = bal_subhi_max; //NP only take the calculated subbox size if relevant for me subhi_final = BIG; if(myloc[idim] == idim_proc) { //NP can take sublo directly sublo[idim] = bal_sublo[idim]; /*NL*/ if(LMP_DEBUGMODE_LBALANCE_SIMPLE) fprintf(LMP_DEBUG_OUT_LBALANCE_SIMPLE,"original border for proc %d of dimension %d: %f, new border %f, maxshift: %f %f\n",idim_proc,idim,subhi[idim],bal_subhi[idim],-max_shift[0],max_shift[1]); //NP make sure the minumim extent is obeyed if(bal_subhi[idim] - bal_sublo[idim] < minextent) bal_subhi[idim] = bal_sublo[idim] + minextent; /*NL*/ if(LMP_DEBUGMODE_LBALANCE_SIMPLE) fprintf(LMP_DEBUG_OUT_LBALANCE_SIMPLE," border result after min extent: %f\n",bal_subhi[idim]); //NP make sure border is not shifted more than allowed double diff = bal_subhi[idim] - subhi[idim]; //if(idim == 2) fprintf(screen,"Processor %d: boundaries initally calculated for dim %d: %f / %f\n",comm->me,idim,bal_sublo[idim],bal_subhi[idim]); if (diff < 0 && diff < -max_shift[0]) subhi[idim] = subhi[idim] - max_shift[0]; else if(diff > 0 && diff > max_shift[1]) subhi[idim] = subhi[idim] + max_shift[1]; else subhi[idim] = bal_subhi[idim]; subhi_final = subhi[idim]; /*NL*/ if(LMP_DEBUGMODE_LBALANCE_SIMPLE) fprintf(LMP_DEBUG_OUT_LBALANCE_SIMPLE," border result after max shift extent: %f\n",subhi[idim]); //NP warn if maximum shift distance did override if(subhi[idim] - sublo[idim] < minextent ) error->warning(FLERR,"Minimum sub-domain extent could not be obeyed because particles would have been lost (did you insert large particles?). Inaccuracies may result."); //if(idim ==2) fprintf(screen,"Processor %d: boundaries finally calculated for dim %d: %f / %f\n",comm->me,idim,sublo[idim],subhi[idim]); } //NP comunicate subhi that came out of the calculation to all procs MPI_Allreduce(&subhi_final,&subhi_final_all,1,MPI_DOUBLE,MPI_MIN,world); bal_subhi[idim] = subhi_final_all; /*NL*/ if(LMP_DEBUGMODE_LBALANCE_SIMPLE && comm->me == 0) fprintf(LMP_DEBUG_OUT_LBALANCE_SIMPLE,"FINAL result for proc %d of dimension %d: %f\n",idim_proc,idim,bal_subhi[idim]); /*NL*/ if(LMP_DEBUGMODE_LBALANCE_SIMPLE_RESULTS && comm->me == 0) fprintf(LMP_DEBUG_OUT_LBALANCE_SIMPLE,"FINAL result for proc %d of dimension %d: %f\n",idim_proc,idim,bal_subhi[idim]); }