git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12207 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2014-07-28 19:58:51 +00:00
parent 3cc23720e9
commit 4455b66003
23 changed files with 2418 additions and 700 deletions

View File

@ -211,6 +211,9 @@ void PPPMCuda::init()
// error check // error check
if (domain->dimension == 2) error->all(FLERR,"Cannot use PPPMCuda with 2d simulation"); if (domain->dimension == 2) error->all(FLERR,"Cannot use PPPMCuda with 2d simulation");
if (comm->style != 0)
error->universe_all(FLERR,"PPPMCuda can only currently be used with "
"comm_style brick");
if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");

View File

@ -88,6 +88,10 @@ FixLbFluid::FixLbFluid(LAMMPS *lmp, int narg, char **arg) :
if(narg <7) error->all(FLERR,"Illegal fix lb/fluid command"); if(narg <7) error->all(FLERR,"Illegal fix lb/fluid command");
if (comm->style != 0)
error->universe_all(FLERR,"Fix lb/fluid can only currently be used with "
"comm_style brick");
MPI_Comm_rank(world,&me); MPI_Comm_rank(world,&me);
MPI_Comm_size(world,&nprocs); MPI_Comm_size(world,&nprocs);
@ -567,9 +571,12 @@ int FixLbFluid::setmask()
void FixLbFluid::init(void) void FixLbFluid::init(void)
{ {
int i,j; int i,j;
if (comm->style != 0)
error->universe_all(FLERR,"Fix lb/fluid can only currently be used with "
"comm_style brick");
//-------------------------------------------------------------------------- //--------------------------------------------------------------------------
// Check to see if the MD timestep has changed between runs. // Check to see if the MD timestep has changed between runs.
//-------------------------------------------------------------------------- //--------------------------------------------------------------------------

View File

@ -47,6 +47,8 @@ using namespace MathConst;
#define CUDA_CHUNK 3000 #define CUDA_CHUNK 3000
#define MAXBODY 20 // max # of lines in one body, also in ReadData class #define MAXBODY 20 // max # of lines in one body, also in ReadData class
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
Atom::Atom(LAMMPS *lmp) : Pointers(lmp) Atom::Atom(LAMMPS *lmp) : Pointers(lmp)
@ -753,17 +755,33 @@ void Atom::data_atoms(int n, char *buf)
sublo[2] = domain->sublo_lamda[2]; subhi[2] = domain->subhi_lamda[2]; sublo[2] = domain->sublo_lamda[2]; subhi[2] = domain->subhi_lamda[2];
} }
if (domain->xperiodic) { if (comm->layout != LAYOUT_TILED) {
if (comm->myloc[0] == 0) sublo[0] -= epsilon[0]; if (domain->xperiodic) {
if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0]; if (comm->myloc[0] == 0) sublo[0] -= epsilon[0];
} if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0];
if (domain->yperiodic) { }
if (comm->myloc[1] == 0) sublo[1] -= epsilon[1]; if (domain->yperiodic) {
if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1]; if (comm->myloc[1] == 0) sublo[1] -= epsilon[1];
} if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1];
if (domain->zperiodic) { }
if (comm->myloc[2] == 0) sublo[2] -= epsilon[2]; if (domain->zperiodic) {
if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2]; if (comm->myloc[2] == 0) sublo[2] -= epsilon[2];
if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2];
}
} else {
if (domain->xperiodic) {
if (comm->mysplit[0][0] == 0.0) sublo[0] -= epsilon[0];
if (comm->mysplit[0][1] == 1.0) subhi[0] += epsilon[0];
}
if (domain->yperiodic) {
if (comm->mysplit[1][0] == 0.0) sublo[1] -= epsilon[1];
if (comm->mysplit[1][1] == 1.0) subhi[1] += epsilon[1];
}
if (domain->zperiodic) {
if (comm->mysplit[2][0] == 0.0) sublo[2] -= epsilon[2];
if (comm->mysplit[2][1] == 1.0) subhi[2] += epsilon[2];
}
} }
// xptr = which word in line starts xyz coords // xptr = which word in line starts xyz coords

View File

@ -21,6 +21,7 @@
#include "balance.h" #include "balance.h"
#include "atom.h" #include "atom.h"
#include "comm.h" #include "comm.h"
#include "rcb.h"
#include "irregular.h" #include "irregular.h"
#include "domain.h" #include "domain.h"
#include "force.h" #include "force.h"
@ -30,9 +31,10 @@
using namespace LAMMPS_NS; using namespace LAMMPS_NS;
enum{XYZ,SHIFT,RCB}; enum{XYZ,SHIFT,BISECTION};
enum{NONE,UNIFORM,USER}; enum{NONE,UNIFORM,USER};
enum{X,Y,Z}; enum{X,Y,Z};
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
@ -47,6 +49,8 @@ Balance::Balance(LAMMPS *lmp) : Pointers(lmp)
user_xsplit = user_ysplit = user_zsplit = NULL; user_xsplit = user_ysplit = user_zsplit = NULL;
shift_allocate = 0; shift_allocate = 0;
rcb = NULL;
fp = NULL; fp = NULL;
firststep = 1; firststep = 1;
} }
@ -74,6 +78,8 @@ Balance::~Balance()
delete [] hisum; delete [] hisum;
} }
delete rcb;
if (fp) fclose(fp); if (fp) fclose(fp);
} }
@ -176,7 +182,7 @@ void Balance::command(int narg, char **arg)
} else if (strcmp(arg[iarg],"rcb") == 0) { } else if (strcmp(arg[iarg],"rcb") == 0) {
if (style != -1) error->all(FLERR,"Illegal balance command"); if (style != -1) error->all(FLERR,"Illegal balance command");
style = RCB; style = BISECTION;
iarg++; iarg++;
} else break; } else break;
@ -232,6 +238,9 @@ void Balance::command(int narg, char **arg)
} }
} }
if (style == BISECTION && comm->style == 0)
error->all(FLERR,"Balance rcb cannot be used with comm_style brick");
// insure atoms are in current box & update box via shrink-wrap // insure atoms are in current box & update box via shrink-wrap
// init entire system since comm->setup is done // init entire system since comm->setup is done
// comm::init needs neighbor::init needs pair::init needs kspace::init, etc // comm::init needs neighbor::init needs pair::init needs kspace::init, etc
@ -251,8 +260,10 @@ void Balance::command(int narg, char **arg)
double imbinit = imbalance_nlocal(maxinit); double imbinit = imbalance_nlocal(maxinit);
// no load-balance if imbalance doesn't exceed threshhold // no load-balance if imbalance doesn't exceed threshhold
// unless switching from tiled to non tiled layout, then force rebalance
if (imbinit < thresh) return; if (comm->layout == LAYOUT_TILED && style != BISECTION) {
} else if (imbinit < thresh) return;
// debug output of initial state // debug output of initial state
@ -262,80 +273,65 @@ void Balance::command(int narg, char **arg)
int niter = 0; int niter = 0;
// NOTE: if using XYZ or SHIFT and current partition is TILING,
// then need to create initial BRICK partition before performing LB
// perform load-balance // perform load-balance
// style XYZ = explicit setting of cutting planes of logical 3d grid // style XYZ = explicit setting of cutting planes of logical 3d grid
if (style == XYZ) { if (style == XYZ) {
if (comm->layout == LAYOUT_UNIFORM) {
if (xflag == USER || yflag == USER || zflag == USER)
comm->layout == LAYOUT_NONUNIFORM;
} else if (comm->style == LAYOUT_NONUNIFORM) {
if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
comm->layout == LAYOUT_UNIFORM;
} else if (comm->style == LAYOUT_TILED) {
if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
comm->layout == LAYOUT_UNIFORM;
else comm->layout == LAYOUT_NONUNIFORM;
}
if (xflag == UNIFORM) { if (xflag == UNIFORM) {
for (int i = 0; i < procgrid[0]; i++) for (int i = 0; i < procgrid[0]; i++)
comm->xsplit[i] = i * 1.0/procgrid[0]; comm->xsplit[i] = i * 1.0/procgrid[0];
comm->xsplit[procgrid[0]] = 1.0; comm->xsplit[procgrid[0]] = 1.0;
} } else if (xflag == USER)
for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i];
if (yflag == UNIFORM) { if (yflag == UNIFORM) {
for (int i = 0; i < procgrid[1]; i++) for (int i = 0; i < procgrid[1]; i++)
comm->ysplit[i] = i * 1.0/procgrid[1]; comm->ysplit[i] = i * 1.0/procgrid[1];
comm->ysplit[procgrid[1]] = 1.0; comm->ysplit[procgrid[1]] = 1.0;
} } else if (yflag == USER)
for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i];
if (zflag == UNIFORM) { if (zflag == UNIFORM) {
for (int i = 0; i < procgrid[2]; i++) for (int i = 0; i < procgrid[2]; i++)
comm->zsplit[i] = i * 1.0/procgrid[2]; comm->zsplit[i] = i * 1.0/procgrid[2];
comm->zsplit[procgrid[2]] = 1.0; comm->zsplit[procgrid[2]] = 1.0;
} } else if (zflag == USER)
if (xflag == USER)
for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i];
if (yflag == USER)
for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i];
if (zflag == USER)
for (int i = 0; i <= procgrid[2]; i++) comm->zsplit[i] = user_zsplit[i]; for (int i = 0; i <= procgrid[2]; i++) comm->zsplit[i] = user_zsplit[i];
} }
// style SHIFT = adjust cutting planes of logical 3d grid // style SHIFT = adjust cutting planes of logical 3d grid
if (style == SHIFT) { if (style == SHIFT) {
static_setup(bstr); comm->layout = LAYOUT_NONUNIFORM;
shift_setup_static(bstr);
niter = shift(); niter = shift();
} }
// style RCB = // style BISECTION = recursive coordinate bisectioning
if (style == RCB) { if (style == BISECTION) {
error->all(FLERR,"Balance rcb is not yet supported"); comm->layout = LAYOUT_TILED;
bisection(1);
if (comm->style == 0)
error->all(FLERR,"Cannot use balance rcb with comm_style brick");
} }
// output of final result // output of final result
if (outflag && me == 0) dumpout(update->ntimestep,fp); if (outflag && me == 0) dumpout(update->ntimestep,fp);
// reset comm->uniform flag if necessary
if (comm->uniform) {
if (style == SHIFT) comm->uniform = 0;
if (style == XYZ && xflag == USER) comm->uniform = 0;
if (style == XYZ && yflag == USER) comm->uniform = 0;
if (style == XYZ && zflag == USER) comm->uniform = 0;
} else {
if (dimension == 3) {
if (style == XYZ &&
xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
comm->uniform = 1;
} else {
if (style == XYZ && xflag == UNIFORM && yflag == UNIFORM)
comm->uniform = 1;
}
}
// reset proc sub-domains // reset proc sub-domains
// for either brick or tiled comm style
if (domain->triclinic) domain->set_lamda_box(); if (domain->triclinic) domain->set_lamda_box();
domain->set_local_box(); domain->set_local_box();
@ -344,7 +340,8 @@ void Balance::command(int narg, char **arg)
if (domain->triclinic) domain->x2lamda(atom->nlocal); if (domain->triclinic) domain->x2lamda(atom->nlocal);
Irregular *irregular = new Irregular(lmp); Irregular *irregular = new Irregular(lmp);
irregular->migrate_atoms(1); if (style == BISECTION) irregular->migrate_atoms(1,rcb->sendproc);
else irregular->migrate_atoms(1);
delete irregular; delete irregular;
if (domain->triclinic) domain->lamda2x(atom->nlocal); if (domain->triclinic) domain->lamda2x(atom->nlocal);
@ -382,34 +379,36 @@ void Balance::command(int narg, char **arg)
} }
} }
if (me == 0) { if (style != BISECTION) {
if (screen) { if (me == 0) {
fprintf(screen," x cuts:"); if (screen) {
for (int i = 0; i <= comm->procgrid[0]; i++) fprintf(screen," x cuts:");
fprintf(screen," %g",comm->xsplit[i]); for (int i = 0; i <= comm->procgrid[0]; i++)
fprintf(screen,"\n"); fprintf(screen," %g",comm->xsplit[i]);
fprintf(screen," y cuts:"); fprintf(screen,"\n");
for (int i = 0; i <= comm->procgrid[1]; i++) fprintf(screen," y cuts:");
fprintf(screen," %g",comm->ysplit[i]); for (int i = 0; i <= comm->procgrid[1]; i++)
fprintf(screen,"\n"); fprintf(screen," %g",comm->ysplit[i]);
fprintf(screen," z cuts:"); fprintf(screen,"\n");
for (int i = 0; i <= comm->procgrid[2]; i++) fprintf(screen," z cuts:");
fprintf(screen," %g",comm->zsplit[i]); for (int i = 0; i <= comm->procgrid[2]; i++)
fprintf(screen,"\n"); fprintf(screen," %g",comm->zsplit[i]);
} fprintf(screen,"\n");
if (logfile) { }
fprintf(logfile," x cuts:"); if (logfile) {
for (int i = 0; i <= comm->procgrid[0]; i++) fprintf(logfile," x cuts:");
fprintf(logfile," %g",comm->xsplit[i]); for (int i = 0; i <= comm->procgrid[0]; i++)
fprintf(logfile,"\n"); fprintf(logfile," %g",comm->xsplit[i]);
fprintf(logfile," y cuts:"); fprintf(logfile,"\n");
for (int i = 0; i <= comm->procgrid[1]; i++) fprintf(logfile," y cuts:");
fprintf(logfile," %g",comm->ysplit[i]); for (int i = 0; i <= comm->procgrid[1]; i++)
fprintf(logfile,"\n"); fprintf(logfile," %g",comm->ysplit[i]);
fprintf(logfile," z cuts:"); fprintf(logfile,"\n");
for (int i = 0; i <= comm->procgrid[2]; i++) fprintf(logfile," z cuts:");
fprintf(logfile," %g",comm->zsplit[i]); for (int i = 0; i <= comm->procgrid[2]; i++)
fprintf(logfile,"\n"); fprintf(logfile," %g",comm->zsplit[i]);
fprintf(logfile,"\n");
}
} }
} }
} }
@ -467,13 +466,52 @@ double Balance::imbalance_splits(int &max)
return imbalance; return imbalance;
} }
/* ----------------------------------------------------------------------
perform balancing via RCB class
sortflag = flag for sorting order of received messages by proc ID
------------------------------------------------------------------------- */
int *Balance::bisection(int sortflag)
{
if (!rcb) rcb = new RCB(lmp);
// NOTE: lo/hi args could be simulation box or particle bounding box
// NOTE: triclinic needs to be in lamda coords
int dim = domain->dimension;
double *boxlo = domain->boxlo;
double *boxhi = domain->boxhi;
double *prd = domain->prd;
rcb->compute(dim,atom->nlocal,atom->x,NULL,boxlo,boxhi);
rcb->invert(sortflag);
// NOTE: this logic is specific to orthogonal boxes, not triclinic
double (*mysplit)[2] = comm->mysplit;
mysplit[0][0] = (rcb->lo[0] - boxlo[0]) / prd[0];
if (rcb->hi[0] == boxhi[0]) mysplit[0][1] = 1.0;
else mysplit[0][1] = (rcb->hi[0] - boxlo[0]) / prd[0];
mysplit[1][0] = (rcb->lo[1] - boxlo[1]) / prd[1];
if (rcb->hi[1] == boxhi[1]) mysplit[1][1] = 1.0;
else mysplit[1][1] = (rcb->hi[1] - boxlo[1]) / prd[1];
mysplit[2][0] = (rcb->lo[2] - boxlo[2]) / prd[2];
if (rcb->hi[2] == boxhi[2]) mysplit[2][1] = 1.0;
else mysplit[2][1] = (rcb->hi[2] - boxlo[2]) / prd[2];
return rcb->sendproc;
}
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
setup static load balance operations setup static load balance operations
called from command called from command and indirectly initially from fix balance
set rho = 0 for static balancing set rho = 0 for static balancing
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Balance::static_setup(char *str) void Balance::shift_setup_static(char *str)
{ {
shift_allocate = 1; shift_allocate = 1;
@ -498,21 +536,35 @@ void Balance::static_setup(char *str)
losum = new bigint[max+1]; losum = new bigint[max+1];
hisum = new bigint[max+1]; hisum = new bigint[max+1];
// if current layout is TILED, set initial uniform splits in Comm
// this gives starting point to subsequent shift balancing
if (comm->layout == LAYOUT_TILED) {
int *procgrid = comm->procgrid;
double *xsplit = comm->xsplit;
double *ysplit = comm->ysplit;
double *zsplit = comm->zsplit;
for (int i = 0; i < procgrid[0]; i++) xsplit[i] = i * 1.0/procgrid[0];
for (int i = 0; i < procgrid[1]; i++) ysplit[i] = i * 1.0/procgrid[1];
for (int i = 0; i < procgrid[2]; i++) zsplit[i] = i * 1.0/procgrid[2];
xsplit[procgrid[0]] = ysplit[procgrid[1]] = zsplit[procgrid[2]] = 1.0;
}
rho = 0; rho = 0;
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
setup shift load balance operations setup shift load balance operations
called from fix balance called from fix balance
set rho = 1 for shift balancing after call to shift_setup() set rho = 1 to do dynamic balancing after call to shift_setup_static()
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Balance::shift_setup(char *str, int nitermax_in, double thresh_in) void Balance::shift_setup(char *str, int nitermax_in, double thresh_in)
{ {
static_setup(str); shift_setup_static(str);
nitermax = nitermax_in; nitermax = nitermax_in;
stopthresh = thresh_in; stopthresh = thresh_in;
rho = 1; rho = 1;
} }
@ -525,7 +577,7 @@ void Balance::shift_setup(char *str, int nitermax_in, double thresh_in)
int Balance::shift() int Balance::shift()
{ {
int i,j,k,m,np,max; int i,j,k,m,np,max;
double *split = NULL; double *split;
// no balancing if no atoms // no balancing if no atoms
@ -590,7 +642,7 @@ int Balance::shift()
// iterate until balanced // iterate until balanced
#ifdef BALANCE_DEBUG #ifdef BALANCE_DEBUG
if (me == 0) debug_output(idim,0,np,split); if (me == 0) debug_shift_output(idim,0,np,split);
#endif #endif
int doneflag; int doneflag;
@ -601,7 +653,7 @@ int Balance::shift()
niter++; niter++;
#ifdef BALANCE_DEBUG #ifdef BALANCE_DEBUG
if (me == 0) debug_output(idim,m+1,np,split); if (me == 0) debug_shift_output(idim,m+1,np,split);
if (me == 0 && fp) dumpout(update->ntimestep,fp); if (me == 0 && fp) dumpout(update->ntimestep,fp);
#endif #endif
@ -827,7 +879,7 @@ void Balance::dumpout(bigint tstep, FILE *bfp)
int nx = comm->procgrid[0] + 1; int nx = comm->procgrid[0] + 1;
int ny = comm->procgrid[1] + 1; int ny = comm->procgrid[1] + 1;
//int nz = comm->procgrid[2] + 1; int nz = comm->procgrid[2] + 1;
if (dimension == 2) { if (dimension == 2) {
int m = 0; int m = 0;
@ -914,7 +966,7 @@ void Balance::dumpout(bigint tstep, FILE *bfp)
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
#ifdef BALANCE_DEBUG #ifdef BALANCE_DEBUG
void Balance::debug_output(int idim, int m, int np, double *split) void Balance::debug_shift_output(int idim, int m, int np, double *split)
{ {
int i; int i;
const char *dim = NULL; const char *dim = NULL;

View File

@ -27,11 +27,14 @@ namespace LAMMPS_NS {
class Balance : protected Pointers { class Balance : protected Pointers {
public: public:
class RCB *rcb;
Balance(class LAMMPS *); Balance(class LAMMPS *);
~Balance(); ~Balance();
void command(int, char **); void command(int, char **);
void shift_setup(char *, int, double); void shift_setup(char *, int, double);
int shift(); int shift();
int *bisection(int sortflag = 0);
double imbalance_nlocal(int &); double imbalance_nlocal(int &);
void dumpout(bigint, FILE *); void dumpout(bigint, FILE *);
@ -66,13 +69,13 @@ class Balance : protected Pointers {
FILE *fp; FILE *fp;
int firststep; int firststep;
void static_setup(char *);
double imbalance_splits(int &); double imbalance_splits(int &);
void shift_setup_static(char *);
void tally(int, int, double *); void tally(int, int, double *);
int adjust(int, double *); int adjust(int, double *);
int binary(double, int, double *); int binary(double, int, double *);
#ifdef BALANCE_DEBUG #ifdef BALANCE_DEBUG
void debug_output(int, int, int, double *); void debug_shift_output(int, int, int, double *);
#endif #endif
}; };

View File

@ -47,25 +47,95 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
gridflag = ONELEVEL; gridflag = ONELEVEL;
mapflag = CART; mapflag = CART;
customfile = NULL; customfile = NULL;
outfile = NULL;
recv_from_partition = send_to_partition = -1; recv_from_partition = send_to_partition = -1;
otherflag = 0; otherflag = 0;
outfile = NULL; maxexchange_atom = maxexchange_fix = 0;
grid2proc = NULL;
xsplit = ysplit = zsplit = NULL; xsplit = ysplit = zsplit = NULL;
// use of OpenMP threads
// query OpenMP for number of threads/process set by user at run-time
// if the OMP_NUM_THREADS environment variable is not set, we default
// to using 1 thread. This follows the principle of the least surprise,
// while practically all OpenMP implementations violate it by using
// as many threads as there are (virtual) CPU cores by default.
nthreads = 1;
#ifdef _OPENMP
if (lmp->kokkos) {
nthreads = lmp->kokkos->num_threads * lmp->kokkos->numa;
} else if (getenv("OMP_NUM_THREADS") == NULL) {
nthreads = 1;
if (me == 0)
error->warning(FLERR,"OMP_NUM_THREADS environment is not set.");
} else {
nthreads = omp_get_max_threads();
}
// enforce consistent number of threads across all MPI tasks
MPI_Bcast(&nthreads,1,MPI_INT,0,world);
if (!lmp->kokkos) omp_set_num_threads(nthreads);
if (me == 0) {
if (screen)
fprintf(screen," using %d OpenMP thread(s) per MPI task\n",nthreads);
if (logfile)
fprintf(logfile," using %d OpenMP thread(s) per MPI task\n",nthreads);
}
#endif
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
Comm::~Comm() Comm::~Comm()
{ {
memory->destroy(grid2proc);
memory->destroy(xsplit); memory->destroy(xsplit);
memory->destroy(ysplit); memory->destroy(ysplit);
memory->destroy(zsplit); memory->destroy(zsplit);
delete [] customfile; delete [] customfile;
delete [] outfile; delete [] outfile;
} }
/* ----------------------------------------------------------------------
deep copy of arrays from old Comm class to new one
all public/protected vectors/arrays in parent Comm class must be copied
called from alternate constructor of child classes
when new comm style is created from Input
------------------------------------------------------------------------- */
void Comm::copy_arrays(Comm *oldcomm)
{
if (oldcomm->grid2proc) {
memory->create(grid2proc,procgrid[0],procgrid[1],procgrid[2],
"comm:grid2proc");
memcpy(&grid2proc[0][0][0],&oldcomm->grid2proc[0][0][0],
(procgrid[0]*procgrid[1]*procgrid[2])*sizeof(int));
memory->create(xsplit,procgrid[0]+1,"comm:xsplit");
memory->create(ysplit,procgrid[1]+1,"comm:ysplit");
memory->create(zsplit,procgrid[2]+1,"comm:zsplit");
memcpy(xsplit,oldcomm->xsplit,(procgrid[0]+1)*sizeof(double));
memcpy(ysplit,oldcomm->ysplit,(procgrid[1]+1)*sizeof(double));
memcpy(zsplit,oldcomm->zsplit,(procgrid[2]+1)*sizeof(double));
}
if (customfile) {
int n = strlen(oldcomm->customfile) + 1;
customfile = new char[n];
strcpy(customfile,oldcomm->customfile);
}
if (outfile) {
int n = strlen(oldcomm->outfile) + 1;
outfile = new char[n];
strcpy(outfile,oldcomm->outfile);
}
}
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
modify communication params modify communication params
invoked from input script by comm_modify command invoked from input script by comm_modify command

View File

@ -21,21 +21,15 @@ namespace LAMMPS_NS {
class Comm : protected Pointers { class Comm : protected Pointers {
public: public:
int style; // comm pattern: 0 = 6-way stencil, 1 = irregular tiling int style; // comm pattern: 0 = 6-way stencil, 1 = irregular tiling
int layout; // current proc domains: 0 = logical bricks, 1 = general tiling int layout; // LAYOUT_UNIFORM = logical equal-sized bricks
// can do style=1 on layout=0, but not vice versa // LAYOUT_NONUNIFORM = logical bricks,
// NOTE: uniform needs to be subsumed into layout // but different sizes due to LB
int uniform; // 1 = equal subdomains, 0 = load-balanced // LAYOUT_TILED = general tiling, due to RCB LB
int me,nprocs; // proc info int me,nprocs; // proc info
int procgrid[3]; // procs assigned in each dim of 3d grid
int user_procgrid[3]; // user request for procs in each dim
int myloc[3]; // which proc I am in each dim
int procneigh[3][2]; // my 6 neighboring procs, 0/1 = left/right
int ghost_velocity; // 1 if ghost atoms have velocity, 0 if not int ghost_velocity; // 1 if ghost atoms have velocity, 0 if not
double *xsplit,*ysplit,*zsplit; // fractional (0-1) sub-domain sizes
double cutghost[3]; // cutoffs used for acquiring ghost atoms double cutghost[3]; // cutoffs used for acquiring ghost atoms
double cutghostuser; // user-specified ghost cutoff double cutghostuser; // user-specified ghost cutoff
int ***grid2proc; // which proc owns i,j,k loc in 3d grid
int recv_from_partition; // recv proc layout from this partition int recv_from_partition; // recv proc layout from this partition
int send_to_partition; // send my proc layout to this partition int send_to_partition; // send my proc layout to this partition
// -1 if no recv or send // -1 if no recv or send
@ -45,8 +39,24 @@ class Comm : protected Pointers {
int maxexchange_fix; // max contribution to exchange from Fixes int maxexchange_fix; // max contribution to exchange from Fixes
int nthreads; // OpenMP threads per MPI process int nthreads; // OpenMP threads per MPI process
// public settings specific to layout = UNIFORM, NONUNIFORM
int procgrid[3]; // procs assigned in each dim of 3d grid
int user_procgrid[3]; // user request for procs in each dim
int myloc[3]; // which proc I am in each dim
int procneigh[3][2]; // my 6 neighboring procs, 0/1 = left/right
double *xsplit,*ysplit,*zsplit; // fractional (0-1) sub-domain sizes
int ***grid2proc; // which proc owns i,j,k loc in 3d grid
// public settings specific to layout = TILED
double mysplit[3][2]; // fractional (0-1) bounds of my sub-domain
// methods
Comm(class LAMMPS *); Comm(class LAMMPS *);
virtual ~Comm(); virtual ~Comm();
void copy_arrays(class Comm *);
void modify_params(int, char **); void modify_params(int, char **);
void set_processors(int, char **); // set 3d processor grid attributes void set_processors(int, char **); // set 3d processor grid attributes

View File

@ -22,6 +22,7 @@
#include "stdio.h" #include "stdio.h"
#include "stdlib.h" #include "stdlib.h"
#include "comm_brick.h" #include "comm_brick.h"
#include "comm_tiled.h"
#include "universe.h" #include "universe.h"
#include "atom.h" #include "atom.h"
#include "atom_vec.h" #include "atom_vec.h"
@ -52,58 +53,57 @@ using namespace LAMMPS_NS;
#define BIG 1.0e20 #define BIG 1.0e20
enum{SINGLE,MULTI}; // same as in Comm enum{SINGLE,MULTI}; // same as in Comm
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp) CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp)
{ {
style = layout = 0; style = 0;
layout = LAYOUT_UNIFORM;
init_buffers();
}
recv_from_partition = send_to_partition = -1; /* ---------------------------------------------------------------------- */
otherflag = 0;
grid2proc = NULL; CommBrick::~CommBrick()
{
free_swap();
if (mode == MULTI) {
free_multi();
memory->destroy(cutghostmulti);
}
uniform = 1; if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]);
memory->sfree(sendlist);
memory->destroy(maxsendlist);
memory->destroy(buf_send);
memory->destroy(buf_recv);
}
/* ---------------------------------------------------------------------- */
CommBrick::CommBrick(LAMMPS *lmp, Comm *oldcomm) : Comm(*oldcomm)
{
if (oldcomm->layout == LAYOUT_TILED)
error->all(FLERR,"Cannot change to comm_style brick from tiled layout");
style = 0;
layout = oldcomm->layout;
copy_arrays(oldcomm);
init_buffers();
}
/* ----------------------------------------------------------------------
initialize comm buffers and other data structs local to CommBrick
------------------------------------------------------------------------- */
void CommBrick::init_buffers()
{
multilo = multihi = NULL; multilo = multihi = NULL;
cutghostmulti = NULL; cutghostmulti = NULL;
// use of OpenMP threads
// query OpenMP for number of threads/process set by user at run-time
// if the OMP_NUM_THREADS environment variable is not set, we default
// to using 1 thread. This follows the principle of the least surprise,
// while practically all OpenMP implementations violate it by using
// as many threads as there are (virtual) CPU cores by default.
nthreads = 1;
#ifdef _OPENMP
if (lmp->kokkos) {
nthreads = lmp->kokkos->num_threads * lmp->kokkos->numa;
} else if (getenv("OMP_NUM_THREADS") == NULL) {
nthreads = 1;
if (me == 0)
error->warning(FLERR,"OMP_NUM_THREADS environment is not set.");
} else {
nthreads = omp_get_max_threads();
}
// enforce consistent number of threads across all MPI tasks
MPI_Bcast(&nthreads,1,MPI_INT,0,world);
if (!lmp->kokkos) omp_set_num_threads(nthreads);
if (me == 0) {
if (screen)
fprintf(screen," using %d OpenMP thread(s) per MPI task\n",nthreads);
if (logfile)
fprintf(logfile," using %d OpenMP thread(s) per MPI task\n",nthreads);
}
#endif
// initialize comm buffers & exchange memory
// NOTE: allow for AtomVec to set maxexchange_atom, e.g. for atom_style body
maxexchange_atom = maxexchange_fix = 0;
maxexchange = maxexchange_atom + maxexchange_fix; maxexchange = maxexchange_atom + maxexchange_fix;
bufextra = maxexchange + BUFEXTRA; bufextra = maxexchange + BUFEXTRA;
@ -125,26 +125,6 @@ CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp)
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
CommBrick::~CommBrick()
{
memory->destroy(grid2proc);
free_swap();
if (mode == MULTI) {
free_multi();
memory->destroy(cutghostmulti);
}
if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]);
memory->sfree(sendlist);
memory->destroy(maxsendlist);
memory->destroy(buf_send);
memory->destroy(buf_recv);
}
/* ---------------------------------------------------------------------- */
void CommBrick::init() void CommBrick::init()
{ {
triclinic = domain->triclinic; triclinic = domain->triclinic;
@ -280,12 +260,12 @@ void CommBrick::setup()
// 0 = to left, 1 = to right // 0 = to left, 1 = to right
// set equal to recvneed[idim][1/0] of neighbor proc // set equal to recvneed[idim][1/0] of neighbor proc
// maxneed[idim] = max procs away any proc recvs atoms in either direction // maxneed[idim] = max procs away any proc recvs atoms in either direction
// uniform = 1 = uniform sized sub-domains: // layout = UNIFORM = uniform sized sub-domains:
// maxneed is directly computable from sub-domain size // maxneed is directly computable from sub-domain size
// limit to procgrid-1 for non-PBC // limit to procgrid-1 for non-PBC
// recvneed = maxneed except for procs near non-PBC // recvneed = maxneed except for procs near non-PBC
// sendneed = recvneed of neighbor on each side // sendneed = recvneed of neighbor on each side
// uniform = 0 = non-uniform sized sub-domains: // layout = NONUNIFORM = non-uniform sized sub-domains:
// compute recvneed via updown() which accounts for non-PBC // compute recvneed via updown() which accounts for non-PBC
// sendneed = recvneed of neighbor on each side // sendneed = recvneed of neighbor on each side
// maxneed via Allreduce() of recvneed // maxneed via Allreduce() of recvneed
@ -293,7 +273,7 @@ void CommBrick::setup()
int *periodicity = domain->periodicity; int *periodicity = domain->periodicity;
int left,right; int left,right;
if (uniform) { if (layout == LAYOUT_UNIFORM) {
maxneed[0] = static_cast<int> (cutghost[0] * procgrid[0] / prd[0]) + 1; maxneed[0] = static_cast<int> (cutghost[0] * procgrid[0] / prd[0]) + 1;
maxneed[1] = static_cast<int> (cutghost[1] * procgrid[1] / prd[1]) + 1; maxneed[1] = static_cast<int> (cutghost[1] * procgrid[1] / prd[1]) + 1;
maxneed[2] = static_cast<int> (cutghost[2] * procgrid[2] / prd[2]) + 1; maxneed[2] = static_cast<int> (cutghost[2] * procgrid[2] / prd[2]) + 1;
@ -561,16 +541,15 @@ void CommBrick::forward_comm(int dummy)
} else { } else {
if (comm_x_only) { if (comm_x_only) {
if (sendnum[iswap]) if (sendnum[iswap])
n = avec->pack_comm(sendnum[iswap],sendlist[iswap], avec->pack_comm(sendnum[iswap],sendlist[iswap],
x[firstrecv[iswap]],pbc_flag[iswap], x[firstrecv[iswap]],pbc_flag[iswap],pbc[iswap]);
pbc[iswap]);
} else if (ghost_velocity) { } else if (ghost_velocity) {
n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap], avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
buf_send,pbc_flag[iswap],pbc[iswap]); buf_send,pbc_flag[iswap],pbc[iswap]);
avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send); avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send);
} else { } else {
n = avec->pack_comm(sendnum[iswap],sendlist[iswap], avec->pack_comm(sendnum[iswap],sendlist[iswap],
buf_send,pbc_flag[iswap],pbc[iswap]); buf_send,pbc_flag[iswap],pbc[iswap]);
avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_send); avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_send);
} }
} }
@ -620,10 +599,10 @@ void CommBrick::reverse_comm()
} else { } else {
if (comm_f_only) { if (comm_f_only) {
if (sendnum[iswap]) if (sendnum[iswap])
avec->unpack_reverse(sendnum[iswap],sendlist[iswap], avec->unpack_reverse(sendnum[iswap],sendlist[iswap],
f[firstrecv[iswap]]); f[firstrecv[iswap]]);
} else { } else {
n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send); avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send);
avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_send); avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_send);
} }
} }

View File

@ -21,6 +21,7 @@ namespace LAMMPS_NS {
class CommBrick : public Comm { class CommBrick : public Comm {
public: public:
CommBrick(class LAMMPS *); CommBrick(class LAMMPS *);
CommBrick(class LAMMPS *, class Comm *);
virtual ~CommBrick(); virtual ~CommBrick();
virtual void init(); virtual void init();
@ -80,6 +81,7 @@ class CommBrick : public Comm {
int maxexchange; // max # of datums/atom in exchange comm int maxexchange; // max # of datums/atom in exchange comm
int bufextra; // extra space beyond maxsend in send buffer int bufextra; // extra space beyond maxsend in send buffer
void init_buffers();
int updown(int, int, int, double, int, double *); int updown(int, int, int, double, int, double *);
// compare cutoff to procs // compare cutoff to procs
virtual void grow_send(int, int); // reallocate send buffer virtual void grow_send(int, int); // reallocate send buffer

View File

@ -11,15 +11,20 @@
See the README file in the top-level LAMMPS directory. See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
#include "string.h"
#include "lmptype.h" #include "lmptype.h"
#include "comm_tiled.h" #include "comm_tiled.h"
#include "comm_brick.h"
#include "atom.h" #include "atom.h"
#include "atom_vec.h" #include "atom_vec.h"
#include "domain.h" #include "domain.h"
#include "force.h"
#include "pair.h" #include "pair.h"
#include "neighbor.h"
#include "modify.h" #include "modify.h"
#include "fix.h" #include "fix.h"
#include "compute.h" #include "compute.h"
#include "output.h"
#include "dump.h" #include "dump.h"
#include "memory.h" #include "memory.h"
#include "error.h" #include "error.h"
@ -27,32 +32,93 @@
using namespace LAMMPS_NS; using namespace LAMMPS_NS;
#define BUFFACTOR 1.5 #define BUFFACTOR 1.5
#define BUFFACTOR 1.5
#define BUFMIN 1000
#define BUFEXTRA 1000
enum{SINGLE,MULTI}; // same as in Comm enum{SINGLE,MULTI}; // same as in Comm
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
CommTiled::CommTiled(LAMMPS *lmp) : Comm(lmp) CommTiled::CommTiled(LAMMPS *lmp) : Comm(lmp)
{ {
style = 1;
layout = 0;
error->all(FLERR,"Comm_style tiled is not yet supported"); error->all(FLERR,"Comm_style tiled is not yet supported");
style = 1;
layout = LAYOUT_UNIFORM;
init_buffers();
}
/* ---------------------------------------------------------------------- */
CommTiled::CommTiled(LAMMPS *lmp, Comm *oldcomm) : Comm(*oldcomm)
{
style = 1;
layout = oldcomm->layout;
copy_arrays(oldcomm);
init_buffers();
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
CommTiled::~CommTiled() CommTiled::~CommTiled()
{ {
free_swap();
if (sendlist) for (int i = 0; i < nswap; i++) memory->destroy(sendlist[i]);
memory->sfree(sendlist);
memory->destroy(maxsendlist);
memory->destroy(buf_send);
memory->destroy(buf_recv);
} }
/* ---------------------------------------------------------------------- */ /* ----------------------------------------------------------------------
initialize comm buffers and other data structs local to CommTiled
NOTE: if this is identical to CommBrick, put it into Comm ??
------------------------------------------------------------------------- */
void CommTiled::init_buffers()
{
maxexchange = maxexchange_atom + maxexchange_fix;
bufextra = maxexchange + BUFEXTRA;
maxsend = BUFMIN;
memory->create(buf_send,maxsend+bufextra,"comm:buf_send");
maxrecv = BUFMIN;
memory->create(buf_recv,maxrecv,"comm:buf_recv");
nswap = 2 * domain->dimension;
allocate_swap(nswap);
//sendlist = (int **) memory->smalloc(nswap*sizeof(int *),"comm:sendlist");
//memory->create(maxsendlist,nswap,"comm:maxsendlist");
//for (int i = 0; i < nswap; i++) {
// maxsendlist[i] = BUFMIN;
// memory->create(sendlist[i],BUFMIN,"comm:sendlist[i]");
//}
}
/* ----------------------------------------------------------------------
NOTE: if this is nearly identical to CommBrick, put it into Comm ??
------------------------------------------------------------------------- */
void CommTiled::init() void CommTiled::init()
{ {
triclinic = domain->triclinic; triclinic = domain->triclinic;
map_style = atom->map_style; map_style = atom->map_style;
// temporary restrictions
if (triclinic)
error->all(FLERR,"Cannot yet use comm_style tiled with triclinic box");
if (domain->xperiodic || domain->yperiodic ||
(domain->dimension == 2 && domain->zperiodic))
error->all(FLERR,"Cannot yet use comm_style tiled with periodic box");
if (mode == MULTI)
error->all(FLERR,"Cannot yet use comm_style tiled with multi-mode comm");
// comm_only = 1 if only x,f are exchanged in forward/reverse comm // comm_only = 1 if only x,f are exchanged in forward/reverse comm
// comm_x_only = 0 if ghost_velocity since velocities are added // comm_x_only = 0 if ghost_velocity since velocities are added
@ -72,51 +138,210 @@ void CommTiled::init()
for (int i = 0; i < modify->nfix; i++) for (int i = 0; i < modify->nfix; i++)
size_border += modify->fix[i]->comm_border; size_border += modify->fix[i]->comm_border;
// maxexchange = max # of datums/atom in exchange communication
// maxforward = # of datums in largest forward communication
// maxreverse = # of datums in largest reverse communication
// query pair,fix,compute,dump for their requirements
// pair style can force reverse comm even if newton off
maxexchange = BUFMIN + maxexchange_fix;
maxforward = MAX(size_forward,size_border);
maxreverse = size_reverse;
if (force->pair) maxforward = MAX(maxforward,force->pair->comm_forward);
if (force->pair) maxreverse = MAX(maxreverse,force->pair->comm_reverse);
for (int i = 0; i < modify->nfix; i++) {
maxforward = MAX(maxforward,modify->fix[i]->comm_forward);
maxreverse = MAX(maxreverse,modify->fix[i]->comm_reverse);
}
for (int i = 0; i < modify->ncompute; i++) {
maxforward = MAX(maxforward,modify->compute[i]->comm_forward);
maxreverse = MAX(maxreverse,modify->compute[i]->comm_reverse);
}
for (int i = 0; i < output->ndump; i++) {
maxforward = MAX(maxforward,output->dump[i]->comm_forward);
maxreverse = MAX(maxreverse,output->dump[i]->comm_reverse);
}
if (force->newton == 0) maxreverse = 0;
if (force->pair) maxreverse = MAX(maxreverse,force->pair->comm_reverse_off);
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
setup spatial-decomposition communication patterns setup spatial-decomposition communication patterns
function of neighbor cutoff(s) & cutghostuser & current box size function of neighbor cutoff(s) & cutghostuser & current box size and tiling
single mode sets slab boundaries (slablo,slabhi) based on max cutoff
multi mode sets type-dependent slab boundaries (multilo,multihi)
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void CommTiled::setup() void CommTiled::setup()
{ {
// error on triclinic or multi? int i;
// set nswap = 2*dim
// setup neighbor proc info for exchange()
// setup nsendproc and nrecvproc bounts
// setup sendproc and recvproc lists
// setup sendboxes
// reallocate requests and statuses
// check that cutoff is <= 1/2 of periodic box len? int dimension;
int *periodicity;
double *prd,*sublo,*subhi,*boxlo,*boxhi;
// loop over dims double cut = MAX(neighbor->cutneighmax,cutghostuser);
// left:
// construct ghost boxes dimension = domain->dimension;
// differnet in x,y,z periodicity = domain->periodicity;
// account for ghost borders in y,z prd = domain->prd;
// account for PBC by shifting sublo = domain->sublo;
// split into multiple boxes if straddles PBC subhi = domain->subhi;
// drop boxes down RCB tree boxlo = domain->boxlo;
// count unique procs they cover boxhi = domain->boxhi;
// what about self if crosses PBC cutghost[0] = cutghost[1] = cutghost[2] = cut;
// for each proc they cover:
// compute box I send it to left if ((periodicity[0] && cut > prd[0]) ||
// is a message I will recv from right (don't care about box) (periodicity[1] && cut > prd[1]) ||
// for ghost-extended boxes (dimension == 3 && periodicity[2] && cut > prd[2]))
// do not count procs that do not overlap my owned box at all error->all(FLERR,"Communication cutoff for comm_style tiled "
// only touching edge of my owned box does not count "cannot exceed periodic box length");
// in this case list I send to and recv from may be different?
// same thing to right // allocate overlap
int *overlap;
int noverlap,noverlap1,indexme;
double lo1[3],hi1[3],lo2[3],hi2[3];
int one,two;
nswap = 0;
for (int idim = 0; idim < dimension; idim++) {
// ghost box in lower direction
one = 1;
lo1[0] = sublo[0]; lo1[1] = sublo[1]; lo1[2] = sublo[2];
hi1[0] = subhi[0]; hi1[1] = subhi[1]; hi1[2] = subhi[2];
lo1[idim] = sublo[idim] - cut;
hi1[idim] = sublo[idim];
two = 0;
if (periodicity[idim] && lo1[idim] < boxlo[idim]) {
two = 1;
lo2[0] = sublo[0]; lo2[1] = sublo[1]; lo2[2] = sublo[2];
hi2[0] = subhi[0]; hi2[1] = subhi[1]; hi2[2] = subhi[2];
lo2[idim] = lo1[idim] + prd[idim];
hi2[idim] = hi1[idim] + prd[idim];
if (sublo[idim] == boxlo[idim]) {
one = 0;
hi2[idim] = boxhi[idim];
}
}
indexme = -1;
noverlap = 0;
if (one) {
if (layout == LAYOUT_UNIFORM)
box_drop_uniform(idim,lo1,hi1,noverlap,overlap,indexme);
else if (layout == LAYOUT_NONUNIFORM)
box_drop_nonuniform(idim,lo1,hi1,noverlap,overlap,indexme);
else
box_drop_tiled(lo1,hi1,0,nprocs-1,noverlap,overlap,indexme);
}
noverlap1 = noverlap;
if (two) {
if (layout == LAYOUT_UNIFORM)
box_drop_uniform(idim,lo2,hi2,noverlap,overlap,indexme);
else if (layout == LAYOUT_NONUNIFORM)
box_drop_nonuniform(idim,lo2,hi2,noverlap,overlap,indexme);
else
box_drop_tiled(lo2,hi2,0,nprocs-1,noverlap,overlap,indexme);
}
// if this (self) proc is in overlap list, move it to end of list
if (indexme >= 0) {
int tmp = overlap[noverlap-1];
overlap[noverlap-1] = overlap[indexme];
overlap[indexme] = tmp;
}
// overlap how has list of noverlap procs
// includes PBC effects
if (overlap[noverlap-1] == me) sendself[nswap] = 1;
else sendself[nswap] = 0;
if (noverlap-sendself[nswap]) sendother[nswap] = 1;
else sendother[nswap] = 0;
nsendproc[nswap] = noverlap;
for (i = 0; i < noverlap; i++) sendproc[nswap][i] = overlap[i];
nrecvproc[nswap+1] = noverlap;
for (i = 0; i < noverlap; i++) recvproc[nswap+1][i] = overlap[i];
// compute sendbox for each of my sends
// ibox = intersection of ghostbox with other proc's sub-domain
// sendbox = ibox displaced by cutoff in dim
// NOTE: need to extend send box in lower dims by cutoff
// NOTE: this logic for overlapping boxes is not correct for sending
double oboxlo[3],oboxhi[3],sbox[6];
for (i = 0; i < noverlap; i++) {
pbc_flag[nswap][i] = 0;
pbc[nswap][i][0] = pbc[nswap][i][1] = pbc[nswap][i][2] =
pbc[nswap][i][3] = pbc[nswap][i][4] = pbc[nswap][i][5] = 0;
if (layout == LAYOUT_UNIFORM)
box_other_uniform(overlap[i],oboxlo,oboxhi);
else if (layout == LAYOUT_NONUNIFORM)
box_other_nonuniform(overlap[i],oboxlo,oboxhi);
else
box_other_tiled(overlap[i],oboxlo,oboxhi);
if (i < noverlap1) {
sbox[0] = MAX(oboxlo[0],lo1[0]);
sbox[1] = MAX(oboxlo[1],lo1[1]);
sbox[2] = MAX(oboxlo[2],lo1[2]);
sbox[3] = MIN(oboxhi[0],hi1[0]);
sbox[4] = MIN(oboxhi[1],hi1[1]);
sbox[5] = MIN(oboxhi[2],hi1[2]);
sbox[idim] += cut;
sbox[3+idim] += cut;
if (sbox[idim] == lo1[idim]) sbox[idim] = sublo[idim];
} else {
pbc_flag[nswap][i] = 1;
pbc[nswap][i][idim] = 1;
sbox[0] = MAX(oboxlo[0],lo2[0]);
sbox[1] = MAX(oboxlo[1],lo2[1]);
sbox[2] = MAX(oboxlo[2],lo2[2]);
sbox[3] = MIN(oboxhi[0],hi2[0]);
sbox[4] = MIN(oboxhi[1],hi2[1]);
sbox[5] = MIN(oboxhi[2],hi2[2]);
sbox[idim] -= prd[idim] - cut;
sbox[3+idim] -= prd[idim] + cut;
if (sbox[idim] == lo1[idim]) sbox[idim] = sublo[idim];
}
if (idim >= 1) {
if (sbox[0] == sublo[0]) sbox[0] -= cut;
if (sbox[4] == subhi[0]) sbox[4] += cut;
}
if (idim == 2) {
if (sbox[1] == sublo[1]) sbox[1] -= cut;
if (sbox[5] == subhi[1]) sbox[5] += cut;
}
memcpy(sendbox[nswap][i],sbox,6*sizeof(double));
}
// ghost box in upper direction
nswap += 2;
}
// reallocate requests and statuses to max of any swap
// what need from decomp (RCB):
// dropbox: return list of procs with overlap and overlapping boxes
// return n, proclist, boxlist
// otherbox: bbox of another proc
// dropatom: return what proc owns the atom coord
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
@ -126,46 +351,73 @@ void CommTiled::setup()
void CommTiled::forward_comm(int dummy) void CommTiled::forward_comm(int dummy)
{ {
int i,irecv,n; int i,irecv,n,nsend,nrecv;
MPI_Status status; MPI_Status status;
AtomVec *avec = atom->avec; AtomVec *avec = atom->avec;
double **x = atom->x; double **x = atom->x;
// exchange data with another set of procs in each swap // exchange data with another set of procs in each swap
// if first proc in set is self, then is just self across PBC, just copy // post recvs from all procs except self
// send data to all procs except self
// copy data to self if sendself is set
// wait on all procs except self and unpack received data
// if comm_x_only set, exchange or copy directly to x, don't unpack // if comm_x_only set, exchange or copy directly to x, don't unpack
for (int iswap = 0; iswap < nswap; iswap++) { for (int iswap = 0; iswap < nswap; iswap++) {
if (sendproc[iswap][0] != me) { nsend = nsendproc[iswap] - sendself[iswap];
if (comm_x_only) { nrecv = nrecvproc[iswap] - sendself[iswap];
for (i = 0; i < nrecvproc[iswap]; i++)
if (comm_x_only) {
if (sendother[iswap]) {
for (i = 0; i < nrecv; i++)
MPI_Irecv(x[firstrecv[iswap][i]],size_forward_recv[iswap][i], MPI_Irecv(x[firstrecv[iswap][i]],size_forward_recv[iswap][i],
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
for (i = 0; i < nsendproc[iswap]; i++) { for (i = 0; i < nsend; i++) {
n = avec->pack_comm(sendnum[iswap][i],sendlist[iswap][i], n = avec->pack_comm(sendnum[iswap][i],sendlist[iswap][i],
buf_send,pbc_flag[iswap][i],pbc[iswap][i]); buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world); MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
} }
MPI_Waitall(nrecvproc[iswap],requests,statuses); }
} else if (ghost_velocity) { if (sendself[iswap]) {
for (i = 0; i < nrecvproc[iswap]; i++) avec->pack_comm(sendnum[iswap][nsend],sendlist[iswap][nsend],
x[firstrecv[iswap][nrecv]],pbc_flag[iswap][nsend],
pbc[iswap][nsend]);
}
if (sendother[iswap]) MPI_Waitall(nrecv,requests,statuses);
} else if (ghost_velocity) {
if (sendother[iswap]) {
for (i = 0; i < nrecv; i++)
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]], MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
size_forward_recv[iswap][i], size_forward_recv[iswap][i],
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
for (i = 0; i < nsendproc[iswap]; i++) { for (i = 0; i < nsend; i++) {
n = avec->pack_comm_vel(sendnum[iswap][i],sendlist[iswap][i], n = avec->pack_comm_vel(sendnum[iswap][i],sendlist[iswap][i],
buf_send,pbc_flag[iswap][i],pbc[iswap][i]); buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world); MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
} }
for (i = 0; i < nrecvproc[iswap]; i++) { }
MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status);
avec->unpack_comm_vel(recvnum[iswap][i],firstrecv[iswap][irecv], if (sendself[iswap]) {
avec->pack_comm_vel(sendnum[iswap][nsend],sendlist[iswap][nsend],
buf_send,pbc_flag[iswap][nsend],pbc[iswap][nsend]);
avec->unpack_comm_vel(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
buf_send);
}
if (sendother[iswap]) {
for (i = 0; i < nrecv; i++) {
MPI_Waitany(nrecv,requests,&irecv,&status);
avec->unpack_comm_vel(recvnum[iswap][irecv],firstrecv[iswap][irecv],
&buf_recv[forward_recv_offset[iswap][irecv]]); &buf_recv[forward_recv_offset[iswap][irecv]]);
} }
}
} else { } else {
for (i = 0; i < nrecvproc[iswap]; i++) if (sendother[iswap]) {
for (i = 0; i < nrecv; i++)
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]], MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
size_forward_recv[iswap][i], size_forward_recv[iswap][i],
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
@ -174,27 +426,21 @@ void CommTiled::forward_comm(int dummy)
buf_send,pbc_flag[iswap][i],pbc[iswap][i]); buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world); MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
} }
for (i = 0; i < nrecvproc[iswap]; i++) {
MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status);
avec->unpack_comm(recvnum[iswap][i],firstrecv[iswap][irecv],
&buf_recv[forward_recv_offset[iswap][irecv]]);
}
} }
} else { if (sendself[iswap]) {
if (comm_x_only) { avec->pack_comm(sendnum[iswap][nsend],sendlist[iswap][nsend],
if (sendnum[iswap][0]) buf_send,pbc_flag[iswap][nsend],pbc[iswap][nsend]);
n = avec->pack_comm(sendnum[iswap][0],sendlist[iswap][0], avec->unpack_comm(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
x[firstrecv[iswap][0]],pbc_flag[iswap][0], buf_send);
pbc[iswap][0]); }
} else if (ghost_velocity) {
n = avec->pack_comm_vel(sendnum[iswap][0],sendlist[iswap][0], if (sendother[iswap]) {
buf_send,pbc_flag[iswap][0],pbc[iswap][0]); for (i = 0; i < nrecv; i++) {
avec->unpack_comm_vel(recvnum[iswap][0],firstrecv[iswap][0],buf_send); MPI_Waitany(nrecv,requests,&irecv,&status);
} else { avec->unpack_comm(recvnum[iswap][irecv],firstrecv[iswap][irecv],
n = avec->pack_comm(sendnum[iswap][0],sendlist[iswap][0], &buf_recv[forward_recv_offset[iswap][irecv]]);
buf_send,pbc_flag[iswap][0],pbc[iswap][0]); }
avec->unpack_comm(recvnum[iswap][0],firstrecv[iswap][0],buf_send);
} }
} }
} }
@ -207,57 +453,70 @@ void CommTiled::forward_comm(int dummy)
void CommTiled::reverse_comm() void CommTiled::reverse_comm()
{ {
int i,irecv,n; int i,irecv,n,nsend,nrecv;
MPI_Request request; MPI_Request request;
MPI_Status status; MPI_Status status;
AtomVec *avec = atom->avec; AtomVec *avec = atom->avec;
double **f = atom->f; double **f = atom->f;
// exchange data with another set of procs in each swap // exchange data with another set of procs in each swap
// if first proc in set is self, then is just self across PBC, just copy // post recvs from all procs except self
// send data to all procs except self
// copy data to self if sendself is set
// wait on all procs except self and unpack received data
// if comm_f_only set, exchange or copy directly from f, don't pack // if comm_f_only set, exchange or copy directly from f, don't pack
for (int iswap = nswap-1; iswap >= 0; iswap--) { for (int iswap = nswap-1; iswap >= 0; iswap--) {
if (sendproc[iswap][0] != me) { if (comm_f_only) {
if (comm_f_only) { if (sendother[iswap]) {
for (i = 0; i < nsendproc[iswap]; i++) for (i = 0; i < nsend; i++)
MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]], MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]],
size_reverse_recv[iswap][i],MPI_DOUBLE, size_reverse_recv[iswap][i],MPI_DOUBLE,
sendproc[iswap][i],0,world,&requests[i]); sendproc[iswap][i],0,world,&requests[i]);
for (i = 0; i < nrecvproc[iswap]; i++) for (i = 0; i < nrecv; i++)
MPI_Send(f[firstrecv[iswap][i]],size_reverse_send[iswap][i], MPI_Send(f[firstrecv[iswap][i]],size_reverse_send[iswap][i],
MPI_DOUBLE,recvproc[iswap][i],0,world); MPI_DOUBLE,recvproc[iswap][i],0,world);
for (i = 0; i < nsendproc[iswap]; i++) { }
MPI_Waitany(nsendproc[iswap],requests,&irecv,&status);
avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv],
&buf_recv[reverse_recv_offset[iswap][irecv]]);
}
} else { if (sendself[iswap]) {
for (i = 0; i < nsendproc[iswap]; i++) avec->unpack_reverse(sendnum[iswap][nsend],sendlist[iswap][nsend],
MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]], f[firstrecv[iswap][nrecv]]);
size_reverse_recv[iswap][i],MPI_DOUBLE, }
sendproc[iswap][i],0,world,&requests[i]);
for (i = 0; i < nrecvproc[iswap]; i++) { if (sendother[iswap]) {
n = avec->pack_reverse(recvnum[iswap][i],firstrecv[iswap][i], for (i = 0; i < nsend; i++) {
buf_send); MPI_Waitany(nsend,requests,&irecv,&status);
MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap][i],0,world);
}
for (i = 0; i < nsendproc[iswap]; i++) {
MPI_Waitany(nsendproc[iswap],requests,&irecv,&status);
avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv], avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv],
&buf_recv[reverse_recv_offset[iswap][irecv]]); &buf_recv[reverse_recv_offset[iswap][irecv]]);
} }
} }
} else { } else {
if (comm_f_only) { if (sendother[iswap]) {
if (sendnum[iswap][0]) for (i = 0; i < nsend; i++)
avec->unpack_reverse(sendnum[iswap][0],sendlist[iswap][0], MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]],
f[firstrecv[iswap][0]]); size_reverse_recv[iswap][i],MPI_DOUBLE,
} else { sendproc[iswap][i],0,world,&requests[i]);
n = avec->pack_reverse(recvnum[iswap][0],firstrecv[iswap][0],buf_send); for (i = 0; i < nrecv; i++) {
avec->unpack_reverse(sendnum[iswap][0],sendlist[iswap][0],buf_send); n = avec->pack_reverse(recvnum[iswap][i],firstrecv[iswap][i],
buf_send);
MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap][i],0,world);
}
}
if (sendself[iswap]) {
avec->pack_reverse(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
buf_send);
avec->unpack_reverse(sendnum[iswap][nsend],sendlist[iswap][nsend],
buf_send);
}
if (sendother[iswap]) {
for (i = 0; i < nsend; i++) {
MPI_Waitany(nsend,requests,&irecv,&status);
avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv],
&buf_recv[reverse_recv_offset[iswap][irecv]]);
}
} }
} }
} }
@ -298,7 +557,7 @@ void CommTiled::exchange()
void CommTiled::borders() void CommTiled::borders()
{ {
int i,n,irecv,ngroup,nlast,nsend,rmaxswap; int i,n,irecv,ngroup,nlast,nsend,nrecv,ncount,rmaxswap;
double xlo,xhi,ylo,yhi,zlo,zhi; double xlo,xhi,ylo,yhi,zlo,zhi;
double *bbox; double *bbox;
double **x; double **x;
@ -333,36 +592,40 @@ void CommTiled::borders()
if (iswap < 2) nlast = atom->nlocal; if (iswap < 2) nlast = atom->nlocal;
else nlast = atom->nlocal + atom->nghost; else nlast = atom->nlocal + atom->nghost;
nsend = 0; ncount = 0;
for (i = 0; i < ngroup; i++) for (i = 0; i < ngroup; i++)
if (x[i][0] >= xlo && x[i][0] <= xhi && if (x[i][0] >= xlo && x[i][0] <= xhi &&
x[i][1] >= ylo && x[i][1] <= yhi && x[i][1] >= ylo && x[i][1] <= yhi &&
x[i][2] >= zlo && x[i][2] <= zhi) { x[i][2] >= zlo && x[i][2] <= zhi) {
if (nsend == maxsendlist[iswap][i]) grow_list(iswap,i,nsend); if (ncount == maxsendlist[iswap][i]) grow_list(iswap,i,ncount);
sendlist[iswap][i][nsend++] = i; sendlist[iswap][i][ncount++] = i;
} }
for (i = atom->nlocal; i < nlast; i++) for (i = atom->nlocal; i < nlast; i++)
if (x[i][0] >= xlo && x[i][0] <= xhi && if (x[i][0] >= xlo && x[i][0] <= xhi &&
x[i][1] >= ylo && x[i][1] <= yhi && x[i][1] >= ylo && x[i][1] <= yhi &&
x[i][2] >= zlo && x[i][2] <= zhi) { x[i][2] >= zlo && x[i][2] <= zhi) {
if (nsend == maxsendlist[iswap][i]) grow_list(iswap,i,nsend); if (ncount == maxsendlist[iswap][i]) grow_list(iswap,i,ncount);
sendlist[iswap][i][nsend++] = i; sendlist[iswap][i][ncount++] = i;
} }
sendnum[iswap][i] = nsend; sendnum[iswap][i] = ncount;
smax = MAX(smax,nsend); smax = MAX(smax,ncount);
} }
// send sendnum counts to procs who recv from me // send sendnum counts to procs who recv from me except self
// copy data to self if sendself is set
if (sendproc[iswap][0] != me) { nsend = nsendproc[iswap] - sendself[iswap];
for (i = 0; i < nrecvproc[iswap]; i++) nrecv = nrecvproc[iswap] - sendself[iswap];
if (sendother[iswap]) {
for (i = 0; i < nrecv; i++)
MPI_Irecv(&recvnum[iswap][i],1,MPI_INT, MPI_Irecv(&recvnum[iswap][i],1,MPI_INT,
recvproc[iswap][i],0,world,&requests[i]); recvproc[iswap][i],0,world,&requests[i]);
for (i = 0; i < nsendproc[iswap]; i++) for (i = 0; i < nsend; i++)
MPI_Send(&sendnum[iswap][i],1,MPI_INT,sendproc[iswap][i],0,world); MPI_Send(&sendnum[iswap][i],1,MPI_INT,sendproc[iswap][i],0,world);
MPI_Waitall(nrecvproc[iswap],requests,statuses); }
if (sendself[iswap]) recvnum[iswap][nrecv] = sendnum[iswap][nsend];
} else recvnum[iswap][0] = sendnum[iswap][0]; if (sendother[iswap]) MPI_Waitall(nrecv,requests,statuses);
// setup other per swap/proc values from sendnum and recvnum // setup other per swap/proc values from sendnum and recvnum
@ -390,54 +653,64 @@ void CommTiled::borders()
// swap atoms with other procs using pack_border(), unpack_border() // swap atoms with other procs using pack_border(), unpack_border()
if (sendproc[iswap][0] != me) { if (ghost_velocity) {
for (i = 0; i < nsendproc[iswap]; i++) { if (sendother[iswap]) {
if (ghost_velocity) { for (i = 0; i < nrecv; i++)
for (i = 0; i < nrecvproc[iswap]; i++) MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]], recvnum[iswap][i]*size_border,
recvnum[iswap][i]*size_border, MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); for (i = 0; i < nsend; i++) {
for (i = 0; i < nsendproc[iswap]; i++) { n = avec->pack_border_vel(sendnum[iswap][i],sendlist[iswap][i],
n = avec->pack_border_vel(sendnum[iswap][i],sendlist[iswap][i], buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
buf_send,pbc_flag[iswap][i], MPI_Send(buf_send,n*size_border,MPI_DOUBLE,
pbc[iswap][i]); sendproc[iswap][i],0,world);
MPI_Send(buf_send,n*size_border,MPI_DOUBLE, }
sendproc[iswap][i],0,world); }
}
for (i = 0; i < nrecvproc[iswap]; i++) { if (sendself[iswap]) {
MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status); n = avec->pack_border_vel(sendnum[iswap][nsend],sendlist[iswap][nsend],
avec->unpack_border(recvnum[iswap][i],firstrecv[iswap][irecv], buf_send,pbc_flag[iswap][nsend],
&buf_recv[forward_recv_offset[iswap][irecv]]); pbc[iswap][nsend]);
} avec->unpack_border_vel(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
buf_send);
} else { }
for (i = 0; i < nrecvproc[iswap]; i++)
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]], if (sendother[iswap]) {
recvnum[iswap][i]*size_border, for (i = 0; i < nrecv; i++) {
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); MPI_Waitany(nrecv,requests,&irecv,&status);
for (i = 0; i < nsendproc[iswap]; i++) { avec->unpack_border(recvnum[iswap][irecv],firstrecv[iswap][irecv],
n = avec->pack_border(sendnum[iswap][i],sendlist[iswap][i], &buf_recv[forward_recv_offset[iswap][irecv]]);
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
MPI_Send(buf_send,n*size_border,MPI_DOUBLE,
sendproc[iswap][i],0,world);
}
for (i = 0; i < nrecvproc[iswap]; i++) {
MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status);
avec->unpack_border(recvnum[iswap][i],firstrecv[iswap][irecv],
&buf_recv[forward_recv_offset[iswap][irecv]]);
}
} }
} }
} else { } else {
if (ghost_velocity) { if (sendother[iswap]) {
n = avec->pack_border_vel(sendnum[iswap][0],sendlist[iswap][0], for (i = 0; i < nrecv; i++)
buf_send,pbc_flag[iswap][0],pbc[iswap][0]); MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
avec->unpack_border_vel(recvnum[iswap][0],firstrecv[iswap][0],buf_send); recvnum[iswap][i]*size_border,
} else { MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
n = avec->pack_border(sendnum[iswap][0],sendlist[iswap][0], for (i = 0; i < nsend; i++) {
buf_send,pbc_flag[iswap][0],pbc[iswap][0]); n = avec->pack_border(sendnum[iswap][i],sendlist[iswap][i],
avec->unpack_border(recvnum[iswap][0],firstrecv[iswap][0],buf_send); buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
MPI_Send(buf_send,n*size_border,MPI_DOUBLE,
sendproc[iswap][i],0,world);
}
}
if (sendself[iswap]) {
n = avec->pack_border(sendnum[iswap][nsend],sendlist[iswap][nsend],
buf_send,pbc_flag[iswap][nsend],
pbc[iswap][nsend]);
avec->unpack_border(recvnum[iswap][nsend],firstrecv[iswap][nsend],
buf_send);
}
if (sendother[iswap]) {
for (i = 0; i < nrecv; i++) {
MPI_Waitany(nrecv,requests,&irecv,&status);
avec->unpack_border(recvnum[iswap][irecv],firstrecv[iswap][irecv],
&buf_recv[forward_recv_offset[iswap][irecv]]);
}
} }
} }
@ -786,6 +1059,64 @@ int CommTiled::exchange_variable(int n, double *inbuf, double *&outbuf)
return nrecv; return nrecv;
} }
/* ----------------------------------------------------------------------
determine overlap list of Noverlap procs the lo/hi box overlaps
overlap = non-zero area in common between box and proc sub-domain
box is onwed by me and extends in dim
------------------------------------------------------------------------- */
void CommTiled::box_drop_uniform(int dim, double *lo, double *hi,
int &noverlap, int *overlap, int &indexme)
{
}
/* ----------------------------------------------------------------------
determine overlap list of Noverlap procs the lo/hi box overlaps
overlap = non-zero area in common between box and proc sub-domain
------------------------------------------------------------------------- */
void CommTiled::box_drop_nonuniform(int dim, double *lo, double *hi,
int &noverlap, int *overlap, int &indexme)
{
}
/* ----------------------------------------------------------------------
determine overlap list of Noverlap procs the lo/hi box overlaps
overlap = non-zero area in common between box and proc sub-domain
recursive routine for traversing an RCB tree of cuts
------------------------------------------------------------------------- */
void CommTiled::box_drop_tiled(double *lo, double *hi,
int proclower, int procupper,
int &noverlap, int *overlap, int &indexme)
{
// end recursion when partition is a single proc
// add proc to overlap list
if (proclower == procupper) {
if (proclower == me) indexme = noverlap;
overlap[noverlap++] = proclower;
return;
}
// drop box on each side of cut it extends beyond
// use > and < criteria to not include procs it only touches
// procmid = 1st processor in upper half of partition
// = location in tree that stores this cut
// dim = 0,1,2 dimension of cut
// cut = position of cut
int procmid = proclower + (procupper - proclower) / 2 + 1;
double cut = tree[procmid].cut;
int dim = tree[procmid].dim;
if (lo[dim] < cut)
box_drop_tiled(lo,hi,proclower,procmid-1,noverlap,overlap,indexme);
if (hi[dim] > cut)
box_drop_tiled(lo,hi,procmid,procupper,noverlap,overlap,indexme);
}
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
realloc the size of the send buffer as needed with BUFFACTOR and bufextra realloc the size of the send buffer as needed with BUFFACTOR and bufextra
if flag = 1, realloc if flag = 1, realloc
@ -825,6 +1156,42 @@ void CommTiled::grow_list(int iswap, int iwhich, int n)
"comm:sendlist[iswap]"); "comm:sendlist[iswap]");
} }
/* ----------------------------------------------------------------------
allocation of swap info
------------------------------------------------------------------------- */
void CommTiled::allocate_swap(int n)
{
memory->create(sendnum,n,"comm:sendnum");
memory->create(recvnum,n,"comm:recvnum");
memory->create(sendproc,n,"comm:sendproc");
memory->create(recvproc,n,"comm:recvproc");
memory->create(size_forward_recv,n,"comm:size");
memory->create(size_reverse_send,n,"comm:size");
memory->create(size_reverse_recv,n,"comm:size");
memory->create(firstrecv,n,"comm:firstrecv");
memory->create(pbc_flag,n,"comm:pbc_flag");
memory->create(pbc,n,6,"comm:pbc");
}
/* ----------------------------------------------------------------------
free memory for swaps
------------------------------------------------------------------------- */
void CommTiled::free_swap()
{
memory->destroy(sendnum);
memory->destroy(recvnum);
memory->destroy(sendproc);
memory->destroy(recvproc);
memory->destroy(size_forward_recv);
memory->destroy(size_reverse_send);
memory->destroy(size_reverse_recv);
memory->destroy(firstrecv);
memory->destroy(pbc_flag);
memory->destroy(pbc);
}
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
return # of bytes of allocated memory return # of bytes of allocated memory
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */

View File

@ -21,6 +21,7 @@ namespace LAMMPS_NS {
class CommTiled : public Comm { class CommTiled : public Comm {
public: public:
CommTiled(class LAMMPS *); CommTiled(class LAMMPS *);
CommTiled(class LAMMPS *, class Comm *);
virtual ~CommTiled(); virtual ~CommTiled();
void init(); void init();
@ -46,15 +47,16 @@ class CommTiled : public Comm {
bigint memory_usage(); bigint memory_usage();
private: private:
int nswap; // # of swaps to perform = 2*dim
int *nsendproc,*nrecvproc; // # of procs to send/recv to/from in each swap
int triclinic; // 0 if domain is orthog, 1 if triclinic int triclinic; // 0 if domain is orthog, 1 if triclinic
int map_style; // non-0 if global->local mapping is done int map_style; // non-0 if global->local mapping is done
int size_forward; // # of per-atom datums in forward comm int size_forward; // # of per-atom datums in forward comm
int size_reverse; // # of datums in reverse comm int size_reverse; // # of datums in reverse comm
int size_border; // # of datums in forward border comm int size_border; // # of datums in forward border comm
int nswap; // # of swaps to perform = 2*dim
int *nsendproc,*nrecvproc; // # of procs to send/recv to/from in each swap
int *sendother; // 1 if send to any other proc in each swap
int *sendself; // 1 if send to self in each swap
int **sendnum,**recvnum; // # of atoms to send/recv per swap/proc int **sendnum,**recvnum; // # of atoms to send/recv per swap/proc
int **sendproc,**recvproc; // proc to send/recv to/from per swap/proc int **sendproc,**recvproc; // proc to send/recv to/from per swap/proc
int **size_forward_recv; // # of values to recv in each forward swap/proc int **size_forward_recv; // # of values to recv in each forward swap/proc
@ -77,6 +79,7 @@ class CommTiled : public Comm {
int maxsend,maxrecv; // current size of send/recv buffer int maxsend,maxrecv; // current size of send/recv buffer
int maxforward,maxreverse; // max # of datums in forward/reverse comm int maxforward,maxreverse; // max # of datums in forward/reverse comm
int maxexchange; // max # of datums/atom in exchange comm
int bufextra; // extra space beyond maxsend in send buffer int bufextra; // extra space beyond maxsend in send buffer
MPI_Request *requests; MPI_Request *requests;
@ -84,9 +87,35 @@ class CommTiled : public Comm {
int comm_x_only,comm_f_only; // 1 if only exchange x,f in for/rev comm int comm_x_only,comm_f_only; // 1 if only exchange x,f in for/rev comm
struct Tree {
double cut;
int dim;
};
Tree *tree;
// info from RCB decomp
double rcbcut;
int rcbcutdim;
double rcblo[3];
double rcbhi[3];
void init_buffers();
void box_drop_uniform(int, double *, double *, int &, int *, int &);
void box_drop_nonuniform(int, double *, double *, int &, int *, int &);
void box_drop_tiled(double *, double *, int, int, int &, int *, int &);
void box_other_uniform(int, double *, double *) {}
void box_other_nonuniform(int, double *, double *) {}
void box_other_tiled(int, double *, double *) {}
void grow_send(int, int); // reallocate send buffer void grow_send(int, int); // reallocate send buffer
void grow_recv(int); // free/allocate recv buffer void grow_recv(int); // free/allocate recv buffer
void grow_list(int, int, int); // reallocate sendlist for one swap/proc void grow_list(int, int, int); // reallocate sendlist for one swap/proc
void allocate_swap(int); // allocate swap arrays
void free_swap(); // free swap arrays
}; };
} }

View File

@ -44,14 +44,15 @@
using namespace LAMMPS_NS; using namespace LAMMPS_NS;
using namespace MathConst; using namespace MathConst;
enum{NO_REMAP,X_REMAP,V_REMAP}; // same as fix_deform.cpp
enum{IGNORE,WARN,ERROR}; // same as thermo.cpp
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
#define BIG 1.0e20 #define BIG 1.0e20
#define SMALL 1.0e-4 #define SMALL 1.0e-4
#define DELTAREGION 4 #define DELTAREGION 4
#define BONDSTRETCH 1.1 #define BONDSTRETCH 1.1
enum{NO_REMAP,X_REMAP,V_REMAP}; // same as fix_deform.cpp
enum{IGNORE,WARN,ERROR}; // same as thermo.cpp
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
default is periodic default is periodic
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
@ -249,42 +250,56 @@ void Domain::set_global_box()
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
set lamda box params set lamda box params
assumes global box is defined and proc assignment has been made assumes global box is defined and proc assignment has been made
uses comm->xyz_split to define subbox boundaries in consistent manner uses comm->xyz_split or comm->mysplit
to define subbox boundaries in consistent manner
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Domain::set_lamda_box() void Domain::set_lamda_box()
{ {
int *myloc = comm->myloc; if (comm->layout != LAYOUT_TILED) {
double *xsplit = comm->xsplit; int *myloc = comm->myloc;
double *ysplit = comm->ysplit; double *xsplit = comm->xsplit;
double *zsplit = comm->zsplit; double *ysplit = comm->ysplit;
double *zsplit = comm->zsplit;
sublo_lamda[0] = xsplit[myloc[0]]; sublo_lamda[0] = xsplit[myloc[0]];
subhi_lamda[0] = xsplit[myloc[0]+1]; subhi_lamda[0] = xsplit[myloc[0]+1];
sublo_lamda[1] = ysplit[myloc[1]];
subhi_lamda[1] = ysplit[myloc[1]+1];
sublo_lamda[2] = zsplit[myloc[2]];
subhi_lamda[2] = zsplit[myloc[2]+1];
sublo_lamda[1] = ysplit[myloc[1]]; } else {
subhi_lamda[1] = ysplit[myloc[1]+1]; double (*mysplit)[2] = comm->mysplit;
sublo_lamda[2] = zsplit[myloc[2]]; sublo_lamda[0] = mysplit[0][0];
subhi_lamda[2] = zsplit[myloc[2]+1]; subhi_lamda[0] = mysplit[0][1];
sublo_lamda[1] = mysplit[1][0];
subhi_lamda[1] = mysplit[1][1];
sublo_lamda[2] = mysplit[2][0];
subhi_lamda[2] = mysplit[2][1];
}
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
set local subbox params for orthogonal boxes set local subbox params for orthogonal boxes
assumes global box is defined and proc assignment has been made assumes global box is defined and proc assignment has been made
uses comm->xyz_split to define subbox boundaries in consistent manner uses comm->xyz_split or comm->mysplit
to define subbox boundaries in consistent manner
insure subhi[max] = boxhi insure subhi[max] = boxhi
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Domain::set_local_box() void Domain::set_local_box()
{ {
int *myloc = comm->myloc; if (triclinic) return;
int *procgrid = comm->procgrid;
double *xsplit = comm->xsplit; if (comm->layout != LAYOUT_TILED) {
double *ysplit = comm->ysplit; int *myloc = comm->myloc;
double *zsplit = comm->zsplit; int *procgrid = comm->procgrid;
double *xsplit = comm->xsplit;
double *ysplit = comm->ysplit;
double *zsplit = comm->zsplit;
if (triclinic == 0) {
sublo[0] = boxlo[0] + xprd*xsplit[myloc[0]]; sublo[0] = boxlo[0] + xprd*xsplit[myloc[0]];
if (myloc[0] < procgrid[0]-1) if (myloc[0] < procgrid[0]-1)
subhi[0] = boxlo[0] + xprd*xsplit[myloc[0]+1]; subhi[0] = boxlo[0] + xprd*xsplit[myloc[0]+1];
@ -299,6 +314,21 @@ void Domain::set_local_box()
if (myloc[2] < procgrid[2]-1) if (myloc[2] < procgrid[2]-1)
subhi[2] = boxlo[2] + zprd*zsplit[myloc[2]+1]; subhi[2] = boxlo[2] + zprd*zsplit[myloc[2]+1];
else subhi[2] = boxhi[2]; else subhi[2] = boxhi[2];
} else {
double (*mysplit)[2] = comm->mysplit;
sublo[0] = boxlo[0] + xprd*mysplit[0][0];
subhi[0] = boxlo[0] + xprd*mysplit[0][1];
if (mysplit[0][1] == 1.0) subhi[0] = boxhi[0];
sublo[1] = boxlo[1] + yprd*mysplit[1][0];
subhi[1] = boxlo[1] + yprd*mysplit[1][1];
if (mysplit[1][1] == 1.0) subhi[1] = boxhi[1];
sublo[2] = boxlo[2] + zprd*mysplit[2][0];
subhi[2] = boxlo[2] + zprd*mysplit[2][1];
if (mysplit[2][1] == 1.0) subhi[2] = boxhi[2];
} }
} }

View File

@ -22,12 +22,14 @@
#include "irregular.h" #include "irregular.h"
#include "force.h" #include "force.h"
#include "kspace.h" #include "kspace.h"
#include "rcb.h"
#include "error.h" #include "error.h"
using namespace LAMMPS_NS; using namespace LAMMPS_NS;
using namespace FixConst; using namespace FixConst;
enum{SHIFT,RCB}; enum{SHIFT,BISECTION};
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
@ -53,7 +55,7 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) :
thresh = force->numeric(FLERR,arg[4]); thresh = force->numeric(FLERR,arg[4]);
if (strcmp(arg[5],"shift") == 0) lbstyle = SHIFT; if (strcmp(arg[5],"shift") == 0) lbstyle = SHIFT;
else if (strcmp(arg[5],"rcb") == 0) lbstyle = RCB; else if (strcmp(arg[5],"rcb") == 0) lbstyle = BISECTION;
else error->all(FLERR,"Illegal fix balance command"); else error->all(FLERR,"Illegal fix balance command");
int iarg = 5; int iarg = 5;
@ -65,7 +67,7 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) :
stopthresh = force->numeric(FLERR,arg[iarg+3]); stopthresh = force->numeric(FLERR,arg[iarg+3]);
if (stopthresh < 1.0) error->all(FLERR,"Illegal fix balance command"); if (stopthresh < 1.0) error->all(FLERR,"Illegal fix balance command");
iarg += 4; iarg += 4;
} else if (lbstyle == RCB) { } else if (lbstyle == BISECTION) {
iarg++; iarg++;
} }
@ -97,14 +99,16 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) :
} }
} }
// create instance of Balance class and initialize it with params if (lbstyle == BISECTION && comm->style == 0)
// NOTE: do I need Balance instance if RCB? error->all(FLERR,"Fix balance rcb cannot be used with comm_style brick");
// create instance of Irregular class
// create instance of Balance class
// if SHIFT, initialize it with params
balance = new Balance(lmp); balance = new Balance(lmp);
if (lbstyle == SHIFT) balance->shift_setup(bstr,nitermax,thresh); if (lbstyle == SHIFT) balance->shift_setup(bstr,nitermax,thresh);
if (lbstyle == RCB) error->all(FLERR,"Fix balance rcb is not yet supported");
// create instance of Irregular class
irregular = new Irregular(lmp); irregular = new Irregular(lmp);
@ -238,32 +242,33 @@ void FixBalance::rebalance()
{ {
imbprev = imbnow; imbprev = imbnow;
// invoke balancer and reset comm->uniform flag
int *sendproc;
if (lbstyle == SHIFT) { if (lbstyle == SHIFT) {
itercount = balance->shift(); itercount = balance->shift();
} else if (lbstyle == RCB) { comm->layout = LAYOUT_NONUNIFORM;
} else if (lbstyle == BISECTION) {
sendproc = balance->bisection();
comm->layout = LAYOUT_TILED;
} }
// output of final result // output of final result
if (fp) balance->dumpout(update->ntimestep,fp); if (fp) balance->dumpout(update->ntimestep,fp);
// reset comm->uniform flag
// NOTE: this needs to change with RCB
comm->uniform = 0;
// reset proc sub-domains // reset proc sub-domains
if (domain->triclinic) domain->set_lamda_box(); if (domain->triclinic) domain->set_lamda_box();
domain->set_local_box(); domain->set_local_box();
// if splits moved further than neighboring processor
// move atoms to new processors via irregular() // move atoms to new processors via irregular()
// only needed if migrate_check() says an atom moves to far, // only needed if migrate_check() says an atom moves to far
// else allow caller's comm->exchange() to do it // else allow caller's comm->exchange() to do it
if (domain->triclinic) domain->x2lamda(atom->nlocal); if (domain->triclinic) domain->x2lamda(atom->nlocal);
if (irregular->migrate_check()) irregular->migrate_atoms(); if (lbstyle == BISECTION) irregular->migrate_atoms(0,sendproc);
else if (irregular->migrate_check()) irregular->migrate_atoms();
if (domain->triclinic) domain->lamda2x(atom->nlocal); if (domain->triclinic) domain->lamda2x(atom->nlocal);
// invoke KSpace setup_grid() to adjust to new proc sub-domains // invoke KSpace setup_grid() to adjust to new proc sub-domains
@ -303,5 +308,6 @@ double FixBalance::compute_vector(int i)
double FixBalance::memory_usage() double FixBalance::memory_usage()
{ {
double bytes = irregular->memory_usage(); double bytes = irregular->memory_usage();
if (balance->rcb) bytes += balance->rcb->memory_usage();
return bytes; return bytes;
} }

View File

@ -987,3 +987,14 @@ void FixDeform::options(int narg, char **arg)
} else error->all(FLERR,"Illegal fix deform command"); } else error->all(FLERR,"Illegal fix deform command");
} }
} }
/* ----------------------------------------------------------------------
memory usage of Irregular
------------------------------------------------------------------------- */
double FixDeform::memory_usage()
{
double bytes = 0.0;
if (irregular) bytes += irregular->memory_usage();
return bytes;
}

View File

@ -35,6 +35,7 @@ class FixDeform : public Fix {
void init(); void init();
void pre_exchange(); void pre_exchange();
void end_of_step(); void end_of_step();
double memory_usage();
private: private:
int triclinic,scaleflag,flipflag; int triclinic,scaleflag,flipflag;

View File

@ -2271,3 +2271,14 @@ void FixNH::pre_exchange()
domain->lamda2x(atom->nlocal); domain->lamda2x(atom->nlocal);
} }
} }
/* ----------------------------------------------------------------------
memory usage of Irregular
------------------------------------------------------------------------- */
double FixNH::memory_usage()
{
double bytes = 0.0;
if (irregular) bytes += irregular->memory_usage();
return bytes;
}

View File

@ -39,6 +39,7 @@ class FixNH : public Fix {
void reset_target(double); void reset_target(double);
void reset_dt(); void reset_dt();
virtual void *extract(const char*,int &); virtual void *extract(const char*,int &);
double memory_usage();
protected: protected:
int dimension,which; int dimension,which;

View File

@ -1159,19 +1159,15 @@ void Input::comm_style()
{ {
if (narg < 1) error->all(FLERR,"Illegal comm_style command"); if (narg < 1) error->all(FLERR,"Illegal comm_style command");
if (strcmp(arg[0],"brick") == 0) { if (strcmp(arg[0],"brick") == 0) {
if (comm->layout) if (comm->style == 0) return;
error->all(FLERR, Comm *oldcomm = comm;
"Cannot switch to comm style brick from " comm = new CommBrick(lmp,oldcomm);
"irregular tiling of proc domains"); delete oldcomm;
comm = new CommBrick(lmp);
// NOTE: this will lose load balancing info in old CommBrick
if (domain->box_exist) {
comm->set_proc_grid();
domain->set_local_box();
}
} else if (strcmp(arg[0],"tiled") == 0) { } else if (strcmp(arg[0],"tiled") == 0) {
error->all(FLERR,"Comm_style tiled not yet supported"); if (comm->style == 1) return;
comm = new CommTiled(lmp); Comm *oldcomm = comm;
comm = new CommTiled(lmp,oldcomm);
delete oldcomm;
} else error->all(FLERR,"Illegal comm_style command"); } else error->all(FLERR,"Illegal comm_style command");
} }

View File

@ -30,6 +30,8 @@ using namespace LAMMPS_NS;
int *Irregular::proc_recv_copy; int *Irregular::proc_recv_copy;
int compare_standalone(const void *, const void *); int compare_standalone(const void *, const void *);
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
#define BUFFACTOR 1.5 #define BUFFACTOR 1.5
#define BUFMIN 1000 #define BUFMIN 1000
#define BUFEXTRA 1000 #define BUFEXTRA 1000
@ -43,13 +45,26 @@ Irregular::Irregular(LAMMPS *lmp) : Pointers(lmp)
triclinic = domain->triclinic; triclinic = domain->triclinic;
map_style = atom->map_style; map_style = atom->map_style;
procgrid = comm->procgrid;
grid2proc = comm->grid2proc;
aplan = NULL; // migrate work vectors
dplan = NULL;
// initialize buffers for atom comm, not used for datum comm maxlocal = 0;
mproclist = NULL;
msizes = NULL;
// send buffers
maxdbuf = 0;
dbuf = NULL;
maxbuf = 0;
buf = NULL;
// universal work vectors
memory->create(work1,nprocs,"irregular:work1");
memory->create(work2,nprocs,"irregular:work2");
// initialize buffers for migrate atoms, not used for datum comm
// these can persist for multiple irregular operations // these can persist for multiple irregular operations
maxsend = BUFMIN; maxsend = BUFMIN;
@ -62,9 +77,12 @@ Irregular::Irregular(LAMMPS *lmp) : Pointers(lmp)
Irregular::~Irregular() Irregular::~Irregular()
{ {
if (aplan) destroy_atom(); memory->destroy(mproclist);
if (dplan) destroy_data(); memory->destroy(msizes);
memory->destroy(dbuf);
memory->destroy(buf);
memory->destroy(work1);
memory->destroy(work2);
memory->destroy(buf_send); memory->destroy(buf_send);
memory->destroy(buf_recv); memory->destroy(buf_recv);
} }
@ -74,11 +92,13 @@ Irregular::~Irregular()
can be used in place of comm->exchange() can be used in place of comm->exchange()
unlike exchange(), allows atoms to have moved arbitrarily long distances unlike exchange(), allows atoms to have moved arbitrarily long distances
sets up irregular plan, invokes it, destroys it sets up irregular plan, invokes it, destroys it
sortflag = flag for sorting order of received messages by proc ID
procassign = non-NULL if already know procs atoms are assigned to (from RCB)
atoms MUST be remapped to be inside simulation box before this is called atoms MUST be remapped to be inside simulation box before this is called
for triclinic: atoms must be in lamda coords (0-1) before this is called for triclinic: atoms must be in lamda coords (0-1) before this is called
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Irregular::migrate_atoms(int sortflag) void Irregular::migrate_atoms(int sortflag, int *procassign)
{ {
// clear global->local map since atoms move to new procs // clear global->local map since atoms move to new procs
// clear old ghosts so map_set() at end will operate only on local atoms // clear old ghosts so map_set() at end will operate only on local atoms
@ -101,13 +121,16 @@ void Irregular::migrate_atoms(int sortflag)
subhi = domain->subhi_lamda; subhi = domain->subhi_lamda;
} }
uniform = comm->uniform; layout = comm->layout;
xsplit = comm->xsplit; xsplit = comm->xsplit;
ysplit = comm->ysplit; ysplit = comm->ysplit;
zsplit = comm->zsplit; zsplit = comm->zsplit;
boxlo = domain->boxlo; boxlo = domain->boxlo;
prd = domain->prd; prd = domain->prd;
procgrid = comm->procgrid;
grid2proc = comm->grid2proc;
// loop over atoms, flag any that are not in my sub-box // loop over atoms, flag any that are not in my sub-box
// fill buffer with atoms leaving my box, using < and >= // fill buffer with atoms leaving my box, using < and >=
// assign which proc it belongs to via coord2proc() // assign which proc it belongs to via coord2proc()
@ -119,41 +142,63 @@ void Irregular::migrate_atoms(int sortflag)
double **x = atom->x; double **x = atom->x;
int nlocal = atom->nlocal; int nlocal = atom->nlocal;
if (nlocal > maxlocal) {
maxlocal = nlocal;
memory->destroy(mproclist);
memory->destroy(msizes);
memory->create(mproclist,maxlocal,"irregular:mproclist");
memory->create(msizes,maxlocal,"irregular:msizes");
}
int igx,igy,igz;
int nsend = 0; int nsend = 0;
int nsendatom = 0; int nsendatom = 0;
int *sizes = new int[nlocal];
int *proclist = new int[nlocal];
int igx,igy,igz;
int i = 0; int i = 0;
while (i < nlocal) {
if (x[i][0] < sublo[0] || x[i][0] >= subhi[0] || if (procassign) {
x[i][1] < sublo[1] || x[i][1] >= subhi[1] || while (i < nlocal) {
x[i][2] < sublo[2] || x[i][2] >= subhi[2]) { if (procassign[i] == me) i++;
proclist[nsendatom] = coord2proc(x[i],igx,igy,igz); else {
if (proclist[nsendatom] != me) { mproclist[nsendatom] = procassign[i];
if (nsend > maxsend) grow_send(nsend,1); if (nsend > maxsend) grow_send(nsend,1);
sizes[nsendatom] = avec->pack_exchange(i,&buf_send[nsend]); msizes[nsendatom] = avec->pack_exchange(i,&buf_send[nsend]);
nsend += sizes[nsendatom]; nsend += msizes[nsendatom];
nsendatom++; nsendatom++;
avec->copy(nlocal-1,i,1); avec->copy(nlocal-1,i,1);
procassign[i] = procassign[nlocal-1];
nlocal--; nlocal--;
}
}
} else {
while (i < nlocal) {
if (x[i][0] < sublo[0] || x[i][0] >= subhi[0] ||
x[i][1] < sublo[1] || x[i][1] >= subhi[1] ||
x[i][2] < sublo[2] || x[i][2] >= subhi[2]) {
mproclist[nsendatom] = coord2proc(x[i],igx,igy,igz);
if (mproclist[nsendatom] == me) i++;
else {
if (nsend > maxsend) grow_send(nsend,1);
msizes[nsendatom] = avec->pack_exchange(i,&buf_send[nsend]);
nsend += msizes[nsendatom];
nsendatom++;
avec->copy(nlocal-1,i,1);
nlocal--;
}
} else i++; } else i++;
} else i++; }
} }
atom->nlocal = nlocal; atom->nlocal = nlocal;
// create irregular communication plan, perform comm, destroy plan // create irregular communication plan, perform comm, destroy plan
// returned nrecv = size of buffer needed for incoming atoms // returned nrecv = size of buffer needed for incoming atoms
int nrecv = create_atom(nsendatom,sizes,proclist,sortflag); int nrecv = create_atom(nsendatom,msizes,mproclist,sortflag);
if (nrecv > maxrecv) grow_recv(nrecv); if (nrecv > maxrecv) grow_recv(nrecv);
exchange_atom(buf_send,sizes,buf_recv); exchange_atom(buf_send,msizes,buf_recv);
destroy_atom(); destroy_atom();
delete [] sizes;
delete [] proclist;
// add received atoms to my list // add received atoms to my list
int m = 0; int m = 0;
@ -174,6 +219,11 @@ void Irregular::migrate_atoms(int sortflag)
int Irregular::migrate_check() int Irregular::migrate_check()
{ {
// migrate required if comm layout is tiled
// cannot use myloc[] logic below
if (comm->layout == LAYOUT_TILED) return 1;
// subbox bounds for orthogonal or triclinic box // subbox bounds for orthogonal or triclinic box
// other comm/domain data used by coord2proc() // other comm/domain data used by coord2proc()
@ -186,13 +236,16 @@ int Irregular::migrate_check()
subhi = domain->subhi_lamda; subhi = domain->subhi_lamda;
} }
uniform = comm->uniform; layout = comm->layout;
xsplit = comm->xsplit; xsplit = comm->xsplit;
ysplit = comm->ysplit; ysplit = comm->ysplit;
zsplit = comm->zsplit; zsplit = comm->zsplit;
boxlo = domain->boxlo; boxlo = domain->boxlo;
prd = domain->prd; prd = domain->prd;
procgrid = comm->procgrid;
grid2proc = comm->grid2proc;
// loop over atoms, check for any that are not in my sub-box // loop over atoms, check for any that are not in my sub-box
// assign which proc it belongs to via coord2proc() // assign which proc it belongs to via coord2proc()
// if logical igx,igy,igz of newproc > one away from myloc, set flag = 1 // if logical igx,igy,igz of newproc > one away from myloc, set flag = 1
@ -250,6 +303,7 @@ int Irregular::migrate_check()
n = # of atoms to send n = # of atoms to send
sizes = # of doubles for each atom sizes = # of doubles for each atom
proclist = proc to send each atom to (not including self) proclist = proc to send each atom to (not including self)
sortflag = flag for sorting order of received messages by proc ID
return total # of doubles I will recv (not including self) return total # of doubles I will recv (not including self)
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
@ -257,51 +311,60 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag)
{ {
int i; int i;
// allocate plan and work vectors // setup for collective comm
// work1 = 1 for procs I send a message to, not including self
if (aplan) destroy_atom(); // work2 = 1 for all procs, used for ReduceScatter
aplan = (PlanAtom *) memory->smalloc(sizeof(PlanAtom),"irregular:aplan");
int *list = new int[nprocs];
int *count = new int[nprocs];
// nrecv = # of messages I receive
for (i = 0; i < nprocs; i++) { for (i = 0; i < nprocs; i++) {
list[i] = 0; work1[i] = 0;
count[i] = 1; work2[i] = 1;
} }
for (i = 0; i < n; i++) list[proclist[i]] = 1; for (i = 0; i < n; i++) work1[proclist[i]] = 1;
work1[me] = 0;
int nrecv; // nrecv_proc = # of procs I receive messages from, not including self
MPI_Reduce_scatter(list,&nrecv,count,MPI_INT,MPI_SUM,world); // options for performing ReduceScatter operation
// some are more efficient on some machines at big sizes
#ifdef LAMMPS_RS_ALLREDUCE_INPLACE
MPI_Allreduce(MPI_IN_PLACE,work1,nprocs,MPI_INT,MPI_SUM,world);
nrecv_proc = work1[me];
#else
#ifdef LAMMPS_RS_ALLREDUCE
MPI_Allreduce(work1,work2,nprocs,MPI_INT,MPI_SUM,world);
nrecv_proc = work2[me];
#else
MPI_Reduce_scatter(work1,&nrecv_proc,work2,MPI_INT,MPI_SUM,world);
#endif
#endif
// allocate receive arrays // allocate receive arrays
int *proc_recv = new int[nrecv]; proc_recv = new int[nrecv_proc];
int *length_recv = new int[nrecv]; length_recv = new int[nrecv_proc];
MPI_Request *request = new MPI_Request[nrecv]; request = new MPI_Request[nrecv_proc];
MPI_Status *status = new MPI_Status[nrecv]; status = new MPI_Status[nrecv_proc];
// nsend = # of messages I send // nsend_proc = # of messages I send
for (i = 0; i < nprocs; i++) list[i] = 0; for (i = 0; i < nprocs; i++) work1[i] = 0;
for (i = 0; i < n; i++) list[proclist[i]] += sizes[i]; for (i = 0; i < n; i++) work1[proclist[i]] += sizes[i];
int nsend = 0; nsend_proc = 0;
for (i = 0; i < nprocs; i++) for (i = 0; i < nprocs; i++)
if (list[i]) nsend++; if (work1[i]) nsend_proc++;
// allocate send arrays // allocate send arrays
int *proc_send = new int[nsend]; proc_send = new int[nsend_proc];
int *length_send = new int[nsend]; length_send = new int[nsend_proc];
int *num_send = new int[nsend]; num_send = new int[nsend_proc];
int *index_send = new int[n]; index_send = new int[n];
int *offset_send = new int[n]; offset_send = new int[n];
// list still stores size of message for procs I send to // list still stores size of message for procs I send to
// proc_send = procs I send to // proc_send = procs I send to
// length_send = total size of message I send to each proc // length_send = # of doubles I send to each proc
// to balance pattern of send messages: // to balance pattern of send messages:
// each proc begins with iproc > me, continues until iproc = me // each proc begins with iproc > me, continues until iproc = me
// reset list to store which send message each proc corresponds to // reset list to store which send message each proc corresponds to
@ -311,81 +374,81 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag)
for (i = 0; i < nprocs; i++) { for (i = 0; i < nprocs; i++) {
iproc++; iproc++;
if (iproc == nprocs) iproc = 0; if (iproc == nprocs) iproc = 0;
if (list[iproc] > 0) { if (work1[iproc] > 0) {
proc_send[isend] = iproc; proc_send[isend] = iproc;
length_send[isend] = list[iproc]; length_send[isend] = work1[iproc];
list[iproc] = isend; work1[iproc] = isend;
isend++; isend++;
} }
} }
// num_send = # of atoms I send to each proc // num_send = # of atoms I send to each proc
for (i = 0; i < nsend; i++) num_send[i] = 0; for (i = 0; i < nsend_proc; i++) num_send[i] = 0;
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
isend = list[proclist[i]]; isend = work1[proclist[i]];
num_send[isend]++; num_send[isend]++;
} }
// count = offsets into index_send for each proc I send to // work2 = offsets into index_send for each proc I send to
// index_send = list of which atoms to send to each proc // index_send = list of which atoms to send to each proc
// 1st N1 values are atom indices for 1st proc, // 1st N1 values are atom indices for 1st proc,
// next N2 values are atom indices for 2nd proc, etc // next N2 values are atom indices for 2nd proc, etc
// offset_send = where each atom starts in send buffer // offset_send = where each atom starts in send buffer
count[0] = 0; work2[0] = 0;
for (i = 1; i < nsend; i++) count[i] = count[i-1] + num_send[i-1]; for (i = 1; i < nsend_proc; i++) work2[i] = work2[i-1] + num_send[i-1];
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
isend = list[proclist[i]]; isend = work1[proclist[i]];
index_send[count[isend]++] = i; index_send[work2[isend]++] = i;
if (i) offset_send[i] = offset_send[i-1] + sizes[i-1]; if (i) offset_send[i] = offset_send[i-1] + sizes[i-1];
else offset_send[i] = 0; else offset_send[i] = 0;
} }
// tell receivers how much data I send // tell receivers how much data I send
// sendmax = largest # of doubles I send in a single message // sendmax_proc = # of doubles I send in largest single message
int sendmax = 0; sendmax_proc = 0;
for (i = 0; i < nsend; i++) { for (i = 0; i < nsend_proc; i++) {
MPI_Send(&length_send[i],1,MPI_INT,proc_send[i],0,world); MPI_Send(&length_send[i],1,MPI_INT,proc_send[i],0,world);
sendmax = MAX(sendmax,length_send[i]); sendmax_proc = MAX(sendmax_proc,length_send[i]);
} }
// receive incoming messages // receive incoming messages
// proc_recv = procs I recv from // proc_recv = procs I recv from
// length_recv = total size of message each proc sends me // length_recv = # of doubles each proc sends me
// nrecvsize = total size of data I recv // nrecvsize = total size of atom data I recv
int nrecvsize = 0; int nrecvsize = 0;
for (i = 0; i < nrecv; i++) { for (i = 0; i < nrecv_proc; i++) {
MPI_Recv(&length_recv[i],1,MPI_INT,MPI_ANY_SOURCE,0,world,status); MPI_Recv(&length_recv[i],1,MPI_INT,MPI_ANY_SOURCE,0,world,status);
proc_recv[i] = status->MPI_SOURCE; proc_recv[i] = status->MPI_SOURCE;
nrecvsize += length_recv[i]; nrecvsize += length_recv[i];
} }
// sort proc_recv and num_recv by proc ID if requested // sort proc_recv and length_recv by proc ID if requested
// useful for debugging to insure reproducible ordering of received atoms // useful for debugging to insure reproducible ordering of received atoms
// invoke by adding final arg = 1 to create_atom() call in migrate_atoms() // invoke by adding final arg = 1 to create_atom() call in migrate_atoms()
if (sortflag) { if (sortflag) {
int *order = new int[nrecv]; int *order = new int[nrecv_proc];
int *proc_recv_ordered = new int[nrecv]; int *proc_recv_ordered = new int[nrecv_proc];
int *length_recv_ordered = new int[nrecv]; int *length_recv_ordered = new int[nrecv_proc];
for (i = 0; i < nrecv; i++) order[i] = i; for (i = 0; i < nrecv_proc; i++) order[i] = i;
proc_recv_copy = proc_recv; proc_recv_copy = proc_recv;
qsort(order,nrecv,sizeof(int),compare_standalone); qsort(order,nrecv_proc,sizeof(int),compare_standalone);
int j; int j;
for (i = 0; i < nrecv; i++) { for (i = 0; i < nrecv_proc; i++) {
j = order[i]; j = order[i];
proc_recv_ordered[i] = proc_recv[j]; proc_recv_ordered[i] = proc_recv[j];
length_recv_ordered[i] = length_recv[j]; length_recv_ordered[i] = length_recv[j];
} }
memcpy(proc_recv,proc_recv_ordered,nrecv*sizeof(int)); memcpy(proc_recv,proc_recv_ordered,nrecv_proc*sizeof(int));
memcpy(length_recv,length_recv_ordered,nrecv*sizeof(int)); memcpy(length_recv,length_recv_ordered,nrecv_proc*sizeof(int));
delete [] order; delete [] order;
delete [] proc_recv_ordered; delete [] proc_recv_ordered;
delete [] length_recv_ordered; delete [] length_recv_ordered;
@ -396,27 +459,7 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag)
MPI_Barrier(world); MPI_Barrier(world);
// free work vectors // return size of atom data I will receive
delete [] count;
delete [] list;
// initialize plan
aplan->nsend = nsend;
aplan->nrecv = nrecv;
aplan->sendmax = sendmax;
aplan->proc_send = proc_send;
aplan->length_send = length_send;
aplan->num_send = num_send;
aplan->index_send = index_send;
aplan->offset_send = offset_send;
aplan->proc_recv = proc_recv;
aplan->length_recv = length_recv;
aplan->request = request;
aplan->status = status;
return nrecvsize; return nrecvsize;
} }
@ -445,217 +488,226 @@ int compare_standalone(const void *iptr, const void *jptr)
void Irregular::exchange_atom(double *sendbuf, int *sizes, double *recvbuf) void Irregular::exchange_atom(double *sendbuf, int *sizes, double *recvbuf)
{ {
int i,m,n,offset,num_send; int i,m,n,offset,count;
// post all receives // post all receives
offset = 0; offset = 0;
for (int irecv = 0; irecv < aplan->nrecv; irecv++) { for (int irecv = 0; irecv < nrecv_proc; irecv++) {
MPI_Irecv(&recvbuf[offset],aplan->length_recv[irecv],MPI_DOUBLE, MPI_Irecv(&recvbuf[offset],length_recv[irecv],MPI_DOUBLE,
aplan->proc_recv[irecv],0,world,&aplan->request[irecv]); proc_recv[irecv],0,world,&request[irecv]);
offset += aplan->length_recv[irecv]; offset += length_recv[irecv];
} }
// allocate buf for largest send // reallocate buf for largest send if necessary
double *buf; if (sendmax_proc > maxdbuf) {
memory->create(buf,aplan->sendmax,"irregular:buf"); memory->destroy(dbuf);
maxdbuf = sendmax_proc;
memory->create(dbuf,maxdbuf,"irregular:dbuf");
}
// send each message // send each message
// pack buf with list of atoms // pack buf with list of atoms
// m = index of atom in sendbuf // m = index of atom in sendbuf
int *index_send = aplan->index_send;
int nsend = aplan->nsend;
n = 0; n = 0;
for (int isend = 0; isend < nsend_proc; isend++) {
for (int isend = 0; isend < nsend; isend++) {
offset = 0; offset = 0;
num_send = aplan->num_send[isend]; count = num_send[isend];
for (i = 0; i < num_send; i++) { for (i = 0; i < count; i++) {
m = index_send[n++]; m = index_send[n++];
memcpy(&buf[offset],&sendbuf[aplan->offset_send[m]], memcpy(&dbuf[offset],&sendbuf[offset_send[m]],sizes[m]*sizeof(double));
sizes[m]*sizeof(double));
offset += sizes[m]; offset += sizes[m];
} }
MPI_Send(buf,aplan->length_send[isend],MPI_DOUBLE, MPI_Send(dbuf,length_send[isend],MPI_DOUBLE,proc_send[isend],0,world);
aplan->proc_send[isend],0,world);
} }
// free temporary send buffer
memory->destroy(buf);
// wait on all incoming messages // wait on all incoming messages
if (aplan->nrecv) MPI_Waitall(aplan->nrecv,aplan->request,aplan->status); if (nrecv_proc) MPI_Waitall(nrecv_proc,request,status);
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
destroy communication plan for atoms destroy vectors in communication plan for atoms
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Irregular::destroy_atom() void Irregular::destroy_atom()
{ {
delete [] aplan->proc_send; delete [] proc_send;
delete [] aplan->length_send; delete [] length_send;
delete [] aplan->num_send; delete [] num_send;
delete [] aplan->index_send; delete [] index_send;
delete [] aplan->offset_send; delete [] offset_send;
delete [] aplan->proc_recv; delete [] proc_recv;
delete [] aplan->length_recv; delete [] length_recv;
delete [] aplan->request; delete [] request;
delete [] aplan->status; delete [] status;
memory->sfree(aplan);
aplan = NULL;
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
create a communication plan for datums create communication plan based on list of datums of uniform size
n = # of datums to send n = # of datums to send
proclist = proc to send each datum to (including self) proclist = proc to send each datum to, can include self
return total # of datums I will recv (including self) sortflag = flag for sorting order of received messages by proc ID
return total # of datums I will recv, including any to self
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
int Irregular::create_data(int n, int *proclist) int Irregular::create_data(int n, int *proclist, int sortflag)
{ {
int i,m; int i,m;
// allocate plan and work vectors // setup for collective comm
// work1 = 1 for procs I send a message to, not including self
dplan = (PlanData *) memory->smalloc(sizeof(PlanData),"irregular:dplan"); // work2 = 1 for all procs, used for ReduceScatter
int *list = new int[nprocs];
int *count = new int[nprocs];
// nrecv = # of messages I receive
for (i = 0; i < nprocs; i++) { for (i = 0; i < nprocs; i++) {
list[i] = 0; work1[i] = 0;
count[i] = 1; work2[i] = 1;
} }
for (i = 0; i < n; i++) list[proclist[i]] = 1; for (i = 0; i < n; i++) work1[proclist[i]] = 1;
work1[me] = 0;
int nrecv; // nrecv_proc = # of procs I receive messages from, not including self
MPI_Reduce_scatter(list,&nrecv,count,MPI_INT,MPI_SUM,world); // options for performing ReduceScatter operation
if (list[me]) nrecv--; // some are more efficient on some machines at big sizes
#ifdef LAMMPS_RS_ALLREDUCE_INPLACE
MPI_Allreduce(MPI_IN_PLACE,work1,nprocs,MPI_INT,MPI_SUM,world);
nrecv_proc = work1[me];
#else
#ifdef LAMMPS_RS_ALLREDUCE
MPI_Allreduce(work1,work2,nprocs,MPI_INT,MPI_SUM,world);
nrecv_proc = work2[me];
#else
MPI_Reduce_scatter(work1,&nrecv_proc,work2,MPI_INT,MPI_SUM,world);
#endif
#endif
// allocate receive arrays // allocate receive arrays
int *proc_recv = new int[nrecv]; proc_recv = new int[nrecv_proc];
int *num_recv = new int[nrecv]; num_recv = new int[nrecv_proc];
MPI_Request *request = new MPI_Request[nrecv]; request = new MPI_Request[nrecv_proc];
MPI_Status *status = new MPI_Status[nrecv]; status = new MPI_Status[nrecv_proc];
// nsend = # of messages I send // work1 = # of datums I send to each proc, including self
// nsend_proc = # of procs I send messages to, not including self
for (i = 0; i < nprocs; i++) list[i] = 0; for (i = 0; i < nprocs; i++) work1[i] = 0;
for (i = 0; i < n; i++) list[proclist[i]]++; for (i = 0; i < n; i++) work1[proclist[i]]++;
int nsend = 0; nsend_proc = 0;
for (i = 0; i < nprocs; i++) for (i = 0; i < nprocs; i++)
if (list[i]) nsend++; if (work1[i]) nsend_proc++;
if (list[me]) nsend--; if (work1[me]) nsend_proc--;
// allocate send and self arrays // allocate send and self arrays
int *proc_send = new int[nsend]; proc_send = new int[nsend_proc];
int *num_send = new int[nsend]; num_send = new int[nsend_proc];
int *index_send = new int[n-list[me]]; index_send = new int[n-work1[me]];
int *index_self = new int[list[me]]; index_self = new int[work1[me]];
// proc_send = procs I send to // proc_send = procs I send to
// num_send = # of datums I send to each proc // num_send = # of datums I send to each proc
// num_self = # of datums I copy to self // num_self = # of datums I copy to self
// to balance pattern of send messages: // to balance pattern of send messages:
// each proc begins with iproc > me, continues until iproc = me // each proc begins with iproc > me, continues until iproc = me
// reset list to store which send message each proc corresponds to // reset work1 to store which send message each proc corresponds to
int num_self;
int iproc = me; int iproc = me;
int isend = 0; int isend = 0;
for (i = 0; i < nprocs; i++) { for (i = 0; i < nprocs; i++) {
iproc++; iproc++;
if (iproc == nprocs) iproc = 0; if (iproc == nprocs) iproc = 0;
if (iproc == me) num_self = list[iproc]; if (iproc == me) {
else if (list[iproc] > 0) { num_self = work1[iproc];
work1[iproc] = 0;
} else if (work1[iproc] > 0) {
proc_send[isend] = iproc; proc_send[isend] = iproc;
num_send[isend] = list[iproc]; num_send[isend] = work1[iproc];
list[iproc] = isend; work1[iproc] = isend;
isend++; isend++;
} }
} }
list[me] = 0;
// count = offsets into index_send for each proc I send to // work2 = offsets into index_send for each proc I send to
// m = ptr into index_self // m = ptr into index_self
// index_send = list of which datums to send to each proc // index_send = list of which datums to send to each proc
// 1st N1 values are datum indices for 1st proc, // 1st N1 values are datum indices for 1st proc,
// next N2 values are datum indices for 2nd proc, etc // next N2 values are datum indices for 2nd proc, etc
// index_self = list of which datums to copy to self
count[0] = 0; work2[0] = 0;
for (i = 1; i < nsend; i++) count[i] = count[i-1] + num_send[i-1]; for (i = 1; i < nsend_proc; i++) work2[i] = work2[i-1] + num_send[i-1];
m = 0; m = 0;
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
iproc = proclist[i]; iproc = proclist[i];
if (iproc == me) index_self[m++] = i; if (iproc == me) index_self[m++] = i;
else { else {
isend = list[iproc]; isend = work1[iproc];
index_send[count[isend]++] = i; index_send[work2[isend]++] = i;
} }
} }
// tell receivers how much data I send // tell receivers how much data I send
// sendmax = largest # of datums I send in a single message // sendmax_proc = largest # of datums I send in a single message
int sendmax = 0; sendmax_proc = 0;
for (i = 0; i < nsend; i++) { for (i = 0; i < nsend_proc; i++) {
MPI_Send(&num_send[i],1,MPI_INT,proc_send[i],0,world); MPI_Send(&num_send[i],1,MPI_INT,proc_send[i],0,world);
sendmax = MAX(sendmax,num_send[i]); sendmax_proc = MAX(sendmax_proc,num_send[i]);
} }
// receive incoming messages // receive incoming messages
// proc_recv = procs I recv from // proc_recv = procs I recv from
// num_recv = total size of message each proc sends me // num_recv = total size of message each proc sends me
// nrecvsize = total size of data I recv // nrecvdatum = total size of data I recv
int nrecvsize = 0; int nrecvdatum = 0;
for (i = 0; i < nrecv; i++) { for (i = 0; i < nrecv_proc; i++) {
MPI_Recv(&num_recv[i],1,MPI_INT,MPI_ANY_SOURCE,0,world,status); MPI_Recv(&num_recv[i],1,MPI_INT,MPI_ANY_SOURCE,0,world,status);
proc_recv[i] = status->MPI_SOURCE; proc_recv[i] = status->MPI_SOURCE;
nrecvsize += num_recv[i]; nrecvdatum += num_recv[i];
}
nrecvdatum += num_self;
// sort proc_recv and num_recv by proc ID if requested
// useful for debugging to insure reproducible ordering of received datums
if (sortflag) {
int *order = new int[nrecv_proc];
int *proc_recv_ordered = new int[nrecv_proc];
int *num_recv_ordered = new int[nrecv_proc];
for (i = 0; i < nrecv_proc; i++) order[i] = i;
proc_recv_copy = proc_recv;
qsort(order,nrecv_proc,sizeof(int),compare_standalone);
int j;
for (i = 0; i < nrecv_proc; i++) {
j = order[i];
proc_recv_ordered[i] = proc_recv[j];
num_recv_ordered[i] = num_recv[j];
}
memcpy(proc_recv,proc_recv_ordered,nrecv_proc*sizeof(int));
memcpy(num_recv,num_recv_ordered,nrecv_proc*sizeof(int));
delete [] order;
delete [] proc_recv_ordered;
delete [] num_recv_ordered;
} }
nrecvsize += num_self;
// barrier to insure all MPI_ANY_SOURCE messages are received // barrier to insure all MPI_ANY_SOURCE messages are received
// else another proc could proceed to exchange_data() and send to me // else another proc could proceed to exchange_data() and send to me
MPI_Barrier(world); MPI_Barrier(world);
// free work vectors // return # of datums I will receive
delete [] count; return nrecvdatum;
delete [] list;
// initialize plan and return it
dplan->nsend = nsend;
dplan->nrecv = nrecv;
dplan->sendmax = sendmax;
dplan->proc_send = proc_send;
dplan->num_send = num_send;
dplan->index_send = index_send;
dplan->proc_recv = proc_recv;
dplan->num_recv = num_recv;
dplan->num_self = num_self;
dplan->index_self = index_self;
dplan->request = request;
dplan->status = status;
return nrecvsize;
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
@ -667,49 +719,41 @@ int Irregular::create_data(int n, int *proclist)
void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf) void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf)
{ {
int i,m,n,offset,num_send; int i,m,n,offset,count;
// post all receives, starting after self copies // post all receives, starting after self copies
offset = dplan->num_self*nbytes; offset = num_self*nbytes;
for (int irecv = 0; irecv < dplan->nrecv; irecv++) { for (int irecv = 0; irecv < nrecv_proc; irecv++) {
MPI_Irecv(&recvbuf[offset],dplan->num_recv[irecv]*nbytes,MPI_CHAR, MPI_Irecv(&recvbuf[offset],num_recv[irecv]*nbytes,MPI_CHAR,
dplan->proc_recv[irecv],0,world,&dplan->request[irecv]); proc_recv[irecv],0,world,&request[irecv]);
offset += dplan->num_recv[irecv]*nbytes; offset += num_recv[irecv]*nbytes;
} }
// allocate buf for largest send // reallocate buf for largest send if necessary
char *buf; if (sendmax_proc*nbytes > maxbuf) {
memory->create(buf,dplan->sendmax*nbytes,"irregular:buf"); memory->destroy(buf);
maxbuf = sendmax_proc*nbytes;
memory->create(buf,maxbuf,"irregular:buf");
}
// send each message // send each message
// pack buf with list of datums // pack buf with list of datums
// m = index of datum in sendbuf // m = index of datum in sendbuf
int *index_send = dplan->index_send;
int nsend = dplan->nsend;
n = 0; n = 0;
for (int isend = 0; isend < nsend_proc; isend++) {
for (int isend = 0; isend < nsend; isend++) { count = num_send[isend];
num_send = dplan->num_send[isend]; for (i = 0; i < count; i++) {
for (i = 0; i < num_send; i++) {
m = index_send[n++]; m = index_send[n++];
memcpy(&buf[i*nbytes],&sendbuf[m*nbytes],nbytes); memcpy(&buf[i*nbytes],&sendbuf[m*nbytes],nbytes);
} }
MPI_Send(buf,dplan->num_send[isend]*nbytes,MPI_CHAR, MPI_Send(buf,count*nbytes,MPI_CHAR,proc_send[isend],0,world);
dplan->proc_send[isend],0,world);
} }
// free temporary send buffer
memory->destroy(buf);
// copy datums to self, put at beginning of recvbuf // copy datums to self, put at beginning of recvbuf
int *index_self = dplan->index_self;
int num_self = dplan->num_self;
for (i = 0; i < num_self; i++) { for (i = 0; i < num_self; i++) {
m = index_self[i]; m = index_self[i];
memcpy(&recvbuf[i*nbytes],&sendbuf[m*nbytes],nbytes); memcpy(&recvbuf[i*nbytes],&sendbuf[m*nbytes],nbytes);
@ -717,39 +761,37 @@ void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf)
// wait on all incoming messages // wait on all incoming messages
if (dplan->nrecv) MPI_Waitall(dplan->nrecv,dplan->request,dplan->status); if (nrecv_proc) MPI_Waitall(nrecv_proc,request,status);
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
destroy communication plan for datums destroy vectors in communication plan for datums
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Irregular::destroy_data() void Irregular::destroy_data()
{ {
delete [] dplan->proc_send; delete [] proc_send;
delete [] dplan->num_send; delete [] num_send;
delete [] dplan->index_send; delete [] index_send;
delete [] dplan->proc_recv; delete [] proc_recv;
delete [] dplan->num_recv; delete [] num_recv;
delete [] dplan->index_self; delete [] index_self;
delete [] dplan->request; delete [] request;
delete [] dplan->status; delete [] status;
memory->sfree(dplan);
dplan = NULL;
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
determine which proc owns atom with coord x[3] determine which proc owns atom with coord x[3]
x will be in box (orthogonal) or lamda coords (triclinic) x will be in box (orthogonal) or lamda coords (triclinic)
for uniform = 1, directly calculate owning proc if layout = UNIFORM, calculate owning proc directly
for non-uniform, iteratively find owning proc via binary search else layout = NONUNIFORM, iteratively find owning proc via binary search
return owning proc ID via grid2proc return owning proc ID via grid2proc
return igx,igy,igz = logical grid loc of owing proc within 3d grid of procs return igx,igy,igz = logical grid loc of owing proc within 3d grid of procs
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
int Irregular::coord2proc(double *x, int &igx, int &igy, int &igz) int Irregular::coord2proc(double *x, int &igx, int &igy, int &igz)
{ {
if (uniform) { if (layout == 0) {
if (triclinic == 0) { if (triclinic == 0) {
igx = static_cast<int> (procgrid[0] * (x[0]-boxlo[0]) / prd[0]); igx = static_cast<int> (procgrid[0] * (x[0]-boxlo[0]) / prd[0]);
igy = static_cast<int> (procgrid[1] * (x[1]-boxlo[1]) / prd[1]); igy = static_cast<int> (procgrid[1] * (x[1]-boxlo[1]) / prd[1]);
@ -846,7 +888,12 @@ void Irregular::grow_recv(int n)
bigint Irregular::memory_usage() bigint Irregular::memory_usage()
{ {
bigint bytes = memory->usage(buf_send,maxsend); bigint bytes = 0;
bytes += memory->usage(buf_recv,maxrecv); bytes += maxsend*sizeof(double); // buf_send
bytes += maxrecv*sizeof(double); // buf_recv
bytes += maxdbuf*sizeof(double); // dbuf
bytes += maxbuf; // buf
bytes += 2*maxlocal*sizeof(int); // mproclist,msizes
bytes += 2*nprocs*sizeof(int); // work1,work2
return bytes; return bytes;
} }

View File

@ -27,9 +27,9 @@ class Irregular : protected Pointers {
Irregular(class LAMMPS *); Irregular(class LAMMPS *);
~Irregular(); ~Irregular();
void migrate_atoms(int sortflag = 0); void migrate_atoms(int sortflag = 0, int *procassign = NULL);
int migrate_check(); int migrate_check();
int create_data(int, int *); int create_data(int, int *, int sortflag = 0);
void exchange_data(char *, int, char *); void exchange_data(char *, int, char *);
void destroy_data(); void destroy_data();
bigint memory_usage(); bigint memory_usage();
@ -38,58 +38,58 @@ class Irregular : protected Pointers {
int me,nprocs; int me,nprocs;
int triclinic; int triclinic;
int map_style; int map_style;
int uniform; int layout;
double *xsplit,*ysplit,*zsplit; // ptrs to comm double *xsplit,*ysplit,*zsplit; // ptrs to comm
int *procgrid; // ptr to comm int *procgrid; // ptr to comm
int ***grid2proc; // ptr to comm int ***grid2proc; // ptr to comm
double *boxlo; // ptr to domain double *boxlo; // ptr to domain
double *prd; // ptr to domain double *prd; // ptr to domain
int maxsend,maxrecv; // size of buffers in # of doubles int maxsend,maxrecv; // size of buf send/recv in # of doubles
double *buf_send,*buf_recv; double *buf_send,*buf_recv; // bufs used in migrate_atoms
int maxdbuf; // size of double buf in bytes
double *dbuf; // double buf for largest single atom send
int maxbuf; // size of char buf in bytes
char *buf; // char buf for largest single data send
// plan for irregular communication of atoms int *mproclist,*msizes; // persistent vectors in migrate_atoms
int maxlocal; // allocated size of mproclist and msizes
int *work1,*work2; // work vectors
// plan params for irregular communication of atoms or datums
// no params refer to atoms/data copied to self
int nsend_proc; // # of messages to send
int nrecv_proc; // # of messages to recv
int sendmax_proc; // # of doubles/datums in largest send message
int *proc_send; // list of procs to send to
int *num_send; // # of atoms/datums to send to each proc
int *index_send; // list of which atoms/datums to send to each proc
int *proc_recv; // list of procs to recv from
MPI_Request *request; // MPI requests for posted recvs
MPI_Status *status; // MPI statuses for WaitAll
// extra plan params plan for irregular communication of atoms
// no params refer to atoms copied to self // no params refer to atoms copied to self
struct PlanAtom { int *length_send; // # of doubles to send to each proc
int nsend; // # of messages to send int *length_recv; // # of doubles to recv from each proc
int nrecv; // # of messages to recv int *offset_send; // where each atom starts in send buffer
int sendmax; // # of doubles in largest send message
int *proc_send; // procs to send to
int *length_send; // # of doubles to send to each proc
int *num_send; // # of atoms to send to each proc
int *index_send; // list of which atoms to send to each proc
int *offset_send; // where each atom starts in send buffer
int *proc_recv; // procs to recv from
int *length_recv; // # of doubles to recv from each proc
MPI_Request *request; // MPI requests for posted recvs
MPI_Status *status; // MPI statuses for WaitAll
};
// plan for irregular communication of datums // extra plan params plan for irregular communication of datums
// only 2 self params refer to atoms copied to self // 2 self params refer to data copied to self
struct PlanData { // plan for irregular communication of data int *num_recv; // # of datums to recv from each proc
int nsend; // # of messages to send int num_self; // # of datums to copy to self
int nrecv; // # of messages to recv int *index_self; // list of which datums to copy to self
int sendmax; // # of datums in largest send message
int *proc_send; // procs to send to
int *num_send; // # of datums to send to each proc
int *index_send; // list of which datums to send to each proc
int *proc_recv; // procs to recv from
int *num_recv; // # of datums to recv from each proc
int num_self; // # of datums to copy to self
int *index_self; // list of which datums to copy to self
MPI_Request *request; // MPI requests for posted recvs
MPI_Status *status; // MPI statuses for WaitAll
};
PlanAtom *aplan; // private methods
PlanData *dplan;
int create_atom(int, int *, int *, int); int create_atom(int, int *, int *, int);
void exchange_atom(double *, int *, double *); void exchange_atom(double *, int *, double *);
void destroy_atom(); void destroy_atom();
int coord2proc(double *, int &, int &, int &); int coord2proc(double *, int &, int &, int &);
int binary(double, int, double *); int binary(double, int, double *);

926
src/rcb.cpp Normal file
View File

@ -0,0 +1,926 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "mpi.h"
#include "string.h"
#include "rcb.h"
#include "irregular.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
#define MYHUGE 1.0e30
#define TINY 1.0e-6
// set this to bigger number after debugging
#define DELTA 10
// prototypes for non-class functions
void box_merge(void *, void *, int *, MPI_Datatype *);
void median_merge(void *, void *, int *, MPI_Datatype *);
// NOTE: if want to have reuse flag, need to sum Tree across procs
/* ---------------------------------------------------------------------- */
RCB::RCB(LAMMPS *lmp) : Pointers(lmp)
{
MPI_Comm_rank(world,&me);
MPI_Comm_size(world,&nprocs);
ndot = maxdot = 0;
dots = NULL;
nlist = maxlist = 0;
dotlist = dotmark = NULL;
maxbuf = 0;
buf = NULL;
maxrecv = maxsend = 0;
recvproc = recvindex = sendproc = sendindex = NULL;
tree = NULL;
irregular = NULL;
// create MPI data and function types for box and median AllReduce ops
MPI_Type_contiguous(6,MPI_DOUBLE,&box_type);
MPI_Type_commit(&box_type);
MPI_Type_contiguous(sizeof(Median),MPI_CHAR,&med_type);
MPI_Type_commit(&med_type);
MPI_Op_create(box_merge,1,&box_op);
MPI_Op_create(median_merge,1,&med_op);
reuse = 0;
}
/* ---------------------------------------------------------------------- */
RCB::~RCB()
{
memory->sfree(dots);
memory->destroy(dotlist);
memory->destroy(dotmark);
memory->sfree(buf);
memory->destroy(recvproc);
memory->destroy(recvindex);
memory->destroy(sendproc);
memory->destroy(sendindex);
memory->sfree(tree);
delete irregular;
MPI_Type_free(&med_type);
MPI_Type_free(&box_type);
MPI_Op_free(&box_op);
MPI_Op_free(&med_op);
}
/* ----------------------------------------------------------------------
perform RCB balancing of N particles at coords X in bounding box LO/HI
if wt = NULL, ignore per-particle weights
if wt defined, per-particle weights > 0.0
dimension = 2 or 3
as documented in rcb.h:
sets noriginal,nfinal,nkeep,recvproc,recvindex,lo,hi
all proc particles will be inside or on surface of 3-d box
defined by final lo/hi
// NOTE: worry about re-use of data structs for fix balance
// NOTE: should I get rid of wt all together, will it be used?
------------------------------------------------------------------------- */
void RCB::compute(int dimension, int n, double **x, double *wt,
double *bboxlo, double *bboxhi)
{
int i,j,k;
int keep,outgoing,incoming,incoming2;
int dim,markactive;
int indexlo,indexhi;
int first_iteration,breakflag;
double wttot,wtlo,wthi,wtsum,wtok,wtupto,wtmax;
double targetlo,targethi;
double valuemin,valuemax,valuehalf;
double tolerance;
MPI_Comm comm,comm_half;
MPI_Request request,request2;
MPI_Status status;
Median med,medme;
// create list of my Dots
ndot = nkeep = noriginal = n;
if (ndot > maxdot) {
maxdot = ndot;
memory->sfree(dots);
dots = (Dot *) memory->smalloc(ndot*sizeof(Dot),"RCB:dots");
}
for (i = 0; i < ndot; i++) {
dots[i].x[0] = x[i][0];
dots[i].x[1] = x[i][1];
dots[i].x[2] = x[i][2];
dots[i].proc = me;
dots[i].index = i;
}
if (wt)
for (i = 0; i < ndot; i++) dots[i].wt = wt[i];
else
for (i = 0; i < ndot; i++) dots[i].wt = 1.0;
// initial bounding box = simulation box
// includes periodic or shrink-wrapped boundaries
lo = bbox.lo;
hi = bbox.hi;
lo[0] = bboxlo[0];
lo[1] = bboxlo[1];
lo[2] = bboxlo[2];
hi[0] = bboxhi[0];
hi[1] = bboxhi[1];
hi[2] = bboxhi[2];
// initialize counters
counters[0] = 0;
counters[1] = 0;
counters[2] = 0;
counters[3] = ndot;
counters[4] = maxdot;
counters[5] = 0;
counters[6] = 0;
// create communicator for use in recursion
MPI_Comm_dup(world,&comm);
// recurse until partition is a single proc = me
// proclower,procupper = lower,upper procs in partition
// procmid = 1st proc in upper half of partition
int procpartner,procpartner2;
int readnumber;
int procmid;
int proclower = 0;
int procupper = nprocs - 1;
while (proclower != procupper) {
// if odd # of procs, lower partition gets extra one
procmid = proclower + (procupper - proclower) / 2 + 1;
// determine communication partner(s)
// readnumber = # of proc partners to read from
if (me < procmid)
procpartner = me + (procmid - proclower);
else
procpartner = me - (procmid - proclower);
int readnumber = 1;
if (procpartner > procupper) {
readnumber = 0;
procpartner--;
}
if (me == procupper && procpartner != procmid - 1) {
readnumber = 2;
procpartner2 = procpartner + 1;
}
// wttot = summed weight of entire partition
// search tolerance = largest single weight (plus epsilon)
// targetlo = desired weight in lower half of partition
// targethi = desired weight in upper half of partition
wtmax = wtsum = 0.0;
if (wt) {
for (i = 0; i < ndot; i++) {
wtsum += dots[i].wt;
if (dots[i].wt > wtmax) wtmax = dots[i].wt;
}
} else {
for (i = 0; i < ndot; i++) wtsum += dots[i].wt;
wtmax = 1.0;
}
MPI_Allreduce(&wtsum,&wttot,1,MPI_DOUBLE,MPI_SUM,comm);
if (wt) MPI_Allreduce(&wtmax,&tolerance,1,MPI_DOUBLE,MPI_MAX,comm);
else tolerance = 1.0;
tolerance *= 1.0 + TINY;
targetlo = wttot * (procmid - proclower) / (procupper + 1 - proclower);
targethi = wttot - targetlo;
// dim = dimension to bisect on
// do not allow choice of z dimension for 2d system
dim = 0;
if (hi[1]-lo[1] > hi[0]-lo[0]) dim = 1;
if (dimension == 3) {
if (dim == 0 && hi[2]-lo[2] > hi[0]-lo[0]) dim = 2;
if (dim == 1 && hi[2]-lo[2] > hi[1]-lo[1]) dim = 2;
}
// create active list and mark array for dots
// initialize active list to all dots
if (ndot > maxlist) {
memory->destroy(dotlist);
memory->destroy(dotmark);
maxlist = maxdot;
memory->create(dotlist,maxlist,"RCB:dotlist");
memory->create(dotmark,maxlist,"RCB:dotmark");
}
nlist = ndot;
for (i = 0; i < nlist; i++) dotlist[i] = i;
// median iteration
// zoom in on bisector until correct # of dots in each half of partition
// as each iteration of median-loop begins, require:
// all non-active dots are marked with 0/1 in dotmark
// valuemin <= every active dot <= valuemax
// wtlo, wthi = total wt of non-active dots
// when leave median-loop, require only:
// valuehalf = correct cut position
// all dots <= valuehalf are marked with 0 in dotmark
// all dots >= valuehalf are marked with 1 in dotmark
// markactive = which side of cut is active = 0/1
// indexlo,indexhi = indices of dot closest to median
wtlo = wthi = 0.0;
valuemin = lo[dim];
valuemax = hi[dim];
first_iteration = 1;
while (1) {
// choose bisector value
// use old value on 1st iteration if old cut dimension is the same
// on 2nd option: could push valuehalf towards geometric center
// with "1.0-factor" to force overshoot
if (first_iteration && reuse && dim == tree[procmid].dim) {
counters[5]++;
valuehalf = tree[procmid].cut;
if (valuehalf < valuemin || valuehalf > valuemax)
valuehalf = 0.5 * (valuemin + valuemax);
} else if (wt)
valuehalf = valuemin + (targetlo - wtlo) /
(wttot - wtlo - wthi) * (valuemax - valuemin);
else
valuehalf = 0.5 * (valuemin + valuemax);
first_iteration = 0;
// initialize local median data structure
medme.totallo = medme.totalhi = 0.0;
medme.valuelo = -MYHUGE;
medme.valuehi = MYHUGE;
medme.wtlo = medme.wthi = 0.0;
medme.countlo = medme.counthi = 0;
medme.proclo = medme.prochi = me;
// mark all active dots on one side or other of bisector
// also set all fields in median data struct
// save indices of closest dots on either side
for (j = 0; j < nlist; j++) {
i = dotlist[j];
if (dots[i].x[dim] <= valuehalf) { // in lower part
medme.totallo += dots[i].wt;
dotmark[i] = 0;
if (dots[i].x[dim] > medme.valuelo) { // my closest dot
medme.valuelo = dots[i].x[dim];
medme.wtlo = dots[i].wt;
medme.countlo = 1;
indexlo = i;
} else if (dots[i].x[dim] == medme.valuelo) { // tied for closest
medme.wtlo += dots[i].wt;
medme.countlo++;
}
}
else { // in upper part
medme.totalhi += dots[i].wt;
dotmark[i] = 1;
if (dots[i].x[dim] < medme.valuehi) { // my closest dot
medme.valuehi = dots[i].x[dim];
medme.wthi = dots[i].wt;
medme.counthi = 1;
indexhi = i;
} else if (dots[i].x[dim] == medme.valuehi) { // tied for closest
medme.wthi += dots[i].wt;
medme.counthi++;
}
}
}
// combine median data struct across current subset of procs
counters[0]++;
MPI_Allreduce(&medme,&med,1,med_type,med_op,comm);
// test median guess for convergence
// move additional dots that are next to cut across it
if (wtlo + med.totallo < targetlo) { // lower half TOO SMALL
wtlo += med.totallo;
valuehalf = med.valuehi;
if (med.counthi == 1) { // only one dot to move
if (wtlo + med.wthi < targetlo) { // move it, keep iterating
if (me == med.prochi) dotmark[indexhi] = 0;
}
else { // only move if beneficial
if (wtlo + med.wthi - targetlo < targetlo - wtlo)
if (me == med.prochi) dotmark[indexhi] = 0;
break; // all done
}
}
else { // multiple dots to move
breakflag = 0;
wtok = 0.0;
if (medme.valuehi == med.valuehi) wtok = medme.wthi;
if (wtlo + med.wthi >= targetlo) { // all done
MPI_Scan(&wtok,&wtupto,1,MPI_DOUBLE,MPI_SUM,comm);
wtmax = targetlo - wtlo;
if (wtupto > wtmax) wtok = wtok - (wtupto - wtmax);
breakflag = 1;
} // wtok = most I can move
for (j = 0, wtsum = 0.0; j < nlist && wtsum < wtok; j++) {
i = dotlist[j];
if (dots[i].x[dim] == med.valuehi) { // only move if better
if (wtsum + dots[i].wt - wtok < wtok - wtsum)
dotmark[i] = 0;
wtsum += dots[i].wt;
}
}
if (breakflag) break; // done if moved enough
}
wtlo += med.wthi;
if (targetlo-wtlo <= tolerance) break; // close enough
valuemin = med.valuehi; // iterate again
markactive = 1;
}
else if (wthi + med.totalhi < targethi) { // upper half TOO SMALL
wthi += med.totalhi;
valuehalf = med.valuelo;
if (med.countlo == 1) { // only one dot to move
if (wthi + med.wtlo < targethi) { // move it, keep iterating
if (me == med.proclo) dotmark[indexlo] = 1;
}
else { // only move if beneficial
if (wthi + med.wtlo - targethi < targethi - wthi)
if (me == med.proclo) dotmark[indexlo] = 1;
break; // all done
}
}
else { // multiple dots to move
breakflag = 0;
wtok = 0.0;
if (medme.valuelo == med.valuelo) wtok = medme.wtlo;
if (wthi + med.wtlo >= targethi) { // all done
MPI_Scan(&wtok,&wtupto,1,MPI_DOUBLE,MPI_SUM,comm);
wtmax = targethi - wthi;
if (wtupto > wtmax) wtok = wtok - (wtupto - wtmax);
breakflag = 1;
} // wtok = most I can move
for (j = 0, wtsum = 0.0; j < nlist && wtsum < wtok; j++) {
i = dotlist[j];
if (dots[i].x[dim] == med.valuelo) { // only move if better
if (wtsum + dots[i].wt - wtok < wtok - wtsum)
dotmark[i] = 1;
wtsum += dots[i].wt;
}
}
if (breakflag) break; // done if moved enough
}
wthi += med.wtlo;
if (targethi-wthi <= tolerance) break; // close enough
valuemax = med.valuelo; // iterate again
markactive = 0;
}
else // Goldilocks result: both partitions just right
break;
// shrink the active list
k = 0;
for (j = 0; j < nlist; j++) {
i = dotlist[j];
if (dotmark[i] == markactive) dotlist[k++] = i;
}
nlist = k;
}
// found median
// store cut info only if I am procmid
if (me == procmid) {
cut = valuehalf;
cutdim = dim;
}
// use cut to shrink my RCB bounding box
if (me < procmid) hi[dim] = valuehalf;
else lo[dim] = valuehalf;
// outgoing = number of dots to ship to partner
// nkeep = number of dots that have never migrated
markactive = (me < procpartner);
for (i = 0, keep = 0, outgoing = 0; i < ndot; i++)
if (dotmark[i] == markactive) outgoing++;
else if (i < nkeep) keep++;
nkeep = keep;
// alert partner how many dots I'll send, read how many I'll recv
MPI_Send(&outgoing,1,MPI_INT,procpartner,0,world);
incoming = 0;
if (readnumber) {
MPI_Recv(&incoming,1,MPI_INT,procpartner,0,world,&status);
if (readnumber == 2) {
MPI_Recv(&incoming2,1,MPI_INT,procpartner2,0,world,&status);
incoming += incoming2;
}
}
// check if need to alloc more space
int ndotnew = ndot - outgoing + incoming;
if (ndotnew > maxdot) {
while (maxdot < ndotnew) maxdot += DELTA;
dots = (Dot *) memory->srealloc(dots,maxdot*sizeof(Dot),"RCB::dots");
counters[6]++;
}
counters[1] += outgoing;
counters[2] += incoming;
if (ndotnew > counters[3]) counters[3] = ndotnew;
if (maxdot > counters[4]) counters[4] = maxdot;
// malloc comm send buffer
if (outgoing > maxbuf) {
memory->sfree(buf);
maxbuf = outgoing;
buf = (Dot *) memory->smalloc(maxbuf*sizeof(Dot),"RCB:buf");
}
// fill buffer with dots that are marked for sending
// pack down the unmarked ones
keep = outgoing = 0;
for (i = 0; i < ndot; i++) {
if (dotmark[i] == markactive)
memcpy(&buf[outgoing++],&dots[i],sizeof(Dot));
else
memcpy(&dots[keep++],&dots[i],sizeof(Dot));
}
// post receives for dots
if (readnumber > 0) {
MPI_Irecv(&dots[keep],incoming*sizeof(Dot),MPI_CHAR,
procpartner,1,world,&request);
if (readnumber == 2) {
keep += incoming - incoming2;
MPI_Irecv(&dots[keep],incoming2*sizeof(Dot),MPI_CHAR,
procpartner2,1,world,&request2);
}
}
// handshake before sending dots to insure recvs have been posted
if (readnumber > 0) {
MPI_Send(NULL,0,MPI_INT,procpartner,0,world);
if (readnumber == 2) MPI_Send(NULL,0,MPI_INT,procpartner2,0,world);
}
MPI_Recv(NULL,0,MPI_INT,procpartner,0,world,&status);
// send dots to partner
MPI_Rsend(buf,outgoing*sizeof(Dot),MPI_CHAR,procpartner,1,world);
// wait until all dots are received
if (readnumber > 0) {
MPI_Wait(&request,&status);
if (readnumber == 2) MPI_Wait(&request2,&status);
}
ndot = ndotnew;
// cut partition in half, create new communicators of 1/2 size
int split;
if (me < procmid) {
procupper = procmid - 1;
split = 0;
} else {
proclower = procmid;
split = 1;
}
MPI_Comm_split(comm,split,me,&comm_half);
MPI_Comm_free(&comm);
comm = comm_half;
}
// clean up
MPI_Comm_free(&comm);
// set public variables with results of rebalance
nfinal = ndot;
if (nfinal > maxrecv) {
memory->destroy(recvproc);
memory->destroy(recvindex);
maxrecv = nfinal;
memory->create(recvproc,maxrecv,"RCB:recvproc");
memory->create(recvindex,maxrecv,"RCB:recvindex");
}
for (i = 0; i < nfinal; i++) {
recvproc[i] = dots[i].proc;
recvindex[i] = dots[i].index;
}
}
/* ----------------------------------------------------------------------
custom MPI reduce operation
merge of each component of an RCB bounding box
------------------------------------------------------------------------- */
void box_merge(void *in, void *inout, int *len, MPI_Datatype *dptr)
{
RCB::BBox *box1 = (RCB::BBox *) in;
RCB::BBox *box2 = (RCB::BBox *) inout;
for (int i = 0; i < 3; i++) {
if (box1->lo[i] < box2->lo[i]) box2->lo[i] = box1->lo[i];
if (box1->hi[i] > box2->hi[i]) box2->hi[i] = box1->hi[i];
}
}
/* ----------------------------------------------------------------------
custom MPI reduce operation
merge median data structure
on input:
in,inout->totallo, totalhi = weight in both partitions on this proc
valuelo, valuehi = pos of nearest dot(s) to cut on this proc
wtlo, wthi = total wt of dot(s) at that pos on this proc
countlo, counthi = # of dot(s) nearest to cut on this proc
proclo, prochi = not used
on exit:
inout-> totallo, totalhi = total # of active dots in both partitions
valuelo, valuehi = pos of nearest dot(s) to cut
wtlo, wthi = total wt of dot(s) at that position
countlo, counthi = total # of dot(s) nearest to cut
proclo, prochi = one unique proc who owns a nearest dot
all procs must get same proclo,prochi
------------------------------------------------------------------------- */
void median_merge(void *in, void *inout, int *len, MPI_Datatype *dptr)
{
RCB::Median *med1 = (RCB::Median *) in;
RCB::Median *med2 = (RCB::Median *) inout;
med2->totallo += med1->totallo;
if (med1->valuelo > med2->valuelo) {
med2->valuelo = med1->valuelo;
med2->wtlo = med1->wtlo;
med2->countlo = med1->countlo;
med2->proclo = med1->proclo;
}
else if (med1->valuelo == med2->valuelo) {
med2->wtlo += med1->wtlo;
med2->countlo += med1->countlo;
if (med1->proclo < med2->proclo) med2->proclo = med1->proclo;
}
med2->totalhi += med1->totalhi;
if (med1->valuehi < med2->valuehi) {
med2->valuehi = med1->valuehi;
med2->wthi = med1->wthi;
med2->counthi = med1->counthi;
med2->prochi = med1->prochi;
}
else if (med1->valuehi == med2->valuehi) {
med2->wthi += med1->wthi;
med2->counthi += med1->counthi;
if (med1->prochi < med2->prochi) med2->prochi = med1->prochi;
}
}
/* ----------------------------------------------------------------------
invert the RCB rebalance result to convert receive info into send info
sortflag = flag for sorting order of received messages by proc ID
------------------------------------------------------------------------- */
void RCB::invert(int sortflag)
{
Invert *sbuf,*rbuf;
// only create Irregular if not previously created
// allows Irregular to persist for multiple RCB calls by fix balance
if (!irregular) irregular = new Irregular(lmp);
// nsend = # of dots to request from other procs
int nsend = nfinal-nkeep;
int *proclist;
memory->create(proclist,nsend,"RCB:proclist");
Invert *sinvert =
(Invert *) memory->smalloc(nsend*sizeof(Invert),"RCB:sinvert");
int m = 0;
for (int i = nkeep; i < nfinal; i++) {
proclist[m] = recvproc[i];
sinvert[m].rindex = recvindex[i];
sinvert[m].sproc = me;
sinvert[m].sindex = i;
m++;
}
// perform inversion via irregular comm
// nrecv = # of my dots to send to other procs
int nrecv = irregular->create_data(nsend,proclist,sortflag);
Invert *rinvert =
(Invert *) memory->smalloc(nrecv*sizeof(Invert),"RCB:rinvert");
irregular->exchange_data((char *) sinvert,sizeof(Invert),(char *) rinvert);
irregular->destroy_data();
// set public variables from requests to send my dots
if (noriginal > maxsend) {
memory->destroy(sendproc);
memory->destroy(sendindex);
maxsend = noriginal;
memory->create(sendproc,maxsend,"RCB:sendproc");
memory->create(sendindex,maxsend,"RCB:sendindex");
}
for (int i = 0; i < nkeep; i++) {
sendproc[recvindex[i]] = me;
sendindex[recvindex[i]] = i;
}
for (int i = 0; i < nrecv; i++) {
m = rinvert[i].rindex;
sendproc[m] = rinvert[i].sproc;
sendindex[m] = rinvert[i].sindex;
}
// clean-up
memory->destroy(proclist);
memory->destroy(sinvert);
memory->destroy(rinvert);
}
/* ----------------------------------------------------------------------
memory use of Irregular
------------------------------------------------------------------------- */
bigint RCB::memory_usage()
{
bigint bytes = 0;
if (irregular) bytes += irregular->memory_usage();
return bytes;
}
// -----------------------------------------------------------------------
// DEBUG methods
// -----------------------------------------------------------------------
/*
// consistency checks on RCB results
void RCB::check()
{
int i,iflag,total1,total2;
double weight,wtmax,wtmin,wtone,tolerance;
// check that total # of dots remained the same
MPI_Allreduce(&ndotorig,&total1,1,MPI_INT,MPI_SUM,world);
MPI_Allreduce(&ndot,&total2,1,MPI_INT,MPI_SUM,world);
if (total1 != total2) {
if (me == 0)
printf("ERROR: Points before RCB = %d, Points after RCB = %d\n",
total1,total2);
}
// check that result is load-balanced within log2(P)*max-wt
weight = wtone = 0.0;
for (i = 0; i < ndot; i++) {
weight += dots[i].wt;
if (dots[i].wt > wtone) wtone = dots[i].wt;
}
MPI_Allreduce(&weight,&wtmin,1,MPI_DOUBLE,MPI_MIN,world);
MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world);
MPI_Allreduce(&wtone,&tolerance,1,MPI_DOUBLE,MPI_MAX,world);
// i = smallest power-of-2 >= nprocs
// tolerance = largest-single-weight*log2(nprocs)
for (i = 0; (nprocs >> i) != 0; i++);
tolerance = tolerance * i * (1.0 + TINY);
if (wtmax - wtmin > tolerance) {
if (me == 0)
printf("ERROR: Load-imbalance > tolerance of %g\n",tolerance);
MPI_Barrier(world);
if (weight == wtmin) printf(" Proc %d has weight = %g\n",me,weight);
if (weight == wtmax) printf(" Proc %d has weight = %g\n",me,weight);
}
MPI_Barrier(world);
// check that final set of points is inside RCB box of each proc
iflag = 0;
for (i = 0; i < ndot; i++) {
if (dots[i].x[0] < lo[0] || dots[i].x[0] > hi[0] ||
dots[i].x[1] < lo[1] || dots[i].x[1] > hi[1] ||
dots[i].x[2] < lo[2] || dots[i].x[2] > hi[2])
iflag++;
}
if (iflag > 0)
printf("ERROR: %d points are out-of-box on proc %d\n",iflag,me);
}
// stats for RCB decomposition
void RCB::stats(int flag)
{
int i,iflag,sum,min,max;
double ave,rsum,rmin,rmax;
double weight,wttot,wtmin,wtmax;
if (me == 0) printf("RCB Statistics:\n");
// distribution info
for (i = 0, weight = 0.0; i < ndot; i++) weight += dots[i].wt;
MPI_Allreduce(&weight,&wttot,1,MPI_DOUBLE,MPI_SUM,world);
MPI_Allreduce(&weight,&wtmin,1,MPI_DOUBLE,MPI_MIN,world);
MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world);
if (me == 0) {
printf(" Total weight of dots = %g\n",wttot);
printf(" Weight on each proc: ave = %g, max = %g, min = %g\n",
wttot/nprocs,wtmax,wtmin);
}
if (flag) {
MPI_Barrier(world);
printf(" Proc %d has weight = %g\n",me,weight);
}
for (i = 0, weight = 0.0; i < ndot; i++)
if (dots[i].wt > weight) weight = dots[i].wt;
MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world);
if (me == 0) printf(" Maximum weight of single dot = %g\n",wtmax);
if (flag) {
MPI_Barrier(world);
printf(" Proc %d max weight = %g\n",me,weight);
}
// counter info
MPI_Allreduce(&counters[0],&sum,1,MPI_INT,MPI_SUM,world);
MPI_Allreduce(&counters[0],&min,1,MPI_INT,MPI_MIN,world);
MPI_Allreduce(&counters[0],&max,1,MPI_INT,MPI_MAX,world);
ave = ((double) sum)/nprocs;
if (me == 0)
printf(" Median iter: ave = %g, min = %d, max = %d\n",ave,min,max);
if (flag) {
MPI_Barrier(world);
printf(" Proc %d median count = %d\n",me,counters[0]);
}
MPI_Allreduce(&counters[1],&sum,1,MPI_INT,MPI_SUM,world);
MPI_Allreduce(&counters[1],&min,1,MPI_INT,MPI_MIN,world);
MPI_Allreduce(&counters[1],&max,1,MPI_INT,MPI_MAX,world);
ave = ((double) sum)/nprocs;
if (me == 0)
printf(" Send count: ave = %g, min = %d, max = %d\n",ave,min,max);
if (flag) {
MPI_Barrier(world);
printf(" Proc %d send count = %d\n",me,counters[1]);
}
MPI_Allreduce(&counters[2],&sum,1,MPI_INT,MPI_SUM,world);
MPI_Allreduce(&counters[2],&min,1,MPI_INT,MPI_MIN,world);
MPI_Allreduce(&counters[2],&max,1,MPI_INT,MPI_MAX,world);
ave = ((double) sum)/nprocs;
if (me == 0)
printf(" Recv count: ave = %g, min = %d, max = %d\n",ave,min,max);
if (flag) {
MPI_Barrier(world);
printf(" Proc %d recv count = %d\n",me,counters[2]);
}
MPI_Allreduce(&counters[3],&sum,1,MPI_INT,MPI_SUM,world);
MPI_Allreduce(&counters[3],&min,1,MPI_INT,MPI_MIN,world);
MPI_Allreduce(&counters[3],&max,1,MPI_INT,MPI_MAX,world);
ave = ((double) sum)/nprocs;
if (me == 0)
printf(" Max dots: ave = %g, min = %d, max = %d\n",ave,min,max);
if (flag) {
MPI_Barrier(world);
printf(" Proc %d max dots = %d\n",me,counters[3]);
}
MPI_Allreduce(&counters[4],&sum,1,MPI_INT,MPI_SUM,world);
MPI_Allreduce(&counters[4],&min,1,MPI_INT,MPI_MIN,world);
MPI_Allreduce(&counters[4],&max,1,MPI_INT,MPI_MAX,world);
ave = ((double) sum)/nprocs;
if (me == 0)
printf(" Max memory: ave = %g, min = %d, max = %d\n",ave,min,max);
if (flag) {
MPI_Barrier(world);
printf(" Proc %d max memory = %d\n",me,counters[4]);
}
if (reuse) {
MPI_Allreduce(&counters[5],&sum,1,MPI_INT,MPI_SUM,world);
MPI_Allreduce(&counters[5],&min,1,MPI_INT,MPI_MIN,world);
MPI_Allreduce(&counters[5],&max,1,MPI_INT,MPI_MAX,world);
ave = ((double) sum)/nprocs;
if (me == 0)
printf(" # of Reuse: ave = %g, min = %d, max = %d\n",ave,min,max);
if (flag) {
MPI_Barrier(world);
printf(" Proc %d # of Reuse = %d\n",me,counters[5]);
}
}
MPI_Allreduce(&counters[6],&sum,1,MPI_INT,MPI_SUM,world);
MPI_Allreduce(&counters[6],&min,1,MPI_INT,MPI_MIN,world);
MPI_Allreduce(&counters[6],&max,1,MPI_INT,MPI_MAX,world);
ave = ((double) sum)/nprocs;
if (me == 0)
printf(" # of OverAlloc: ave = %g, min = %d, max = %d\n",ave,min,max);
if (flag) {
MPI_Barrier(world);
printf(" Proc %d # of OverAlloc = %d\n",me,counters[6]);
}
// RCB boxes for each proc
if (flag) {
if (me == 0) printf(" RCB sub-domain boxes:\n");
for (i = 0; i < 3; i++) {
MPI_Barrier(world);
if (me == 0) printf(" Dimension %d\n",i+1);
MPI_Barrier(world);
printf(" Proc = %d: Box = %g %g\n",me,lo[i],hi[i]);
}
}
}
*/

131
src/rcb.h Normal file
View File

@ -0,0 +1,131 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef LAMMPS_RCB_H
#define LAMMPS_RCB_H
#include "mpi.h"
#include "pointers.h"
namespace LAMMPS_NS {
class RCB : protected Pointers {
public:
// set by compute()
int noriginal; // # of dots I own before balancing
int nfinal; // # of dots I own after balancing
int nkeep; // how many dots of noriginal I still own
// will be first nkept of nfinal list
int *recvproc; // proc IDs of nfinal dots
int *recvindex; // index of nfinal dots on owning procs
// based on input list for compute()
double *lo,*hi; // final bounding box of my RCB sub-domain
double cut; // single cut (in Tree) owned by this proc
int cutdim; // dimension (0,1,2) of the cut
// set by invert()
int *sendproc; // proc to send each of my noriginal dots to
int *sendindex; // index of dot in receiver's nfinal list
RCB(class LAMMPS *);
~RCB();
void compute(int, int, double **, double *, double *, double *);
void invert(int sortflag = 0);
bigint memory_usage();
// DEBUG methods
//void check();
//void stats(int);
// RCB cut info
struct Median {
double totallo,totalhi; // weight in each half of active partition
double valuelo,valuehi; // position of dot(s) nearest to cut
double wtlo,wthi; // total weight of dot(s) at that position
int countlo,counthi; // # of dots at that position
int proclo,prochi; // unique proc who owns a nearest dot
};
struct BBox {
double lo[3],hi[3]; // corner points of a bounding box
};
private:
int me,nprocs;
// point to balance on
struct Dot {
double x[3]; // coord of point
double wt; // weight of point
int proc; // owning proc
int index; // index on owning proc
};
// tree of RCB cuts
struct Tree {
double cut; // position of cut
int dim; // dimension = 0/1/2 of cut
};
// inversion message
struct Invert {
int rindex; // index on receiving proc
int sproc; // sending proc
int sindex; // index on sending proc
};
Dot *dots; // dots on this proc
int ndot; // # of dots on this proc
int maxdot; // allocated size of dots
int ndotorig;
int nlist;
int maxlist;
int *dotlist;
int *dotmark;
int maxbuf;
Dot *buf;
int maxrecv,maxsend;
BBox bbox;
class Irregular *irregular;
MPI_Op box_op,med_op;
MPI_Datatype box_type,med_type;
int reuse; // 1/0 to use/not use previous cuts
int dottop; // dots >= this index are new
double bboxlo[3]; // bounding box of final RCB sub-domain
double bboxhi[3];
Tree *tree; // tree of RCB cuts, used by reuse()
int counters[7]; // diagnostic counts
// 0 = # of median iterations
// 1 = # of points sent
// 2 = # of points received
// 3 = most points this proc ever owns
// 4 = most point memory this proc ever allocs
// 5 = # of times a previous cut is re-used
// 6 = # of reallocs of point vector
};
}
#endif

View File

@ -29,6 +29,8 @@ using namespace LAMMPS_NS;
#define LB_FACTOR 1.1 #define LB_FACTOR 1.1
#define EPSILON 1.0e-6 #define EPSILON 1.0e-6
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
Replicate::Replicate(LAMMPS *lmp) : Pointers(lmp) {} Replicate::Replicate(LAMMPS *lmp) : Pointers(lmp) {}
@ -220,17 +222,33 @@ void Replicate::command(int narg, char **arg)
sublo[2] = domain->sublo_lamda[2]; subhi[2] = domain->subhi_lamda[2]; sublo[2] = domain->sublo_lamda[2]; subhi[2] = domain->subhi_lamda[2];
} }
if (domain->xperiodic) { if (comm->layout != LAYOUT_TILED) {
if (comm->myloc[0] == 0) sublo[0] -= epsilon[0]; if (domain->xperiodic) {
if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0]; if (comm->myloc[0] == 0) sublo[0] -= epsilon[0];
} if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0];
if (domain->yperiodic) { }
if (comm->myloc[1] == 0) sublo[1] -= epsilon[1]; if (domain->yperiodic) {
if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1]; if (comm->myloc[1] == 0) sublo[1] -= epsilon[1];
} if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1];
if (domain->zperiodic) { }
if (comm->myloc[2] == 0) sublo[2] -= epsilon[2]; if (domain->zperiodic) {
if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2]; if (comm->myloc[2] == 0) sublo[2] -= epsilon[2];
if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2];
}
} else {
if (domain->xperiodic) {
if (comm->mysplit[0][0] == 0.0) sublo[0] -= epsilon[0];
if (comm->mysplit[0][1] == 1.0) subhi[0] += epsilon[0];
}
if (domain->yperiodic) {
if (comm->mysplit[1][0] == 0.0) sublo[1] -= epsilon[1];
if (comm->mysplit[1][1] == 1.0) subhi[1] += epsilon[1];
}
if (domain->zperiodic) {
if (comm->mysplit[2][0] == 0.0) sublo[2] -= epsilon[2];
if (comm->mysplit[2][1] == 1.0) subhi[2] += epsilon[2];
}
} }
// loop over all procs // loop over all procs