git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12207 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -211,6 +211,9 @@ void PPPMCuda::init()
|
|||||||
// error check
|
// error check
|
||||||
|
|
||||||
if (domain->dimension == 2) error->all(FLERR,"Cannot use PPPMCuda with 2d simulation");
|
if (domain->dimension == 2) error->all(FLERR,"Cannot use PPPMCuda with 2d simulation");
|
||||||
|
if (comm->style != 0)
|
||||||
|
error->universe_all(FLERR,"PPPMCuda can only currently be used with "
|
||||||
|
"comm_style brick");
|
||||||
|
|
||||||
if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
|
if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
|
||||||
|
|
||||||
|
|||||||
@ -88,6 +88,10 @@ FixLbFluid::FixLbFluid(LAMMPS *lmp, int narg, char **arg) :
|
|||||||
|
|
||||||
if(narg <7) error->all(FLERR,"Illegal fix lb/fluid command");
|
if(narg <7) error->all(FLERR,"Illegal fix lb/fluid command");
|
||||||
|
|
||||||
|
if (comm->style != 0)
|
||||||
|
error->universe_all(FLERR,"Fix lb/fluid can only currently be used with "
|
||||||
|
"comm_style brick");
|
||||||
|
|
||||||
MPI_Comm_rank(world,&me);
|
MPI_Comm_rank(world,&me);
|
||||||
MPI_Comm_size(world,&nprocs);
|
MPI_Comm_size(world,&nprocs);
|
||||||
|
|
||||||
@ -567,9 +571,12 @@ int FixLbFluid::setmask()
|
|||||||
|
|
||||||
void FixLbFluid::init(void)
|
void FixLbFluid::init(void)
|
||||||
{
|
{
|
||||||
|
|
||||||
int i,j;
|
int i,j;
|
||||||
|
|
||||||
|
if (comm->style != 0)
|
||||||
|
error->universe_all(FLERR,"Fix lb/fluid can only currently be used with "
|
||||||
|
"comm_style brick");
|
||||||
|
|
||||||
//--------------------------------------------------------------------------
|
//--------------------------------------------------------------------------
|
||||||
// Check to see if the MD timestep has changed between runs.
|
// Check to see if the MD timestep has changed between runs.
|
||||||
//--------------------------------------------------------------------------
|
//--------------------------------------------------------------------------
|
||||||
|
|||||||
40
src/atom.cpp
40
src/atom.cpp
@ -47,6 +47,8 @@ using namespace MathConst;
|
|||||||
#define CUDA_CHUNK 3000
|
#define CUDA_CHUNK 3000
|
||||||
#define MAXBODY 20 // max # of lines in one body, also in ReadData class
|
#define MAXBODY 20 // max # of lines in one body, also in ReadData class
|
||||||
|
|
||||||
|
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
Atom::Atom(LAMMPS *lmp) : Pointers(lmp)
|
Atom::Atom(LAMMPS *lmp) : Pointers(lmp)
|
||||||
@ -753,17 +755,33 @@ void Atom::data_atoms(int n, char *buf)
|
|||||||
sublo[2] = domain->sublo_lamda[2]; subhi[2] = domain->subhi_lamda[2];
|
sublo[2] = domain->sublo_lamda[2]; subhi[2] = domain->subhi_lamda[2];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (domain->xperiodic) {
|
if (comm->layout != LAYOUT_TILED) {
|
||||||
if (comm->myloc[0] == 0) sublo[0] -= epsilon[0];
|
if (domain->xperiodic) {
|
||||||
if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0];
|
if (comm->myloc[0] == 0) sublo[0] -= epsilon[0];
|
||||||
}
|
if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0];
|
||||||
if (domain->yperiodic) {
|
}
|
||||||
if (comm->myloc[1] == 0) sublo[1] -= epsilon[1];
|
if (domain->yperiodic) {
|
||||||
if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1];
|
if (comm->myloc[1] == 0) sublo[1] -= epsilon[1];
|
||||||
}
|
if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1];
|
||||||
if (domain->zperiodic) {
|
}
|
||||||
if (comm->myloc[2] == 0) sublo[2] -= epsilon[2];
|
if (domain->zperiodic) {
|
||||||
if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2];
|
if (comm->myloc[2] == 0) sublo[2] -= epsilon[2];
|
||||||
|
if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
if (domain->xperiodic) {
|
||||||
|
if (comm->mysplit[0][0] == 0.0) sublo[0] -= epsilon[0];
|
||||||
|
if (comm->mysplit[0][1] == 1.0) subhi[0] += epsilon[0];
|
||||||
|
}
|
||||||
|
if (domain->yperiodic) {
|
||||||
|
if (comm->mysplit[1][0] == 0.0) sublo[1] -= epsilon[1];
|
||||||
|
if (comm->mysplit[1][1] == 1.0) subhi[1] += epsilon[1];
|
||||||
|
}
|
||||||
|
if (domain->zperiodic) {
|
||||||
|
if (comm->mysplit[2][0] == 0.0) sublo[2] -= epsilon[2];
|
||||||
|
if (comm->mysplit[2][1] == 1.0) subhi[2] += epsilon[2];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// xptr = which word in line starts xyz coords
|
// xptr = which word in line starts xyz coords
|
||||||
|
|||||||
214
src/balance.cpp
214
src/balance.cpp
@ -21,6 +21,7 @@
|
|||||||
#include "balance.h"
|
#include "balance.h"
|
||||||
#include "atom.h"
|
#include "atom.h"
|
||||||
#include "comm.h"
|
#include "comm.h"
|
||||||
|
#include "rcb.h"
|
||||||
#include "irregular.h"
|
#include "irregular.h"
|
||||||
#include "domain.h"
|
#include "domain.h"
|
||||||
#include "force.h"
|
#include "force.h"
|
||||||
@ -30,9 +31,10 @@
|
|||||||
|
|
||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
|
|
||||||
enum{XYZ,SHIFT,RCB};
|
enum{XYZ,SHIFT,BISECTION};
|
||||||
enum{NONE,UNIFORM,USER};
|
enum{NONE,UNIFORM,USER};
|
||||||
enum{X,Y,Z};
|
enum{X,Y,Z};
|
||||||
|
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
@ -47,6 +49,8 @@ Balance::Balance(LAMMPS *lmp) : Pointers(lmp)
|
|||||||
user_xsplit = user_ysplit = user_zsplit = NULL;
|
user_xsplit = user_ysplit = user_zsplit = NULL;
|
||||||
shift_allocate = 0;
|
shift_allocate = 0;
|
||||||
|
|
||||||
|
rcb = NULL;
|
||||||
|
|
||||||
fp = NULL;
|
fp = NULL;
|
||||||
firststep = 1;
|
firststep = 1;
|
||||||
}
|
}
|
||||||
@ -74,6 +78,8 @@ Balance::~Balance()
|
|||||||
delete [] hisum;
|
delete [] hisum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
delete rcb;
|
||||||
|
|
||||||
if (fp) fclose(fp);
|
if (fp) fclose(fp);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -176,7 +182,7 @@ void Balance::command(int narg, char **arg)
|
|||||||
|
|
||||||
} else if (strcmp(arg[iarg],"rcb") == 0) {
|
} else if (strcmp(arg[iarg],"rcb") == 0) {
|
||||||
if (style != -1) error->all(FLERR,"Illegal balance command");
|
if (style != -1) error->all(FLERR,"Illegal balance command");
|
||||||
style = RCB;
|
style = BISECTION;
|
||||||
iarg++;
|
iarg++;
|
||||||
|
|
||||||
} else break;
|
} else break;
|
||||||
@ -232,6 +238,9 @@ void Balance::command(int narg, char **arg)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (style == BISECTION && comm->style == 0)
|
||||||
|
error->all(FLERR,"Balance rcb cannot be used with comm_style brick");
|
||||||
|
|
||||||
// insure atoms are in current box & update box via shrink-wrap
|
// insure atoms are in current box & update box via shrink-wrap
|
||||||
// init entire system since comm->setup is done
|
// init entire system since comm->setup is done
|
||||||
// comm::init needs neighbor::init needs pair::init needs kspace::init, etc
|
// comm::init needs neighbor::init needs pair::init needs kspace::init, etc
|
||||||
@ -251,8 +260,10 @@ void Balance::command(int narg, char **arg)
|
|||||||
double imbinit = imbalance_nlocal(maxinit);
|
double imbinit = imbalance_nlocal(maxinit);
|
||||||
|
|
||||||
// no load-balance if imbalance doesn't exceed threshhold
|
// no load-balance if imbalance doesn't exceed threshhold
|
||||||
|
// unless switching from tiled to non tiled layout, then force rebalance
|
||||||
|
|
||||||
if (imbinit < thresh) return;
|
if (comm->layout == LAYOUT_TILED && style != BISECTION) {
|
||||||
|
} else if (imbinit < thresh) return;
|
||||||
|
|
||||||
// debug output of initial state
|
// debug output of initial state
|
||||||
|
|
||||||
@ -262,80 +273,65 @@ void Balance::command(int narg, char **arg)
|
|||||||
|
|
||||||
int niter = 0;
|
int niter = 0;
|
||||||
|
|
||||||
// NOTE: if using XYZ or SHIFT and current partition is TILING,
|
|
||||||
// then need to create initial BRICK partition before performing LB
|
|
||||||
|
|
||||||
// perform load-balance
|
// perform load-balance
|
||||||
// style XYZ = explicit setting of cutting planes of logical 3d grid
|
// style XYZ = explicit setting of cutting planes of logical 3d grid
|
||||||
|
|
||||||
if (style == XYZ) {
|
if (style == XYZ) {
|
||||||
|
if (comm->layout == LAYOUT_UNIFORM) {
|
||||||
|
if (xflag == USER || yflag == USER || zflag == USER)
|
||||||
|
comm->layout == LAYOUT_NONUNIFORM;
|
||||||
|
} else if (comm->style == LAYOUT_NONUNIFORM) {
|
||||||
|
if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
|
||||||
|
comm->layout == LAYOUT_UNIFORM;
|
||||||
|
} else if (comm->style == LAYOUT_TILED) {
|
||||||
|
if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
|
||||||
|
comm->layout == LAYOUT_UNIFORM;
|
||||||
|
else comm->layout == LAYOUT_NONUNIFORM;
|
||||||
|
}
|
||||||
|
|
||||||
if (xflag == UNIFORM) {
|
if (xflag == UNIFORM) {
|
||||||
for (int i = 0; i < procgrid[0]; i++)
|
for (int i = 0; i < procgrid[0]; i++)
|
||||||
comm->xsplit[i] = i * 1.0/procgrid[0];
|
comm->xsplit[i] = i * 1.0/procgrid[0];
|
||||||
comm->xsplit[procgrid[0]] = 1.0;
|
comm->xsplit[procgrid[0]] = 1.0;
|
||||||
}
|
} else if (xflag == USER)
|
||||||
|
for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i];
|
||||||
|
|
||||||
if (yflag == UNIFORM) {
|
if (yflag == UNIFORM) {
|
||||||
for (int i = 0; i < procgrid[1]; i++)
|
for (int i = 0; i < procgrid[1]; i++)
|
||||||
comm->ysplit[i] = i * 1.0/procgrid[1];
|
comm->ysplit[i] = i * 1.0/procgrid[1];
|
||||||
comm->ysplit[procgrid[1]] = 1.0;
|
comm->ysplit[procgrid[1]] = 1.0;
|
||||||
}
|
} else if (yflag == USER)
|
||||||
|
for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i];
|
||||||
|
|
||||||
if (zflag == UNIFORM) {
|
if (zflag == UNIFORM) {
|
||||||
for (int i = 0; i < procgrid[2]; i++)
|
for (int i = 0; i < procgrid[2]; i++)
|
||||||
comm->zsplit[i] = i * 1.0/procgrid[2];
|
comm->zsplit[i] = i * 1.0/procgrid[2];
|
||||||
comm->zsplit[procgrid[2]] = 1.0;
|
comm->zsplit[procgrid[2]] = 1.0;
|
||||||
}
|
} else if (zflag == USER)
|
||||||
|
|
||||||
if (xflag == USER)
|
|
||||||
for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i];
|
|
||||||
|
|
||||||
if (yflag == USER)
|
|
||||||
for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i];
|
|
||||||
|
|
||||||
if (zflag == USER)
|
|
||||||
for (int i = 0; i <= procgrid[2]; i++) comm->zsplit[i] = user_zsplit[i];
|
for (int i = 0; i <= procgrid[2]; i++) comm->zsplit[i] = user_zsplit[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
// style SHIFT = adjust cutting planes of logical 3d grid
|
// style SHIFT = adjust cutting planes of logical 3d grid
|
||||||
|
|
||||||
if (style == SHIFT) {
|
if (style == SHIFT) {
|
||||||
static_setup(bstr);
|
comm->layout = LAYOUT_NONUNIFORM;
|
||||||
|
shift_setup_static(bstr);
|
||||||
niter = shift();
|
niter = shift();
|
||||||
}
|
}
|
||||||
|
|
||||||
// style RCB =
|
// style BISECTION = recursive coordinate bisectioning
|
||||||
|
|
||||||
if (style == RCB) {
|
if (style == BISECTION) {
|
||||||
error->all(FLERR,"Balance rcb is not yet supported");
|
comm->layout = LAYOUT_TILED;
|
||||||
|
bisection(1);
|
||||||
if (comm->style == 0)
|
|
||||||
error->all(FLERR,"Cannot use balance rcb with comm_style brick");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// output of final result
|
// output of final result
|
||||||
|
|
||||||
if (outflag && me == 0) dumpout(update->ntimestep,fp);
|
if (outflag && me == 0) dumpout(update->ntimestep,fp);
|
||||||
|
|
||||||
// reset comm->uniform flag if necessary
|
|
||||||
|
|
||||||
if (comm->uniform) {
|
|
||||||
if (style == SHIFT) comm->uniform = 0;
|
|
||||||
if (style == XYZ && xflag == USER) comm->uniform = 0;
|
|
||||||
if (style == XYZ && yflag == USER) comm->uniform = 0;
|
|
||||||
if (style == XYZ && zflag == USER) comm->uniform = 0;
|
|
||||||
} else {
|
|
||||||
if (dimension == 3) {
|
|
||||||
if (style == XYZ &&
|
|
||||||
xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
|
|
||||||
comm->uniform = 1;
|
|
||||||
} else {
|
|
||||||
if (style == XYZ && xflag == UNIFORM && yflag == UNIFORM)
|
|
||||||
comm->uniform = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// reset proc sub-domains
|
// reset proc sub-domains
|
||||||
|
// for either brick or tiled comm style
|
||||||
|
|
||||||
if (domain->triclinic) domain->set_lamda_box();
|
if (domain->triclinic) domain->set_lamda_box();
|
||||||
domain->set_local_box();
|
domain->set_local_box();
|
||||||
@ -344,7 +340,8 @@ void Balance::command(int narg, char **arg)
|
|||||||
|
|
||||||
if (domain->triclinic) domain->x2lamda(atom->nlocal);
|
if (domain->triclinic) domain->x2lamda(atom->nlocal);
|
||||||
Irregular *irregular = new Irregular(lmp);
|
Irregular *irregular = new Irregular(lmp);
|
||||||
irregular->migrate_atoms(1);
|
if (style == BISECTION) irregular->migrate_atoms(1,rcb->sendproc);
|
||||||
|
else irregular->migrate_atoms(1);
|
||||||
delete irregular;
|
delete irregular;
|
||||||
if (domain->triclinic) domain->lamda2x(atom->nlocal);
|
if (domain->triclinic) domain->lamda2x(atom->nlocal);
|
||||||
|
|
||||||
@ -382,34 +379,36 @@ void Balance::command(int narg, char **arg)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (me == 0) {
|
if (style != BISECTION) {
|
||||||
if (screen) {
|
if (me == 0) {
|
||||||
fprintf(screen," x cuts:");
|
if (screen) {
|
||||||
for (int i = 0; i <= comm->procgrid[0]; i++)
|
fprintf(screen," x cuts:");
|
||||||
fprintf(screen," %g",comm->xsplit[i]);
|
for (int i = 0; i <= comm->procgrid[0]; i++)
|
||||||
fprintf(screen,"\n");
|
fprintf(screen," %g",comm->xsplit[i]);
|
||||||
fprintf(screen," y cuts:");
|
fprintf(screen,"\n");
|
||||||
for (int i = 0; i <= comm->procgrid[1]; i++)
|
fprintf(screen," y cuts:");
|
||||||
fprintf(screen," %g",comm->ysplit[i]);
|
for (int i = 0; i <= comm->procgrid[1]; i++)
|
||||||
fprintf(screen,"\n");
|
fprintf(screen," %g",comm->ysplit[i]);
|
||||||
fprintf(screen," z cuts:");
|
fprintf(screen,"\n");
|
||||||
for (int i = 0; i <= comm->procgrid[2]; i++)
|
fprintf(screen," z cuts:");
|
||||||
fprintf(screen," %g",comm->zsplit[i]);
|
for (int i = 0; i <= comm->procgrid[2]; i++)
|
||||||
fprintf(screen,"\n");
|
fprintf(screen," %g",comm->zsplit[i]);
|
||||||
}
|
fprintf(screen,"\n");
|
||||||
if (logfile) {
|
}
|
||||||
fprintf(logfile," x cuts:");
|
if (logfile) {
|
||||||
for (int i = 0; i <= comm->procgrid[0]; i++)
|
fprintf(logfile," x cuts:");
|
||||||
fprintf(logfile," %g",comm->xsplit[i]);
|
for (int i = 0; i <= comm->procgrid[0]; i++)
|
||||||
fprintf(logfile,"\n");
|
fprintf(logfile," %g",comm->xsplit[i]);
|
||||||
fprintf(logfile," y cuts:");
|
fprintf(logfile,"\n");
|
||||||
for (int i = 0; i <= comm->procgrid[1]; i++)
|
fprintf(logfile," y cuts:");
|
||||||
fprintf(logfile," %g",comm->ysplit[i]);
|
for (int i = 0; i <= comm->procgrid[1]; i++)
|
||||||
fprintf(logfile,"\n");
|
fprintf(logfile," %g",comm->ysplit[i]);
|
||||||
fprintf(logfile," z cuts:");
|
fprintf(logfile,"\n");
|
||||||
for (int i = 0; i <= comm->procgrid[2]; i++)
|
fprintf(logfile," z cuts:");
|
||||||
fprintf(logfile," %g",comm->zsplit[i]);
|
for (int i = 0; i <= comm->procgrid[2]; i++)
|
||||||
fprintf(logfile,"\n");
|
fprintf(logfile," %g",comm->zsplit[i]);
|
||||||
|
fprintf(logfile,"\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -467,13 +466,52 @@ double Balance::imbalance_splits(int &max)
|
|||||||
return imbalance;
|
return imbalance;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
perform balancing via RCB class
|
||||||
|
sortflag = flag for sorting order of received messages by proc ID
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
int *Balance::bisection(int sortflag)
|
||||||
|
{
|
||||||
|
if (!rcb) rcb = new RCB(lmp);
|
||||||
|
|
||||||
|
// NOTE: lo/hi args could be simulation box or particle bounding box
|
||||||
|
// NOTE: triclinic needs to be in lamda coords
|
||||||
|
|
||||||
|
int dim = domain->dimension;
|
||||||
|
double *boxlo = domain->boxlo;
|
||||||
|
double *boxhi = domain->boxhi;
|
||||||
|
double *prd = domain->prd;
|
||||||
|
|
||||||
|
rcb->compute(dim,atom->nlocal,atom->x,NULL,boxlo,boxhi);
|
||||||
|
rcb->invert(sortflag);
|
||||||
|
|
||||||
|
// NOTE: this logic is specific to orthogonal boxes, not triclinic
|
||||||
|
|
||||||
|
double (*mysplit)[2] = comm->mysplit;
|
||||||
|
|
||||||
|
mysplit[0][0] = (rcb->lo[0] - boxlo[0]) / prd[0];
|
||||||
|
if (rcb->hi[0] == boxhi[0]) mysplit[0][1] = 1.0;
|
||||||
|
else mysplit[0][1] = (rcb->hi[0] - boxlo[0]) / prd[0];
|
||||||
|
|
||||||
|
mysplit[1][0] = (rcb->lo[1] - boxlo[1]) / prd[1];
|
||||||
|
if (rcb->hi[1] == boxhi[1]) mysplit[1][1] = 1.0;
|
||||||
|
else mysplit[1][1] = (rcb->hi[1] - boxlo[1]) / prd[1];
|
||||||
|
|
||||||
|
mysplit[2][0] = (rcb->lo[2] - boxlo[2]) / prd[2];
|
||||||
|
if (rcb->hi[2] == boxhi[2]) mysplit[2][1] = 1.0;
|
||||||
|
else mysplit[2][1] = (rcb->hi[2] - boxlo[2]) / prd[2];
|
||||||
|
|
||||||
|
return rcb->sendproc;
|
||||||
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
setup static load balance operations
|
setup static load balance operations
|
||||||
called from command
|
called from command and indirectly initially from fix balance
|
||||||
set rho = 0 for static balancing
|
set rho = 0 for static balancing
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Balance::static_setup(char *str)
|
void Balance::shift_setup_static(char *str)
|
||||||
{
|
{
|
||||||
shift_allocate = 1;
|
shift_allocate = 1;
|
||||||
|
|
||||||
@ -498,21 +536,35 @@ void Balance::static_setup(char *str)
|
|||||||
losum = new bigint[max+1];
|
losum = new bigint[max+1];
|
||||||
hisum = new bigint[max+1];
|
hisum = new bigint[max+1];
|
||||||
|
|
||||||
|
// if current layout is TILED, set initial uniform splits in Comm
|
||||||
|
// this gives starting point to subsequent shift balancing
|
||||||
|
|
||||||
|
if (comm->layout == LAYOUT_TILED) {
|
||||||
|
int *procgrid = comm->procgrid;
|
||||||
|
double *xsplit = comm->xsplit;
|
||||||
|
double *ysplit = comm->ysplit;
|
||||||
|
double *zsplit = comm->zsplit;
|
||||||
|
|
||||||
|
for (int i = 0; i < procgrid[0]; i++) xsplit[i] = i * 1.0/procgrid[0];
|
||||||
|
for (int i = 0; i < procgrid[1]; i++) ysplit[i] = i * 1.0/procgrid[1];
|
||||||
|
for (int i = 0; i < procgrid[2]; i++) zsplit[i] = i * 1.0/procgrid[2];
|
||||||
|
xsplit[procgrid[0]] = ysplit[procgrid[1]] = zsplit[procgrid[2]] = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
rho = 0;
|
rho = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
setup shift load balance operations
|
setup shift load balance operations
|
||||||
called from fix balance
|
called from fix balance
|
||||||
set rho = 1 for shift balancing after call to shift_setup()
|
set rho = 1 to do dynamic balancing after call to shift_setup_static()
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Balance::shift_setup(char *str, int nitermax_in, double thresh_in)
|
void Balance::shift_setup(char *str, int nitermax_in, double thresh_in)
|
||||||
{
|
{
|
||||||
static_setup(str);
|
shift_setup_static(str);
|
||||||
nitermax = nitermax_in;
|
nitermax = nitermax_in;
|
||||||
stopthresh = thresh_in;
|
stopthresh = thresh_in;
|
||||||
|
|
||||||
rho = 1;
|
rho = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -525,7 +577,7 @@ void Balance::shift_setup(char *str, int nitermax_in, double thresh_in)
|
|||||||
int Balance::shift()
|
int Balance::shift()
|
||||||
{
|
{
|
||||||
int i,j,k,m,np,max;
|
int i,j,k,m,np,max;
|
||||||
double *split = NULL;
|
double *split;
|
||||||
|
|
||||||
// no balancing if no atoms
|
// no balancing if no atoms
|
||||||
|
|
||||||
@ -590,7 +642,7 @@ int Balance::shift()
|
|||||||
// iterate until balanced
|
// iterate until balanced
|
||||||
|
|
||||||
#ifdef BALANCE_DEBUG
|
#ifdef BALANCE_DEBUG
|
||||||
if (me == 0) debug_output(idim,0,np,split);
|
if (me == 0) debug_shift_output(idim,0,np,split);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int doneflag;
|
int doneflag;
|
||||||
@ -601,7 +653,7 @@ int Balance::shift()
|
|||||||
niter++;
|
niter++;
|
||||||
|
|
||||||
#ifdef BALANCE_DEBUG
|
#ifdef BALANCE_DEBUG
|
||||||
if (me == 0) debug_output(idim,m+1,np,split);
|
if (me == 0) debug_shift_output(idim,m+1,np,split);
|
||||||
if (me == 0 && fp) dumpout(update->ntimestep,fp);
|
if (me == 0 && fp) dumpout(update->ntimestep,fp);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -827,7 +879,7 @@ void Balance::dumpout(bigint tstep, FILE *bfp)
|
|||||||
|
|
||||||
int nx = comm->procgrid[0] + 1;
|
int nx = comm->procgrid[0] + 1;
|
||||||
int ny = comm->procgrid[1] + 1;
|
int ny = comm->procgrid[1] + 1;
|
||||||
//int nz = comm->procgrid[2] + 1;
|
int nz = comm->procgrid[2] + 1;
|
||||||
|
|
||||||
if (dimension == 2) {
|
if (dimension == 2) {
|
||||||
int m = 0;
|
int m = 0;
|
||||||
@ -914,7 +966,7 @@ void Balance::dumpout(bigint tstep, FILE *bfp)
|
|||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#ifdef BALANCE_DEBUG
|
#ifdef BALANCE_DEBUG
|
||||||
void Balance::debug_output(int idim, int m, int np, double *split)
|
void Balance::debug_shift_output(int idim, int m, int np, double *split)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
const char *dim = NULL;
|
const char *dim = NULL;
|
||||||
|
|||||||
@ -27,11 +27,14 @@ namespace LAMMPS_NS {
|
|||||||
|
|
||||||
class Balance : protected Pointers {
|
class Balance : protected Pointers {
|
||||||
public:
|
public:
|
||||||
|
class RCB *rcb;
|
||||||
|
|
||||||
Balance(class LAMMPS *);
|
Balance(class LAMMPS *);
|
||||||
~Balance();
|
~Balance();
|
||||||
void command(int, char **);
|
void command(int, char **);
|
||||||
void shift_setup(char *, int, double);
|
void shift_setup(char *, int, double);
|
||||||
int shift();
|
int shift();
|
||||||
|
int *bisection(int sortflag = 0);
|
||||||
double imbalance_nlocal(int &);
|
double imbalance_nlocal(int &);
|
||||||
void dumpout(bigint, FILE *);
|
void dumpout(bigint, FILE *);
|
||||||
|
|
||||||
@ -66,13 +69,13 @@ class Balance : protected Pointers {
|
|||||||
FILE *fp;
|
FILE *fp;
|
||||||
int firststep;
|
int firststep;
|
||||||
|
|
||||||
void static_setup(char *);
|
|
||||||
double imbalance_splits(int &);
|
double imbalance_splits(int &);
|
||||||
|
void shift_setup_static(char *);
|
||||||
void tally(int, int, double *);
|
void tally(int, int, double *);
|
||||||
int adjust(int, double *);
|
int adjust(int, double *);
|
||||||
int binary(double, int, double *);
|
int binary(double, int, double *);
|
||||||
#ifdef BALANCE_DEBUG
|
#ifdef BALANCE_DEBUG
|
||||||
void debug_output(int, int, int, double *);
|
void debug_shift_output(int, int, int, double *);
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
74
src/comm.cpp
74
src/comm.cpp
@ -47,25 +47,95 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
|
|||||||
gridflag = ONELEVEL;
|
gridflag = ONELEVEL;
|
||||||
mapflag = CART;
|
mapflag = CART;
|
||||||
customfile = NULL;
|
customfile = NULL;
|
||||||
|
outfile = NULL;
|
||||||
recv_from_partition = send_to_partition = -1;
|
recv_from_partition = send_to_partition = -1;
|
||||||
otherflag = 0;
|
otherflag = 0;
|
||||||
outfile = NULL;
|
maxexchange_atom = maxexchange_fix = 0;
|
||||||
|
|
||||||
|
grid2proc = NULL;
|
||||||
xsplit = ysplit = zsplit = NULL;
|
xsplit = ysplit = zsplit = NULL;
|
||||||
|
|
||||||
|
// use of OpenMP threads
|
||||||
|
// query OpenMP for number of threads/process set by user at run-time
|
||||||
|
// if the OMP_NUM_THREADS environment variable is not set, we default
|
||||||
|
// to using 1 thread. This follows the principle of the least surprise,
|
||||||
|
// while practically all OpenMP implementations violate it by using
|
||||||
|
// as many threads as there are (virtual) CPU cores by default.
|
||||||
|
|
||||||
|
nthreads = 1;
|
||||||
|
#ifdef _OPENMP
|
||||||
|
if (lmp->kokkos) {
|
||||||
|
nthreads = lmp->kokkos->num_threads * lmp->kokkos->numa;
|
||||||
|
} else if (getenv("OMP_NUM_THREADS") == NULL) {
|
||||||
|
nthreads = 1;
|
||||||
|
if (me == 0)
|
||||||
|
error->warning(FLERR,"OMP_NUM_THREADS environment is not set.");
|
||||||
|
} else {
|
||||||
|
nthreads = omp_get_max_threads();
|
||||||
|
}
|
||||||
|
|
||||||
|
// enforce consistent number of threads across all MPI tasks
|
||||||
|
|
||||||
|
MPI_Bcast(&nthreads,1,MPI_INT,0,world);
|
||||||
|
if (!lmp->kokkos) omp_set_num_threads(nthreads);
|
||||||
|
|
||||||
|
if (me == 0) {
|
||||||
|
if (screen)
|
||||||
|
fprintf(screen," using %d OpenMP thread(s) per MPI task\n",nthreads);
|
||||||
|
if (logfile)
|
||||||
|
fprintf(logfile," using %d OpenMP thread(s) per MPI task\n",nthreads);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
Comm::~Comm()
|
Comm::~Comm()
|
||||||
{
|
{
|
||||||
|
memory->destroy(grid2proc);
|
||||||
memory->destroy(xsplit);
|
memory->destroy(xsplit);
|
||||||
memory->destroy(ysplit);
|
memory->destroy(ysplit);
|
||||||
memory->destroy(zsplit);
|
memory->destroy(zsplit);
|
||||||
|
|
||||||
delete [] customfile;
|
delete [] customfile;
|
||||||
delete [] outfile;
|
delete [] outfile;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
deep copy of arrays from old Comm class to new one
|
||||||
|
all public/protected vectors/arrays in parent Comm class must be copied
|
||||||
|
called from alternate constructor of child classes
|
||||||
|
when new comm style is created from Input
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void Comm::copy_arrays(Comm *oldcomm)
|
||||||
|
{
|
||||||
|
if (oldcomm->grid2proc) {
|
||||||
|
memory->create(grid2proc,procgrid[0],procgrid[1],procgrid[2],
|
||||||
|
"comm:grid2proc");
|
||||||
|
memcpy(&grid2proc[0][0][0],&oldcomm->grid2proc[0][0][0],
|
||||||
|
(procgrid[0]*procgrid[1]*procgrid[2])*sizeof(int));
|
||||||
|
|
||||||
|
memory->create(xsplit,procgrid[0]+1,"comm:xsplit");
|
||||||
|
memory->create(ysplit,procgrid[1]+1,"comm:ysplit");
|
||||||
|
memory->create(zsplit,procgrid[2]+1,"comm:zsplit");
|
||||||
|
memcpy(xsplit,oldcomm->xsplit,(procgrid[0]+1)*sizeof(double));
|
||||||
|
memcpy(ysplit,oldcomm->ysplit,(procgrid[1]+1)*sizeof(double));
|
||||||
|
memcpy(zsplit,oldcomm->zsplit,(procgrid[2]+1)*sizeof(double));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (customfile) {
|
||||||
|
int n = strlen(oldcomm->customfile) + 1;
|
||||||
|
customfile = new char[n];
|
||||||
|
strcpy(customfile,oldcomm->customfile);
|
||||||
|
}
|
||||||
|
if (outfile) {
|
||||||
|
int n = strlen(oldcomm->outfile) + 1;
|
||||||
|
outfile = new char[n];
|
||||||
|
strcpy(outfile,oldcomm->outfile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
modify communication params
|
modify communication params
|
||||||
invoked from input script by comm_modify command
|
invoked from input script by comm_modify command
|
||||||
|
|||||||
30
src/comm.h
30
src/comm.h
@ -21,21 +21,15 @@ namespace LAMMPS_NS {
|
|||||||
class Comm : protected Pointers {
|
class Comm : protected Pointers {
|
||||||
public:
|
public:
|
||||||
int style; // comm pattern: 0 = 6-way stencil, 1 = irregular tiling
|
int style; // comm pattern: 0 = 6-way stencil, 1 = irregular tiling
|
||||||
int layout; // current proc domains: 0 = logical bricks, 1 = general tiling
|
int layout; // LAYOUT_UNIFORM = logical equal-sized bricks
|
||||||
// can do style=1 on layout=0, but not vice versa
|
// LAYOUT_NONUNIFORM = logical bricks,
|
||||||
// NOTE: uniform needs to be subsumed into layout
|
// but different sizes due to LB
|
||||||
int uniform; // 1 = equal subdomains, 0 = load-balanced
|
// LAYOUT_TILED = general tiling, due to RCB LB
|
||||||
|
|
||||||
int me,nprocs; // proc info
|
int me,nprocs; // proc info
|
||||||
int procgrid[3]; // procs assigned in each dim of 3d grid
|
|
||||||
int user_procgrid[3]; // user request for procs in each dim
|
|
||||||
int myloc[3]; // which proc I am in each dim
|
|
||||||
int procneigh[3][2]; // my 6 neighboring procs, 0/1 = left/right
|
|
||||||
int ghost_velocity; // 1 if ghost atoms have velocity, 0 if not
|
int ghost_velocity; // 1 if ghost atoms have velocity, 0 if not
|
||||||
double *xsplit,*ysplit,*zsplit; // fractional (0-1) sub-domain sizes
|
|
||||||
double cutghost[3]; // cutoffs used for acquiring ghost atoms
|
double cutghost[3]; // cutoffs used for acquiring ghost atoms
|
||||||
double cutghostuser; // user-specified ghost cutoff
|
double cutghostuser; // user-specified ghost cutoff
|
||||||
int ***grid2proc; // which proc owns i,j,k loc in 3d grid
|
|
||||||
int recv_from_partition; // recv proc layout from this partition
|
int recv_from_partition; // recv proc layout from this partition
|
||||||
int send_to_partition; // send my proc layout to this partition
|
int send_to_partition; // send my proc layout to this partition
|
||||||
// -1 if no recv or send
|
// -1 if no recv or send
|
||||||
@ -45,8 +39,24 @@ class Comm : protected Pointers {
|
|||||||
int maxexchange_fix; // max contribution to exchange from Fixes
|
int maxexchange_fix; // max contribution to exchange from Fixes
|
||||||
int nthreads; // OpenMP threads per MPI process
|
int nthreads; // OpenMP threads per MPI process
|
||||||
|
|
||||||
|
// public settings specific to layout = UNIFORM, NONUNIFORM
|
||||||
|
|
||||||
|
int procgrid[3]; // procs assigned in each dim of 3d grid
|
||||||
|
int user_procgrid[3]; // user request for procs in each dim
|
||||||
|
int myloc[3]; // which proc I am in each dim
|
||||||
|
int procneigh[3][2]; // my 6 neighboring procs, 0/1 = left/right
|
||||||
|
double *xsplit,*ysplit,*zsplit; // fractional (0-1) sub-domain sizes
|
||||||
|
int ***grid2proc; // which proc owns i,j,k loc in 3d grid
|
||||||
|
|
||||||
|
// public settings specific to layout = TILED
|
||||||
|
|
||||||
|
double mysplit[3][2]; // fractional (0-1) bounds of my sub-domain
|
||||||
|
|
||||||
|
// methods
|
||||||
|
|
||||||
Comm(class LAMMPS *);
|
Comm(class LAMMPS *);
|
||||||
virtual ~Comm();
|
virtual ~Comm();
|
||||||
|
void copy_arrays(class Comm *);
|
||||||
void modify_params(int, char **);
|
void modify_params(int, char **);
|
||||||
|
|
||||||
void set_processors(int, char **); // set 3d processor grid attributes
|
void set_processors(int, char **); // set 3d processor grid attributes
|
||||||
|
|||||||
@ -22,6 +22,7 @@
|
|||||||
#include "stdio.h"
|
#include "stdio.h"
|
||||||
#include "stdlib.h"
|
#include "stdlib.h"
|
||||||
#include "comm_brick.h"
|
#include "comm_brick.h"
|
||||||
|
#include "comm_tiled.h"
|
||||||
#include "universe.h"
|
#include "universe.h"
|
||||||
#include "atom.h"
|
#include "atom.h"
|
||||||
#include "atom_vec.h"
|
#include "atom_vec.h"
|
||||||
@ -52,58 +53,57 @@ using namespace LAMMPS_NS;
|
|||||||
#define BIG 1.0e20
|
#define BIG 1.0e20
|
||||||
|
|
||||||
enum{SINGLE,MULTI}; // same as in Comm
|
enum{SINGLE,MULTI}; // same as in Comm
|
||||||
|
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp)
|
CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp)
|
||||||
{
|
{
|
||||||
style = layout = 0;
|
style = 0;
|
||||||
|
layout = LAYOUT_UNIFORM;
|
||||||
|
init_buffers();
|
||||||
|
}
|
||||||
|
|
||||||
recv_from_partition = send_to_partition = -1;
|
/* ---------------------------------------------------------------------- */
|
||||||
otherflag = 0;
|
|
||||||
|
|
||||||
grid2proc = NULL;
|
CommBrick::~CommBrick()
|
||||||
|
{
|
||||||
|
free_swap();
|
||||||
|
if (mode == MULTI) {
|
||||||
|
free_multi();
|
||||||
|
memory->destroy(cutghostmulti);
|
||||||
|
}
|
||||||
|
|
||||||
uniform = 1;
|
if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]);
|
||||||
|
memory->sfree(sendlist);
|
||||||
|
memory->destroy(maxsendlist);
|
||||||
|
|
||||||
|
memory->destroy(buf_send);
|
||||||
|
memory->destroy(buf_recv);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
CommBrick::CommBrick(LAMMPS *lmp, Comm *oldcomm) : Comm(*oldcomm)
|
||||||
|
{
|
||||||
|
if (oldcomm->layout == LAYOUT_TILED)
|
||||||
|
error->all(FLERR,"Cannot change to comm_style brick from tiled layout");
|
||||||
|
|
||||||
|
style = 0;
|
||||||
|
layout = oldcomm->layout;
|
||||||
|
copy_arrays(oldcomm);
|
||||||
|
init_buffers();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
initialize comm buffers and other data structs local to CommBrick
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void CommBrick::init_buffers()
|
||||||
|
{
|
||||||
multilo = multihi = NULL;
|
multilo = multihi = NULL;
|
||||||
cutghostmulti = NULL;
|
cutghostmulti = NULL;
|
||||||
|
|
||||||
// use of OpenMP threads
|
|
||||||
// query OpenMP for number of threads/process set by user at run-time
|
|
||||||
// if the OMP_NUM_THREADS environment variable is not set, we default
|
|
||||||
// to using 1 thread. This follows the principle of the least surprise,
|
|
||||||
// while practically all OpenMP implementations violate it by using
|
|
||||||
// as many threads as there are (virtual) CPU cores by default.
|
|
||||||
|
|
||||||
nthreads = 1;
|
|
||||||
#ifdef _OPENMP
|
|
||||||
if (lmp->kokkos) {
|
|
||||||
nthreads = lmp->kokkos->num_threads * lmp->kokkos->numa;
|
|
||||||
} else if (getenv("OMP_NUM_THREADS") == NULL) {
|
|
||||||
nthreads = 1;
|
|
||||||
if (me == 0)
|
|
||||||
error->warning(FLERR,"OMP_NUM_THREADS environment is not set.");
|
|
||||||
} else {
|
|
||||||
nthreads = omp_get_max_threads();
|
|
||||||
}
|
|
||||||
|
|
||||||
// enforce consistent number of threads across all MPI tasks
|
|
||||||
|
|
||||||
MPI_Bcast(&nthreads,1,MPI_INT,0,world);
|
|
||||||
if (!lmp->kokkos) omp_set_num_threads(nthreads);
|
|
||||||
|
|
||||||
if (me == 0) {
|
|
||||||
if (screen)
|
|
||||||
fprintf(screen," using %d OpenMP thread(s) per MPI task\n",nthreads);
|
|
||||||
if (logfile)
|
|
||||||
fprintf(logfile," using %d OpenMP thread(s) per MPI task\n",nthreads);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// initialize comm buffers & exchange memory
|
|
||||||
// NOTE: allow for AtomVec to set maxexchange_atom, e.g. for atom_style body
|
|
||||||
|
|
||||||
maxexchange_atom = maxexchange_fix = 0;
|
|
||||||
maxexchange = maxexchange_atom + maxexchange_fix;
|
maxexchange = maxexchange_atom + maxexchange_fix;
|
||||||
bufextra = maxexchange + BUFEXTRA;
|
bufextra = maxexchange + BUFEXTRA;
|
||||||
|
|
||||||
@ -125,26 +125,6 @@ CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp)
|
|||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
CommBrick::~CommBrick()
|
|
||||||
{
|
|
||||||
memory->destroy(grid2proc);
|
|
||||||
|
|
||||||
free_swap();
|
|
||||||
if (mode == MULTI) {
|
|
||||||
free_multi();
|
|
||||||
memory->destroy(cutghostmulti);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]);
|
|
||||||
memory->sfree(sendlist);
|
|
||||||
memory->destroy(maxsendlist);
|
|
||||||
|
|
||||||
memory->destroy(buf_send);
|
|
||||||
memory->destroy(buf_recv);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void CommBrick::init()
|
void CommBrick::init()
|
||||||
{
|
{
|
||||||
triclinic = domain->triclinic;
|
triclinic = domain->triclinic;
|
||||||
@ -280,12 +260,12 @@ void CommBrick::setup()
|
|||||||
// 0 = to left, 1 = to right
|
// 0 = to left, 1 = to right
|
||||||
// set equal to recvneed[idim][1/0] of neighbor proc
|
// set equal to recvneed[idim][1/0] of neighbor proc
|
||||||
// maxneed[idim] = max procs away any proc recvs atoms in either direction
|
// maxneed[idim] = max procs away any proc recvs atoms in either direction
|
||||||
// uniform = 1 = uniform sized sub-domains:
|
// layout = UNIFORM = uniform sized sub-domains:
|
||||||
// maxneed is directly computable from sub-domain size
|
// maxneed is directly computable from sub-domain size
|
||||||
// limit to procgrid-1 for non-PBC
|
// limit to procgrid-1 for non-PBC
|
||||||
// recvneed = maxneed except for procs near non-PBC
|
// recvneed = maxneed except for procs near non-PBC
|
||||||
// sendneed = recvneed of neighbor on each side
|
// sendneed = recvneed of neighbor on each side
|
||||||
// uniform = 0 = non-uniform sized sub-domains:
|
// layout = NONUNIFORM = non-uniform sized sub-domains:
|
||||||
// compute recvneed via updown() which accounts for non-PBC
|
// compute recvneed via updown() which accounts for non-PBC
|
||||||
// sendneed = recvneed of neighbor on each side
|
// sendneed = recvneed of neighbor on each side
|
||||||
// maxneed via Allreduce() of recvneed
|
// maxneed via Allreduce() of recvneed
|
||||||
@ -293,7 +273,7 @@ void CommBrick::setup()
|
|||||||
int *periodicity = domain->periodicity;
|
int *periodicity = domain->periodicity;
|
||||||
int left,right;
|
int left,right;
|
||||||
|
|
||||||
if (uniform) {
|
if (layout == LAYOUT_UNIFORM) {
|
||||||
maxneed[0] = static_cast<int> (cutghost[0] * procgrid[0] / prd[0]) + 1;
|
maxneed[0] = static_cast<int> (cutghost[0] * procgrid[0] / prd[0]) + 1;
|
||||||
maxneed[1] = static_cast<int> (cutghost[1] * procgrid[1] / prd[1]) + 1;
|
maxneed[1] = static_cast<int> (cutghost[1] * procgrid[1] / prd[1]) + 1;
|
||||||
maxneed[2] = static_cast<int> (cutghost[2] * procgrid[2] / prd[2]) + 1;
|
maxneed[2] = static_cast<int> (cutghost[2] * procgrid[2] / prd[2]) + 1;
|
||||||
@ -561,16 +541,15 @@ void CommBrick::forward_comm(int dummy)
|
|||||||
} else {
|
} else {
|
||||||
if (comm_x_only) {
|
if (comm_x_only) {
|
||||||
if (sendnum[iswap])
|
if (sendnum[iswap])
|
||||||
n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
|
avec->pack_comm(sendnum[iswap],sendlist[iswap],
|
||||||
x[firstrecv[iswap]],pbc_flag[iswap],
|
x[firstrecv[iswap]],pbc_flag[iswap],pbc[iswap]);
|
||||||
pbc[iswap]);
|
|
||||||
} else if (ghost_velocity) {
|
} else if (ghost_velocity) {
|
||||||
n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
|
avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
|
||||||
buf_send,pbc_flag[iswap],pbc[iswap]);
|
buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||||
avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send);
|
avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send);
|
||||||
} else {
|
} else {
|
||||||
n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
|
avec->pack_comm(sendnum[iswap],sendlist[iswap],
|
||||||
buf_send,pbc_flag[iswap],pbc[iswap]);
|
buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||||
avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_send);
|
avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_send);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -620,10 +599,10 @@ void CommBrick::reverse_comm()
|
|||||||
} else {
|
} else {
|
||||||
if (comm_f_only) {
|
if (comm_f_only) {
|
||||||
if (sendnum[iswap])
|
if (sendnum[iswap])
|
||||||
avec->unpack_reverse(sendnum[iswap],sendlist[iswap],
|
avec->unpack_reverse(sendnum[iswap],sendlist[iswap],
|
||||||
f[firstrecv[iswap]]);
|
f[firstrecv[iswap]]);
|
||||||
} else {
|
} else {
|
||||||
n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send);
|
avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send);
|
||||||
avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_send);
|
avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_send);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -21,6 +21,7 @@ namespace LAMMPS_NS {
|
|||||||
class CommBrick : public Comm {
|
class CommBrick : public Comm {
|
||||||
public:
|
public:
|
||||||
CommBrick(class LAMMPS *);
|
CommBrick(class LAMMPS *);
|
||||||
|
CommBrick(class LAMMPS *, class Comm *);
|
||||||
virtual ~CommBrick();
|
virtual ~CommBrick();
|
||||||
|
|
||||||
virtual void init();
|
virtual void init();
|
||||||
@ -80,6 +81,7 @@ class CommBrick : public Comm {
|
|||||||
int maxexchange; // max # of datums/atom in exchange comm
|
int maxexchange; // max # of datums/atom in exchange comm
|
||||||
int bufextra; // extra space beyond maxsend in send buffer
|
int bufextra; // extra space beyond maxsend in send buffer
|
||||||
|
|
||||||
|
void init_buffers();
|
||||||
int updown(int, int, int, double, int, double *);
|
int updown(int, int, int, double, int, double *);
|
||||||
// compare cutoff to procs
|
// compare cutoff to procs
|
||||||
virtual void grow_send(int, int); // reallocate send buffer
|
virtual void grow_send(int, int); // reallocate send buffer
|
||||||
|
|||||||
@ -11,15 +11,20 @@
|
|||||||
See the README file in the top-level LAMMPS directory.
|
See the README file in the top-level LAMMPS directory.
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#include "string.h"
|
||||||
#include "lmptype.h"
|
#include "lmptype.h"
|
||||||
#include "comm_tiled.h"
|
#include "comm_tiled.h"
|
||||||
|
#include "comm_brick.h"
|
||||||
#include "atom.h"
|
#include "atom.h"
|
||||||
#include "atom_vec.h"
|
#include "atom_vec.h"
|
||||||
#include "domain.h"
|
#include "domain.h"
|
||||||
|
#include "force.h"
|
||||||
#include "pair.h"
|
#include "pair.h"
|
||||||
|
#include "neighbor.h"
|
||||||
#include "modify.h"
|
#include "modify.h"
|
||||||
#include "fix.h"
|
#include "fix.h"
|
||||||
#include "compute.h"
|
#include "compute.h"
|
||||||
|
#include "output.h"
|
||||||
#include "dump.h"
|
#include "dump.h"
|
||||||
#include "memory.h"
|
#include "memory.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
@ -27,32 +32,93 @@
|
|||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
|
|
||||||
#define BUFFACTOR 1.5
|
#define BUFFACTOR 1.5
|
||||||
|
#define BUFFACTOR 1.5
|
||||||
|
#define BUFMIN 1000
|
||||||
|
#define BUFEXTRA 1000
|
||||||
|
|
||||||
enum{SINGLE,MULTI}; // same as in Comm
|
enum{SINGLE,MULTI}; // same as in Comm
|
||||||
|
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
CommTiled::CommTiled(LAMMPS *lmp) : Comm(lmp)
|
CommTiled::CommTiled(LAMMPS *lmp) : Comm(lmp)
|
||||||
{
|
{
|
||||||
style = 1;
|
|
||||||
layout = 0;
|
|
||||||
|
|
||||||
error->all(FLERR,"Comm_style tiled is not yet supported");
|
error->all(FLERR,"Comm_style tiled is not yet supported");
|
||||||
|
|
||||||
|
style = 1;
|
||||||
|
layout = LAYOUT_UNIFORM;
|
||||||
|
init_buffers();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
CommTiled::CommTiled(LAMMPS *lmp, Comm *oldcomm) : Comm(*oldcomm)
|
||||||
|
{
|
||||||
|
style = 1;
|
||||||
|
layout = oldcomm->layout;
|
||||||
|
copy_arrays(oldcomm);
|
||||||
|
init_buffers();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
CommTiled::~CommTiled()
|
CommTiled::~CommTiled()
|
||||||
{
|
{
|
||||||
|
free_swap();
|
||||||
|
|
||||||
|
if (sendlist) for (int i = 0; i < nswap; i++) memory->destroy(sendlist[i]);
|
||||||
|
memory->sfree(sendlist);
|
||||||
|
memory->destroy(maxsendlist);
|
||||||
|
|
||||||
|
memory->destroy(buf_send);
|
||||||
|
memory->destroy(buf_recv);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ----------------------------------------------------------------------
|
||||||
|
initialize comm buffers and other data structs local to CommTiled
|
||||||
|
NOTE: if this is identical to CommBrick, put it into Comm ??
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void CommTiled::init_buffers()
|
||||||
|
{
|
||||||
|
maxexchange = maxexchange_atom + maxexchange_fix;
|
||||||
|
bufextra = maxexchange + BUFEXTRA;
|
||||||
|
|
||||||
|
maxsend = BUFMIN;
|
||||||
|
memory->create(buf_send,maxsend+bufextra,"comm:buf_send");
|
||||||
|
maxrecv = BUFMIN;
|
||||||
|
memory->create(buf_recv,maxrecv,"comm:buf_recv");
|
||||||
|
|
||||||
|
nswap = 2 * domain->dimension;
|
||||||
|
allocate_swap(nswap);
|
||||||
|
|
||||||
|
//sendlist = (int **) memory->smalloc(nswap*sizeof(int *),"comm:sendlist");
|
||||||
|
//memory->create(maxsendlist,nswap,"comm:maxsendlist");
|
||||||
|
//for (int i = 0; i < nswap; i++) {
|
||||||
|
// maxsendlist[i] = BUFMIN;
|
||||||
|
// memory->create(sendlist[i],BUFMIN,"comm:sendlist[i]");
|
||||||
|
//}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
NOTE: if this is nearly identical to CommBrick, put it into Comm ??
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void CommTiled::init()
|
void CommTiled::init()
|
||||||
{
|
{
|
||||||
triclinic = domain->triclinic;
|
triclinic = domain->triclinic;
|
||||||
map_style = atom->map_style;
|
map_style = atom->map_style;
|
||||||
|
|
||||||
|
// temporary restrictions
|
||||||
|
|
||||||
|
if (triclinic)
|
||||||
|
error->all(FLERR,"Cannot yet use comm_style tiled with triclinic box");
|
||||||
|
if (domain->xperiodic || domain->yperiodic ||
|
||||||
|
(domain->dimension == 2 && domain->zperiodic))
|
||||||
|
error->all(FLERR,"Cannot yet use comm_style tiled with periodic box");
|
||||||
|
if (mode == MULTI)
|
||||||
|
error->all(FLERR,"Cannot yet use comm_style tiled with multi-mode comm");
|
||||||
|
|
||||||
// comm_only = 1 if only x,f are exchanged in forward/reverse comm
|
// comm_only = 1 if only x,f are exchanged in forward/reverse comm
|
||||||
// comm_x_only = 0 if ghost_velocity since velocities are added
|
// comm_x_only = 0 if ghost_velocity since velocities are added
|
||||||
|
|
||||||
@ -72,51 +138,210 @@ void CommTiled::init()
|
|||||||
|
|
||||||
for (int i = 0; i < modify->nfix; i++)
|
for (int i = 0; i < modify->nfix; i++)
|
||||||
size_border += modify->fix[i]->comm_border;
|
size_border += modify->fix[i]->comm_border;
|
||||||
|
|
||||||
|
// maxexchange = max # of datums/atom in exchange communication
|
||||||
|
// maxforward = # of datums in largest forward communication
|
||||||
|
// maxreverse = # of datums in largest reverse communication
|
||||||
|
// query pair,fix,compute,dump for their requirements
|
||||||
|
// pair style can force reverse comm even if newton off
|
||||||
|
|
||||||
|
maxexchange = BUFMIN + maxexchange_fix;
|
||||||
|
maxforward = MAX(size_forward,size_border);
|
||||||
|
maxreverse = size_reverse;
|
||||||
|
|
||||||
|
if (force->pair) maxforward = MAX(maxforward,force->pair->comm_forward);
|
||||||
|
if (force->pair) maxreverse = MAX(maxreverse,force->pair->comm_reverse);
|
||||||
|
|
||||||
|
for (int i = 0; i < modify->nfix; i++) {
|
||||||
|
maxforward = MAX(maxforward,modify->fix[i]->comm_forward);
|
||||||
|
maxreverse = MAX(maxreverse,modify->fix[i]->comm_reverse);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < modify->ncompute; i++) {
|
||||||
|
maxforward = MAX(maxforward,modify->compute[i]->comm_forward);
|
||||||
|
maxreverse = MAX(maxreverse,modify->compute[i]->comm_reverse);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < output->ndump; i++) {
|
||||||
|
maxforward = MAX(maxforward,output->dump[i]->comm_forward);
|
||||||
|
maxreverse = MAX(maxreverse,output->dump[i]->comm_reverse);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (force->newton == 0) maxreverse = 0;
|
||||||
|
if (force->pair) maxreverse = MAX(maxreverse,force->pair->comm_reverse_off);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
setup spatial-decomposition communication patterns
|
setup spatial-decomposition communication patterns
|
||||||
function of neighbor cutoff(s) & cutghostuser & current box size
|
function of neighbor cutoff(s) & cutghostuser & current box size and tiling
|
||||||
single mode sets slab boundaries (slablo,slabhi) based on max cutoff
|
|
||||||
multi mode sets type-dependent slab boundaries (multilo,multihi)
|
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void CommTiled::setup()
|
void CommTiled::setup()
|
||||||
{
|
{
|
||||||
// error on triclinic or multi?
|
int i;
|
||||||
// set nswap = 2*dim
|
|
||||||
// setup neighbor proc info for exchange()
|
|
||||||
// setup nsendproc and nrecvproc bounts
|
|
||||||
// setup sendproc and recvproc lists
|
|
||||||
// setup sendboxes
|
|
||||||
// reallocate requests and statuses
|
|
||||||
|
|
||||||
// check that cutoff is <= 1/2 of periodic box len?
|
int dimension;
|
||||||
|
int *periodicity;
|
||||||
|
double *prd,*sublo,*subhi,*boxlo,*boxhi;
|
||||||
|
|
||||||
// loop over dims
|
double cut = MAX(neighbor->cutneighmax,cutghostuser);
|
||||||
// left:
|
|
||||||
// construct ghost boxes
|
dimension = domain->dimension;
|
||||||
// differnet in x,y,z
|
periodicity = domain->periodicity;
|
||||||
// account for ghost borders in y,z
|
prd = domain->prd;
|
||||||
// account for PBC by shifting
|
sublo = domain->sublo;
|
||||||
// split into multiple boxes if straddles PBC
|
subhi = domain->subhi;
|
||||||
// drop boxes down RCB tree
|
boxlo = domain->boxlo;
|
||||||
// count unique procs they cover
|
boxhi = domain->boxhi;
|
||||||
// what about self if crosses PBC
|
cutghost[0] = cutghost[1] = cutghost[2] = cut;
|
||||||
// for each proc they cover:
|
|
||||||
// compute box I send it to left
|
if ((periodicity[0] && cut > prd[0]) ||
|
||||||
// is a message I will recv from right (don't care about box)
|
(periodicity[1] && cut > prd[1]) ||
|
||||||
// for ghost-extended boxes
|
(dimension == 3 && periodicity[2] && cut > prd[2]))
|
||||||
// do not count procs that do not overlap my owned box at all
|
error->all(FLERR,"Communication cutoff for comm_style tiled "
|
||||||
// only touching edge of my owned box does not count
|
"cannot exceed periodic box length");
|
||||||
// in this case list I send to and recv from may be different?
|
|
||||||
// same thing to right
|
// allocate overlap
|
||||||
|
int *overlap;
|
||||||
|
int noverlap,noverlap1,indexme;
|
||||||
|
double lo1[3],hi1[3],lo2[3],hi2[3];
|
||||||
|
int one,two;
|
||||||
|
|
||||||
|
nswap = 0;
|
||||||
|
for (int idim = 0; idim < dimension; idim++) {
|
||||||
|
|
||||||
|
// ghost box in lower direction
|
||||||
|
|
||||||
|
one = 1;
|
||||||
|
lo1[0] = sublo[0]; lo1[1] = sublo[1]; lo1[2] = sublo[2];
|
||||||
|
hi1[0] = subhi[0]; hi1[1] = subhi[1]; hi1[2] = subhi[2];
|
||||||
|
lo1[idim] = sublo[idim] - cut;
|
||||||
|
hi1[idim] = sublo[idim];
|
||||||
|
|
||||||
|
two = 0;
|
||||||
|
if (periodicity[idim] && lo1[idim] < boxlo[idim]) {
|
||||||
|
two = 1;
|
||||||
|
lo2[0] = sublo[0]; lo2[1] = sublo[1]; lo2[2] = sublo[2];
|
||||||
|
hi2[0] = subhi[0]; hi2[1] = subhi[1]; hi2[2] = subhi[2];
|
||||||
|
lo2[idim] = lo1[idim] + prd[idim];
|
||||||
|
hi2[idim] = hi1[idim] + prd[idim];
|
||||||
|
if (sublo[idim] == boxlo[idim]) {
|
||||||
|
one = 0;
|
||||||
|
hi2[idim] = boxhi[idim];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
indexme = -1;
|
||||||
|
noverlap = 0;
|
||||||
|
if (one) {
|
||||||
|
if (layout == LAYOUT_UNIFORM)
|
||||||
|
box_drop_uniform(idim,lo1,hi1,noverlap,overlap,indexme);
|
||||||
|
else if (layout == LAYOUT_NONUNIFORM)
|
||||||
|
box_drop_nonuniform(idim,lo1,hi1,noverlap,overlap,indexme);
|
||||||
|
else
|
||||||
|
box_drop_tiled(lo1,hi1,0,nprocs-1,noverlap,overlap,indexme);
|
||||||
|
}
|
||||||
|
|
||||||
|
noverlap1 = noverlap;
|
||||||
|
if (two) {
|
||||||
|
if (layout == LAYOUT_UNIFORM)
|
||||||
|
box_drop_uniform(idim,lo2,hi2,noverlap,overlap,indexme);
|
||||||
|
else if (layout == LAYOUT_NONUNIFORM)
|
||||||
|
box_drop_nonuniform(idim,lo2,hi2,noverlap,overlap,indexme);
|
||||||
|
else
|
||||||
|
box_drop_tiled(lo2,hi2,0,nprocs-1,noverlap,overlap,indexme);
|
||||||
|
}
|
||||||
|
|
||||||
|
// if this (self) proc is in overlap list, move it to end of list
|
||||||
|
|
||||||
|
if (indexme >= 0) {
|
||||||
|
int tmp = overlap[noverlap-1];
|
||||||
|
overlap[noverlap-1] = overlap[indexme];
|
||||||
|
overlap[indexme] = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// overlap how has list of noverlap procs
|
||||||
|
// includes PBC effects
|
||||||
|
|
||||||
|
if (overlap[noverlap-1] == me) sendself[nswap] = 1;
|
||||||
|
else sendself[nswap] = 0;
|
||||||
|
if (noverlap-sendself[nswap]) sendother[nswap] = 1;
|
||||||
|
else sendother[nswap] = 0;
|
||||||
|
|
||||||
|
nsendproc[nswap] = noverlap;
|
||||||
|
for (i = 0; i < noverlap; i++) sendproc[nswap][i] = overlap[i];
|
||||||
|
nrecvproc[nswap+1] = noverlap;
|
||||||
|
for (i = 0; i < noverlap; i++) recvproc[nswap+1][i] = overlap[i];
|
||||||
|
|
||||||
|
// compute sendbox for each of my sends
|
||||||
|
// ibox = intersection of ghostbox with other proc's sub-domain
|
||||||
|
// sendbox = ibox displaced by cutoff in dim
|
||||||
|
|
||||||
|
// NOTE: need to extend send box in lower dims by cutoff
|
||||||
|
// NOTE: this logic for overlapping boxes is not correct for sending
|
||||||
|
|
||||||
|
double oboxlo[3],oboxhi[3],sbox[6];
|
||||||
|
|
||||||
|
for (i = 0; i < noverlap; i++) {
|
||||||
|
pbc_flag[nswap][i] = 0;
|
||||||
|
pbc[nswap][i][0] = pbc[nswap][i][1] = pbc[nswap][i][2] =
|
||||||
|
pbc[nswap][i][3] = pbc[nswap][i][4] = pbc[nswap][i][5] = 0;
|
||||||
|
|
||||||
|
if (layout == LAYOUT_UNIFORM)
|
||||||
|
box_other_uniform(overlap[i],oboxlo,oboxhi);
|
||||||
|
else if (layout == LAYOUT_NONUNIFORM)
|
||||||
|
box_other_nonuniform(overlap[i],oboxlo,oboxhi);
|
||||||
|
else
|
||||||
|
box_other_tiled(overlap[i],oboxlo,oboxhi);
|
||||||
|
|
||||||
|
if (i < noverlap1) {
|
||||||
|
sbox[0] = MAX(oboxlo[0],lo1[0]);
|
||||||
|
sbox[1] = MAX(oboxlo[1],lo1[1]);
|
||||||
|
sbox[2] = MAX(oboxlo[2],lo1[2]);
|
||||||
|
sbox[3] = MIN(oboxhi[0],hi1[0]);
|
||||||
|
sbox[4] = MIN(oboxhi[1],hi1[1]);
|
||||||
|
sbox[5] = MIN(oboxhi[2],hi1[2]);
|
||||||
|
sbox[idim] += cut;
|
||||||
|
sbox[3+idim] += cut;
|
||||||
|
if (sbox[idim] == lo1[idim]) sbox[idim] = sublo[idim];
|
||||||
|
} else {
|
||||||
|
pbc_flag[nswap][i] = 1;
|
||||||
|
pbc[nswap][i][idim] = 1;
|
||||||
|
sbox[0] = MAX(oboxlo[0],lo2[0]);
|
||||||
|
sbox[1] = MAX(oboxlo[1],lo2[1]);
|
||||||
|
sbox[2] = MAX(oboxlo[2],lo2[2]);
|
||||||
|
sbox[3] = MIN(oboxhi[0],hi2[0]);
|
||||||
|
sbox[4] = MIN(oboxhi[1],hi2[1]);
|
||||||
|
sbox[5] = MIN(oboxhi[2],hi2[2]);
|
||||||
|
sbox[idim] -= prd[idim] - cut;
|
||||||
|
sbox[3+idim] -= prd[idim] + cut;
|
||||||
|
if (sbox[idim] == lo1[idim]) sbox[idim] = sublo[idim];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (idim >= 1) {
|
||||||
|
if (sbox[0] == sublo[0]) sbox[0] -= cut;
|
||||||
|
if (sbox[4] == subhi[0]) sbox[4] += cut;
|
||||||
|
}
|
||||||
|
if (idim == 2) {
|
||||||
|
if (sbox[1] == sublo[1]) sbox[1] -= cut;
|
||||||
|
if (sbox[5] == subhi[1]) sbox[5] += cut;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(sendbox[nswap][i],sbox,6*sizeof(double));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ghost box in upper direction
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
nswap += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// reallocate requests and statuses to max of any swap
|
||||||
|
|
||||||
// what need from decomp (RCB):
|
|
||||||
// dropbox: return list of procs with overlap and overlapping boxes
|
|
||||||
// return n, proclist, boxlist
|
|
||||||
// otherbox: bbox of another proc
|
|
||||||
// dropatom: return what proc owns the atom coord
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
@ -126,46 +351,73 @@ void CommTiled::setup()
|
|||||||
|
|
||||||
void CommTiled::forward_comm(int dummy)
|
void CommTiled::forward_comm(int dummy)
|
||||||
{
|
{
|
||||||
int i,irecv,n;
|
int i,irecv,n,nsend,nrecv;
|
||||||
MPI_Status status;
|
MPI_Status status;
|
||||||
AtomVec *avec = atom->avec;
|
AtomVec *avec = atom->avec;
|
||||||
double **x = atom->x;
|
double **x = atom->x;
|
||||||
|
|
||||||
// exchange data with another set of procs in each swap
|
// exchange data with another set of procs in each swap
|
||||||
// if first proc in set is self, then is just self across PBC, just copy
|
// post recvs from all procs except self
|
||||||
|
// send data to all procs except self
|
||||||
|
// copy data to self if sendself is set
|
||||||
|
// wait on all procs except self and unpack received data
|
||||||
// if comm_x_only set, exchange or copy directly to x, don't unpack
|
// if comm_x_only set, exchange or copy directly to x, don't unpack
|
||||||
|
|
||||||
for (int iswap = 0; iswap < nswap; iswap++) {
|
for (int iswap = 0; iswap < nswap; iswap++) {
|
||||||
if (sendproc[iswap][0] != me) {
|
nsend = nsendproc[iswap] - sendself[iswap];
|
||||||
if (comm_x_only) {
|
nrecv = nrecvproc[iswap] - sendself[iswap];
|
||||||
for (i = 0; i < nrecvproc[iswap]; i++)
|
|
||||||
|
if (comm_x_only) {
|
||||||
|
if (sendother[iswap]) {
|
||||||
|
for (i = 0; i < nrecv; i++)
|
||||||
MPI_Irecv(x[firstrecv[iswap][i]],size_forward_recv[iswap][i],
|
MPI_Irecv(x[firstrecv[iswap][i]],size_forward_recv[iswap][i],
|
||||||
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
|
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
|
||||||
for (i = 0; i < nsendproc[iswap]; i++) {
|
for (i = 0; i < nsend; i++) {
|
||||||
n = avec->pack_comm(sendnum[iswap][i],sendlist[iswap][i],
|
n = avec->pack_comm(sendnum[iswap][i],sendlist[iswap][i],
|
||||||
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
|
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
|
||||||
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
|
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
|
||||||
}
|
}
|
||||||
MPI_Waitall(nrecvproc[iswap],requests,statuses);
|
}
|
||||||
|
|
||||||
} else if (ghost_velocity) {
|
if (sendself[iswap]) {
|
||||||
for (i = 0; i < nrecvproc[iswap]; i++)
|
avec->pack_comm(sendnum[iswap][nsend],sendlist[iswap][nsend],
|
||||||
|
x[firstrecv[iswap][nrecv]],pbc_flag[iswap][nsend],
|
||||||
|
pbc[iswap][nsend]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sendother[iswap]) MPI_Waitall(nrecv,requests,statuses);
|
||||||
|
|
||||||
|
} else if (ghost_velocity) {
|
||||||
|
if (sendother[iswap]) {
|
||||||
|
for (i = 0; i < nrecv; i++)
|
||||||
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
|
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
|
||||||
size_forward_recv[iswap][i],
|
size_forward_recv[iswap][i],
|
||||||
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
|
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
|
||||||
for (i = 0; i < nsendproc[iswap]; i++) {
|
for (i = 0; i < nsend; i++) {
|
||||||
n = avec->pack_comm_vel(sendnum[iswap][i],sendlist[iswap][i],
|
n = avec->pack_comm_vel(sendnum[iswap][i],sendlist[iswap][i],
|
||||||
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
|
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
|
||||||
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
|
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
|
||||||
}
|
}
|
||||||
for (i = 0; i < nrecvproc[iswap]; i++) {
|
}
|
||||||
MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status);
|
|
||||||
avec->unpack_comm_vel(recvnum[iswap][i],firstrecv[iswap][irecv],
|
if (sendself[iswap]) {
|
||||||
|
avec->pack_comm_vel(sendnum[iswap][nsend],sendlist[iswap][nsend],
|
||||||
|
buf_send,pbc_flag[iswap][nsend],pbc[iswap][nsend]);
|
||||||
|
avec->unpack_comm_vel(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
|
||||||
|
buf_send);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sendother[iswap]) {
|
||||||
|
for (i = 0; i < nrecv; i++) {
|
||||||
|
MPI_Waitany(nrecv,requests,&irecv,&status);
|
||||||
|
avec->unpack_comm_vel(recvnum[iswap][irecv],firstrecv[iswap][irecv],
|
||||||
&buf_recv[forward_recv_offset[iswap][irecv]]);
|
&buf_recv[forward_recv_offset[iswap][irecv]]);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
for (i = 0; i < nrecvproc[iswap]; i++)
|
if (sendother[iswap]) {
|
||||||
|
for (i = 0; i < nrecv; i++)
|
||||||
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
|
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
|
||||||
size_forward_recv[iswap][i],
|
size_forward_recv[iswap][i],
|
||||||
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
|
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
|
||||||
@ -174,27 +426,21 @@ void CommTiled::forward_comm(int dummy)
|
|||||||
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
|
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
|
||||||
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
|
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
|
||||||
}
|
}
|
||||||
for (i = 0; i < nrecvproc[iswap]; i++) {
|
|
||||||
MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status);
|
|
||||||
avec->unpack_comm(recvnum[iswap][i],firstrecv[iswap][irecv],
|
|
||||||
&buf_recv[forward_recv_offset[iswap][irecv]]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
if (sendself[iswap]) {
|
||||||
if (comm_x_only) {
|
avec->pack_comm(sendnum[iswap][nsend],sendlist[iswap][nsend],
|
||||||
if (sendnum[iswap][0])
|
buf_send,pbc_flag[iswap][nsend],pbc[iswap][nsend]);
|
||||||
n = avec->pack_comm(sendnum[iswap][0],sendlist[iswap][0],
|
avec->unpack_comm(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
|
||||||
x[firstrecv[iswap][0]],pbc_flag[iswap][0],
|
buf_send);
|
||||||
pbc[iswap][0]);
|
}
|
||||||
} else if (ghost_velocity) {
|
|
||||||
n = avec->pack_comm_vel(sendnum[iswap][0],sendlist[iswap][0],
|
if (sendother[iswap]) {
|
||||||
buf_send,pbc_flag[iswap][0],pbc[iswap][0]);
|
for (i = 0; i < nrecv; i++) {
|
||||||
avec->unpack_comm_vel(recvnum[iswap][0],firstrecv[iswap][0],buf_send);
|
MPI_Waitany(nrecv,requests,&irecv,&status);
|
||||||
} else {
|
avec->unpack_comm(recvnum[iswap][irecv],firstrecv[iswap][irecv],
|
||||||
n = avec->pack_comm(sendnum[iswap][0],sendlist[iswap][0],
|
&buf_recv[forward_recv_offset[iswap][irecv]]);
|
||||||
buf_send,pbc_flag[iswap][0],pbc[iswap][0]);
|
}
|
||||||
avec->unpack_comm(recvnum[iswap][0],firstrecv[iswap][0],buf_send);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -207,57 +453,70 @@ void CommTiled::forward_comm(int dummy)
|
|||||||
|
|
||||||
void CommTiled::reverse_comm()
|
void CommTiled::reverse_comm()
|
||||||
{
|
{
|
||||||
int i,irecv,n;
|
int i,irecv,n,nsend,nrecv;
|
||||||
MPI_Request request;
|
MPI_Request request;
|
||||||
MPI_Status status;
|
MPI_Status status;
|
||||||
AtomVec *avec = atom->avec;
|
AtomVec *avec = atom->avec;
|
||||||
double **f = atom->f;
|
double **f = atom->f;
|
||||||
|
|
||||||
// exchange data with another set of procs in each swap
|
// exchange data with another set of procs in each swap
|
||||||
// if first proc in set is self, then is just self across PBC, just copy
|
// post recvs from all procs except self
|
||||||
|
// send data to all procs except self
|
||||||
|
// copy data to self if sendself is set
|
||||||
|
// wait on all procs except self and unpack received data
|
||||||
// if comm_f_only set, exchange or copy directly from f, don't pack
|
// if comm_f_only set, exchange or copy directly from f, don't pack
|
||||||
|
|
||||||
for (int iswap = nswap-1; iswap >= 0; iswap--) {
|
for (int iswap = nswap-1; iswap >= 0; iswap--) {
|
||||||
if (sendproc[iswap][0] != me) {
|
if (comm_f_only) {
|
||||||
if (comm_f_only) {
|
if (sendother[iswap]) {
|
||||||
for (i = 0; i < nsendproc[iswap]; i++)
|
for (i = 0; i < nsend; i++)
|
||||||
MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]],
|
MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]],
|
||||||
size_reverse_recv[iswap][i],MPI_DOUBLE,
|
size_reverse_recv[iswap][i],MPI_DOUBLE,
|
||||||
sendproc[iswap][i],0,world,&requests[i]);
|
sendproc[iswap][i],0,world,&requests[i]);
|
||||||
for (i = 0; i < nrecvproc[iswap]; i++)
|
for (i = 0; i < nrecv; i++)
|
||||||
MPI_Send(f[firstrecv[iswap][i]],size_reverse_send[iswap][i],
|
MPI_Send(f[firstrecv[iswap][i]],size_reverse_send[iswap][i],
|
||||||
MPI_DOUBLE,recvproc[iswap][i],0,world);
|
MPI_DOUBLE,recvproc[iswap][i],0,world);
|
||||||
for (i = 0; i < nsendproc[iswap]; i++) {
|
}
|
||||||
MPI_Waitany(nsendproc[iswap],requests,&irecv,&status);
|
|
||||||
avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv],
|
|
||||||
&buf_recv[reverse_recv_offset[iswap][irecv]]);
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
if (sendself[iswap]) {
|
||||||
for (i = 0; i < nsendproc[iswap]; i++)
|
avec->unpack_reverse(sendnum[iswap][nsend],sendlist[iswap][nsend],
|
||||||
MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]],
|
f[firstrecv[iswap][nrecv]]);
|
||||||
size_reverse_recv[iswap][i],MPI_DOUBLE,
|
}
|
||||||
sendproc[iswap][i],0,world,&requests[i]);
|
|
||||||
for (i = 0; i < nrecvproc[iswap]; i++) {
|
if (sendother[iswap]) {
|
||||||
n = avec->pack_reverse(recvnum[iswap][i],firstrecv[iswap][i],
|
for (i = 0; i < nsend; i++) {
|
||||||
buf_send);
|
MPI_Waitany(nsend,requests,&irecv,&status);
|
||||||
MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap][i],0,world);
|
|
||||||
}
|
|
||||||
for (i = 0; i < nsendproc[iswap]; i++) {
|
|
||||||
MPI_Waitany(nsendproc[iswap],requests,&irecv,&status);
|
|
||||||
avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv],
|
avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv],
|
||||||
&buf_recv[reverse_recv_offset[iswap][irecv]]);
|
&buf_recv[reverse_recv_offset[iswap][irecv]]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
if (comm_f_only) {
|
if (sendother[iswap]) {
|
||||||
if (sendnum[iswap][0])
|
for (i = 0; i < nsend; i++)
|
||||||
avec->unpack_reverse(sendnum[iswap][0],sendlist[iswap][0],
|
MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]],
|
||||||
f[firstrecv[iswap][0]]);
|
size_reverse_recv[iswap][i],MPI_DOUBLE,
|
||||||
} else {
|
sendproc[iswap][i],0,world,&requests[i]);
|
||||||
n = avec->pack_reverse(recvnum[iswap][0],firstrecv[iswap][0],buf_send);
|
for (i = 0; i < nrecv; i++) {
|
||||||
avec->unpack_reverse(sendnum[iswap][0],sendlist[iswap][0],buf_send);
|
n = avec->pack_reverse(recvnum[iswap][i],firstrecv[iswap][i],
|
||||||
|
buf_send);
|
||||||
|
MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap][i],0,world);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sendself[iswap]) {
|
||||||
|
avec->pack_reverse(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
|
||||||
|
buf_send);
|
||||||
|
avec->unpack_reverse(sendnum[iswap][nsend],sendlist[iswap][nsend],
|
||||||
|
buf_send);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sendother[iswap]) {
|
||||||
|
for (i = 0; i < nsend; i++) {
|
||||||
|
MPI_Waitany(nsend,requests,&irecv,&status);
|
||||||
|
avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv],
|
||||||
|
&buf_recv[reverse_recv_offset[iswap][irecv]]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -298,7 +557,7 @@ void CommTiled::exchange()
|
|||||||
|
|
||||||
void CommTiled::borders()
|
void CommTiled::borders()
|
||||||
{
|
{
|
||||||
int i,n,irecv,ngroup,nlast,nsend,rmaxswap;
|
int i,n,irecv,ngroup,nlast,nsend,nrecv,ncount,rmaxswap;
|
||||||
double xlo,xhi,ylo,yhi,zlo,zhi;
|
double xlo,xhi,ylo,yhi,zlo,zhi;
|
||||||
double *bbox;
|
double *bbox;
|
||||||
double **x;
|
double **x;
|
||||||
@ -333,36 +592,40 @@ void CommTiled::borders()
|
|||||||
if (iswap < 2) nlast = atom->nlocal;
|
if (iswap < 2) nlast = atom->nlocal;
|
||||||
else nlast = atom->nlocal + atom->nghost;
|
else nlast = atom->nlocal + atom->nghost;
|
||||||
|
|
||||||
nsend = 0;
|
ncount = 0;
|
||||||
for (i = 0; i < ngroup; i++)
|
for (i = 0; i < ngroup; i++)
|
||||||
if (x[i][0] >= xlo && x[i][0] <= xhi &&
|
if (x[i][0] >= xlo && x[i][0] <= xhi &&
|
||||||
x[i][1] >= ylo && x[i][1] <= yhi &&
|
x[i][1] >= ylo && x[i][1] <= yhi &&
|
||||||
x[i][2] >= zlo && x[i][2] <= zhi) {
|
x[i][2] >= zlo && x[i][2] <= zhi) {
|
||||||
if (nsend == maxsendlist[iswap][i]) grow_list(iswap,i,nsend);
|
if (ncount == maxsendlist[iswap][i]) grow_list(iswap,i,ncount);
|
||||||
sendlist[iswap][i][nsend++] = i;
|
sendlist[iswap][i][ncount++] = i;
|
||||||
}
|
}
|
||||||
for (i = atom->nlocal; i < nlast; i++)
|
for (i = atom->nlocal; i < nlast; i++)
|
||||||
if (x[i][0] >= xlo && x[i][0] <= xhi &&
|
if (x[i][0] >= xlo && x[i][0] <= xhi &&
|
||||||
x[i][1] >= ylo && x[i][1] <= yhi &&
|
x[i][1] >= ylo && x[i][1] <= yhi &&
|
||||||
x[i][2] >= zlo && x[i][2] <= zhi) {
|
x[i][2] >= zlo && x[i][2] <= zhi) {
|
||||||
if (nsend == maxsendlist[iswap][i]) grow_list(iswap,i,nsend);
|
if (ncount == maxsendlist[iswap][i]) grow_list(iswap,i,ncount);
|
||||||
sendlist[iswap][i][nsend++] = i;
|
sendlist[iswap][i][ncount++] = i;
|
||||||
}
|
}
|
||||||
sendnum[iswap][i] = nsend;
|
sendnum[iswap][i] = ncount;
|
||||||
smax = MAX(smax,nsend);
|
smax = MAX(smax,ncount);
|
||||||
}
|
}
|
||||||
|
|
||||||
// send sendnum counts to procs who recv from me
|
// send sendnum counts to procs who recv from me except self
|
||||||
|
// copy data to self if sendself is set
|
||||||
|
|
||||||
if (sendproc[iswap][0] != me) {
|
nsend = nsendproc[iswap] - sendself[iswap];
|
||||||
for (i = 0; i < nrecvproc[iswap]; i++)
|
nrecv = nrecvproc[iswap] - sendself[iswap];
|
||||||
|
|
||||||
|
if (sendother[iswap]) {
|
||||||
|
for (i = 0; i < nrecv; i++)
|
||||||
MPI_Irecv(&recvnum[iswap][i],1,MPI_INT,
|
MPI_Irecv(&recvnum[iswap][i],1,MPI_INT,
|
||||||
recvproc[iswap][i],0,world,&requests[i]);
|
recvproc[iswap][i],0,world,&requests[i]);
|
||||||
for (i = 0; i < nsendproc[iswap]; i++)
|
for (i = 0; i < nsend; i++)
|
||||||
MPI_Send(&sendnum[iswap][i],1,MPI_INT,sendproc[iswap][i],0,world);
|
MPI_Send(&sendnum[iswap][i],1,MPI_INT,sendproc[iswap][i],0,world);
|
||||||
MPI_Waitall(nrecvproc[iswap],requests,statuses);
|
}
|
||||||
|
if (sendself[iswap]) recvnum[iswap][nrecv] = sendnum[iswap][nsend];
|
||||||
} else recvnum[iswap][0] = sendnum[iswap][0];
|
if (sendother[iswap]) MPI_Waitall(nrecv,requests,statuses);
|
||||||
|
|
||||||
// setup other per swap/proc values from sendnum and recvnum
|
// setup other per swap/proc values from sendnum and recvnum
|
||||||
|
|
||||||
@ -390,54 +653,64 @@ void CommTiled::borders()
|
|||||||
|
|
||||||
// swap atoms with other procs using pack_border(), unpack_border()
|
// swap atoms with other procs using pack_border(), unpack_border()
|
||||||
|
|
||||||
if (sendproc[iswap][0] != me) {
|
if (ghost_velocity) {
|
||||||
for (i = 0; i < nsendproc[iswap]; i++) {
|
if (sendother[iswap]) {
|
||||||
if (ghost_velocity) {
|
for (i = 0; i < nrecv; i++)
|
||||||
for (i = 0; i < nrecvproc[iswap]; i++)
|
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
|
||||||
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
|
recvnum[iswap][i]*size_border,
|
||||||
recvnum[iswap][i]*size_border,
|
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
|
||||||
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
|
for (i = 0; i < nsend; i++) {
|
||||||
for (i = 0; i < nsendproc[iswap]; i++) {
|
n = avec->pack_border_vel(sendnum[iswap][i],sendlist[iswap][i],
|
||||||
n = avec->pack_border_vel(sendnum[iswap][i],sendlist[iswap][i],
|
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
|
||||||
buf_send,pbc_flag[iswap][i],
|
MPI_Send(buf_send,n*size_border,MPI_DOUBLE,
|
||||||
pbc[iswap][i]);
|
sendproc[iswap][i],0,world);
|
||||||
MPI_Send(buf_send,n*size_border,MPI_DOUBLE,
|
}
|
||||||
sendproc[iswap][i],0,world);
|
}
|
||||||
}
|
|
||||||
for (i = 0; i < nrecvproc[iswap]; i++) {
|
if (sendself[iswap]) {
|
||||||
MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status);
|
n = avec->pack_border_vel(sendnum[iswap][nsend],sendlist[iswap][nsend],
|
||||||
avec->unpack_border(recvnum[iswap][i],firstrecv[iswap][irecv],
|
buf_send,pbc_flag[iswap][nsend],
|
||||||
&buf_recv[forward_recv_offset[iswap][irecv]]);
|
pbc[iswap][nsend]);
|
||||||
}
|
avec->unpack_border_vel(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
|
||||||
|
buf_send);
|
||||||
} else {
|
}
|
||||||
for (i = 0; i < nrecvproc[iswap]; i++)
|
|
||||||
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
|
if (sendother[iswap]) {
|
||||||
recvnum[iswap][i]*size_border,
|
for (i = 0; i < nrecv; i++) {
|
||||||
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
|
MPI_Waitany(nrecv,requests,&irecv,&status);
|
||||||
for (i = 0; i < nsendproc[iswap]; i++) {
|
avec->unpack_border(recvnum[iswap][irecv],firstrecv[iswap][irecv],
|
||||||
n = avec->pack_border(sendnum[iswap][i],sendlist[iswap][i],
|
&buf_recv[forward_recv_offset[iswap][irecv]]);
|
||||||
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
|
|
||||||
MPI_Send(buf_send,n*size_border,MPI_DOUBLE,
|
|
||||||
sendproc[iswap][i],0,world);
|
|
||||||
}
|
|
||||||
for (i = 0; i < nrecvproc[iswap]; i++) {
|
|
||||||
MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status);
|
|
||||||
avec->unpack_border(recvnum[iswap][i],firstrecv[iswap][irecv],
|
|
||||||
&buf_recv[forward_recv_offset[iswap][irecv]]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
if (ghost_velocity) {
|
if (sendother[iswap]) {
|
||||||
n = avec->pack_border_vel(sendnum[iswap][0],sendlist[iswap][0],
|
for (i = 0; i < nrecv; i++)
|
||||||
buf_send,pbc_flag[iswap][0],pbc[iswap][0]);
|
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
|
||||||
avec->unpack_border_vel(recvnum[iswap][0],firstrecv[iswap][0],buf_send);
|
recvnum[iswap][i]*size_border,
|
||||||
} else {
|
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
|
||||||
n = avec->pack_border(sendnum[iswap][0],sendlist[iswap][0],
|
for (i = 0; i < nsend; i++) {
|
||||||
buf_send,pbc_flag[iswap][0],pbc[iswap][0]);
|
n = avec->pack_border(sendnum[iswap][i],sendlist[iswap][i],
|
||||||
avec->unpack_border(recvnum[iswap][0],firstrecv[iswap][0],buf_send);
|
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
|
||||||
|
MPI_Send(buf_send,n*size_border,MPI_DOUBLE,
|
||||||
|
sendproc[iswap][i],0,world);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sendself[iswap]) {
|
||||||
|
n = avec->pack_border(sendnum[iswap][nsend],sendlist[iswap][nsend],
|
||||||
|
buf_send,pbc_flag[iswap][nsend],
|
||||||
|
pbc[iswap][nsend]);
|
||||||
|
avec->unpack_border(recvnum[iswap][nsend],firstrecv[iswap][nsend],
|
||||||
|
buf_send);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sendother[iswap]) {
|
||||||
|
for (i = 0; i < nrecv; i++) {
|
||||||
|
MPI_Waitany(nrecv,requests,&irecv,&status);
|
||||||
|
avec->unpack_border(recvnum[iswap][irecv],firstrecv[iswap][irecv],
|
||||||
|
&buf_recv[forward_recv_offset[iswap][irecv]]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -786,6 +1059,64 @@ int CommTiled::exchange_variable(int n, double *inbuf, double *&outbuf)
|
|||||||
return nrecv;
|
return nrecv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
determine overlap list of Noverlap procs the lo/hi box overlaps
|
||||||
|
overlap = non-zero area in common between box and proc sub-domain
|
||||||
|
box is onwed by me and extends in dim
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void CommTiled::box_drop_uniform(int dim, double *lo, double *hi,
|
||||||
|
int &noverlap, int *overlap, int &indexme)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
determine overlap list of Noverlap procs the lo/hi box overlaps
|
||||||
|
overlap = non-zero area in common between box and proc sub-domain
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void CommTiled::box_drop_nonuniform(int dim, double *lo, double *hi,
|
||||||
|
int &noverlap, int *overlap, int &indexme)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
determine overlap list of Noverlap procs the lo/hi box overlaps
|
||||||
|
overlap = non-zero area in common between box and proc sub-domain
|
||||||
|
recursive routine for traversing an RCB tree of cuts
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void CommTiled::box_drop_tiled(double *lo, double *hi,
|
||||||
|
int proclower, int procupper,
|
||||||
|
int &noverlap, int *overlap, int &indexme)
|
||||||
|
{
|
||||||
|
// end recursion when partition is a single proc
|
||||||
|
// add proc to overlap list
|
||||||
|
|
||||||
|
if (proclower == procupper) {
|
||||||
|
if (proclower == me) indexme = noverlap;
|
||||||
|
overlap[noverlap++] = proclower;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// drop box on each side of cut it extends beyond
|
||||||
|
// use > and < criteria to not include procs it only touches
|
||||||
|
// procmid = 1st processor in upper half of partition
|
||||||
|
// = location in tree that stores this cut
|
||||||
|
// dim = 0,1,2 dimension of cut
|
||||||
|
// cut = position of cut
|
||||||
|
|
||||||
|
int procmid = proclower + (procupper - proclower) / 2 + 1;
|
||||||
|
double cut = tree[procmid].cut;
|
||||||
|
int dim = tree[procmid].dim;
|
||||||
|
|
||||||
|
if (lo[dim] < cut)
|
||||||
|
box_drop_tiled(lo,hi,proclower,procmid-1,noverlap,overlap,indexme);
|
||||||
|
if (hi[dim] > cut)
|
||||||
|
box_drop_tiled(lo,hi,procmid,procupper,noverlap,overlap,indexme);
|
||||||
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
realloc the size of the send buffer as needed with BUFFACTOR and bufextra
|
realloc the size of the send buffer as needed with BUFFACTOR and bufextra
|
||||||
if flag = 1, realloc
|
if flag = 1, realloc
|
||||||
@ -825,6 +1156,42 @@ void CommTiled::grow_list(int iswap, int iwhich, int n)
|
|||||||
"comm:sendlist[iswap]");
|
"comm:sendlist[iswap]");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
allocation of swap info
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void CommTiled::allocate_swap(int n)
|
||||||
|
{
|
||||||
|
memory->create(sendnum,n,"comm:sendnum");
|
||||||
|
memory->create(recvnum,n,"comm:recvnum");
|
||||||
|
memory->create(sendproc,n,"comm:sendproc");
|
||||||
|
memory->create(recvproc,n,"comm:recvproc");
|
||||||
|
memory->create(size_forward_recv,n,"comm:size");
|
||||||
|
memory->create(size_reverse_send,n,"comm:size");
|
||||||
|
memory->create(size_reverse_recv,n,"comm:size");
|
||||||
|
memory->create(firstrecv,n,"comm:firstrecv");
|
||||||
|
memory->create(pbc_flag,n,"comm:pbc_flag");
|
||||||
|
memory->create(pbc,n,6,"comm:pbc");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
free memory for swaps
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void CommTiled::free_swap()
|
||||||
|
{
|
||||||
|
memory->destroy(sendnum);
|
||||||
|
memory->destroy(recvnum);
|
||||||
|
memory->destroy(sendproc);
|
||||||
|
memory->destroy(recvproc);
|
||||||
|
memory->destroy(size_forward_recv);
|
||||||
|
memory->destroy(size_reverse_send);
|
||||||
|
memory->destroy(size_reverse_recv);
|
||||||
|
memory->destroy(firstrecv);
|
||||||
|
memory->destroy(pbc_flag);
|
||||||
|
memory->destroy(pbc);
|
||||||
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
return # of bytes of allocated memory
|
return # of bytes of allocated memory
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|||||||
@ -21,6 +21,7 @@ namespace LAMMPS_NS {
|
|||||||
class CommTiled : public Comm {
|
class CommTiled : public Comm {
|
||||||
public:
|
public:
|
||||||
CommTiled(class LAMMPS *);
|
CommTiled(class LAMMPS *);
|
||||||
|
CommTiled(class LAMMPS *, class Comm *);
|
||||||
virtual ~CommTiled();
|
virtual ~CommTiled();
|
||||||
|
|
||||||
void init();
|
void init();
|
||||||
@ -46,15 +47,16 @@ class CommTiled : public Comm {
|
|||||||
bigint memory_usage();
|
bigint memory_usage();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int nswap; // # of swaps to perform = 2*dim
|
|
||||||
int *nsendproc,*nrecvproc; // # of procs to send/recv to/from in each swap
|
|
||||||
|
|
||||||
int triclinic; // 0 if domain is orthog, 1 if triclinic
|
int triclinic; // 0 if domain is orthog, 1 if triclinic
|
||||||
int map_style; // non-0 if global->local mapping is done
|
int map_style; // non-0 if global->local mapping is done
|
||||||
int size_forward; // # of per-atom datums in forward comm
|
int size_forward; // # of per-atom datums in forward comm
|
||||||
int size_reverse; // # of datums in reverse comm
|
int size_reverse; // # of datums in reverse comm
|
||||||
int size_border; // # of datums in forward border comm
|
int size_border; // # of datums in forward border comm
|
||||||
|
|
||||||
|
int nswap; // # of swaps to perform = 2*dim
|
||||||
|
int *nsendproc,*nrecvproc; // # of procs to send/recv to/from in each swap
|
||||||
|
int *sendother; // 1 if send to any other proc in each swap
|
||||||
|
int *sendself; // 1 if send to self in each swap
|
||||||
int **sendnum,**recvnum; // # of atoms to send/recv per swap/proc
|
int **sendnum,**recvnum; // # of atoms to send/recv per swap/proc
|
||||||
int **sendproc,**recvproc; // proc to send/recv to/from per swap/proc
|
int **sendproc,**recvproc; // proc to send/recv to/from per swap/proc
|
||||||
int **size_forward_recv; // # of values to recv in each forward swap/proc
|
int **size_forward_recv; // # of values to recv in each forward swap/proc
|
||||||
@ -77,6 +79,7 @@ class CommTiled : public Comm {
|
|||||||
int maxsend,maxrecv; // current size of send/recv buffer
|
int maxsend,maxrecv; // current size of send/recv buffer
|
||||||
int maxforward,maxreverse; // max # of datums in forward/reverse comm
|
int maxforward,maxreverse; // max # of datums in forward/reverse comm
|
||||||
|
|
||||||
|
int maxexchange; // max # of datums/atom in exchange comm
|
||||||
int bufextra; // extra space beyond maxsend in send buffer
|
int bufextra; // extra space beyond maxsend in send buffer
|
||||||
|
|
||||||
MPI_Request *requests;
|
MPI_Request *requests;
|
||||||
@ -84,9 +87,35 @@ class CommTiled : public Comm {
|
|||||||
|
|
||||||
int comm_x_only,comm_f_only; // 1 if only exchange x,f in for/rev comm
|
int comm_x_only,comm_f_only; // 1 if only exchange x,f in for/rev comm
|
||||||
|
|
||||||
|
struct Tree {
|
||||||
|
double cut;
|
||||||
|
int dim;
|
||||||
|
};
|
||||||
|
|
||||||
|
Tree *tree;
|
||||||
|
|
||||||
|
// info from RCB decomp
|
||||||
|
|
||||||
|
double rcbcut;
|
||||||
|
int rcbcutdim;
|
||||||
|
double rcblo[3];
|
||||||
|
double rcbhi[3];
|
||||||
|
|
||||||
|
void init_buffers();
|
||||||
|
|
||||||
|
void box_drop_uniform(int, double *, double *, int &, int *, int &);
|
||||||
|
void box_drop_nonuniform(int, double *, double *, int &, int *, int &);
|
||||||
|
void box_drop_tiled(double *, double *, int, int, int &, int *, int &);
|
||||||
|
|
||||||
|
void box_other_uniform(int, double *, double *) {}
|
||||||
|
void box_other_nonuniform(int, double *, double *) {}
|
||||||
|
void box_other_tiled(int, double *, double *) {}
|
||||||
|
|
||||||
void grow_send(int, int); // reallocate send buffer
|
void grow_send(int, int); // reallocate send buffer
|
||||||
void grow_recv(int); // free/allocate recv buffer
|
void grow_recv(int); // free/allocate recv buffer
|
||||||
void grow_list(int, int, int); // reallocate sendlist for one swap/proc
|
void grow_list(int, int, int); // reallocate sendlist for one swap/proc
|
||||||
|
void allocate_swap(int); // allocate swap arrays
|
||||||
|
void free_swap(); // free swap arrays
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -44,14 +44,15 @@
|
|||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
using namespace MathConst;
|
using namespace MathConst;
|
||||||
|
|
||||||
|
enum{NO_REMAP,X_REMAP,V_REMAP}; // same as fix_deform.cpp
|
||||||
|
enum{IGNORE,WARN,ERROR}; // same as thermo.cpp
|
||||||
|
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
|
||||||
|
|
||||||
#define BIG 1.0e20
|
#define BIG 1.0e20
|
||||||
#define SMALL 1.0e-4
|
#define SMALL 1.0e-4
|
||||||
#define DELTAREGION 4
|
#define DELTAREGION 4
|
||||||
#define BONDSTRETCH 1.1
|
#define BONDSTRETCH 1.1
|
||||||
|
|
||||||
enum{NO_REMAP,X_REMAP,V_REMAP}; // same as fix_deform.cpp
|
|
||||||
enum{IGNORE,WARN,ERROR}; // same as thermo.cpp
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
default is periodic
|
default is periodic
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
@ -249,42 +250,56 @@ void Domain::set_global_box()
|
|||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
set lamda box params
|
set lamda box params
|
||||||
assumes global box is defined and proc assignment has been made
|
assumes global box is defined and proc assignment has been made
|
||||||
uses comm->xyz_split to define subbox boundaries in consistent manner
|
uses comm->xyz_split or comm->mysplit
|
||||||
|
to define subbox boundaries in consistent manner
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Domain::set_lamda_box()
|
void Domain::set_lamda_box()
|
||||||
{
|
{
|
||||||
int *myloc = comm->myloc;
|
if (comm->layout != LAYOUT_TILED) {
|
||||||
double *xsplit = comm->xsplit;
|
int *myloc = comm->myloc;
|
||||||
double *ysplit = comm->ysplit;
|
double *xsplit = comm->xsplit;
|
||||||
double *zsplit = comm->zsplit;
|
double *ysplit = comm->ysplit;
|
||||||
|
double *zsplit = comm->zsplit;
|
||||||
|
|
||||||
sublo_lamda[0] = xsplit[myloc[0]];
|
sublo_lamda[0] = xsplit[myloc[0]];
|
||||||
subhi_lamda[0] = xsplit[myloc[0]+1];
|
subhi_lamda[0] = xsplit[myloc[0]+1];
|
||||||
|
sublo_lamda[1] = ysplit[myloc[1]];
|
||||||
|
subhi_lamda[1] = ysplit[myloc[1]+1];
|
||||||
|
sublo_lamda[2] = zsplit[myloc[2]];
|
||||||
|
subhi_lamda[2] = zsplit[myloc[2]+1];
|
||||||
|
|
||||||
sublo_lamda[1] = ysplit[myloc[1]];
|
} else {
|
||||||
subhi_lamda[1] = ysplit[myloc[1]+1];
|
double (*mysplit)[2] = comm->mysplit;
|
||||||
|
|
||||||
sublo_lamda[2] = zsplit[myloc[2]];
|
sublo_lamda[0] = mysplit[0][0];
|
||||||
subhi_lamda[2] = zsplit[myloc[2]+1];
|
subhi_lamda[0] = mysplit[0][1];
|
||||||
|
sublo_lamda[1] = mysplit[1][0];
|
||||||
|
subhi_lamda[1] = mysplit[1][1];
|
||||||
|
sublo_lamda[2] = mysplit[2][0];
|
||||||
|
subhi_lamda[2] = mysplit[2][1];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
set local subbox params for orthogonal boxes
|
set local subbox params for orthogonal boxes
|
||||||
assumes global box is defined and proc assignment has been made
|
assumes global box is defined and proc assignment has been made
|
||||||
uses comm->xyz_split to define subbox boundaries in consistent manner
|
uses comm->xyz_split or comm->mysplit
|
||||||
|
to define subbox boundaries in consistent manner
|
||||||
insure subhi[max] = boxhi
|
insure subhi[max] = boxhi
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Domain::set_local_box()
|
void Domain::set_local_box()
|
||||||
{
|
{
|
||||||
int *myloc = comm->myloc;
|
if (triclinic) return;
|
||||||
int *procgrid = comm->procgrid;
|
|
||||||
double *xsplit = comm->xsplit;
|
if (comm->layout != LAYOUT_TILED) {
|
||||||
double *ysplit = comm->ysplit;
|
int *myloc = comm->myloc;
|
||||||
double *zsplit = comm->zsplit;
|
int *procgrid = comm->procgrid;
|
||||||
|
double *xsplit = comm->xsplit;
|
||||||
|
double *ysplit = comm->ysplit;
|
||||||
|
double *zsplit = comm->zsplit;
|
||||||
|
|
||||||
if (triclinic == 0) {
|
|
||||||
sublo[0] = boxlo[0] + xprd*xsplit[myloc[0]];
|
sublo[0] = boxlo[0] + xprd*xsplit[myloc[0]];
|
||||||
if (myloc[0] < procgrid[0]-1)
|
if (myloc[0] < procgrid[0]-1)
|
||||||
subhi[0] = boxlo[0] + xprd*xsplit[myloc[0]+1];
|
subhi[0] = boxlo[0] + xprd*xsplit[myloc[0]+1];
|
||||||
@ -299,6 +314,21 @@ void Domain::set_local_box()
|
|||||||
if (myloc[2] < procgrid[2]-1)
|
if (myloc[2] < procgrid[2]-1)
|
||||||
subhi[2] = boxlo[2] + zprd*zsplit[myloc[2]+1];
|
subhi[2] = boxlo[2] + zprd*zsplit[myloc[2]+1];
|
||||||
else subhi[2] = boxhi[2];
|
else subhi[2] = boxhi[2];
|
||||||
|
|
||||||
|
} else {
|
||||||
|
double (*mysplit)[2] = comm->mysplit;
|
||||||
|
|
||||||
|
sublo[0] = boxlo[0] + xprd*mysplit[0][0];
|
||||||
|
subhi[0] = boxlo[0] + xprd*mysplit[0][1];
|
||||||
|
if (mysplit[0][1] == 1.0) subhi[0] = boxhi[0];
|
||||||
|
|
||||||
|
sublo[1] = boxlo[1] + yprd*mysplit[1][0];
|
||||||
|
subhi[1] = boxlo[1] + yprd*mysplit[1][1];
|
||||||
|
if (mysplit[1][1] == 1.0) subhi[1] = boxhi[1];
|
||||||
|
|
||||||
|
sublo[2] = boxlo[2] + zprd*mysplit[2][0];
|
||||||
|
subhi[2] = boxlo[2] + zprd*mysplit[2][1];
|
||||||
|
if (mysplit[2][1] == 1.0) subhi[2] = boxhi[2];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -22,12 +22,14 @@
|
|||||||
#include "irregular.h"
|
#include "irregular.h"
|
||||||
#include "force.h"
|
#include "force.h"
|
||||||
#include "kspace.h"
|
#include "kspace.h"
|
||||||
|
#include "rcb.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
|
|
||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
using namespace FixConst;
|
using namespace FixConst;
|
||||||
|
|
||||||
enum{SHIFT,RCB};
|
enum{SHIFT,BISECTION};
|
||||||
|
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
@ -53,7 +55,7 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) :
|
|||||||
thresh = force->numeric(FLERR,arg[4]);
|
thresh = force->numeric(FLERR,arg[4]);
|
||||||
|
|
||||||
if (strcmp(arg[5],"shift") == 0) lbstyle = SHIFT;
|
if (strcmp(arg[5],"shift") == 0) lbstyle = SHIFT;
|
||||||
else if (strcmp(arg[5],"rcb") == 0) lbstyle = RCB;
|
else if (strcmp(arg[5],"rcb") == 0) lbstyle = BISECTION;
|
||||||
else error->all(FLERR,"Illegal fix balance command");
|
else error->all(FLERR,"Illegal fix balance command");
|
||||||
|
|
||||||
int iarg = 5;
|
int iarg = 5;
|
||||||
@ -65,7 +67,7 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) :
|
|||||||
stopthresh = force->numeric(FLERR,arg[iarg+3]);
|
stopthresh = force->numeric(FLERR,arg[iarg+3]);
|
||||||
if (stopthresh < 1.0) error->all(FLERR,"Illegal fix balance command");
|
if (stopthresh < 1.0) error->all(FLERR,"Illegal fix balance command");
|
||||||
iarg += 4;
|
iarg += 4;
|
||||||
} else if (lbstyle == RCB) {
|
} else if (lbstyle == BISECTION) {
|
||||||
iarg++;
|
iarg++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -97,14 +99,16 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) :
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// create instance of Balance class and initialize it with params
|
if (lbstyle == BISECTION && comm->style == 0)
|
||||||
// NOTE: do I need Balance instance if RCB?
|
error->all(FLERR,"Fix balance rcb cannot be used with comm_style brick");
|
||||||
// create instance of Irregular class
|
|
||||||
|
// create instance of Balance class
|
||||||
|
// if SHIFT, initialize it with params
|
||||||
|
|
||||||
balance = new Balance(lmp);
|
balance = new Balance(lmp);
|
||||||
|
|
||||||
if (lbstyle == SHIFT) balance->shift_setup(bstr,nitermax,thresh);
|
if (lbstyle == SHIFT) balance->shift_setup(bstr,nitermax,thresh);
|
||||||
if (lbstyle == RCB) error->all(FLERR,"Fix balance rcb is not yet supported");
|
|
||||||
|
// create instance of Irregular class
|
||||||
|
|
||||||
irregular = new Irregular(lmp);
|
irregular = new Irregular(lmp);
|
||||||
|
|
||||||
@ -238,32 +242,33 @@ void FixBalance::rebalance()
|
|||||||
{
|
{
|
||||||
imbprev = imbnow;
|
imbprev = imbnow;
|
||||||
|
|
||||||
|
// invoke balancer and reset comm->uniform flag
|
||||||
|
|
||||||
|
int *sendproc;
|
||||||
if (lbstyle == SHIFT) {
|
if (lbstyle == SHIFT) {
|
||||||
itercount = balance->shift();
|
itercount = balance->shift();
|
||||||
} else if (lbstyle == RCB) {
|
comm->layout = LAYOUT_NONUNIFORM;
|
||||||
|
} else if (lbstyle == BISECTION) {
|
||||||
|
sendproc = balance->bisection();
|
||||||
|
comm->layout = LAYOUT_TILED;
|
||||||
}
|
}
|
||||||
|
|
||||||
// output of final result
|
// output of final result
|
||||||
|
|
||||||
if (fp) balance->dumpout(update->ntimestep,fp);
|
if (fp) balance->dumpout(update->ntimestep,fp);
|
||||||
|
|
||||||
// reset comm->uniform flag
|
|
||||||
// NOTE: this needs to change with RCB
|
|
||||||
|
|
||||||
comm->uniform = 0;
|
|
||||||
|
|
||||||
// reset proc sub-domains
|
// reset proc sub-domains
|
||||||
|
|
||||||
if (domain->triclinic) domain->set_lamda_box();
|
if (domain->triclinic) domain->set_lamda_box();
|
||||||
domain->set_local_box();
|
domain->set_local_box();
|
||||||
|
|
||||||
// if splits moved further than neighboring processor
|
|
||||||
// move atoms to new processors via irregular()
|
// move atoms to new processors via irregular()
|
||||||
// only needed if migrate_check() says an atom moves to far,
|
// only needed if migrate_check() says an atom moves to far
|
||||||
// else allow caller's comm->exchange() to do it
|
// else allow caller's comm->exchange() to do it
|
||||||
|
|
||||||
if (domain->triclinic) domain->x2lamda(atom->nlocal);
|
if (domain->triclinic) domain->x2lamda(atom->nlocal);
|
||||||
if (irregular->migrate_check()) irregular->migrate_atoms();
|
if (lbstyle == BISECTION) irregular->migrate_atoms(0,sendproc);
|
||||||
|
else if (irregular->migrate_check()) irregular->migrate_atoms();
|
||||||
if (domain->triclinic) domain->lamda2x(atom->nlocal);
|
if (domain->triclinic) domain->lamda2x(atom->nlocal);
|
||||||
|
|
||||||
// invoke KSpace setup_grid() to adjust to new proc sub-domains
|
// invoke KSpace setup_grid() to adjust to new proc sub-domains
|
||||||
@ -303,5 +308,6 @@ double FixBalance::compute_vector(int i)
|
|||||||
double FixBalance::memory_usage()
|
double FixBalance::memory_usage()
|
||||||
{
|
{
|
||||||
double bytes = irregular->memory_usage();
|
double bytes = irregular->memory_usage();
|
||||||
|
if (balance->rcb) bytes += balance->rcb->memory_usage();
|
||||||
return bytes;
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -987,3 +987,14 @@ void FixDeform::options(int narg, char **arg)
|
|||||||
} else error->all(FLERR,"Illegal fix deform command");
|
} else error->all(FLERR,"Illegal fix deform command");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
memory usage of Irregular
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
double FixDeform::memory_usage()
|
||||||
|
{
|
||||||
|
double bytes = 0.0;
|
||||||
|
if (irregular) bytes += irregular->memory_usage();
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
|||||||
@ -35,6 +35,7 @@ class FixDeform : public Fix {
|
|||||||
void init();
|
void init();
|
||||||
void pre_exchange();
|
void pre_exchange();
|
||||||
void end_of_step();
|
void end_of_step();
|
||||||
|
double memory_usage();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int triclinic,scaleflag,flipflag;
|
int triclinic,scaleflag,flipflag;
|
||||||
|
|||||||
@ -2271,3 +2271,14 @@ void FixNH::pre_exchange()
|
|||||||
domain->lamda2x(atom->nlocal);
|
domain->lamda2x(atom->nlocal);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
memory usage of Irregular
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
double FixNH::memory_usage()
|
||||||
|
{
|
||||||
|
double bytes = 0.0;
|
||||||
|
if (irregular) bytes += irregular->memory_usage();
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
|||||||
@ -39,6 +39,7 @@ class FixNH : public Fix {
|
|||||||
void reset_target(double);
|
void reset_target(double);
|
||||||
void reset_dt();
|
void reset_dt();
|
||||||
virtual void *extract(const char*,int &);
|
virtual void *extract(const char*,int &);
|
||||||
|
double memory_usage();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
int dimension,which;
|
int dimension,which;
|
||||||
|
|||||||
@ -1159,19 +1159,15 @@ void Input::comm_style()
|
|||||||
{
|
{
|
||||||
if (narg < 1) error->all(FLERR,"Illegal comm_style command");
|
if (narg < 1) error->all(FLERR,"Illegal comm_style command");
|
||||||
if (strcmp(arg[0],"brick") == 0) {
|
if (strcmp(arg[0],"brick") == 0) {
|
||||||
if (comm->layout)
|
if (comm->style == 0) return;
|
||||||
error->all(FLERR,
|
Comm *oldcomm = comm;
|
||||||
"Cannot switch to comm style brick from "
|
comm = new CommBrick(lmp,oldcomm);
|
||||||
"irregular tiling of proc domains");
|
delete oldcomm;
|
||||||
comm = new CommBrick(lmp);
|
|
||||||
// NOTE: this will lose load balancing info in old CommBrick
|
|
||||||
if (domain->box_exist) {
|
|
||||||
comm->set_proc_grid();
|
|
||||||
domain->set_local_box();
|
|
||||||
}
|
|
||||||
} else if (strcmp(arg[0],"tiled") == 0) {
|
} else if (strcmp(arg[0],"tiled") == 0) {
|
||||||
error->all(FLERR,"Comm_style tiled not yet supported");
|
if (comm->style == 1) return;
|
||||||
comm = new CommTiled(lmp);
|
Comm *oldcomm = comm;
|
||||||
|
comm = new CommTiled(lmp,oldcomm);
|
||||||
|
delete oldcomm;
|
||||||
} else error->all(FLERR,"Illegal comm_style command");
|
} else error->all(FLERR,"Illegal comm_style command");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -30,6 +30,8 @@ using namespace LAMMPS_NS;
|
|||||||
int *Irregular::proc_recv_copy;
|
int *Irregular::proc_recv_copy;
|
||||||
int compare_standalone(const void *, const void *);
|
int compare_standalone(const void *, const void *);
|
||||||
|
|
||||||
|
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
|
||||||
|
|
||||||
#define BUFFACTOR 1.5
|
#define BUFFACTOR 1.5
|
||||||
#define BUFMIN 1000
|
#define BUFMIN 1000
|
||||||
#define BUFEXTRA 1000
|
#define BUFEXTRA 1000
|
||||||
@ -43,13 +45,26 @@ Irregular::Irregular(LAMMPS *lmp) : Pointers(lmp)
|
|||||||
|
|
||||||
triclinic = domain->triclinic;
|
triclinic = domain->triclinic;
|
||||||
map_style = atom->map_style;
|
map_style = atom->map_style;
|
||||||
procgrid = comm->procgrid;
|
|
||||||
grid2proc = comm->grid2proc;
|
|
||||||
|
|
||||||
aplan = NULL;
|
// migrate work vectors
|
||||||
dplan = NULL;
|
|
||||||
|
|
||||||
// initialize buffers for atom comm, not used for datum comm
|
maxlocal = 0;
|
||||||
|
mproclist = NULL;
|
||||||
|
msizes = NULL;
|
||||||
|
|
||||||
|
// send buffers
|
||||||
|
|
||||||
|
maxdbuf = 0;
|
||||||
|
dbuf = NULL;
|
||||||
|
maxbuf = 0;
|
||||||
|
buf = NULL;
|
||||||
|
|
||||||
|
// universal work vectors
|
||||||
|
|
||||||
|
memory->create(work1,nprocs,"irregular:work1");
|
||||||
|
memory->create(work2,nprocs,"irregular:work2");
|
||||||
|
|
||||||
|
// initialize buffers for migrate atoms, not used for datum comm
|
||||||
// these can persist for multiple irregular operations
|
// these can persist for multiple irregular operations
|
||||||
|
|
||||||
maxsend = BUFMIN;
|
maxsend = BUFMIN;
|
||||||
@ -62,9 +77,12 @@ Irregular::Irregular(LAMMPS *lmp) : Pointers(lmp)
|
|||||||
|
|
||||||
Irregular::~Irregular()
|
Irregular::~Irregular()
|
||||||
{
|
{
|
||||||
if (aplan) destroy_atom();
|
memory->destroy(mproclist);
|
||||||
if (dplan) destroy_data();
|
memory->destroy(msizes);
|
||||||
|
memory->destroy(dbuf);
|
||||||
|
memory->destroy(buf);
|
||||||
|
memory->destroy(work1);
|
||||||
|
memory->destroy(work2);
|
||||||
memory->destroy(buf_send);
|
memory->destroy(buf_send);
|
||||||
memory->destroy(buf_recv);
|
memory->destroy(buf_recv);
|
||||||
}
|
}
|
||||||
@ -74,11 +92,13 @@ Irregular::~Irregular()
|
|||||||
can be used in place of comm->exchange()
|
can be used in place of comm->exchange()
|
||||||
unlike exchange(), allows atoms to have moved arbitrarily long distances
|
unlike exchange(), allows atoms to have moved arbitrarily long distances
|
||||||
sets up irregular plan, invokes it, destroys it
|
sets up irregular plan, invokes it, destroys it
|
||||||
|
sortflag = flag for sorting order of received messages by proc ID
|
||||||
|
procassign = non-NULL if already know procs atoms are assigned to (from RCB)
|
||||||
atoms MUST be remapped to be inside simulation box before this is called
|
atoms MUST be remapped to be inside simulation box before this is called
|
||||||
for triclinic: atoms must be in lamda coords (0-1) before this is called
|
for triclinic: atoms must be in lamda coords (0-1) before this is called
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Irregular::migrate_atoms(int sortflag)
|
void Irregular::migrate_atoms(int sortflag, int *procassign)
|
||||||
{
|
{
|
||||||
// clear global->local map since atoms move to new procs
|
// clear global->local map since atoms move to new procs
|
||||||
// clear old ghosts so map_set() at end will operate only on local atoms
|
// clear old ghosts so map_set() at end will operate only on local atoms
|
||||||
@ -101,13 +121,16 @@ void Irregular::migrate_atoms(int sortflag)
|
|||||||
subhi = domain->subhi_lamda;
|
subhi = domain->subhi_lamda;
|
||||||
}
|
}
|
||||||
|
|
||||||
uniform = comm->uniform;
|
layout = comm->layout;
|
||||||
xsplit = comm->xsplit;
|
xsplit = comm->xsplit;
|
||||||
ysplit = comm->ysplit;
|
ysplit = comm->ysplit;
|
||||||
zsplit = comm->zsplit;
|
zsplit = comm->zsplit;
|
||||||
boxlo = domain->boxlo;
|
boxlo = domain->boxlo;
|
||||||
prd = domain->prd;
|
prd = domain->prd;
|
||||||
|
|
||||||
|
procgrid = comm->procgrid;
|
||||||
|
grid2proc = comm->grid2proc;
|
||||||
|
|
||||||
// loop over atoms, flag any that are not in my sub-box
|
// loop over atoms, flag any that are not in my sub-box
|
||||||
// fill buffer with atoms leaving my box, using < and >=
|
// fill buffer with atoms leaving my box, using < and >=
|
||||||
// assign which proc it belongs to via coord2proc()
|
// assign which proc it belongs to via coord2proc()
|
||||||
@ -119,41 +142,63 @@ void Irregular::migrate_atoms(int sortflag)
|
|||||||
double **x = atom->x;
|
double **x = atom->x;
|
||||||
int nlocal = atom->nlocal;
|
int nlocal = atom->nlocal;
|
||||||
|
|
||||||
|
if (nlocal > maxlocal) {
|
||||||
|
maxlocal = nlocal;
|
||||||
|
memory->destroy(mproclist);
|
||||||
|
memory->destroy(msizes);
|
||||||
|
memory->create(mproclist,maxlocal,"irregular:mproclist");
|
||||||
|
memory->create(msizes,maxlocal,"irregular:msizes");
|
||||||
|
}
|
||||||
|
|
||||||
|
int igx,igy,igz;
|
||||||
int nsend = 0;
|
int nsend = 0;
|
||||||
int nsendatom = 0;
|
int nsendatom = 0;
|
||||||
int *sizes = new int[nlocal];
|
|
||||||
int *proclist = new int[nlocal];
|
|
||||||
int igx,igy,igz;
|
|
||||||
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
while (i < nlocal) {
|
|
||||||
if (x[i][0] < sublo[0] || x[i][0] >= subhi[0] ||
|
if (procassign) {
|
||||||
x[i][1] < sublo[1] || x[i][1] >= subhi[1] ||
|
while (i < nlocal) {
|
||||||
x[i][2] < sublo[2] || x[i][2] >= subhi[2]) {
|
if (procassign[i] == me) i++;
|
||||||
proclist[nsendatom] = coord2proc(x[i],igx,igy,igz);
|
else {
|
||||||
if (proclist[nsendatom] != me) {
|
mproclist[nsendatom] = procassign[i];
|
||||||
if (nsend > maxsend) grow_send(nsend,1);
|
if (nsend > maxsend) grow_send(nsend,1);
|
||||||
sizes[nsendatom] = avec->pack_exchange(i,&buf_send[nsend]);
|
msizes[nsendatom] = avec->pack_exchange(i,&buf_send[nsend]);
|
||||||
nsend += sizes[nsendatom];
|
nsend += msizes[nsendatom];
|
||||||
nsendatom++;
|
nsendatom++;
|
||||||
avec->copy(nlocal-1,i,1);
|
avec->copy(nlocal-1,i,1);
|
||||||
|
procassign[i] = procassign[nlocal-1];
|
||||||
nlocal--;
|
nlocal--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
while (i < nlocal) {
|
||||||
|
if (x[i][0] < sublo[0] || x[i][0] >= subhi[0] ||
|
||||||
|
x[i][1] < sublo[1] || x[i][1] >= subhi[1] ||
|
||||||
|
x[i][2] < sublo[2] || x[i][2] >= subhi[2]) {
|
||||||
|
mproclist[nsendatom] = coord2proc(x[i],igx,igy,igz);
|
||||||
|
if (mproclist[nsendatom] == me) i++;
|
||||||
|
else {
|
||||||
|
if (nsend > maxsend) grow_send(nsend,1);
|
||||||
|
msizes[nsendatom] = avec->pack_exchange(i,&buf_send[nsend]);
|
||||||
|
nsend += msizes[nsendatom];
|
||||||
|
nsendatom++;
|
||||||
|
avec->copy(nlocal-1,i,1);
|
||||||
|
nlocal--;
|
||||||
|
}
|
||||||
} else i++;
|
} else i++;
|
||||||
} else i++;
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
atom->nlocal = nlocal;
|
atom->nlocal = nlocal;
|
||||||
|
|
||||||
// create irregular communication plan, perform comm, destroy plan
|
// create irregular communication plan, perform comm, destroy plan
|
||||||
// returned nrecv = size of buffer needed for incoming atoms
|
// returned nrecv = size of buffer needed for incoming atoms
|
||||||
|
|
||||||
int nrecv = create_atom(nsendatom,sizes,proclist,sortflag);
|
int nrecv = create_atom(nsendatom,msizes,mproclist,sortflag);
|
||||||
if (nrecv > maxrecv) grow_recv(nrecv);
|
if (nrecv > maxrecv) grow_recv(nrecv);
|
||||||
exchange_atom(buf_send,sizes,buf_recv);
|
exchange_atom(buf_send,msizes,buf_recv);
|
||||||
destroy_atom();
|
destroy_atom();
|
||||||
|
|
||||||
delete [] sizes;
|
|
||||||
delete [] proclist;
|
|
||||||
|
|
||||||
// add received atoms to my list
|
// add received atoms to my list
|
||||||
|
|
||||||
int m = 0;
|
int m = 0;
|
||||||
@ -174,6 +219,11 @@ void Irregular::migrate_atoms(int sortflag)
|
|||||||
|
|
||||||
int Irregular::migrate_check()
|
int Irregular::migrate_check()
|
||||||
{
|
{
|
||||||
|
// migrate required if comm layout is tiled
|
||||||
|
// cannot use myloc[] logic below
|
||||||
|
|
||||||
|
if (comm->layout == LAYOUT_TILED) return 1;
|
||||||
|
|
||||||
// subbox bounds for orthogonal or triclinic box
|
// subbox bounds for orthogonal or triclinic box
|
||||||
// other comm/domain data used by coord2proc()
|
// other comm/domain data used by coord2proc()
|
||||||
|
|
||||||
@ -186,13 +236,16 @@ int Irregular::migrate_check()
|
|||||||
subhi = domain->subhi_lamda;
|
subhi = domain->subhi_lamda;
|
||||||
}
|
}
|
||||||
|
|
||||||
uniform = comm->uniform;
|
layout = comm->layout;
|
||||||
xsplit = comm->xsplit;
|
xsplit = comm->xsplit;
|
||||||
ysplit = comm->ysplit;
|
ysplit = comm->ysplit;
|
||||||
zsplit = comm->zsplit;
|
zsplit = comm->zsplit;
|
||||||
boxlo = domain->boxlo;
|
boxlo = domain->boxlo;
|
||||||
prd = domain->prd;
|
prd = domain->prd;
|
||||||
|
|
||||||
|
procgrid = comm->procgrid;
|
||||||
|
grid2proc = comm->grid2proc;
|
||||||
|
|
||||||
// loop over atoms, check for any that are not in my sub-box
|
// loop over atoms, check for any that are not in my sub-box
|
||||||
// assign which proc it belongs to via coord2proc()
|
// assign which proc it belongs to via coord2proc()
|
||||||
// if logical igx,igy,igz of newproc > one away from myloc, set flag = 1
|
// if logical igx,igy,igz of newproc > one away from myloc, set flag = 1
|
||||||
@ -250,6 +303,7 @@ int Irregular::migrate_check()
|
|||||||
n = # of atoms to send
|
n = # of atoms to send
|
||||||
sizes = # of doubles for each atom
|
sizes = # of doubles for each atom
|
||||||
proclist = proc to send each atom to (not including self)
|
proclist = proc to send each atom to (not including self)
|
||||||
|
sortflag = flag for sorting order of received messages by proc ID
|
||||||
return total # of doubles I will recv (not including self)
|
return total # of doubles I will recv (not including self)
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
@ -257,51 +311,60 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
// allocate plan and work vectors
|
// setup for collective comm
|
||||||
|
// work1 = 1 for procs I send a message to, not including self
|
||||||
if (aplan) destroy_atom();
|
// work2 = 1 for all procs, used for ReduceScatter
|
||||||
aplan = (PlanAtom *) memory->smalloc(sizeof(PlanAtom),"irregular:aplan");
|
|
||||||
int *list = new int[nprocs];
|
|
||||||
int *count = new int[nprocs];
|
|
||||||
|
|
||||||
// nrecv = # of messages I receive
|
|
||||||
|
|
||||||
for (i = 0; i < nprocs; i++) {
|
for (i = 0; i < nprocs; i++) {
|
||||||
list[i] = 0;
|
work1[i] = 0;
|
||||||
count[i] = 1;
|
work2[i] = 1;
|
||||||
}
|
}
|
||||||
for (i = 0; i < n; i++) list[proclist[i]] = 1;
|
for (i = 0; i < n; i++) work1[proclist[i]] = 1;
|
||||||
|
work1[me] = 0;
|
||||||
|
|
||||||
int nrecv;
|
// nrecv_proc = # of procs I receive messages from, not including self
|
||||||
MPI_Reduce_scatter(list,&nrecv,count,MPI_INT,MPI_SUM,world);
|
// options for performing ReduceScatter operation
|
||||||
|
// some are more efficient on some machines at big sizes
|
||||||
|
|
||||||
|
#ifdef LAMMPS_RS_ALLREDUCE_INPLACE
|
||||||
|
MPI_Allreduce(MPI_IN_PLACE,work1,nprocs,MPI_INT,MPI_SUM,world);
|
||||||
|
nrecv_proc = work1[me];
|
||||||
|
#else
|
||||||
|
#ifdef LAMMPS_RS_ALLREDUCE
|
||||||
|
MPI_Allreduce(work1,work2,nprocs,MPI_INT,MPI_SUM,world);
|
||||||
|
nrecv_proc = work2[me];
|
||||||
|
#else
|
||||||
|
MPI_Reduce_scatter(work1,&nrecv_proc,work2,MPI_INT,MPI_SUM,world);
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
// allocate receive arrays
|
// allocate receive arrays
|
||||||
|
|
||||||
int *proc_recv = new int[nrecv];
|
proc_recv = new int[nrecv_proc];
|
||||||
int *length_recv = new int[nrecv];
|
length_recv = new int[nrecv_proc];
|
||||||
MPI_Request *request = new MPI_Request[nrecv];
|
request = new MPI_Request[nrecv_proc];
|
||||||
MPI_Status *status = new MPI_Status[nrecv];
|
status = new MPI_Status[nrecv_proc];
|
||||||
|
|
||||||
// nsend = # of messages I send
|
// nsend_proc = # of messages I send
|
||||||
|
|
||||||
for (i = 0; i < nprocs; i++) list[i] = 0;
|
for (i = 0; i < nprocs; i++) work1[i] = 0;
|
||||||
for (i = 0; i < n; i++) list[proclist[i]] += sizes[i];
|
for (i = 0; i < n; i++) work1[proclist[i]] += sizes[i];
|
||||||
|
|
||||||
int nsend = 0;
|
nsend_proc = 0;
|
||||||
for (i = 0; i < nprocs; i++)
|
for (i = 0; i < nprocs; i++)
|
||||||
if (list[i]) nsend++;
|
if (work1[i]) nsend_proc++;
|
||||||
|
|
||||||
// allocate send arrays
|
// allocate send arrays
|
||||||
|
|
||||||
int *proc_send = new int[nsend];
|
proc_send = new int[nsend_proc];
|
||||||
int *length_send = new int[nsend];
|
length_send = new int[nsend_proc];
|
||||||
int *num_send = new int[nsend];
|
num_send = new int[nsend_proc];
|
||||||
int *index_send = new int[n];
|
index_send = new int[n];
|
||||||
int *offset_send = new int[n];
|
offset_send = new int[n];
|
||||||
|
|
||||||
// list still stores size of message for procs I send to
|
// list still stores size of message for procs I send to
|
||||||
// proc_send = procs I send to
|
// proc_send = procs I send to
|
||||||
// length_send = total size of message I send to each proc
|
// length_send = # of doubles I send to each proc
|
||||||
// to balance pattern of send messages:
|
// to balance pattern of send messages:
|
||||||
// each proc begins with iproc > me, continues until iproc = me
|
// each proc begins with iproc > me, continues until iproc = me
|
||||||
// reset list to store which send message each proc corresponds to
|
// reset list to store which send message each proc corresponds to
|
||||||
@ -311,81 +374,81 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag)
|
|||||||
for (i = 0; i < nprocs; i++) {
|
for (i = 0; i < nprocs; i++) {
|
||||||
iproc++;
|
iproc++;
|
||||||
if (iproc == nprocs) iproc = 0;
|
if (iproc == nprocs) iproc = 0;
|
||||||
if (list[iproc] > 0) {
|
if (work1[iproc] > 0) {
|
||||||
proc_send[isend] = iproc;
|
proc_send[isend] = iproc;
|
||||||
length_send[isend] = list[iproc];
|
length_send[isend] = work1[iproc];
|
||||||
list[iproc] = isend;
|
work1[iproc] = isend;
|
||||||
isend++;
|
isend++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// num_send = # of atoms I send to each proc
|
// num_send = # of atoms I send to each proc
|
||||||
|
|
||||||
for (i = 0; i < nsend; i++) num_send[i] = 0;
|
for (i = 0; i < nsend_proc; i++) num_send[i] = 0;
|
||||||
for (i = 0; i < n; i++) {
|
for (i = 0; i < n; i++) {
|
||||||
isend = list[proclist[i]];
|
isend = work1[proclist[i]];
|
||||||
num_send[isend]++;
|
num_send[isend]++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// count = offsets into index_send for each proc I send to
|
// work2 = offsets into index_send for each proc I send to
|
||||||
// index_send = list of which atoms to send to each proc
|
// index_send = list of which atoms to send to each proc
|
||||||
// 1st N1 values are atom indices for 1st proc,
|
// 1st N1 values are atom indices for 1st proc,
|
||||||
// next N2 values are atom indices for 2nd proc, etc
|
// next N2 values are atom indices for 2nd proc, etc
|
||||||
// offset_send = where each atom starts in send buffer
|
// offset_send = where each atom starts in send buffer
|
||||||
|
|
||||||
count[0] = 0;
|
work2[0] = 0;
|
||||||
for (i = 1; i < nsend; i++) count[i] = count[i-1] + num_send[i-1];
|
for (i = 1; i < nsend_proc; i++) work2[i] = work2[i-1] + num_send[i-1];
|
||||||
|
|
||||||
for (i = 0; i < n; i++) {
|
for (i = 0; i < n; i++) {
|
||||||
isend = list[proclist[i]];
|
isend = work1[proclist[i]];
|
||||||
index_send[count[isend]++] = i;
|
index_send[work2[isend]++] = i;
|
||||||
if (i) offset_send[i] = offset_send[i-1] + sizes[i-1];
|
if (i) offset_send[i] = offset_send[i-1] + sizes[i-1];
|
||||||
else offset_send[i] = 0;
|
else offset_send[i] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// tell receivers how much data I send
|
// tell receivers how much data I send
|
||||||
// sendmax = largest # of doubles I send in a single message
|
// sendmax_proc = # of doubles I send in largest single message
|
||||||
|
|
||||||
int sendmax = 0;
|
sendmax_proc = 0;
|
||||||
for (i = 0; i < nsend; i++) {
|
for (i = 0; i < nsend_proc; i++) {
|
||||||
MPI_Send(&length_send[i],1,MPI_INT,proc_send[i],0,world);
|
MPI_Send(&length_send[i],1,MPI_INT,proc_send[i],0,world);
|
||||||
sendmax = MAX(sendmax,length_send[i]);
|
sendmax_proc = MAX(sendmax_proc,length_send[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// receive incoming messages
|
// receive incoming messages
|
||||||
// proc_recv = procs I recv from
|
// proc_recv = procs I recv from
|
||||||
// length_recv = total size of message each proc sends me
|
// length_recv = # of doubles each proc sends me
|
||||||
// nrecvsize = total size of data I recv
|
// nrecvsize = total size of atom data I recv
|
||||||
|
|
||||||
int nrecvsize = 0;
|
int nrecvsize = 0;
|
||||||
for (i = 0; i < nrecv; i++) {
|
for (i = 0; i < nrecv_proc; i++) {
|
||||||
MPI_Recv(&length_recv[i],1,MPI_INT,MPI_ANY_SOURCE,0,world,status);
|
MPI_Recv(&length_recv[i],1,MPI_INT,MPI_ANY_SOURCE,0,world,status);
|
||||||
proc_recv[i] = status->MPI_SOURCE;
|
proc_recv[i] = status->MPI_SOURCE;
|
||||||
nrecvsize += length_recv[i];
|
nrecvsize += length_recv[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
// sort proc_recv and num_recv by proc ID if requested
|
// sort proc_recv and length_recv by proc ID if requested
|
||||||
// useful for debugging to insure reproducible ordering of received atoms
|
// useful for debugging to insure reproducible ordering of received atoms
|
||||||
// invoke by adding final arg = 1 to create_atom() call in migrate_atoms()
|
// invoke by adding final arg = 1 to create_atom() call in migrate_atoms()
|
||||||
|
|
||||||
if (sortflag) {
|
if (sortflag) {
|
||||||
int *order = new int[nrecv];
|
int *order = new int[nrecv_proc];
|
||||||
int *proc_recv_ordered = new int[nrecv];
|
int *proc_recv_ordered = new int[nrecv_proc];
|
||||||
int *length_recv_ordered = new int[nrecv];
|
int *length_recv_ordered = new int[nrecv_proc];
|
||||||
|
|
||||||
for (i = 0; i < nrecv; i++) order[i] = i;
|
for (i = 0; i < nrecv_proc; i++) order[i] = i;
|
||||||
proc_recv_copy = proc_recv;
|
proc_recv_copy = proc_recv;
|
||||||
qsort(order,nrecv,sizeof(int),compare_standalone);
|
qsort(order,nrecv_proc,sizeof(int),compare_standalone);
|
||||||
|
|
||||||
int j;
|
int j;
|
||||||
for (i = 0; i < nrecv; i++) {
|
for (i = 0; i < nrecv_proc; i++) {
|
||||||
j = order[i];
|
j = order[i];
|
||||||
proc_recv_ordered[i] = proc_recv[j];
|
proc_recv_ordered[i] = proc_recv[j];
|
||||||
length_recv_ordered[i] = length_recv[j];
|
length_recv_ordered[i] = length_recv[j];
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(proc_recv,proc_recv_ordered,nrecv*sizeof(int));
|
memcpy(proc_recv,proc_recv_ordered,nrecv_proc*sizeof(int));
|
||||||
memcpy(length_recv,length_recv_ordered,nrecv*sizeof(int));
|
memcpy(length_recv,length_recv_ordered,nrecv_proc*sizeof(int));
|
||||||
delete [] order;
|
delete [] order;
|
||||||
delete [] proc_recv_ordered;
|
delete [] proc_recv_ordered;
|
||||||
delete [] length_recv_ordered;
|
delete [] length_recv_ordered;
|
||||||
@ -396,27 +459,7 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag)
|
|||||||
|
|
||||||
MPI_Barrier(world);
|
MPI_Barrier(world);
|
||||||
|
|
||||||
// free work vectors
|
// return size of atom data I will receive
|
||||||
|
|
||||||
delete [] count;
|
|
||||||
delete [] list;
|
|
||||||
|
|
||||||
// initialize plan
|
|
||||||
|
|
||||||
aplan->nsend = nsend;
|
|
||||||
aplan->nrecv = nrecv;
|
|
||||||
aplan->sendmax = sendmax;
|
|
||||||
|
|
||||||
aplan->proc_send = proc_send;
|
|
||||||
aplan->length_send = length_send;
|
|
||||||
aplan->num_send = num_send;
|
|
||||||
aplan->index_send = index_send;
|
|
||||||
aplan->offset_send = offset_send;
|
|
||||||
aplan->proc_recv = proc_recv;
|
|
||||||
aplan->length_recv = length_recv;
|
|
||||||
|
|
||||||
aplan->request = request;
|
|
||||||
aplan->status = status;
|
|
||||||
|
|
||||||
return nrecvsize;
|
return nrecvsize;
|
||||||
}
|
}
|
||||||
@ -445,217 +488,226 @@ int compare_standalone(const void *iptr, const void *jptr)
|
|||||||
|
|
||||||
void Irregular::exchange_atom(double *sendbuf, int *sizes, double *recvbuf)
|
void Irregular::exchange_atom(double *sendbuf, int *sizes, double *recvbuf)
|
||||||
{
|
{
|
||||||
int i,m,n,offset,num_send;
|
int i,m,n,offset,count;
|
||||||
|
|
||||||
// post all receives
|
// post all receives
|
||||||
|
|
||||||
offset = 0;
|
offset = 0;
|
||||||
for (int irecv = 0; irecv < aplan->nrecv; irecv++) {
|
for (int irecv = 0; irecv < nrecv_proc; irecv++) {
|
||||||
MPI_Irecv(&recvbuf[offset],aplan->length_recv[irecv],MPI_DOUBLE,
|
MPI_Irecv(&recvbuf[offset],length_recv[irecv],MPI_DOUBLE,
|
||||||
aplan->proc_recv[irecv],0,world,&aplan->request[irecv]);
|
proc_recv[irecv],0,world,&request[irecv]);
|
||||||
offset += aplan->length_recv[irecv];
|
offset += length_recv[irecv];
|
||||||
}
|
}
|
||||||
|
|
||||||
// allocate buf for largest send
|
// reallocate buf for largest send if necessary
|
||||||
|
|
||||||
double *buf;
|
if (sendmax_proc > maxdbuf) {
|
||||||
memory->create(buf,aplan->sendmax,"irregular:buf");
|
memory->destroy(dbuf);
|
||||||
|
maxdbuf = sendmax_proc;
|
||||||
|
memory->create(dbuf,maxdbuf,"irregular:dbuf");
|
||||||
|
}
|
||||||
|
|
||||||
// send each message
|
// send each message
|
||||||
// pack buf with list of atoms
|
// pack buf with list of atoms
|
||||||
// m = index of atom in sendbuf
|
// m = index of atom in sendbuf
|
||||||
|
|
||||||
int *index_send = aplan->index_send;
|
|
||||||
int nsend = aplan->nsend;
|
|
||||||
n = 0;
|
n = 0;
|
||||||
|
for (int isend = 0; isend < nsend_proc; isend++) {
|
||||||
for (int isend = 0; isend < nsend; isend++) {
|
|
||||||
offset = 0;
|
offset = 0;
|
||||||
num_send = aplan->num_send[isend];
|
count = num_send[isend];
|
||||||
for (i = 0; i < num_send; i++) {
|
for (i = 0; i < count; i++) {
|
||||||
m = index_send[n++];
|
m = index_send[n++];
|
||||||
memcpy(&buf[offset],&sendbuf[aplan->offset_send[m]],
|
memcpy(&dbuf[offset],&sendbuf[offset_send[m]],sizes[m]*sizeof(double));
|
||||||
sizes[m]*sizeof(double));
|
|
||||||
offset += sizes[m];
|
offset += sizes[m];
|
||||||
}
|
}
|
||||||
MPI_Send(buf,aplan->length_send[isend],MPI_DOUBLE,
|
MPI_Send(dbuf,length_send[isend],MPI_DOUBLE,proc_send[isend],0,world);
|
||||||
aplan->proc_send[isend],0,world);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// free temporary send buffer
|
|
||||||
|
|
||||||
memory->destroy(buf);
|
|
||||||
|
|
||||||
// wait on all incoming messages
|
// wait on all incoming messages
|
||||||
|
|
||||||
if (aplan->nrecv) MPI_Waitall(aplan->nrecv,aplan->request,aplan->status);
|
if (nrecv_proc) MPI_Waitall(nrecv_proc,request,status);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
destroy communication plan for atoms
|
destroy vectors in communication plan for atoms
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Irregular::destroy_atom()
|
void Irregular::destroy_atom()
|
||||||
{
|
{
|
||||||
delete [] aplan->proc_send;
|
delete [] proc_send;
|
||||||
delete [] aplan->length_send;
|
delete [] length_send;
|
||||||
delete [] aplan->num_send;
|
delete [] num_send;
|
||||||
delete [] aplan->index_send;
|
delete [] index_send;
|
||||||
delete [] aplan->offset_send;
|
delete [] offset_send;
|
||||||
delete [] aplan->proc_recv;
|
delete [] proc_recv;
|
||||||
delete [] aplan->length_recv;
|
delete [] length_recv;
|
||||||
delete [] aplan->request;
|
delete [] request;
|
||||||
delete [] aplan->status;
|
delete [] status;
|
||||||
memory->sfree(aplan);
|
|
||||||
aplan = NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
create a communication plan for datums
|
create communication plan based on list of datums of uniform size
|
||||||
n = # of datums to send
|
n = # of datums to send
|
||||||
proclist = proc to send each datum to (including self)
|
proclist = proc to send each datum to, can include self
|
||||||
return total # of datums I will recv (including self)
|
sortflag = flag for sorting order of received messages by proc ID
|
||||||
|
return total # of datums I will recv, including any to self
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
int Irregular::create_data(int n, int *proclist)
|
int Irregular::create_data(int n, int *proclist, int sortflag)
|
||||||
{
|
{
|
||||||
int i,m;
|
int i,m;
|
||||||
|
|
||||||
// allocate plan and work vectors
|
// setup for collective comm
|
||||||
|
// work1 = 1 for procs I send a message to, not including self
|
||||||
dplan = (PlanData *) memory->smalloc(sizeof(PlanData),"irregular:dplan");
|
// work2 = 1 for all procs, used for ReduceScatter
|
||||||
int *list = new int[nprocs];
|
|
||||||
int *count = new int[nprocs];
|
|
||||||
|
|
||||||
// nrecv = # of messages I receive
|
|
||||||
|
|
||||||
for (i = 0; i < nprocs; i++) {
|
for (i = 0; i < nprocs; i++) {
|
||||||
list[i] = 0;
|
work1[i] = 0;
|
||||||
count[i] = 1;
|
work2[i] = 1;
|
||||||
}
|
}
|
||||||
for (i = 0; i < n; i++) list[proclist[i]] = 1;
|
for (i = 0; i < n; i++) work1[proclist[i]] = 1;
|
||||||
|
work1[me] = 0;
|
||||||
|
|
||||||
int nrecv;
|
// nrecv_proc = # of procs I receive messages from, not including self
|
||||||
MPI_Reduce_scatter(list,&nrecv,count,MPI_INT,MPI_SUM,world);
|
// options for performing ReduceScatter operation
|
||||||
if (list[me]) nrecv--;
|
// some are more efficient on some machines at big sizes
|
||||||
|
|
||||||
|
#ifdef LAMMPS_RS_ALLREDUCE_INPLACE
|
||||||
|
MPI_Allreduce(MPI_IN_PLACE,work1,nprocs,MPI_INT,MPI_SUM,world);
|
||||||
|
nrecv_proc = work1[me];
|
||||||
|
#else
|
||||||
|
#ifdef LAMMPS_RS_ALLREDUCE
|
||||||
|
MPI_Allreduce(work1,work2,nprocs,MPI_INT,MPI_SUM,world);
|
||||||
|
nrecv_proc = work2[me];
|
||||||
|
#else
|
||||||
|
MPI_Reduce_scatter(work1,&nrecv_proc,work2,MPI_INT,MPI_SUM,world);
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
// allocate receive arrays
|
// allocate receive arrays
|
||||||
|
|
||||||
int *proc_recv = new int[nrecv];
|
proc_recv = new int[nrecv_proc];
|
||||||
int *num_recv = new int[nrecv];
|
num_recv = new int[nrecv_proc];
|
||||||
MPI_Request *request = new MPI_Request[nrecv];
|
request = new MPI_Request[nrecv_proc];
|
||||||
MPI_Status *status = new MPI_Status[nrecv];
|
status = new MPI_Status[nrecv_proc];
|
||||||
|
|
||||||
// nsend = # of messages I send
|
// work1 = # of datums I send to each proc, including self
|
||||||
|
// nsend_proc = # of procs I send messages to, not including self
|
||||||
|
|
||||||
for (i = 0; i < nprocs; i++) list[i] = 0;
|
for (i = 0; i < nprocs; i++) work1[i] = 0;
|
||||||
for (i = 0; i < n; i++) list[proclist[i]]++;
|
for (i = 0; i < n; i++) work1[proclist[i]]++;
|
||||||
|
|
||||||
int nsend = 0;
|
nsend_proc = 0;
|
||||||
for (i = 0; i < nprocs; i++)
|
for (i = 0; i < nprocs; i++)
|
||||||
if (list[i]) nsend++;
|
if (work1[i]) nsend_proc++;
|
||||||
if (list[me]) nsend--;
|
if (work1[me]) nsend_proc--;
|
||||||
|
|
||||||
// allocate send and self arrays
|
// allocate send and self arrays
|
||||||
|
|
||||||
int *proc_send = new int[nsend];
|
proc_send = new int[nsend_proc];
|
||||||
int *num_send = new int[nsend];
|
num_send = new int[nsend_proc];
|
||||||
int *index_send = new int[n-list[me]];
|
index_send = new int[n-work1[me]];
|
||||||
int *index_self = new int[list[me]];
|
index_self = new int[work1[me]];
|
||||||
|
|
||||||
// proc_send = procs I send to
|
// proc_send = procs I send to
|
||||||
// num_send = # of datums I send to each proc
|
// num_send = # of datums I send to each proc
|
||||||
// num_self = # of datums I copy to self
|
// num_self = # of datums I copy to self
|
||||||
// to balance pattern of send messages:
|
// to balance pattern of send messages:
|
||||||
// each proc begins with iproc > me, continues until iproc = me
|
// each proc begins with iproc > me, continues until iproc = me
|
||||||
// reset list to store which send message each proc corresponds to
|
// reset work1 to store which send message each proc corresponds to
|
||||||
|
|
||||||
int num_self;
|
|
||||||
|
|
||||||
int iproc = me;
|
int iproc = me;
|
||||||
int isend = 0;
|
int isend = 0;
|
||||||
for (i = 0; i < nprocs; i++) {
|
for (i = 0; i < nprocs; i++) {
|
||||||
iproc++;
|
iproc++;
|
||||||
if (iproc == nprocs) iproc = 0;
|
if (iproc == nprocs) iproc = 0;
|
||||||
if (iproc == me) num_self = list[iproc];
|
if (iproc == me) {
|
||||||
else if (list[iproc] > 0) {
|
num_self = work1[iproc];
|
||||||
|
work1[iproc] = 0;
|
||||||
|
} else if (work1[iproc] > 0) {
|
||||||
proc_send[isend] = iproc;
|
proc_send[isend] = iproc;
|
||||||
num_send[isend] = list[iproc];
|
num_send[isend] = work1[iproc];
|
||||||
list[iproc] = isend;
|
work1[iproc] = isend;
|
||||||
isend++;
|
isend++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
list[me] = 0;
|
|
||||||
|
|
||||||
// count = offsets into index_send for each proc I send to
|
// work2 = offsets into index_send for each proc I send to
|
||||||
// m = ptr into index_self
|
// m = ptr into index_self
|
||||||
// index_send = list of which datums to send to each proc
|
// index_send = list of which datums to send to each proc
|
||||||
// 1st N1 values are datum indices for 1st proc,
|
// 1st N1 values are datum indices for 1st proc,
|
||||||
// next N2 values are datum indices for 2nd proc, etc
|
// next N2 values are datum indices for 2nd proc, etc
|
||||||
|
// index_self = list of which datums to copy to self
|
||||||
|
|
||||||
count[0] = 0;
|
work2[0] = 0;
|
||||||
for (i = 1; i < nsend; i++) count[i] = count[i-1] + num_send[i-1];
|
for (i = 1; i < nsend_proc; i++) work2[i] = work2[i-1] + num_send[i-1];
|
||||||
|
|
||||||
m = 0;
|
m = 0;
|
||||||
for (i = 0; i < n; i++) {
|
for (i = 0; i < n; i++) {
|
||||||
iproc = proclist[i];
|
iproc = proclist[i];
|
||||||
if (iproc == me) index_self[m++] = i;
|
if (iproc == me) index_self[m++] = i;
|
||||||
else {
|
else {
|
||||||
isend = list[iproc];
|
isend = work1[iproc];
|
||||||
index_send[count[isend]++] = i;
|
index_send[work2[isend]++] = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// tell receivers how much data I send
|
// tell receivers how much data I send
|
||||||
// sendmax = largest # of datums I send in a single message
|
// sendmax_proc = largest # of datums I send in a single message
|
||||||
|
|
||||||
int sendmax = 0;
|
sendmax_proc = 0;
|
||||||
for (i = 0; i < nsend; i++) {
|
for (i = 0; i < nsend_proc; i++) {
|
||||||
MPI_Send(&num_send[i],1,MPI_INT,proc_send[i],0,world);
|
MPI_Send(&num_send[i],1,MPI_INT,proc_send[i],0,world);
|
||||||
sendmax = MAX(sendmax,num_send[i]);
|
sendmax_proc = MAX(sendmax_proc,num_send[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// receive incoming messages
|
// receive incoming messages
|
||||||
// proc_recv = procs I recv from
|
// proc_recv = procs I recv from
|
||||||
// num_recv = total size of message each proc sends me
|
// num_recv = total size of message each proc sends me
|
||||||
// nrecvsize = total size of data I recv
|
// nrecvdatum = total size of data I recv
|
||||||
|
|
||||||
int nrecvsize = 0;
|
int nrecvdatum = 0;
|
||||||
for (i = 0; i < nrecv; i++) {
|
for (i = 0; i < nrecv_proc; i++) {
|
||||||
MPI_Recv(&num_recv[i],1,MPI_INT,MPI_ANY_SOURCE,0,world,status);
|
MPI_Recv(&num_recv[i],1,MPI_INT,MPI_ANY_SOURCE,0,world,status);
|
||||||
proc_recv[i] = status->MPI_SOURCE;
|
proc_recv[i] = status->MPI_SOURCE;
|
||||||
nrecvsize += num_recv[i];
|
nrecvdatum += num_recv[i];
|
||||||
|
}
|
||||||
|
nrecvdatum += num_self;
|
||||||
|
|
||||||
|
// sort proc_recv and num_recv by proc ID if requested
|
||||||
|
// useful for debugging to insure reproducible ordering of received datums
|
||||||
|
|
||||||
|
if (sortflag) {
|
||||||
|
int *order = new int[nrecv_proc];
|
||||||
|
int *proc_recv_ordered = new int[nrecv_proc];
|
||||||
|
int *num_recv_ordered = new int[nrecv_proc];
|
||||||
|
|
||||||
|
for (i = 0; i < nrecv_proc; i++) order[i] = i;
|
||||||
|
proc_recv_copy = proc_recv;
|
||||||
|
qsort(order,nrecv_proc,sizeof(int),compare_standalone);
|
||||||
|
|
||||||
|
int j;
|
||||||
|
for (i = 0; i < nrecv_proc; i++) {
|
||||||
|
j = order[i];
|
||||||
|
proc_recv_ordered[i] = proc_recv[j];
|
||||||
|
num_recv_ordered[i] = num_recv[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(proc_recv,proc_recv_ordered,nrecv_proc*sizeof(int));
|
||||||
|
memcpy(num_recv,num_recv_ordered,nrecv_proc*sizeof(int));
|
||||||
|
delete [] order;
|
||||||
|
delete [] proc_recv_ordered;
|
||||||
|
delete [] num_recv_ordered;
|
||||||
}
|
}
|
||||||
nrecvsize += num_self;
|
|
||||||
|
|
||||||
// barrier to insure all MPI_ANY_SOURCE messages are received
|
// barrier to insure all MPI_ANY_SOURCE messages are received
|
||||||
// else another proc could proceed to exchange_data() and send to me
|
// else another proc could proceed to exchange_data() and send to me
|
||||||
|
|
||||||
MPI_Barrier(world);
|
MPI_Barrier(world);
|
||||||
|
|
||||||
// free work vectors
|
// return # of datums I will receive
|
||||||
|
|
||||||
delete [] count;
|
return nrecvdatum;
|
||||||
delete [] list;
|
|
||||||
|
|
||||||
// initialize plan and return it
|
|
||||||
|
|
||||||
dplan->nsend = nsend;
|
|
||||||
dplan->nrecv = nrecv;
|
|
||||||
dplan->sendmax = sendmax;
|
|
||||||
|
|
||||||
dplan->proc_send = proc_send;
|
|
||||||
dplan->num_send = num_send;
|
|
||||||
dplan->index_send = index_send;
|
|
||||||
dplan->proc_recv = proc_recv;
|
|
||||||
dplan->num_recv = num_recv;
|
|
||||||
dplan->num_self = num_self;
|
|
||||||
dplan->index_self = index_self;
|
|
||||||
|
|
||||||
dplan->request = request;
|
|
||||||
dplan->status = status;
|
|
||||||
|
|
||||||
return nrecvsize;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
@ -667,49 +719,41 @@ int Irregular::create_data(int n, int *proclist)
|
|||||||
|
|
||||||
void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf)
|
void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf)
|
||||||
{
|
{
|
||||||
int i,m,n,offset,num_send;
|
int i,m,n,offset,count;
|
||||||
|
|
||||||
// post all receives, starting after self copies
|
// post all receives, starting after self copies
|
||||||
|
|
||||||
offset = dplan->num_self*nbytes;
|
offset = num_self*nbytes;
|
||||||
for (int irecv = 0; irecv < dplan->nrecv; irecv++) {
|
for (int irecv = 0; irecv < nrecv_proc; irecv++) {
|
||||||
MPI_Irecv(&recvbuf[offset],dplan->num_recv[irecv]*nbytes,MPI_CHAR,
|
MPI_Irecv(&recvbuf[offset],num_recv[irecv]*nbytes,MPI_CHAR,
|
||||||
dplan->proc_recv[irecv],0,world,&dplan->request[irecv]);
|
proc_recv[irecv],0,world,&request[irecv]);
|
||||||
offset += dplan->num_recv[irecv]*nbytes;
|
offset += num_recv[irecv]*nbytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
// allocate buf for largest send
|
// reallocate buf for largest send if necessary
|
||||||
|
|
||||||
char *buf;
|
if (sendmax_proc*nbytes > maxbuf) {
|
||||||
memory->create(buf,dplan->sendmax*nbytes,"irregular:buf");
|
memory->destroy(buf);
|
||||||
|
maxbuf = sendmax_proc*nbytes;
|
||||||
|
memory->create(buf,maxbuf,"irregular:buf");
|
||||||
|
}
|
||||||
|
|
||||||
// send each message
|
// send each message
|
||||||
// pack buf with list of datums
|
// pack buf with list of datums
|
||||||
// m = index of datum in sendbuf
|
// m = index of datum in sendbuf
|
||||||
|
|
||||||
int *index_send = dplan->index_send;
|
|
||||||
int nsend = dplan->nsend;
|
|
||||||
n = 0;
|
n = 0;
|
||||||
|
for (int isend = 0; isend < nsend_proc; isend++) {
|
||||||
for (int isend = 0; isend < nsend; isend++) {
|
count = num_send[isend];
|
||||||
num_send = dplan->num_send[isend];
|
for (i = 0; i < count; i++) {
|
||||||
for (i = 0; i < num_send; i++) {
|
|
||||||
m = index_send[n++];
|
m = index_send[n++];
|
||||||
memcpy(&buf[i*nbytes],&sendbuf[m*nbytes],nbytes);
|
memcpy(&buf[i*nbytes],&sendbuf[m*nbytes],nbytes);
|
||||||
}
|
}
|
||||||
MPI_Send(buf,dplan->num_send[isend]*nbytes,MPI_CHAR,
|
MPI_Send(buf,count*nbytes,MPI_CHAR,proc_send[isend],0,world);
|
||||||
dplan->proc_send[isend],0,world);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// free temporary send buffer
|
|
||||||
|
|
||||||
memory->destroy(buf);
|
|
||||||
|
|
||||||
// copy datums to self, put at beginning of recvbuf
|
// copy datums to self, put at beginning of recvbuf
|
||||||
|
|
||||||
int *index_self = dplan->index_self;
|
|
||||||
int num_self = dplan->num_self;
|
|
||||||
|
|
||||||
for (i = 0; i < num_self; i++) {
|
for (i = 0; i < num_self; i++) {
|
||||||
m = index_self[i];
|
m = index_self[i];
|
||||||
memcpy(&recvbuf[i*nbytes],&sendbuf[m*nbytes],nbytes);
|
memcpy(&recvbuf[i*nbytes],&sendbuf[m*nbytes],nbytes);
|
||||||
@ -717,39 +761,37 @@ void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf)
|
|||||||
|
|
||||||
// wait on all incoming messages
|
// wait on all incoming messages
|
||||||
|
|
||||||
if (dplan->nrecv) MPI_Waitall(dplan->nrecv,dplan->request,dplan->status);
|
if (nrecv_proc) MPI_Waitall(nrecv_proc,request,status);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
destroy communication plan for datums
|
destroy vectors in communication plan for datums
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Irregular::destroy_data()
|
void Irregular::destroy_data()
|
||||||
{
|
{
|
||||||
delete [] dplan->proc_send;
|
delete [] proc_send;
|
||||||
delete [] dplan->num_send;
|
delete [] num_send;
|
||||||
delete [] dplan->index_send;
|
delete [] index_send;
|
||||||
delete [] dplan->proc_recv;
|
delete [] proc_recv;
|
||||||
delete [] dplan->num_recv;
|
delete [] num_recv;
|
||||||
delete [] dplan->index_self;
|
delete [] index_self;
|
||||||
delete [] dplan->request;
|
delete [] request;
|
||||||
delete [] dplan->status;
|
delete [] status;
|
||||||
memory->sfree(dplan);
|
|
||||||
dplan = NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
determine which proc owns atom with coord x[3]
|
determine which proc owns atom with coord x[3]
|
||||||
x will be in box (orthogonal) or lamda coords (triclinic)
|
x will be in box (orthogonal) or lamda coords (triclinic)
|
||||||
for uniform = 1, directly calculate owning proc
|
if layout = UNIFORM, calculate owning proc directly
|
||||||
for non-uniform, iteratively find owning proc via binary search
|
else layout = NONUNIFORM, iteratively find owning proc via binary search
|
||||||
return owning proc ID via grid2proc
|
return owning proc ID via grid2proc
|
||||||
return igx,igy,igz = logical grid loc of owing proc within 3d grid of procs
|
return igx,igy,igz = logical grid loc of owing proc within 3d grid of procs
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
int Irregular::coord2proc(double *x, int &igx, int &igy, int &igz)
|
int Irregular::coord2proc(double *x, int &igx, int &igy, int &igz)
|
||||||
{
|
{
|
||||||
if (uniform) {
|
if (layout == 0) {
|
||||||
if (triclinic == 0) {
|
if (triclinic == 0) {
|
||||||
igx = static_cast<int> (procgrid[0] * (x[0]-boxlo[0]) / prd[0]);
|
igx = static_cast<int> (procgrid[0] * (x[0]-boxlo[0]) / prd[0]);
|
||||||
igy = static_cast<int> (procgrid[1] * (x[1]-boxlo[1]) / prd[1]);
|
igy = static_cast<int> (procgrid[1] * (x[1]-boxlo[1]) / prd[1]);
|
||||||
@ -846,7 +888,12 @@ void Irregular::grow_recv(int n)
|
|||||||
|
|
||||||
bigint Irregular::memory_usage()
|
bigint Irregular::memory_usage()
|
||||||
{
|
{
|
||||||
bigint bytes = memory->usage(buf_send,maxsend);
|
bigint bytes = 0;
|
||||||
bytes += memory->usage(buf_recv,maxrecv);
|
bytes += maxsend*sizeof(double); // buf_send
|
||||||
|
bytes += maxrecv*sizeof(double); // buf_recv
|
||||||
|
bytes += maxdbuf*sizeof(double); // dbuf
|
||||||
|
bytes += maxbuf; // buf
|
||||||
|
bytes += 2*maxlocal*sizeof(int); // mproclist,msizes
|
||||||
|
bytes += 2*nprocs*sizeof(int); // work1,work2
|
||||||
return bytes;
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -27,9 +27,9 @@ class Irregular : protected Pointers {
|
|||||||
|
|
||||||
Irregular(class LAMMPS *);
|
Irregular(class LAMMPS *);
|
||||||
~Irregular();
|
~Irregular();
|
||||||
void migrate_atoms(int sortflag = 0);
|
void migrate_atoms(int sortflag = 0, int *procassign = NULL);
|
||||||
int migrate_check();
|
int migrate_check();
|
||||||
int create_data(int, int *);
|
int create_data(int, int *, int sortflag = 0);
|
||||||
void exchange_data(char *, int, char *);
|
void exchange_data(char *, int, char *);
|
||||||
void destroy_data();
|
void destroy_data();
|
||||||
bigint memory_usage();
|
bigint memory_usage();
|
||||||
@ -38,58 +38,58 @@ class Irregular : protected Pointers {
|
|||||||
int me,nprocs;
|
int me,nprocs;
|
||||||
int triclinic;
|
int triclinic;
|
||||||
int map_style;
|
int map_style;
|
||||||
int uniform;
|
int layout;
|
||||||
double *xsplit,*ysplit,*zsplit; // ptrs to comm
|
double *xsplit,*ysplit,*zsplit; // ptrs to comm
|
||||||
int *procgrid; // ptr to comm
|
int *procgrid; // ptr to comm
|
||||||
int ***grid2proc; // ptr to comm
|
int ***grid2proc; // ptr to comm
|
||||||
double *boxlo; // ptr to domain
|
double *boxlo; // ptr to domain
|
||||||
double *prd; // ptr to domain
|
double *prd; // ptr to domain
|
||||||
|
|
||||||
int maxsend,maxrecv; // size of buffers in # of doubles
|
int maxsend,maxrecv; // size of buf send/recv in # of doubles
|
||||||
double *buf_send,*buf_recv;
|
double *buf_send,*buf_recv; // bufs used in migrate_atoms
|
||||||
|
int maxdbuf; // size of double buf in bytes
|
||||||
|
double *dbuf; // double buf for largest single atom send
|
||||||
|
int maxbuf; // size of char buf in bytes
|
||||||
|
char *buf; // char buf for largest single data send
|
||||||
|
|
||||||
// plan for irregular communication of atoms
|
int *mproclist,*msizes; // persistent vectors in migrate_atoms
|
||||||
|
int maxlocal; // allocated size of mproclist and msizes
|
||||||
|
|
||||||
|
int *work1,*work2; // work vectors
|
||||||
|
|
||||||
|
// plan params for irregular communication of atoms or datums
|
||||||
|
// no params refer to atoms/data copied to self
|
||||||
|
|
||||||
|
int nsend_proc; // # of messages to send
|
||||||
|
int nrecv_proc; // # of messages to recv
|
||||||
|
int sendmax_proc; // # of doubles/datums in largest send message
|
||||||
|
int *proc_send; // list of procs to send to
|
||||||
|
int *num_send; // # of atoms/datums to send to each proc
|
||||||
|
int *index_send; // list of which atoms/datums to send to each proc
|
||||||
|
int *proc_recv; // list of procs to recv from
|
||||||
|
MPI_Request *request; // MPI requests for posted recvs
|
||||||
|
MPI_Status *status; // MPI statuses for WaitAll
|
||||||
|
|
||||||
|
// extra plan params plan for irregular communication of atoms
|
||||||
// no params refer to atoms copied to self
|
// no params refer to atoms copied to self
|
||||||
|
|
||||||
struct PlanAtom {
|
int *length_send; // # of doubles to send to each proc
|
||||||
int nsend; // # of messages to send
|
int *length_recv; // # of doubles to recv from each proc
|
||||||
int nrecv; // # of messages to recv
|
int *offset_send; // where each atom starts in send buffer
|
||||||
int sendmax; // # of doubles in largest send message
|
|
||||||
int *proc_send; // procs to send to
|
|
||||||
int *length_send; // # of doubles to send to each proc
|
|
||||||
int *num_send; // # of atoms to send to each proc
|
|
||||||
int *index_send; // list of which atoms to send to each proc
|
|
||||||
int *offset_send; // where each atom starts in send buffer
|
|
||||||
int *proc_recv; // procs to recv from
|
|
||||||
int *length_recv; // # of doubles to recv from each proc
|
|
||||||
MPI_Request *request; // MPI requests for posted recvs
|
|
||||||
MPI_Status *status; // MPI statuses for WaitAll
|
|
||||||
};
|
|
||||||
|
|
||||||
// plan for irregular communication of datums
|
// extra plan params plan for irregular communication of datums
|
||||||
// only 2 self params refer to atoms copied to self
|
// 2 self params refer to data copied to self
|
||||||
|
|
||||||
struct PlanData { // plan for irregular communication of data
|
int *num_recv; // # of datums to recv from each proc
|
||||||
int nsend; // # of messages to send
|
int num_self; // # of datums to copy to self
|
||||||
int nrecv; // # of messages to recv
|
int *index_self; // list of which datums to copy to self
|
||||||
int sendmax; // # of datums in largest send message
|
|
||||||
int *proc_send; // procs to send to
|
|
||||||
int *num_send; // # of datums to send to each proc
|
|
||||||
int *index_send; // list of which datums to send to each proc
|
|
||||||
int *proc_recv; // procs to recv from
|
|
||||||
int *num_recv; // # of datums to recv from each proc
|
|
||||||
int num_self; // # of datums to copy to self
|
|
||||||
int *index_self; // list of which datums to copy to self
|
|
||||||
MPI_Request *request; // MPI requests for posted recvs
|
|
||||||
MPI_Status *status; // MPI statuses for WaitAll
|
|
||||||
};
|
|
||||||
|
|
||||||
PlanAtom *aplan;
|
// private methods
|
||||||
PlanData *dplan;
|
|
||||||
|
|
||||||
int create_atom(int, int *, int *, int);
|
int create_atom(int, int *, int *, int);
|
||||||
void exchange_atom(double *, int *, double *);
|
void exchange_atom(double *, int *, double *);
|
||||||
void destroy_atom();
|
void destroy_atom();
|
||||||
|
|
||||||
int coord2proc(double *, int &, int &, int &);
|
int coord2proc(double *, int &, int &, int &);
|
||||||
int binary(double, int, double *);
|
int binary(double, int, double *);
|
||||||
|
|
||||||
|
|||||||
926
src/rcb.cpp
Normal file
926
src/rcb.cpp
Normal file
@ -0,0 +1,926 @@
|
|||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#include "mpi.h"
|
||||||
|
#include "string.h"
|
||||||
|
#include "rcb.h"
|
||||||
|
#include "irregular.h"
|
||||||
|
#include "memory.h"
|
||||||
|
#include "error.h"
|
||||||
|
|
||||||
|
using namespace LAMMPS_NS;
|
||||||
|
|
||||||
|
#define MYHUGE 1.0e30
|
||||||
|
#define TINY 1.0e-6
|
||||||
|
|
||||||
|
// set this to bigger number after debugging
|
||||||
|
|
||||||
|
#define DELTA 10
|
||||||
|
|
||||||
|
// prototypes for non-class functions
|
||||||
|
|
||||||
|
void box_merge(void *, void *, int *, MPI_Datatype *);
|
||||||
|
void median_merge(void *, void *, int *, MPI_Datatype *);
|
||||||
|
|
||||||
|
// NOTE: if want to have reuse flag, need to sum Tree across procs
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
RCB::RCB(LAMMPS *lmp) : Pointers(lmp)
|
||||||
|
{
|
||||||
|
MPI_Comm_rank(world,&me);
|
||||||
|
MPI_Comm_size(world,&nprocs);
|
||||||
|
|
||||||
|
ndot = maxdot = 0;
|
||||||
|
dots = NULL;
|
||||||
|
|
||||||
|
nlist = maxlist = 0;
|
||||||
|
dotlist = dotmark = NULL;
|
||||||
|
|
||||||
|
maxbuf = 0;
|
||||||
|
buf = NULL;
|
||||||
|
|
||||||
|
maxrecv = maxsend = 0;
|
||||||
|
recvproc = recvindex = sendproc = sendindex = NULL;
|
||||||
|
|
||||||
|
tree = NULL;
|
||||||
|
irregular = NULL;
|
||||||
|
|
||||||
|
// create MPI data and function types for box and median AllReduce ops
|
||||||
|
|
||||||
|
MPI_Type_contiguous(6,MPI_DOUBLE,&box_type);
|
||||||
|
MPI_Type_commit(&box_type);
|
||||||
|
MPI_Type_contiguous(sizeof(Median),MPI_CHAR,&med_type);
|
||||||
|
MPI_Type_commit(&med_type);
|
||||||
|
|
||||||
|
MPI_Op_create(box_merge,1,&box_op);
|
||||||
|
MPI_Op_create(median_merge,1,&med_op);
|
||||||
|
|
||||||
|
reuse = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
RCB::~RCB()
|
||||||
|
{
|
||||||
|
memory->sfree(dots);
|
||||||
|
memory->destroy(dotlist);
|
||||||
|
memory->destroy(dotmark);
|
||||||
|
memory->sfree(buf);
|
||||||
|
|
||||||
|
memory->destroy(recvproc);
|
||||||
|
memory->destroy(recvindex);
|
||||||
|
memory->destroy(sendproc);
|
||||||
|
memory->destroy(sendindex);
|
||||||
|
|
||||||
|
memory->sfree(tree);
|
||||||
|
delete irregular;
|
||||||
|
|
||||||
|
MPI_Type_free(&med_type);
|
||||||
|
MPI_Type_free(&box_type);
|
||||||
|
MPI_Op_free(&box_op);
|
||||||
|
MPI_Op_free(&med_op);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
perform RCB balancing of N particles at coords X in bounding box LO/HI
|
||||||
|
if wt = NULL, ignore per-particle weights
|
||||||
|
if wt defined, per-particle weights > 0.0
|
||||||
|
dimension = 2 or 3
|
||||||
|
as documented in rcb.h:
|
||||||
|
sets noriginal,nfinal,nkeep,recvproc,recvindex,lo,hi
|
||||||
|
all proc particles will be inside or on surface of 3-d box
|
||||||
|
defined by final lo/hi
|
||||||
|
// NOTE: worry about re-use of data structs for fix balance
|
||||||
|
// NOTE: should I get rid of wt all together, will it be used?
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void RCB::compute(int dimension, int n, double **x, double *wt,
|
||||||
|
double *bboxlo, double *bboxhi)
|
||||||
|
{
|
||||||
|
int i,j,k;
|
||||||
|
int keep,outgoing,incoming,incoming2;
|
||||||
|
int dim,markactive;
|
||||||
|
int indexlo,indexhi;
|
||||||
|
int first_iteration,breakflag;
|
||||||
|
double wttot,wtlo,wthi,wtsum,wtok,wtupto,wtmax;
|
||||||
|
double targetlo,targethi;
|
||||||
|
double valuemin,valuemax,valuehalf;
|
||||||
|
double tolerance;
|
||||||
|
MPI_Comm comm,comm_half;
|
||||||
|
MPI_Request request,request2;
|
||||||
|
MPI_Status status;
|
||||||
|
Median med,medme;
|
||||||
|
|
||||||
|
// create list of my Dots
|
||||||
|
|
||||||
|
ndot = nkeep = noriginal = n;
|
||||||
|
|
||||||
|
if (ndot > maxdot) {
|
||||||
|
maxdot = ndot;
|
||||||
|
memory->sfree(dots);
|
||||||
|
dots = (Dot *) memory->smalloc(ndot*sizeof(Dot),"RCB:dots");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < ndot; i++) {
|
||||||
|
dots[i].x[0] = x[i][0];
|
||||||
|
dots[i].x[1] = x[i][1];
|
||||||
|
dots[i].x[2] = x[i][2];
|
||||||
|
dots[i].proc = me;
|
||||||
|
dots[i].index = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (wt)
|
||||||
|
for (i = 0; i < ndot; i++) dots[i].wt = wt[i];
|
||||||
|
else
|
||||||
|
for (i = 0; i < ndot; i++) dots[i].wt = 1.0;
|
||||||
|
|
||||||
|
// initial bounding box = simulation box
|
||||||
|
// includes periodic or shrink-wrapped boundaries
|
||||||
|
|
||||||
|
lo = bbox.lo;
|
||||||
|
hi = bbox.hi;
|
||||||
|
|
||||||
|
lo[0] = bboxlo[0];
|
||||||
|
lo[1] = bboxlo[1];
|
||||||
|
lo[2] = bboxlo[2];
|
||||||
|
hi[0] = bboxhi[0];
|
||||||
|
hi[1] = bboxhi[1];
|
||||||
|
hi[2] = bboxhi[2];
|
||||||
|
|
||||||
|
// initialize counters
|
||||||
|
|
||||||
|
counters[0] = 0;
|
||||||
|
counters[1] = 0;
|
||||||
|
counters[2] = 0;
|
||||||
|
counters[3] = ndot;
|
||||||
|
counters[4] = maxdot;
|
||||||
|
counters[5] = 0;
|
||||||
|
counters[6] = 0;
|
||||||
|
|
||||||
|
// create communicator for use in recursion
|
||||||
|
|
||||||
|
MPI_Comm_dup(world,&comm);
|
||||||
|
|
||||||
|
// recurse until partition is a single proc = me
|
||||||
|
// proclower,procupper = lower,upper procs in partition
|
||||||
|
// procmid = 1st proc in upper half of partition
|
||||||
|
|
||||||
|
int procpartner,procpartner2;
|
||||||
|
int readnumber;
|
||||||
|
|
||||||
|
int procmid;
|
||||||
|
int proclower = 0;
|
||||||
|
int procupper = nprocs - 1;
|
||||||
|
|
||||||
|
while (proclower != procupper) {
|
||||||
|
|
||||||
|
// if odd # of procs, lower partition gets extra one
|
||||||
|
|
||||||
|
procmid = proclower + (procupper - proclower) / 2 + 1;
|
||||||
|
|
||||||
|
// determine communication partner(s)
|
||||||
|
// readnumber = # of proc partners to read from
|
||||||
|
|
||||||
|
if (me < procmid)
|
||||||
|
procpartner = me + (procmid - proclower);
|
||||||
|
else
|
||||||
|
procpartner = me - (procmid - proclower);
|
||||||
|
|
||||||
|
int readnumber = 1;
|
||||||
|
if (procpartner > procupper) {
|
||||||
|
readnumber = 0;
|
||||||
|
procpartner--;
|
||||||
|
}
|
||||||
|
if (me == procupper && procpartner != procmid - 1) {
|
||||||
|
readnumber = 2;
|
||||||
|
procpartner2 = procpartner + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// wttot = summed weight of entire partition
|
||||||
|
// search tolerance = largest single weight (plus epsilon)
|
||||||
|
// targetlo = desired weight in lower half of partition
|
||||||
|
// targethi = desired weight in upper half of partition
|
||||||
|
|
||||||
|
wtmax = wtsum = 0.0;
|
||||||
|
|
||||||
|
if (wt) {
|
||||||
|
for (i = 0; i < ndot; i++) {
|
||||||
|
wtsum += dots[i].wt;
|
||||||
|
if (dots[i].wt > wtmax) wtmax = dots[i].wt;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (i = 0; i < ndot; i++) wtsum += dots[i].wt;
|
||||||
|
wtmax = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Allreduce(&wtsum,&wttot,1,MPI_DOUBLE,MPI_SUM,comm);
|
||||||
|
if (wt) MPI_Allreduce(&wtmax,&tolerance,1,MPI_DOUBLE,MPI_MAX,comm);
|
||||||
|
else tolerance = 1.0;
|
||||||
|
|
||||||
|
tolerance *= 1.0 + TINY;
|
||||||
|
targetlo = wttot * (procmid - proclower) / (procupper + 1 - proclower);
|
||||||
|
targethi = wttot - targetlo;
|
||||||
|
|
||||||
|
// dim = dimension to bisect on
|
||||||
|
// do not allow choice of z dimension for 2d system
|
||||||
|
|
||||||
|
dim = 0;
|
||||||
|
if (hi[1]-lo[1] > hi[0]-lo[0]) dim = 1;
|
||||||
|
if (dimension == 3) {
|
||||||
|
if (dim == 0 && hi[2]-lo[2] > hi[0]-lo[0]) dim = 2;
|
||||||
|
if (dim == 1 && hi[2]-lo[2] > hi[1]-lo[1]) dim = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// create active list and mark array for dots
|
||||||
|
// initialize active list to all dots
|
||||||
|
|
||||||
|
if (ndot > maxlist) {
|
||||||
|
memory->destroy(dotlist);
|
||||||
|
memory->destroy(dotmark);
|
||||||
|
maxlist = maxdot;
|
||||||
|
memory->create(dotlist,maxlist,"RCB:dotlist");
|
||||||
|
memory->create(dotmark,maxlist,"RCB:dotmark");
|
||||||
|
}
|
||||||
|
|
||||||
|
nlist = ndot;
|
||||||
|
for (i = 0; i < nlist; i++) dotlist[i] = i;
|
||||||
|
|
||||||
|
// median iteration
|
||||||
|
// zoom in on bisector until correct # of dots in each half of partition
|
||||||
|
// as each iteration of median-loop begins, require:
|
||||||
|
// all non-active dots are marked with 0/1 in dotmark
|
||||||
|
// valuemin <= every active dot <= valuemax
|
||||||
|
// wtlo, wthi = total wt of non-active dots
|
||||||
|
// when leave median-loop, require only:
|
||||||
|
// valuehalf = correct cut position
|
||||||
|
// all dots <= valuehalf are marked with 0 in dotmark
|
||||||
|
// all dots >= valuehalf are marked with 1 in dotmark
|
||||||
|
// markactive = which side of cut is active = 0/1
|
||||||
|
// indexlo,indexhi = indices of dot closest to median
|
||||||
|
|
||||||
|
wtlo = wthi = 0.0;
|
||||||
|
valuemin = lo[dim];
|
||||||
|
valuemax = hi[dim];
|
||||||
|
first_iteration = 1;
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
|
||||||
|
// choose bisector value
|
||||||
|
// use old value on 1st iteration if old cut dimension is the same
|
||||||
|
// on 2nd option: could push valuehalf towards geometric center
|
||||||
|
// with "1.0-factor" to force overshoot
|
||||||
|
|
||||||
|
if (first_iteration && reuse && dim == tree[procmid].dim) {
|
||||||
|
counters[5]++;
|
||||||
|
valuehalf = tree[procmid].cut;
|
||||||
|
if (valuehalf < valuemin || valuehalf > valuemax)
|
||||||
|
valuehalf = 0.5 * (valuemin + valuemax);
|
||||||
|
} else if (wt)
|
||||||
|
valuehalf = valuemin + (targetlo - wtlo) /
|
||||||
|
(wttot - wtlo - wthi) * (valuemax - valuemin);
|
||||||
|
else
|
||||||
|
valuehalf = 0.5 * (valuemin + valuemax);
|
||||||
|
|
||||||
|
first_iteration = 0;
|
||||||
|
|
||||||
|
// initialize local median data structure
|
||||||
|
|
||||||
|
medme.totallo = medme.totalhi = 0.0;
|
||||||
|
medme.valuelo = -MYHUGE;
|
||||||
|
medme.valuehi = MYHUGE;
|
||||||
|
medme.wtlo = medme.wthi = 0.0;
|
||||||
|
medme.countlo = medme.counthi = 0;
|
||||||
|
medme.proclo = medme.prochi = me;
|
||||||
|
|
||||||
|
// mark all active dots on one side or other of bisector
|
||||||
|
// also set all fields in median data struct
|
||||||
|
// save indices of closest dots on either side
|
||||||
|
|
||||||
|
for (j = 0; j < nlist; j++) {
|
||||||
|
i = dotlist[j];
|
||||||
|
if (dots[i].x[dim] <= valuehalf) { // in lower part
|
||||||
|
medme.totallo += dots[i].wt;
|
||||||
|
dotmark[i] = 0;
|
||||||
|
if (dots[i].x[dim] > medme.valuelo) { // my closest dot
|
||||||
|
medme.valuelo = dots[i].x[dim];
|
||||||
|
medme.wtlo = dots[i].wt;
|
||||||
|
medme.countlo = 1;
|
||||||
|
indexlo = i;
|
||||||
|
} else if (dots[i].x[dim] == medme.valuelo) { // tied for closest
|
||||||
|
medme.wtlo += dots[i].wt;
|
||||||
|
medme.countlo++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else { // in upper part
|
||||||
|
medme.totalhi += dots[i].wt;
|
||||||
|
dotmark[i] = 1;
|
||||||
|
if (dots[i].x[dim] < medme.valuehi) { // my closest dot
|
||||||
|
medme.valuehi = dots[i].x[dim];
|
||||||
|
medme.wthi = dots[i].wt;
|
||||||
|
medme.counthi = 1;
|
||||||
|
indexhi = i;
|
||||||
|
} else if (dots[i].x[dim] == medme.valuehi) { // tied for closest
|
||||||
|
medme.wthi += dots[i].wt;
|
||||||
|
medme.counthi++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// combine median data struct across current subset of procs
|
||||||
|
|
||||||
|
counters[0]++;
|
||||||
|
MPI_Allreduce(&medme,&med,1,med_type,med_op,comm);
|
||||||
|
|
||||||
|
// test median guess for convergence
|
||||||
|
// move additional dots that are next to cut across it
|
||||||
|
|
||||||
|
if (wtlo + med.totallo < targetlo) { // lower half TOO SMALL
|
||||||
|
|
||||||
|
wtlo += med.totallo;
|
||||||
|
valuehalf = med.valuehi;
|
||||||
|
|
||||||
|
if (med.counthi == 1) { // only one dot to move
|
||||||
|
if (wtlo + med.wthi < targetlo) { // move it, keep iterating
|
||||||
|
if (me == med.prochi) dotmark[indexhi] = 0;
|
||||||
|
}
|
||||||
|
else { // only move if beneficial
|
||||||
|
if (wtlo + med.wthi - targetlo < targetlo - wtlo)
|
||||||
|
if (me == med.prochi) dotmark[indexhi] = 0;
|
||||||
|
break; // all done
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else { // multiple dots to move
|
||||||
|
breakflag = 0;
|
||||||
|
wtok = 0.0;
|
||||||
|
if (medme.valuehi == med.valuehi) wtok = medme.wthi;
|
||||||
|
if (wtlo + med.wthi >= targetlo) { // all done
|
||||||
|
MPI_Scan(&wtok,&wtupto,1,MPI_DOUBLE,MPI_SUM,comm);
|
||||||
|
wtmax = targetlo - wtlo;
|
||||||
|
if (wtupto > wtmax) wtok = wtok - (wtupto - wtmax);
|
||||||
|
breakflag = 1;
|
||||||
|
} // wtok = most I can move
|
||||||
|
for (j = 0, wtsum = 0.0; j < nlist && wtsum < wtok; j++) {
|
||||||
|
i = dotlist[j];
|
||||||
|
if (dots[i].x[dim] == med.valuehi) { // only move if better
|
||||||
|
if (wtsum + dots[i].wt - wtok < wtok - wtsum)
|
||||||
|
dotmark[i] = 0;
|
||||||
|
wtsum += dots[i].wt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (breakflag) break; // done if moved enough
|
||||||
|
}
|
||||||
|
|
||||||
|
wtlo += med.wthi;
|
||||||
|
if (targetlo-wtlo <= tolerance) break; // close enough
|
||||||
|
|
||||||
|
valuemin = med.valuehi; // iterate again
|
||||||
|
markactive = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (wthi + med.totalhi < targethi) { // upper half TOO SMALL
|
||||||
|
|
||||||
|
wthi += med.totalhi;
|
||||||
|
valuehalf = med.valuelo;
|
||||||
|
|
||||||
|
if (med.countlo == 1) { // only one dot to move
|
||||||
|
if (wthi + med.wtlo < targethi) { // move it, keep iterating
|
||||||
|
if (me == med.proclo) dotmark[indexlo] = 1;
|
||||||
|
}
|
||||||
|
else { // only move if beneficial
|
||||||
|
if (wthi + med.wtlo - targethi < targethi - wthi)
|
||||||
|
if (me == med.proclo) dotmark[indexlo] = 1;
|
||||||
|
break; // all done
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else { // multiple dots to move
|
||||||
|
breakflag = 0;
|
||||||
|
wtok = 0.0;
|
||||||
|
if (medme.valuelo == med.valuelo) wtok = medme.wtlo;
|
||||||
|
if (wthi + med.wtlo >= targethi) { // all done
|
||||||
|
MPI_Scan(&wtok,&wtupto,1,MPI_DOUBLE,MPI_SUM,comm);
|
||||||
|
wtmax = targethi - wthi;
|
||||||
|
if (wtupto > wtmax) wtok = wtok - (wtupto - wtmax);
|
||||||
|
breakflag = 1;
|
||||||
|
} // wtok = most I can move
|
||||||
|
for (j = 0, wtsum = 0.0; j < nlist && wtsum < wtok; j++) {
|
||||||
|
i = dotlist[j];
|
||||||
|
if (dots[i].x[dim] == med.valuelo) { // only move if better
|
||||||
|
if (wtsum + dots[i].wt - wtok < wtok - wtsum)
|
||||||
|
dotmark[i] = 1;
|
||||||
|
wtsum += dots[i].wt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (breakflag) break; // done if moved enough
|
||||||
|
}
|
||||||
|
|
||||||
|
wthi += med.wtlo;
|
||||||
|
if (targethi-wthi <= tolerance) break; // close enough
|
||||||
|
|
||||||
|
valuemax = med.valuelo; // iterate again
|
||||||
|
markactive = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
else // Goldilocks result: both partitions just right
|
||||||
|
break;
|
||||||
|
|
||||||
|
// shrink the active list
|
||||||
|
|
||||||
|
k = 0;
|
||||||
|
for (j = 0; j < nlist; j++) {
|
||||||
|
i = dotlist[j];
|
||||||
|
if (dotmark[i] == markactive) dotlist[k++] = i;
|
||||||
|
}
|
||||||
|
nlist = k;
|
||||||
|
}
|
||||||
|
|
||||||
|
// found median
|
||||||
|
// store cut info only if I am procmid
|
||||||
|
|
||||||
|
if (me == procmid) {
|
||||||
|
cut = valuehalf;
|
||||||
|
cutdim = dim;
|
||||||
|
}
|
||||||
|
|
||||||
|
// use cut to shrink my RCB bounding box
|
||||||
|
|
||||||
|
if (me < procmid) hi[dim] = valuehalf;
|
||||||
|
else lo[dim] = valuehalf;
|
||||||
|
|
||||||
|
// outgoing = number of dots to ship to partner
|
||||||
|
// nkeep = number of dots that have never migrated
|
||||||
|
|
||||||
|
markactive = (me < procpartner);
|
||||||
|
for (i = 0, keep = 0, outgoing = 0; i < ndot; i++)
|
||||||
|
if (dotmark[i] == markactive) outgoing++;
|
||||||
|
else if (i < nkeep) keep++;
|
||||||
|
nkeep = keep;
|
||||||
|
|
||||||
|
// alert partner how many dots I'll send, read how many I'll recv
|
||||||
|
|
||||||
|
MPI_Send(&outgoing,1,MPI_INT,procpartner,0,world);
|
||||||
|
incoming = 0;
|
||||||
|
if (readnumber) {
|
||||||
|
MPI_Recv(&incoming,1,MPI_INT,procpartner,0,world,&status);
|
||||||
|
if (readnumber == 2) {
|
||||||
|
MPI_Recv(&incoming2,1,MPI_INT,procpartner2,0,world,&status);
|
||||||
|
incoming += incoming2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if need to alloc more space
|
||||||
|
|
||||||
|
int ndotnew = ndot - outgoing + incoming;
|
||||||
|
if (ndotnew > maxdot) {
|
||||||
|
while (maxdot < ndotnew) maxdot += DELTA;
|
||||||
|
dots = (Dot *) memory->srealloc(dots,maxdot*sizeof(Dot),"RCB::dots");
|
||||||
|
counters[6]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
counters[1] += outgoing;
|
||||||
|
counters[2] += incoming;
|
||||||
|
if (ndotnew > counters[3]) counters[3] = ndotnew;
|
||||||
|
if (maxdot > counters[4]) counters[4] = maxdot;
|
||||||
|
|
||||||
|
// malloc comm send buffer
|
||||||
|
|
||||||
|
if (outgoing > maxbuf) {
|
||||||
|
memory->sfree(buf);
|
||||||
|
maxbuf = outgoing;
|
||||||
|
buf = (Dot *) memory->smalloc(maxbuf*sizeof(Dot),"RCB:buf");
|
||||||
|
}
|
||||||
|
|
||||||
|
// fill buffer with dots that are marked for sending
|
||||||
|
// pack down the unmarked ones
|
||||||
|
|
||||||
|
keep = outgoing = 0;
|
||||||
|
for (i = 0; i < ndot; i++) {
|
||||||
|
if (dotmark[i] == markactive)
|
||||||
|
memcpy(&buf[outgoing++],&dots[i],sizeof(Dot));
|
||||||
|
else
|
||||||
|
memcpy(&dots[keep++],&dots[i],sizeof(Dot));
|
||||||
|
}
|
||||||
|
|
||||||
|
// post receives for dots
|
||||||
|
|
||||||
|
if (readnumber > 0) {
|
||||||
|
MPI_Irecv(&dots[keep],incoming*sizeof(Dot),MPI_CHAR,
|
||||||
|
procpartner,1,world,&request);
|
||||||
|
if (readnumber == 2) {
|
||||||
|
keep += incoming - incoming2;
|
||||||
|
MPI_Irecv(&dots[keep],incoming2*sizeof(Dot),MPI_CHAR,
|
||||||
|
procpartner2,1,world,&request2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// handshake before sending dots to insure recvs have been posted
|
||||||
|
|
||||||
|
if (readnumber > 0) {
|
||||||
|
MPI_Send(NULL,0,MPI_INT,procpartner,0,world);
|
||||||
|
if (readnumber == 2) MPI_Send(NULL,0,MPI_INT,procpartner2,0,world);
|
||||||
|
}
|
||||||
|
MPI_Recv(NULL,0,MPI_INT,procpartner,0,world,&status);
|
||||||
|
|
||||||
|
// send dots to partner
|
||||||
|
|
||||||
|
MPI_Rsend(buf,outgoing*sizeof(Dot),MPI_CHAR,procpartner,1,world);
|
||||||
|
|
||||||
|
// wait until all dots are received
|
||||||
|
|
||||||
|
if (readnumber > 0) {
|
||||||
|
MPI_Wait(&request,&status);
|
||||||
|
if (readnumber == 2) MPI_Wait(&request2,&status);
|
||||||
|
}
|
||||||
|
|
||||||
|
ndot = ndotnew;
|
||||||
|
|
||||||
|
// cut partition in half, create new communicators of 1/2 size
|
||||||
|
|
||||||
|
int split;
|
||||||
|
if (me < procmid) {
|
||||||
|
procupper = procmid - 1;
|
||||||
|
split = 0;
|
||||||
|
} else {
|
||||||
|
proclower = procmid;
|
||||||
|
split = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Comm_split(comm,split,me,&comm_half);
|
||||||
|
MPI_Comm_free(&comm);
|
||||||
|
comm = comm_half;
|
||||||
|
}
|
||||||
|
|
||||||
|
// clean up
|
||||||
|
|
||||||
|
MPI_Comm_free(&comm);
|
||||||
|
|
||||||
|
// set public variables with results of rebalance
|
||||||
|
|
||||||
|
nfinal = ndot;
|
||||||
|
|
||||||
|
if (nfinal > maxrecv) {
|
||||||
|
memory->destroy(recvproc);
|
||||||
|
memory->destroy(recvindex);
|
||||||
|
maxrecv = nfinal;
|
||||||
|
memory->create(recvproc,maxrecv,"RCB:recvproc");
|
||||||
|
memory->create(recvindex,maxrecv,"RCB:recvindex");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < nfinal; i++) {
|
||||||
|
recvproc[i] = dots[i].proc;
|
||||||
|
recvindex[i] = dots[i].index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
custom MPI reduce operation
|
||||||
|
merge of each component of an RCB bounding box
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void box_merge(void *in, void *inout, int *len, MPI_Datatype *dptr)
|
||||||
|
|
||||||
|
{
|
||||||
|
RCB::BBox *box1 = (RCB::BBox *) in;
|
||||||
|
RCB::BBox *box2 = (RCB::BBox *) inout;
|
||||||
|
|
||||||
|
for (int i = 0; i < 3; i++) {
|
||||||
|
if (box1->lo[i] < box2->lo[i]) box2->lo[i] = box1->lo[i];
|
||||||
|
if (box1->hi[i] > box2->hi[i]) box2->hi[i] = box1->hi[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
custom MPI reduce operation
|
||||||
|
merge median data structure
|
||||||
|
on input:
|
||||||
|
in,inout->totallo, totalhi = weight in both partitions on this proc
|
||||||
|
valuelo, valuehi = pos of nearest dot(s) to cut on this proc
|
||||||
|
wtlo, wthi = total wt of dot(s) at that pos on this proc
|
||||||
|
countlo, counthi = # of dot(s) nearest to cut on this proc
|
||||||
|
proclo, prochi = not used
|
||||||
|
on exit:
|
||||||
|
inout-> totallo, totalhi = total # of active dots in both partitions
|
||||||
|
valuelo, valuehi = pos of nearest dot(s) to cut
|
||||||
|
wtlo, wthi = total wt of dot(s) at that position
|
||||||
|
countlo, counthi = total # of dot(s) nearest to cut
|
||||||
|
proclo, prochi = one unique proc who owns a nearest dot
|
||||||
|
all procs must get same proclo,prochi
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void median_merge(void *in, void *inout, int *len, MPI_Datatype *dptr)
|
||||||
|
|
||||||
|
{
|
||||||
|
RCB::Median *med1 = (RCB::Median *) in;
|
||||||
|
RCB::Median *med2 = (RCB::Median *) inout;
|
||||||
|
|
||||||
|
med2->totallo += med1->totallo;
|
||||||
|
if (med1->valuelo > med2->valuelo) {
|
||||||
|
med2->valuelo = med1->valuelo;
|
||||||
|
med2->wtlo = med1->wtlo;
|
||||||
|
med2->countlo = med1->countlo;
|
||||||
|
med2->proclo = med1->proclo;
|
||||||
|
}
|
||||||
|
else if (med1->valuelo == med2->valuelo) {
|
||||||
|
med2->wtlo += med1->wtlo;
|
||||||
|
med2->countlo += med1->countlo;
|
||||||
|
if (med1->proclo < med2->proclo) med2->proclo = med1->proclo;
|
||||||
|
}
|
||||||
|
|
||||||
|
med2->totalhi += med1->totalhi;
|
||||||
|
if (med1->valuehi < med2->valuehi) {
|
||||||
|
med2->valuehi = med1->valuehi;
|
||||||
|
med2->wthi = med1->wthi;
|
||||||
|
med2->counthi = med1->counthi;
|
||||||
|
med2->prochi = med1->prochi;
|
||||||
|
}
|
||||||
|
else if (med1->valuehi == med2->valuehi) {
|
||||||
|
med2->wthi += med1->wthi;
|
||||||
|
med2->counthi += med1->counthi;
|
||||||
|
if (med1->prochi < med2->prochi) med2->prochi = med1->prochi;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
invert the RCB rebalance result to convert receive info into send info
|
||||||
|
sortflag = flag for sorting order of received messages by proc ID
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void RCB::invert(int sortflag)
|
||||||
|
{
|
||||||
|
Invert *sbuf,*rbuf;
|
||||||
|
|
||||||
|
// only create Irregular if not previously created
|
||||||
|
// allows Irregular to persist for multiple RCB calls by fix balance
|
||||||
|
|
||||||
|
if (!irregular) irregular = new Irregular(lmp);
|
||||||
|
|
||||||
|
// nsend = # of dots to request from other procs
|
||||||
|
|
||||||
|
int nsend = nfinal-nkeep;
|
||||||
|
|
||||||
|
int *proclist;
|
||||||
|
memory->create(proclist,nsend,"RCB:proclist");
|
||||||
|
|
||||||
|
Invert *sinvert =
|
||||||
|
(Invert *) memory->smalloc(nsend*sizeof(Invert),"RCB:sinvert");
|
||||||
|
|
||||||
|
int m = 0;
|
||||||
|
for (int i = nkeep; i < nfinal; i++) {
|
||||||
|
proclist[m] = recvproc[i];
|
||||||
|
sinvert[m].rindex = recvindex[i];
|
||||||
|
sinvert[m].sproc = me;
|
||||||
|
sinvert[m].sindex = i;
|
||||||
|
m++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// perform inversion via irregular comm
|
||||||
|
// nrecv = # of my dots to send to other procs
|
||||||
|
|
||||||
|
int nrecv = irregular->create_data(nsend,proclist,sortflag);
|
||||||
|
Invert *rinvert =
|
||||||
|
(Invert *) memory->smalloc(nrecv*sizeof(Invert),"RCB:rinvert");
|
||||||
|
irregular->exchange_data((char *) sinvert,sizeof(Invert),(char *) rinvert);
|
||||||
|
irregular->destroy_data();
|
||||||
|
|
||||||
|
// set public variables from requests to send my dots
|
||||||
|
|
||||||
|
if (noriginal > maxsend) {
|
||||||
|
memory->destroy(sendproc);
|
||||||
|
memory->destroy(sendindex);
|
||||||
|
maxsend = noriginal;
|
||||||
|
memory->create(sendproc,maxsend,"RCB:sendproc");
|
||||||
|
memory->create(sendindex,maxsend,"RCB:sendindex");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < nkeep; i++) {
|
||||||
|
sendproc[recvindex[i]] = me;
|
||||||
|
sendindex[recvindex[i]] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < nrecv; i++) {
|
||||||
|
m = rinvert[i].rindex;
|
||||||
|
sendproc[m] = rinvert[i].sproc;
|
||||||
|
sendindex[m] = rinvert[i].sindex;
|
||||||
|
}
|
||||||
|
|
||||||
|
// clean-up
|
||||||
|
|
||||||
|
memory->destroy(proclist);
|
||||||
|
memory->destroy(sinvert);
|
||||||
|
memory->destroy(rinvert);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
memory use of Irregular
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
bigint RCB::memory_usage()
|
||||||
|
{
|
||||||
|
bigint bytes = 0;
|
||||||
|
if (irregular) bytes += irregular->memory_usage();
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// DEBUG methods
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
/*
|
||||||
|
// consistency checks on RCB results
|
||||||
|
|
||||||
|
void RCB::check()
|
||||||
|
{
|
||||||
|
int i,iflag,total1,total2;
|
||||||
|
double weight,wtmax,wtmin,wtone,tolerance;
|
||||||
|
|
||||||
|
// check that total # of dots remained the same
|
||||||
|
|
||||||
|
MPI_Allreduce(&ndotorig,&total1,1,MPI_INT,MPI_SUM,world);
|
||||||
|
MPI_Allreduce(&ndot,&total2,1,MPI_INT,MPI_SUM,world);
|
||||||
|
if (total1 != total2) {
|
||||||
|
if (me == 0)
|
||||||
|
printf("ERROR: Points before RCB = %d, Points after RCB = %d\n",
|
||||||
|
total1,total2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// check that result is load-balanced within log2(P)*max-wt
|
||||||
|
|
||||||
|
weight = wtone = 0.0;
|
||||||
|
for (i = 0; i < ndot; i++) {
|
||||||
|
weight += dots[i].wt;
|
||||||
|
if (dots[i].wt > wtone) wtone = dots[i].wt;
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Allreduce(&weight,&wtmin,1,MPI_DOUBLE,MPI_MIN,world);
|
||||||
|
MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world);
|
||||||
|
MPI_Allreduce(&wtone,&tolerance,1,MPI_DOUBLE,MPI_MAX,world);
|
||||||
|
|
||||||
|
// i = smallest power-of-2 >= nprocs
|
||||||
|
// tolerance = largest-single-weight*log2(nprocs)
|
||||||
|
|
||||||
|
for (i = 0; (nprocs >> i) != 0; i++);
|
||||||
|
tolerance = tolerance * i * (1.0 + TINY);
|
||||||
|
|
||||||
|
if (wtmax - wtmin > tolerance) {
|
||||||
|
if (me == 0)
|
||||||
|
printf("ERROR: Load-imbalance > tolerance of %g\n",tolerance);
|
||||||
|
MPI_Barrier(world);
|
||||||
|
if (weight == wtmin) printf(" Proc %d has weight = %g\n",me,weight);
|
||||||
|
if (weight == wtmax) printf(" Proc %d has weight = %g\n",me,weight);
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Barrier(world);
|
||||||
|
|
||||||
|
// check that final set of points is inside RCB box of each proc
|
||||||
|
|
||||||
|
iflag = 0;
|
||||||
|
for (i = 0; i < ndot; i++) {
|
||||||
|
if (dots[i].x[0] < lo[0] || dots[i].x[0] > hi[0] ||
|
||||||
|
dots[i].x[1] < lo[1] || dots[i].x[1] > hi[1] ||
|
||||||
|
dots[i].x[2] < lo[2] || dots[i].x[2] > hi[2])
|
||||||
|
iflag++;
|
||||||
|
}
|
||||||
|
if (iflag > 0)
|
||||||
|
printf("ERROR: %d points are out-of-box on proc %d\n",iflag,me);
|
||||||
|
}
|
||||||
|
|
||||||
|
// stats for RCB decomposition
|
||||||
|
|
||||||
|
void RCB::stats(int flag)
|
||||||
|
{
|
||||||
|
int i,iflag,sum,min,max;
|
||||||
|
double ave,rsum,rmin,rmax;
|
||||||
|
double weight,wttot,wtmin,wtmax;
|
||||||
|
|
||||||
|
if (me == 0) printf("RCB Statistics:\n");
|
||||||
|
|
||||||
|
// distribution info
|
||||||
|
|
||||||
|
for (i = 0, weight = 0.0; i < ndot; i++) weight += dots[i].wt;
|
||||||
|
MPI_Allreduce(&weight,&wttot,1,MPI_DOUBLE,MPI_SUM,world);
|
||||||
|
MPI_Allreduce(&weight,&wtmin,1,MPI_DOUBLE,MPI_MIN,world);
|
||||||
|
MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world);
|
||||||
|
|
||||||
|
if (me == 0) {
|
||||||
|
printf(" Total weight of dots = %g\n",wttot);
|
||||||
|
printf(" Weight on each proc: ave = %g, max = %g, min = %g\n",
|
||||||
|
wttot/nprocs,wtmax,wtmin);
|
||||||
|
}
|
||||||
|
if (flag) {
|
||||||
|
MPI_Barrier(world);
|
||||||
|
printf(" Proc %d has weight = %g\n",me,weight);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0, weight = 0.0; i < ndot; i++)
|
||||||
|
if (dots[i].wt > weight) weight = dots[i].wt;
|
||||||
|
MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world);
|
||||||
|
|
||||||
|
if (me == 0) printf(" Maximum weight of single dot = %g\n",wtmax);
|
||||||
|
if (flag) {
|
||||||
|
MPI_Barrier(world);
|
||||||
|
printf(" Proc %d max weight = %g\n",me,weight);
|
||||||
|
}
|
||||||
|
|
||||||
|
// counter info
|
||||||
|
|
||||||
|
MPI_Allreduce(&counters[0],&sum,1,MPI_INT,MPI_SUM,world);
|
||||||
|
MPI_Allreduce(&counters[0],&min,1,MPI_INT,MPI_MIN,world);
|
||||||
|
MPI_Allreduce(&counters[0],&max,1,MPI_INT,MPI_MAX,world);
|
||||||
|
ave = ((double) sum)/nprocs;
|
||||||
|
if (me == 0)
|
||||||
|
printf(" Median iter: ave = %g, min = %d, max = %d\n",ave,min,max);
|
||||||
|
if (flag) {
|
||||||
|
MPI_Barrier(world);
|
||||||
|
printf(" Proc %d median count = %d\n",me,counters[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Allreduce(&counters[1],&sum,1,MPI_INT,MPI_SUM,world);
|
||||||
|
MPI_Allreduce(&counters[1],&min,1,MPI_INT,MPI_MIN,world);
|
||||||
|
MPI_Allreduce(&counters[1],&max,1,MPI_INT,MPI_MAX,world);
|
||||||
|
ave = ((double) sum)/nprocs;
|
||||||
|
if (me == 0)
|
||||||
|
printf(" Send count: ave = %g, min = %d, max = %d\n",ave,min,max);
|
||||||
|
if (flag) {
|
||||||
|
MPI_Barrier(world);
|
||||||
|
printf(" Proc %d send count = %d\n",me,counters[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Allreduce(&counters[2],&sum,1,MPI_INT,MPI_SUM,world);
|
||||||
|
MPI_Allreduce(&counters[2],&min,1,MPI_INT,MPI_MIN,world);
|
||||||
|
MPI_Allreduce(&counters[2],&max,1,MPI_INT,MPI_MAX,world);
|
||||||
|
ave = ((double) sum)/nprocs;
|
||||||
|
if (me == 0)
|
||||||
|
printf(" Recv count: ave = %g, min = %d, max = %d\n",ave,min,max);
|
||||||
|
if (flag) {
|
||||||
|
MPI_Barrier(world);
|
||||||
|
printf(" Proc %d recv count = %d\n",me,counters[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Allreduce(&counters[3],&sum,1,MPI_INT,MPI_SUM,world);
|
||||||
|
MPI_Allreduce(&counters[3],&min,1,MPI_INT,MPI_MIN,world);
|
||||||
|
MPI_Allreduce(&counters[3],&max,1,MPI_INT,MPI_MAX,world);
|
||||||
|
ave = ((double) sum)/nprocs;
|
||||||
|
if (me == 0)
|
||||||
|
printf(" Max dots: ave = %g, min = %d, max = %d\n",ave,min,max);
|
||||||
|
if (flag) {
|
||||||
|
MPI_Barrier(world);
|
||||||
|
printf(" Proc %d max dots = %d\n",me,counters[3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Allreduce(&counters[4],&sum,1,MPI_INT,MPI_SUM,world);
|
||||||
|
MPI_Allreduce(&counters[4],&min,1,MPI_INT,MPI_MIN,world);
|
||||||
|
MPI_Allreduce(&counters[4],&max,1,MPI_INT,MPI_MAX,world);
|
||||||
|
ave = ((double) sum)/nprocs;
|
||||||
|
if (me == 0)
|
||||||
|
printf(" Max memory: ave = %g, min = %d, max = %d\n",ave,min,max);
|
||||||
|
if (flag) {
|
||||||
|
MPI_Barrier(world);
|
||||||
|
printf(" Proc %d max memory = %d\n",me,counters[4]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (reuse) {
|
||||||
|
MPI_Allreduce(&counters[5],&sum,1,MPI_INT,MPI_SUM,world);
|
||||||
|
MPI_Allreduce(&counters[5],&min,1,MPI_INT,MPI_MIN,world);
|
||||||
|
MPI_Allreduce(&counters[5],&max,1,MPI_INT,MPI_MAX,world);
|
||||||
|
ave = ((double) sum)/nprocs;
|
||||||
|
if (me == 0)
|
||||||
|
printf(" # of Reuse: ave = %g, min = %d, max = %d\n",ave,min,max);
|
||||||
|
if (flag) {
|
||||||
|
MPI_Barrier(world);
|
||||||
|
printf(" Proc %d # of Reuse = %d\n",me,counters[5]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Allreduce(&counters[6],&sum,1,MPI_INT,MPI_SUM,world);
|
||||||
|
MPI_Allreduce(&counters[6],&min,1,MPI_INT,MPI_MIN,world);
|
||||||
|
MPI_Allreduce(&counters[6],&max,1,MPI_INT,MPI_MAX,world);
|
||||||
|
ave = ((double) sum)/nprocs;
|
||||||
|
if (me == 0)
|
||||||
|
printf(" # of OverAlloc: ave = %g, min = %d, max = %d\n",ave,min,max);
|
||||||
|
if (flag) {
|
||||||
|
MPI_Barrier(world);
|
||||||
|
printf(" Proc %d # of OverAlloc = %d\n",me,counters[6]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// RCB boxes for each proc
|
||||||
|
|
||||||
|
if (flag) {
|
||||||
|
if (me == 0) printf(" RCB sub-domain boxes:\n");
|
||||||
|
for (i = 0; i < 3; i++) {
|
||||||
|
MPI_Barrier(world);
|
||||||
|
if (me == 0) printf(" Dimension %d\n",i+1);
|
||||||
|
MPI_Barrier(world);
|
||||||
|
printf(" Proc = %d: Box = %g %g\n",me,lo[i],hi[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
131
src/rcb.h
Normal file
131
src/rcb.h
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#ifndef LAMMPS_RCB_H
|
||||||
|
#define LAMMPS_RCB_H
|
||||||
|
|
||||||
|
#include "mpi.h"
|
||||||
|
#include "pointers.h"
|
||||||
|
|
||||||
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
|
class RCB : protected Pointers {
|
||||||
|
public:
|
||||||
|
// set by compute()
|
||||||
|
|
||||||
|
int noriginal; // # of dots I own before balancing
|
||||||
|
int nfinal; // # of dots I own after balancing
|
||||||
|
int nkeep; // how many dots of noriginal I still own
|
||||||
|
// will be first nkept of nfinal list
|
||||||
|
int *recvproc; // proc IDs of nfinal dots
|
||||||
|
int *recvindex; // index of nfinal dots on owning procs
|
||||||
|
// based on input list for compute()
|
||||||
|
double *lo,*hi; // final bounding box of my RCB sub-domain
|
||||||
|
double cut; // single cut (in Tree) owned by this proc
|
||||||
|
int cutdim; // dimension (0,1,2) of the cut
|
||||||
|
|
||||||
|
// set by invert()
|
||||||
|
|
||||||
|
int *sendproc; // proc to send each of my noriginal dots to
|
||||||
|
int *sendindex; // index of dot in receiver's nfinal list
|
||||||
|
|
||||||
|
RCB(class LAMMPS *);
|
||||||
|
~RCB();
|
||||||
|
void compute(int, int, double **, double *, double *, double *);
|
||||||
|
void invert(int sortflag = 0);
|
||||||
|
bigint memory_usage();
|
||||||
|
|
||||||
|
// DEBUG methods
|
||||||
|
//void check();
|
||||||
|
//void stats(int);
|
||||||
|
|
||||||
|
// RCB cut info
|
||||||
|
|
||||||
|
struct Median {
|
||||||
|
double totallo,totalhi; // weight in each half of active partition
|
||||||
|
double valuelo,valuehi; // position of dot(s) nearest to cut
|
||||||
|
double wtlo,wthi; // total weight of dot(s) at that position
|
||||||
|
int countlo,counthi; // # of dots at that position
|
||||||
|
int proclo,prochi; // unique proc who owns a nearest dot
|
||||||
|
};
|
||||||
|
|
||||||
|
struct BBox {
|
||||||
|
double lo[3],hi[3]; // corner points of a bounding box
|
||||||
|
};
|
||||||
|
|
||||||
|
private:
|
||||||
|
int me,nprocs;
|
||||||
|
|
||||||
|
// point to balance on
|
||||||
|
|
||||||
|
struct Dot {
|
||||||
|
double x[3]; // coord of point
|
||||||
|
double wt; // weight of point
|
||||||
|
int proc; // owning proc
|
||||||
|
int index; // index on owning proc
|
||||||
|
};
|
||||||
|
|
||||||
|
// tree of RCB cuts
|
||||||
|
|
||||||
|
struct Tree {
|
||||||
|
double cut; // position of cut
|
||||||
|
int dim; // dimension = 0/1/2 of cut
|
||||||
|
};
|
||||||
|
|
||||||
|
// inversion message
|
||||||
|
|
||||||
|
struct Invert {
|
||||||
|
int rindex; // index on receiving proc
|
||||||
|
int sproc; // sending proc
|
||||||
|
int sindex; // index on sending proc
|
||||||
|
};
|
||||||
|
|
||||||
|
Dot *dots; // dots on this proc
|
||||||
|
int ndot; // # of dots on this proc
|
||||||
|
int maxdot; // allocated size of dots
|
||||||
|
int ndotorig;
|
||||||
|
|
||||||
|
int nlist;
|
||||||
|
int maxlist;
|
||||||
|
int *dotlist;
|
||||||
|
int *dotmark;
|
||||||
|
|
||||||
|
int maxbuf;
|
||||||
|
Dot *buf;
|
||||||
|
|
||||||
|
int maxrecv,maxsend;
|
||||||
|
|
||||||
|
BBox bbox;
|
||||||
|
class Irregular *irregular;
|
||||||
|
|
||||||
|
MPI_Op box_op,med_op;
|
||||||
|
MPI_Datatype box_type,med_type;
|
||||||
|
|
||||||
|
int reuse; // 1/0 to use/not use previous cuts
|
||||||
|
int dottop; // dots >= this index are new
|
||||||
|
double bboxlo[3]; // bounding box of final RCB sub-domain
|
||||||
|
double bboxhi[3];
|
||||||
|
Tree *tree; // tree of RCB cuts, used by reuse()
|
||||||
|
int counters[7]; // diagnostic counts
|
||||||
|
// 0 = # of median iterations
|
||||||
|
// 1 = # of points sent
|
||||||
|
// 2 = # of points received
|
||||||
|
// 3 = most points this proc ever owns
|
||||||
|
// 4 = most point memory this proc ever allocs
|
||||||
|
// 5 = # of times a previous cut is re-used
|
||||||
|
// 6 = # of reallocs of point vector
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -29,6 +29,8 @@ using namespace LAMMPS_NS;
|
|||||||
#define LB_FACTOR 1.1
|
#define LB_FACTOR 1.1
|
||||||
#define EPSILON 1.0e-6
|
#define EPSILON 1.0e-6
|
||||||
|
|
||||||
|
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
Replicate::Replicate(LAMMPS *lmp) : Pointers(lmp) {}
|
Replicate::Replicate(LAMMPS *lmp) : Pointers(lmp) {}
|
||||||
@ -220,17 +222,33 @@ void Replicate::command(int narg, char **arg)
|
|||||||
sublo[2] = domain->sublo_lamda[2]; subhi[2] = domain->subhi_lamda[2];
|
sublo[2] = domain->sublo_lamda[2]; subhi[2] = domain->subhi_lamda[2];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (domain->xperiodic) {
|
if (comm->layout != LAYOUT_TILED) {
|
||||||
if (comm->myloc[0] == 0) sublo[0] -= epsilon[0];
|
if (domain->xperiodic) {
|
||||||
if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0];
|
if (comm->myloc[0] == 0) sublo[0] -= epsilon[0];
|
||||||
}
|
if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0];
|
||||||
if (domain->yperiodic) {
|
}
|
||||||
if (comm->myloc[1] == 0) sublo[1] -= epsilon[1];
|
if (domain->yperiodic) {
|
||||||
if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1];
|
if (comm->myloc[1] == 0) sublo[1] -= epsilon[1];
|
||||||
}
|
if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1];
|
||||||
if (domain->zperiodic) {
|
}
|
||||||
if (comm->myloc[2] == 0) sublo[2] -= epsilon[2];
|
if (domain->zperiodic) {
|
||||||
if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2];
|
if (comm->myloc[2] == 0) sublo[2] -= epsilon[2];
|
||||||
|
if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
if (domain->xperiodic) {
|
||||||
|
if (comm->mysplit[0][0] == 0.0) sublo[0] -= epsilon[0];
|
||||||
|
if (comm->mysplit[0][1] == 1.0) subhi[0] += epsilon[0];
|
||||||
|
}
|
||||||
|
if (domain->yperiodic) {
|
||||||
|
if (comm->mysplit[1][0] == 0.0) sublo[1] -= epsilon[1];
|
||||||
|
if (comm->mysplit[1][1] == 1.0) subhi[1] += epsilon[1];
|
||||||
|
}
|
||||||
|
if (domain->zperiodic) {
|
||||||
|
if (comm->mysplit[2][0] == 0.0) sublo[2] -= epsilon[2];
|
||||||
|
if (comm->mysplit[2][1] == 1.0) subhi[2] += epsilon[2];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// loop over all procs
|
// loop over all procs
|
||||||
|
|||||||
Reference in New Issue
Block a user