git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7362 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -66,7 +66,7 @@ void PairTersoffZBL::read_file(char *file)
|
|||||||
int params_per_line = 21;
|
int params_per_line = 21;
|
||||||
char **words = new char*[params_per_line+1];
|
char **words = new char*[params_per_line+1];
|
||||||
|
|
||||||
delete [] params;
|
memory->sfree(params);
|
||||||
params = NULL;
|
params = NULL;
|
||||||
nparams = 0;
|
nparams = 0;
|
||||||
|
|
||||||
|
|||||||
@ -282,7 +282,8 @@ void PairLJCharmmCoulCharmm::coeff(int narg, char **arg)
|
|||||||
void PairLJCharmmCoulCharmm::init_style()
|
void PairLJCharmmCoulCharmm::init_style()
|
||||||
{
|
{
|
||||||
if (!atom->q_flag)
|
if (!atom->q_flag)
|
||||||
error->all(FLERR,"Pair style lj/charmm/coul/charmm requires atom attribute q");
|
error->all(FLERR,
|
||||||
|
"Pair style lj/charmm/coul/charmm requires atom attribute q");
|
||||||
|
|
||||||
neighbor->request(this);
|
neighbor->request(this);
|
||||||
|
|
||||||
|
|||||||
@ -132,33 +132,39 @@ VerletSplit::VerletSplit(LAMMPS *lmp, int narg, char **arg) :
|
|||||||
|
|
||||||
if (universe->me == 0) {
|
if (universe->me == 0) {
|
||||||
if (universe->uscreen) {
|
if (universe->uscreen) {
|
||||||
fprintf(universe->uscreen,"Rspace/Kspace procs in each block:\n");
|
fprintf(universe->uscreen,
|
||||||
|
"Per-block Rspace/Kspace proc IDs (original proc IDs):\n");
|
||||||
int m = 0;
|
int m = 0;
|
||||||
for (int i = 0; i < universe->nprocs/(ratio+1); i++) {
|
for (int i = 0; i < universe->nprocs/(ratio+1); i++) {
|
||||||
fprintf(universe->uscreen," block %d:",i);
|
fprintf(universe->uscreen," block %d:",i);
|
||||||
int kspace_proc = bmapall[m];
|
int kspace_proc = bmapall[m];
|
||||||
for (int j = 1; j <= ratio; j++)
|
for (int j = 1; j <= ratio; j++)
|
||||||
fprintf(universe->uscreen," %d",bmapall[m+j]);
|
fprintf(universe->uscreen," %d",bmapall[m+j]);
|
||||||
fprintf(universe->uscreen," %d\n",kspace_proc);
|
fprintf(universe->uscreen," %d (",kspace_proc);
|
||||||
/*
|
|
||||||
kspace_proc = bmapall[m];
|
kspace_proc = bmapall[m];
|
||||||
for (int j = 1; j <= ratio; j++)
|
for (int j = 1; j <= ratio; j++)
|
||||||
fprintf(universe->uscreen," %d",
|
fprintf(universe->uscreen," %d",
|
||||||
universe->proc2original[bmapall[m+j]]);
|
universe->uni2orig[bmapall[m+j]]);
|
||||||
fprintf(universe->uscreen," %d\n",universe->proc2original[kspace_proc]);
|
fprintf(universe->uscreen," %d)",universe->uni2orig[kspace_proc]);
|
||||||
*/
|
|
||||||
m += ratio + 1;
|
m += ratio + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (universe->ulogfile) {
|
if (universe->ulogfile) {
|
||||||
fprintf(universe->ulogfile,"Rspace/Kspace procs in each block:\n");
|
fprintf(universe->ulogfile,
|
||||||
|
"Per-block Rspace/Kspace proc IDs (original proc IDs):\n");
|
||||||
int m = 0;
|
int m = 0;
|
||||||
for (int i = 0; i < universe->nprocs/(ratio+1); i++) {
|
for (int i = 0; i < universe->nprocs/(ratio+1); i++) {
|
||||||
fprintf(universe->ulogfile," block %d:",i);
|
fprintf(universe->ulogfile," block %d:",i);
|
||||||
int kspace_proc = bmapall[m++];
|
int kspace_proc = bmapall[m];
|
||||||
for (int j = 1; j <= ratio; j++)
|
for (int j = 1; j <= ratio; j++)
|
||||||
fprintf(universe->ulogfile," %d",bmapall[m++]);
|
fprintf(universe->ulogfile," %d",bmapall[m+j]);
|
||||||
fprintf(universe->ulogfile," %d\n",kspace_proc);
|
fprintf(universe->ulogfile," %d (",kspace_proc);
|
||||||
|
kspace_proc = bmapall[m];
|
||||||
|
for (int j = 1; j <= ratio; j++)
|
||||||
|
fprintf(universe->ulogfile," %d",
|
||||||
|
universe->uni2orig[bmapall[m+j]]);
|
||||||
|
fprintf(universe->ulogfile," %d)",universe->uni2orig[kspace_proc]);
|
||||||
|
m += ratio + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
18
src/comm.cpp
18
src/comm.cpp
@ -164,24 +164,18 @@ void Comm::set_proc_grid()
|
|||||||
ProcMap *pmap = new ProcMap(lmp);
|
ProcMap *pmap = new ProcMap(lmp);
|
||||||
|
|
||||||
// create 3d grid of processors, produces procgrid
|
// create 3d grid of processors, produces procgrid
|
||||||
// can fail (on one partition) if constrained by other partition
|
|
||||||
// if numa_grid() fails, try onelevel_grid()
|
|
||||||
|
|
||||||
int flag;
|
|
||||||
if (gridflag == ONELEVEL) {
|
if (gridflag == ONELEVEL) {
|
||||||
flag = pmap->onelevel_grid(nprocs,user_procgrid,procgrid,
|
pmap->onelevel_grid(nprocs,user_procgrid,procgrid,
|
||||||
otherflag,other_style,other_procgrid);
|
otherflag,other_style,other_procgrid);
|
||||||
if (!flag) error->all(FLERR,"Could not create grid of processors");
|
|
||||||
|
|
||||||
} else if (gridflag == TWOLEVEL) {
|
} else if (gridflag == TWOLEVEL) {
|
||||||
flag = pmap->twolevel_grid(nprocs,user_procgrid,procgrid,
|
pmap->twolevel_grid(nprocs,user_procgrid,procgrid,
|
||||||
ncores,user_coregrid,coregrid,
|
ncores,user_coregrid,coregrid,
|
||||||
otherflag,other_style,other_procgrid);
|
otherflag,other_style,other_procgrid);
|
||||||
if (!flag) error->all(FLERR,"Could not create grid of processors");
|
|
||||||
|
|
||||||
} else if (gridflag == NUMA) {
|
} else if (gridflag == NUMA) {
|
||||||
flag = pmap->numa_grid(nprocs,user_procgrid,procgrid,coregrid);
|
pmap->numa_grid(nprocs,user_procgrid,procgrid,coregrid);
|
||||||
if (!flag) error->all(FLERR,"Could not create grid of processors");
|
|
||||||
|
|
||||||
} else if (gridflag == CUSTOM) {
|
} else if (gridflag == CUSTOM) {
|
||||||
pmap->custom_grid(customfile,nprocs,user_procgrid,procgrid);
|
pmap->custom_grid(customfile,nprocs,user_procgrid,procgrid);
|
||||||
|
|||||||
482
src/procmap.cpp
482
src/procmap.cpp
@ -36,50 +36,116 @@ enum{MULTIPLE}; // same as in Comm
|
|||||||
ProcMap::ProcMap(LAMMPS *lmp) : Pointers(lmp) {}
|
ProcMap::ProcMap(LAMMPS *lmp) : Pointers(lmp) {}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
create a one-level 3d grid of procs via procs2box()
|
create a one-level 3d grid of procs
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
int ProcMap::onelevel_grid(int nprocs, int *user_procgrid, int *procgrid,
|
void ProcMap::onelevel_grid(int nprocs, int *user_procgrid, int *procgrid,
|
||||||
int otherflag, int other_style_caller,
|
int otherflag, int other_style,
|
||||||
int *other_procgrid_caller)
|
int *other_procgrid)
|
||||||
{
|
{
|
||||||
other_style = other_style_caller;
|
int **factors;
|
||||||
other_procgrid[0] = other_procgrid_caller[0];
|
|
||||||
other_procgrid[1] = other_procgrid_caller[1];
|
|
||||||
other_procgrid[2] = other_procgrid_caller[2];
|
|
||||||
|
|
||||||
int flag = procs2box(nprocs,user_procgrid,procgrid,1,1,1,otherflag);
|
// factors = list of all possible 3 factors of processor count
|
||||||
return flag;
|
|
||||||
|
int npossible = factor(nprocs,NULL);
|
||||||
|
memory->create(factors,npossible,3,"procmap:factors");
|
||||||
|
npossible = factor(nprocs,factors);
|
||||||
|
|
||||||
|
// constrain by 2d, user request, other partition
|
||||||
|
|
||||||
|
if (domain->dimension == 2) npossible = cull_2d(npossible,factors,3);
|
||||||
|
npossible = cull_user(npossible,factors,3,user_procgrid);
|
||||||
|
if (otherflag) npossible = cull_other(npossible,factors,3,
|
||||||
|
other_style,other_procgrid);
|
||||||
|
|
||||||
|
// user/other constraints make failure possible
|
||||||
|
|
||||||
|
if (npossible == 0)
|
||||||
|
error->all(FLERR,"Could not create 3d grid of processors");
|
||||||
|
|
||||||
|
// select best set of 3 factors based on surface area of proc sub-domains
|
||||||
|
|
||||||
|
best_factors(npossible,factors,procgrid,1,1,1);
|
||||||
|
|
||||||
|
// clean-up
|
||||||
|
|
||||||
|
memory->destroy(factors);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
create a two-level 3d grid of procs and cores via procs2box()
|
create a two-level 3d grid of procs
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
int ProcMap::twolevel_grid(int nprocs, int *user_procgrid, int *procgrid,
|
void ProcMap::twolevel_grid(int nprocs, int *user_procgrid, int *procgrid,
|
||||||
int ncores, int *user_coregrid, int *coregrid,
|
int ncores, int *user_coregrid, int *coregrid,
|
||||||
int otherflag, int other_style_caller,
|
int otherflag, int other_style,
|
||||||
int *other_procgrid_caller)
|
int *other_procgrid)
|
||||||
{
|
{
|
||||||
|
int **nfactors,**cfactors,**factors;
|
||||||
|
|
||||||
if (nprocs % ncores)
|
if (nprocs % ncores)
|
||||||
error->all(FLERR,"Processors twogrid requres proc count "
|
error->all(FLERR,"Processors twogrid requires proc count "
|
||||||
"be a multiple of core count");
|
"be a multiple of core count");
|
||||||
|
|
||||||
|
// nfactors = list of all possible 3 factors of node count
|
||||||
|
// constrain by 2d
|
||||||
|
|
||||||
|
int nnpossible = factor(nprocs/ncores,NULL);
|
||||||
|
memory->create(nfactors,nnpossible,3,"procmap:nfactors");
|
||||||
|
nnpossible = factor(nprocs/ncores,nfactors);
|
||||||
|
|
||||||
error->all(FLERR,
|
if (domain->dimension == 2) nnpossible = cull_2d(nnpossible,nfactors,3);
|
||||||
"The twolevel option is not yet supported, but will be soon");
|
|
||||||
return 1;
|
// cfactors = list of all possible 3 factors of core count
|
||||||
|
// constrain by 2d
|
||||||
|
|
||||||
|
int ncpossible = factor(ncores,NULL);
|
||||||
|
memory->create(cfactors,ncpossible,3,"procmap:cfactors");
|
||||||
|
ncpossible = factor(ncores,cfactors);
|
||||||
|
|
||||||
|
if (domain->dimension == 2) ncpossible = cull_2d(ncpossible,cfactors,3);
|
||||||
|
ncpossible = cull_user(ncpossible,cfactors,3,user_coregrid);
|
||||||
|
|
||||||
|
// factors = all combinations of nfactors and cfactors
|
||||||
|
// factors stores additional index pointing to corresponding cfactors
|
||||||
|
// constrain by user request, other partition
|
||||||
|
|
||||||
|
int npossible = nnpossible * ncpossible;
|
||||||
|
memory->create(factors,npossible,4,"procmap:factors");
|
||||||
|
npossible = combine_factors(nnpossible,nfactors,ncpossible,cfactors,factors);
|
||||||
|
|
||||||
|
npossible = cull_user(npossible,factors,4,user_procgrid);
|
||||||
|
if (otherflag) npossible = cull_other(npossible,factors,4,
|
||||||
|
other_style,other_procgrid);
|
||||||
|
|
||||||
|
// user/other constraints make failure possible
|
||||||
|
|
||||||
|
if (npossible == 0)
|
||||||
|
error->all(FLERR,"Could not create twolevel 3d grid of processors");
|
||||||
|
|
||||||
|
// select best set of 3 factors based on surface area of proc sub-domains
|
||||||
|
// index points to corresponding core factorization
|
||||||
|
|
||||||
|
int index = best_factors(npossible,factors,procgrid,1,1,1);
|
||||||
|
|
||||||
|
coregrid[0] = cfactors[factors[index][3]][0];
|
||||||
|
coregrid[1] = cfactors[factors[index][3]][1];
|
||||||
|
coregrid[2] = cfactors[factors[index][3]][2];
|
||||||
|
|
||||||
|
// clean-up
|
||||||
|
|
||||||
|
memory->destroy(nfactors);
|
||||||
|
memory->destroy(cfactors);
|
||||||
|
memory->destroy(factors);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
create a 3d grid of procs that does a 2-level hierarchy within a node
|
create a 3d grid of procs that does a 2-level hierarchy within a node
|
||||||
auto-detects NUMA sockets within a multi-core node
|
auto-detects NUMA sockets within a multi-core node
|
||||||
return 1 if successful, 0 if not
|
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
int ProcMap::numa_grid(int nprocs, int *user_procgrid, int *procgrid,
|
void ProcMap::numa_grid(int nprocs, int *user_procgrid, int *procgrid,
|
||||||
int *numagrid)
|
int *numagrid)
|
||||||
{
|
{
|
||||||
// hardwire this for now
|
// hardwire this for now
|
||||||
|
|
||||||
@ -117,16 +183,16 @@ int ProcMap::numa_grid(int nprocs, int *user_procgrid, int *procgrid,
|
|||||||
user_procgrid[0] > 1 || // user specified grid > 1 in any dim
|
user_procgrid[0] > 1 || // user specified grid > 1 in any dim
|
||||||
user_procgrid[1] > 1 ||
|
user_procgrid[1] > 1 ||
|
||||||
user_procgrid[2] > 1)
|
user_procgrid[2] > 1)
|
||||||
return 0;
|
error->all(FLERR,"Could not create numa 3d grid of processors");
|
||||||
|
|
||||||
|
|
||||||
// user settings for the factorization per numa node
|
// user settings for the factorization per numa node
|
||||||
// currently not user settable
|
// currently not user settable
|
||||||
|
// if user specifies 1 for a proc grid dimension,
|
||||||
|
// also use 1 for the numa grid dimension
|
||||||
|
|
||||||
int user_numagrid[3];
|
int user_numagrid[3];
|
||||||
user_numagrid[0] = user_numagrid[1] = user_numagrid[2] = 0;
|
user_numagrid[0] = user_numagrid[1] = user_numagrid[2] = 0;
|
||||||
|
|
||||||
// if user specifies 1 for a proc grid dimension,
|
|
||||||
// also use 1 for the numa grid dimension
|
|
||||||
|
|
||||||
if (user_procgrid[0] == 1) user_numagrid[0] = 1;
|
if (user_procgrid[0] == 1) user_numagrid[0] = 1;
|
||||||
if (user_procgrid[1] == 1) user_numagrid[1] = 1;
|
if (user_procgrid[1] == 1) user_numagrid[1] = 1;
|
||||||
@ -134,23 +200,52 @@ int ProcMap::numa_grid(int nprocs, int *user_procgrid, int *procgrid,
|
|||||||
|
|
||||||
// initial factorization within NUMA node
|
// initial factorization within NUMA node
|
||||||
|
|
||||||
procs2box(procs_per_numa,user_numagrid,numagrid,1,1,1,0);
|
int **numafactors;
|
||||||
if (numagrid[0]*numagrid[1]*numagrid[2] != procs_per_numa)
|
int numapossible = factor(procs_per_numa,NULL);
|
||||||
error->all(FLERR,"Bad grid of processors");
|
memory->create(numafactors,numapossible,3,"procmap:numafactors");
|
||||||
|
numapossible = factor(procs_per_numa,numafactors);
|
||||||
|
|
||||||
|
if (domain->dimension == 2)
|
||||||
|
numapossible = cull_2d(numapossible,numafactors,3);
|
||||||
|
numapossible = cull_user(numapossible,numafactors,3,user_numagrid);
|
||||||
|
|
||||||
|
if (numapossible == 0)
|
||||||
|
error->all(FLERR,"Could not create numa grid of processors");
|
||||||
|
|
||||||
|
best_factors(numapossible,numafactors,numagrid,1,1,1);
|
||||||
|
|
||||||
|
// user_nodegrid = implied user contraints on nodes
|
||||||
|
|
||||||
|
int user_nodegrid[3];
|
||||||
|
user_nodegrid[0] = user_procgrid[0] / numagrid[0];
|
||||||
|
user_nodegrid[1] = user_procgrid[1] / numagrid[1];
|
||||||
|
user_nodegrid[2] = user_procgrid[2] / numagrid[2];
|
||||||
|
|
||||||
// factorization for the grid of NUMA nodes
|
// factorization for the grid of NUMA nodes
|
||||||
|
|
||||||
int node_count = nprocs / procs_per_numa;
|
int node_count = nprocs / procs_per_numa;
|
||||||
procs2box(node_count,user_procgrid,nodegrid,
|
|
||||||
numagrid[0],numagrid[1],numagrid[2],0);
|
int **nodefactors;
|
||||||
if (procgrid[0]*procgrid[1]*procgrid[2] != node_count)
|
int nodepossible = factor(node_count,NULL);
|
||||||
error->all(FLERR,"Bad grid of processors");
|
memory->create(nodefactors,nodepossible,3,"procmap:nodefactors");
|
||||||
|
nodepossible = factor(node_count,nodefactors);
|
||||||
|
|
||||||
|
if (domain->dimension == 2)
|
||||||
|
nodepossible = cull_2d(nodepossible,nodefactors,3);
|
||||||
|
nodepossible = cull_user(nodepossible,nodefactors,3,user_nodegrid);
|
||||||
|
|
||||||
|
if (nodepossible == 0)
|
||||||
|
error->all(FLERR,"Could not create numa grid of processors");
|
||||||
|
|
||||||
|
best_factors(nodepossible,nodefactors,nodegrid,
|
||||||
|
numagrid[0],numagrid[1],numagrid[2]);
|
||||||
|
|
||||||
// repeat NUMA node factorization using subdomain sizes
|
// repeat NUMA node factorization using subdomain sizes
|
||||||
// refines the factorization if the user specified the node layout
|
// refines the factorization if the user specified the node layout
|
||||||
|
// NOTE: this will not re-enforce user-procgrid constraint will it?
|
||||||
|
|
||||||
procs2box(procs_per_numa,user_numagrid,numagrid,
|
best_factors(numapossible,numafactors,numagrid,
|
||||||
procgrid[0],procgrid[1],procgrid[2],0);
|
nodegrid[0],nodegrid[1],nodegrid[2]);
|
||||||
|
|
||||||
// assign a unique id to each node
|
// assign a unique id to each node
|
||||||
|
|
||||||
@ -166,8 +261,6 @@ int ProcMap::numa_grid(int nprocs, int *user_procgrid, int *procgrid,
|
|||||||
procgrid[0] = nodegrid[0] * numagrid[0];
|
procgrid[0] = nodegrid[0] * numagrid[0];
|
||||||
procgrid[1] = nodegrid[1] * numagrid[1];
|
procgrid[1] = nodegrid[1] * numagrid[1];
|
||||||
procgrid[2] = nodegrid[2] * numagrid[2];
|
procgrid[2] = nodegrid[2] * numagrid[2];
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
@ -245,136 +338,6 @@ void ProcMap::custom_grid(char *cfile, int nprocs,
|
|||||||
if (flag) error->all(FLERR,"Processors custom grid file is invalid");
|
if (flag) error->all(FLERR,"Processors custom grid file is invalid");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
assign nprocs to 3d box so as to minimize surface area
|
|
||||||
area = surface area of each of 3 faces of simulation box divided by sx,sy,sz
|
|
||||||
for triclinic, area = cross product of 2 edge vectors stored in h matrix
|
|
||||||
valid assignment will be factorization of nprocs = Px by Py by Pz
|
|
||||||
user_factors = if non-zero, factors are specified by user
|
|
||||||
sx,sy,sz = scale box xyz dimension by dividing by sx,sy,sz
|
|
||||||
other = 1 to enforce compatability with other partition's layout
|
|
||||||
return factors = # of procs assigned to each dimension
|
|
||||||
return 1 if factor successfully, 0 if not
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
int ProcMap::procs2box(int nprocs, int *user_factors, int *factors,
|
|
||||||
const int sx, const int sy, const int sz, int other)
|
|
||||||
{
|
|
||||||
factors[0] = user_factors[0];
|
|
||||||
factors[1] = user_factors[1];
|
|
||||||
factors[2] = user_factors[2];
|
|
||||||
|
|
||||||
// all 3 proc counts are specified
|
|
||||||
|
|
||||||
if (factors[0] && factors[1] && factors[2]) return 1;
|
|
||||||
|
|
||||||
// 2 out of 3 proc counts are specified
|
|
||||||
|
|
||||||
if (factors[0] > 0 && factors[1] > 0) {
|
|
||||||
factors[2] = nprocs/(factors[0]*factors[1]);
|
|
||||||
return 1;
|
|
||||||
} else if (factors[0] > 0 && factors[2] > 0) {
|
|
||||||
factors[1] = nprocs/(factors[0]*factors[2]);
|
|
||||||
return 1;
|
|
||||||
} else if (factors[1] > 0 && factors[2] > 0) {
|
|
||||||
factors[0] = nprocs/(factors[1]*factors[2]);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// determine cross-sectional areas for orthogonal and triclinic boxes
|
|
||||||
// area[0] = xy, area[1] = xz, area[2] = yz
|
|
||||||
|
|
||||||
double area[3];
|
|
||||||
if (domain->triclinic == 0) {
|
|
||||||
area[0] = domain->xprd * domain->yprd / (sx * sy);
|
|
||||||
area[1] = domain->xprd * domain->zprd / (sx * sz);
|
|
||||||
area[2] = domain->yprd * domain->zprd / (sy * sz);
|
|
||||||
} else {
|
|
||||||
double *h = domain->h;
|
|
||||||
double a[3],b[3],c[3];
|
|
||||||
a[0] = h[0]; a[1] = 0.0; a[2] = 0.0;
|
|
||||||
b[0] = h[5]; b[1] = h[1]; b[2] = 0.0;
|
|
||||||
MathExtra::cross3(a,b,c);
|
|
||||||
area[0] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sx * sy);
|
|
||||||
a[0] = h[0]; a[1] = 0.0; a[2] = 0.0;
|
|
||||||
b[0] = h[4]; b[1] = h[3]; b[2] = h[2];
|
|
||||||
MathExtra::cross3(a,b,c);
|
|
||||||
area[1] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sx * sz);
|
|
||||||
a[0] = h[5]; a[1] = h[1]; a[2] = 0.0;
|
|
||||||
b[0] = h[4]; b[1] = h[3]; b[2] = h[2];
|
|
||||||
MathExtra::cross3(a,b,c);
|
|
||||||
area[2] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sy * sz);
|
|
||||||
}
|
|
||||||
|
|
||||||
double bestsurf = 2.0 * (area[0]+area[1]+area[2]);
|
|
||||||
|
|
||||||
// loop thru all possible factorizations of nprocs
|
|
||||||
// only consider valid cases that match procgrid settings
|
|
||||||
// surf = surface area of a proc sub-domain
|
|
||||||
// only consider cases that match user_factors & other_procgrid settings
|
|
||||||
// success = 1 if valid factoriztion is found
|
|
||||||
// may not be if other constraint is enforced
|
|
||||||
|
|
||||||
int ipx,ipy,ipz,valid;
|
|
||||||
double surf;
|
|
||||||
|
|
||||||
int success = 0;
|
|
||||||
ipx = 1;
|
|
||||||
while (ipx <= nprocs) {
|
|
||||||
valid = 1;
|
|
||||||
if (user_factors[0] && ipx != user_factors[0]) valid = 0;
|
|
||||||
if (other) {
|
|
||||||
if (other_style == MULTIPLE && other_procgrid[0] % ipx) valid = 0;
|
|
||||||
}
|
|
||||||
if (nprocs % ipx) valid = 0;
|
|
||||||
|
|
||||||
if (!valid) {
|
|
||||||
ipx++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
ipy = 1;
|
|
||||||
while (ipy <= nprocs/ipx) {
|
|
||||||
valid = 1;
|
|
||||||
if (user_factors[1] && ipy != user_factors[1]) valid = 0;
|
|
||||||
if (other) {
|
|
||||||
if (other_style == MULTIPLE && other_procgrid[1] % ipy) valid = 0;
|
|
||||||
}
|
|
||||||
if ((nprocs/ipx) % ipy) valid = 0;
|
|
||||||
if (!valid) {
|
|
||||||
ipy++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
ipz = nprocs/ipx/ipy;
|
|
||||||
valid = 1;
|
|
||||||
if (user_factors[2] && ipz != user_factors[2]) valid = 0;
|
|
||||||
if (other) {
|
|
||||||
if (other_style == MULTIPLE && other_procgrid[2] % ipz) valid = 0;
|
|
||||||
}
|
|
||||||
if (domain->dimension == 2 && ipz != 1) valid = 0;
|
|
||||||
if (!valid) {
|
|
||||||
ipy++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
surf = area[0]/ipx/ipy + area[1]/ipx/ipz + area[2]/ipy/ipz;
|
|
||||||
if (surf < bestsurf) {
|
|
||||||
success = 1;
|
|
||||||
bestsurf = surf;
|
|
||||||
factors[0] = ipx;
|
|
||||||
factors[1] = ipy;
|
|
||||||
factors[2] = ipz;
|
|
||||||
}
|
|
||||||
ipy++;
|
|
||||||
}
|
|
||||||
|
|
||||||
ipx++;
|
|
||||||
}
|
|
||||||
|
|
||||||
return success;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
map processors to 3d grid via MPI_Cart routines
|
map processors to 3d grid via MPI_Cart routines
|
||||||
MPI may do layout in machine-optimized fashion
|
MPI may do layout in machine-optimized fashion
|
||||||
@ -471,6 +434,44 @@ void ProcMap::xyz_map(char *xyz, int *procgrid,
|
|||||||
void ProcMap::xyz_map(char *xyz, int *procgrid, int *coregrid,
|
void ProcMap::xyz_map(char *xyz, int *procgrid, int *coregrid,
|
||||||
int *myloc, int procneigh[3][2], int ***grid2proc)
|
int *myloc, int procneigh[3][2], int ***grid2proc)
|
||||||
{
|
{
|
||||||
|
int me;
|
||||||
|
MPI_Comm_rank(world,&me);
|
||||||
|
|
||||||
|
int i,j,k;
|
||||||
|
for (i = 0; i < procgrid[0]; i++)
|
||||||
|
for (j = 0; j < procgrid[1]; j++)
|
||||||
|
for (k = 0; k < procgrid[2]; k++) {
|
||||||
|
grid2proc[i][j][k] = k*procgrid[1]*procgrid[0] + j*procgrid[0] + i;
|
||||||
|
if (xyz[0] == 'x' && xyz[1] == 'y' && xyz[2] == 'z')
|
||||||
|
grid2proc[i][j][k] = k*procgrid[1]*procgrid[0] + j*procgrid[0] + i;
|
||||||
|
else if (xyz[0] == 'x' && xyz[1] == 'z' && xyz[2] == 'y')
|
||||||
|
grid2proc[i][j][k] = j*procgrid[2]*procgrid[0] + k*procgrid[0] + i;
|
||||||
|
else if (xyz[0] == 'y' && xyz[1] == 'x' && xyz[2] == 'z')
|
||||||
|
grid2proc[i][j][k] = k*procgrid[0]*procgrid[1] + i*procgrid[1] + j;
|
||||||
|
else if (xyz[0] == 'y' && xyz[1] == 'z' && xyz[2] == 'x')
|
||||||
|
grid2proc[i][j][k] = i*procgrid[2]*procgrid[1] + k*procgrid[1] + j;
|
||||||
|
else if (xyz[0] == 'z' && xyz[1] == 'x' && xyz[2] == 'y')
|
||||||
|
grid2proc[i][j][k] = j*procgrid[0]*procgrid[2] + i*procgrid[2] + k;
|
||||||
|
else if (xyz[0] == 'z' && xyz[1] == 'y' && xyz[2] == 'x')
|
||||||
|
grid2proc[i][j][k] = i*procgrid[1]*procgrid[2] + j*procgrid[2] + k;
|
||||||
|
|
||||||
|
if (grid2proc[i][j][k] == me) {
|
||||||
|
myloc[0] = i; myloc[1] = j, myloc[2] = k;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int minus,plus;
|
||||||
|
grid_shift(myloc[0],procgrid[0],minus,plus);
|
||||||
|
procneigh[0][0] = grid2proc[minus][myloc[1]][myloc[2]];
|
||||||
|
procneigh[0][1] = grid2proc[plus][myloc[1]][myloc[2]];
|
||||||
|
|
||||||
|
grid_shift(myloc[1],procgrid[1],minus,plus);
|
||||||
|
procneigh[1][0] = grid2proc[myloc[0]][minus][myloc[2]];
|
||||||
|
procneigh[1][1] = grid2proc[myloc[0]][plus][myloc[2]];
|
||||||
|
|
||||||
|
grid_shift(myloc[2],procgrid[2],minus,plus);
|
||||||
|
procneigh[2][0] = grid2proc[myloc[0]][myloc[1]][minus];
|
||||||
|
procneigh[2][1] = grid2proc[myloc[0]][myloc[1]][plus];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
@ -596,18 +597,6 @@ void ProcMap::custom_map(int *procgrid,
|
|||||||
memory->destroy(cmap);
|
memory->destroy(cmap);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
minus,plus = indices of neighboring processors in a dimension
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void ProcMap::grid_shift(int myloc, int nprocs, int &minus, int &plus)
|
|
||||||
{
|
|
||||||
minus = myloc - 1;
|
|
||||||
if (minus < 0) minus = nprocs - 1;
|
|
||||||
plus = myloc + 1;
|
|
||||||
if (plus == nprocs) plus = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
output mapping of processors to 3d grid to file
|
output mapping of processors to 3d grid to file
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
@ -679,3 +668,136 @@ void ProcMap::output(char *file, int *procgrid, int ***grid2proc)
|
|||||||
|
|
||||||
if (me == 0) fclose(fp);
|
if (me == 0) fclose(fp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
generate all possible 3-integer factorizations of N
|
||||||
|
store them in factors if non-NULL
|
||||||
|
return # of factorizations
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
int ProcMap::factor(int n, int **factors)
|
||||||
|
{
|
||||||
|
int i,j,nyz;
|
||||||
|
|
||||||
|
int m = 0;
|
||||||
|
for (i = 1; i <= n; i++) {
|
||||||
|
if (n % i) continue;
|
||||||
|
nyz = n/i;
|
||||||
|
for (j = 1; j <= nyz; j++) {
|
||||||
|
if (nyz % j) continue;
|
||||||
|
if (factors) {
|
||||||
|
factors[m][0] = i;
|
||||||
|
factors[m][1] = j;
|
||||||
|
factors[m][2] = nyz/j;
|
||||||
|
}
|
||||||
|
m++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
int ProcMap::combine_factors(int n1, int **factors1, int n2, int **factors2,
|
||||||
|
int **factors)
|
||||||
|
{
|
||||||
|
int m = 0;
|
||||||
|
|
||||||
|
return n1*n2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
int ProcMap::cull_2d(int n, int **factors, int m)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
int ProcMap::cull_user(int n, int **factors, int m, int *user_factors)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
int ProcMap::cull_other(int n, int **factors, int m,
|
||||||
|
int other_style, int *other_grid)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
choose best factors from list of Npossible factors
|
||||||
|
best = minimal surface area of sub-domain
|
||||||
|
return best = 3 factors
|
||||||
|
return index of best factors in factors
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
int ProcMap::best_factors(int npossible, int **factors, int *best,
|
||||||
|
const int sx, const int sy, const int sz)
|
||||||
|
{
|
||||||
|
// determine cross-sectional areas for orthogonal and triclinic boxes
|
||||||
|
// for triclinic, area = cross product of 2 edge vectors stored in h matrix
|
||||||
|
// area[3] = surface area 3 box faces divided by sx,sy,sz
|
||||||
|
// area[0] = xy, area[1] = xz, area[2] = yz
|
||||||
|
|
||||||
|
double area[3];
|
||||||
|
if (domain->triclinic == 0) {
|
||||||
|
area[0] = domain->xprd * domain->yprd / (sx*sy);
|
||||||
|
area[1] = domain->xprd * domain->zprd / (sx*sz);
|
||||||
|
area[2] = domain->yprd * domain->zprd / (sy*sz);
|
||||||
|
} else {
|
||||||
|
double *h = domain->h;
|
||||||
|
double a[3],b[3],c[3];
|
||||||
|
a[0] = h[0]; a[1] = 0.0; a[2] = 0.0;
|
||||||
|
b[0] = h[5]; b[1] = h[1]; b[2] = 0.0;
|
||||||
|
MathExtra::cross3(a,b,c);
|
||||||
|
area[0] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sx*sy);
|
||||||
|
a[0] = h[0]; a[1] = 0.0; a[2] = 0.0;
|
||||||
|
b[0] = h[4]; b[1] = h[3]; b[2] = h[2];
|
||||||
|
MathExtra::cross3(a,b,c);
|
||||||
|
area[1] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sx*sz);
|
||||||
|
a[0] = h[5]; a[1] = h[1]; a[2] = 0.0;
|
||||||
|
b[0] = h[4]; b[1] = h[3]; b[2] = h[2];
|
||||||
|
MathExtra::cross3(a,b,c);
|
||||||
|
area[2] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sy*sz);
|
||||||
|
}
|
||||||
|
|
||||||
|
int index;
|
||||||
|
double surf;
|
||||||
|
double bestsurf = 2.0 * (area[0]+area[1]+area[2]);
|
||||||
|
|
||||||
|
for (int m = 0; m < npossible; m++) {
|
||||||
|
surf = area[0]/factors[m][0]/factors[m][1] +
|
||||||
|
area[1]/factors[m][0]/factors[m][2] +
|
||||||
|
area[2]/factors[m][1]/factors[m][2];
|
||||||
|
if (surf < bestsurf) {
|
||||||
|
best[0] = factors[m][0];
|
||||||
|
best[1] = factors[m][1];
|
||||||
|
best[2] = factors[m][2];
|
||||||
|
index = m;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
minus,plus = indices of neighboring processors in a dimension
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void ProcMap::grid_shift(int myloc, int nprocs, int &minus, int &plus)
|
||||||
|
{
|
||||||
|
minus = myloc - 1;
|
||||||
|
if (minus < 0) minus = nprocs - 1;
|
||||||
|
plus = myloc + 1;
|
||||||
|
if (plus == nprocs) plus = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@ -22,9 +22,9 @@ class ProcMap : protected Pointers {
|
|||||||
public:
|
public:
|
||||||
ProcMap(class LAMMPS *);
|
ProcMap(class LAMMPS *);
|
||||||
~ProcMap() {}
|
~ProcMap() {}
|
||||||
int onelevel_grid(int, int *, int *, int, int, int *);
|
void onelevel_grid(int, int *, int *, int, int, int *);
|
||||||
int twolevel_grid(int, int *, int *, int, int *, int *, int, int, int *);
|
void twolevel_grid(int, int *, int *, int, int *, int *, int, int, int *);
|
||||||
int numa_grid(int, int *, int *, int *);
|
void numa_grid(int, int *, int *, int *);
|
||||||
void custom_grid(char *, int, int *, int *);
|
void custom_grid(char *, int, int *, int *);
|
||||||
void cart_map(int, int *, int *, int [3][2], int ***);
|
void cart_map(int, int *, int *, int [3][2], int ***);
|
||||||
void cart_map(int, int *, int *, int *, int [3][2], int ***);
|
void cart_map(int, int *, int *, int *, int [3][2], int ***);
|
||||||
@ -35,15 +35,19 @@ class ProcMap : protected Pointers {
|
|||||||
void output(char *, int *, int ***);
|
void output(char *, int *, int ***);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int other_style;
|
int nodegrid[3]; // NUMA params
|
||||||
int other_procgrid[3];
|
|
||||||
int nodegrid[3];
|
|
||||||
int node_id;
|
int node_id;
|
||||||
int procs_per_node;
|
int procs_per_node;
|
||||||
int procs_per_numa;
|
int procs_per_numa;
|
||||||
int **cmap;
|
|
||||||
|
|
||||||
int procs2box(int, int *, int *, const int, const int, const int, int);
|
int **cmap; // info in custom grid file
|
||||||
|
|
||||||
|
int factor(int, int **);
|
||||||
|
int combine_factors(int, int **, int, int **, int **);
|
||||||
|
int cull_2d(int, int **, int);
|
||||||
|
int cull_user(int, int **, int, int *);
|
||||||
|
int cull_other(int, int **, int, int, int *);
|
||||||
|
int best_factors(int, int **, int *, int, int, int);
|
||||||
void grid_shift(int, int, int &, int &);
|
void grid_shift(int, int, int &, int &);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user