git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7734 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
728
src/balance.cpp
Normal file
728
src/balance.cpp
Normal file
@ -0,0 +1,728 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "mpi.h"
|
||||
#include "math.h"
|
||||
#include "stdlib.h"
|
||||
#include "string.h"
|
||||
#include "balance.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "irregular.h"
|
||||
#include "domain.h"
|
||||
#include "force.h"
|
||||
#include "update.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
enum{NONE,UNIFORM,USER,DYNAMIC};
|
||||
enum{X,Y,Z};
|
||||
enum{EXPAND,CONTRACT};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Balance::Balance(LAMMPS *lmp) : Pointers(lmp)
|
||||
{
|
||||
MPI_Comm_rank(world,&me);
|
||||
MPI_Comm_size(world,&nprocs);
|
||||
|
||||
memory->create(pcount,nprocs,"balance:pcount");
|
||||
memory->create(allcount,nprocs,"balance:allcount");
|
||||
|
||||
user_xsplit = user_ysplit = user_zsplit = NULL;
|
||||
dflag = 0;
|
||||
|
||||
fp = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Balance::~Balance()
|
||||
{
|
||||
memory->destroy(pcount);
|
||||
memory->destroy(allcount);
|
||||
|
||||
delete [] user_xsplit;
|
||||
delete [] user_ysplit;
|
||||
delete [] user_zsplit;
|
||||
|
||||
if (dflag) {
|
||||
delete [] bstr;
|
||||
delete [] ops;
|
||||
delete [] counts[0];
|
||||
delete [] counts[1];
|
||||
delete [] counts[2];
|
||||
delete [] cuts;
|
||||
delete [] onecount;
|
||||
//MPI_Comm_free(&commslice[0]);
|
||||
//MPI_Comm_free(&commslice[1]);
|
||||
//MPI_Comm_free(&commslice[2]);
|
||||
}
|
||||
|
||||
if (fp) fclose(fp);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
called as balance command in input script
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Balance::command(int narg, char **arg)
|
||||
{
|
||||
if (domain->box_exist == 0)
|
||||
error->all(FLERR,"Balance command before simulation box is defined");
|
||||
|
||||
if (comm->me == 0 && screen) fprintf(screen,"Balancing ...\n");
|
||||
|
||||
// parse arguments
|
||||
|
||||
int dimension = domain->dimension;
|
||||
int *procgrid = comm->procgrid;
|
||||
xflag = yflag = zflag = NONE;
|
||||
dflag = 0;
|
||||
|
||||
int iarg = 0;
|
||||
while (iarg < narg) {
|
||||
if (strcmp(arg[iarg],"x") == 0) {
|
||||
if (xflag == DYNAMIC) error->all(FLERR,"Illegal balance command");
|
||||
if (strcmp(arg[iarg+1],"uniform") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
|
||||
xflag = UNIFORM;
|
||||
iarg += 2;
|
||||
} else {
|
||||
if (1 + procgrid[0]-1 > narg)
|
||||
error->all(FLERR,"Illegal balance command");
|
||||
xflag = USER;
|
||||
delete [] user_xsplit;
|
||||
user_xsplit = new double[procgrid[0]+1];
|
||||
user_xsplit[0] = 0.0;
|
||||
iarg++;
|
||||
for (int i = 1; i < procgrid[0]; i++)
|
||||
user_xsplit[i] = force->numeric(arg[iarg++]);
|
||||
user_xsplit[procgrid[0]] = 1.0;
|
||||
}
|
||||
} else if (strcmp(arg[iarg],"y") == 0) {
|
||||
if (yflag == DYNAMIC) error->all(FLERR,"Illegal balance command");
|
||||
if (strcmp(arg[iarg+1],"uniform") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
|
||||
yflag = UNIFORM;
|
||||
iarg += 2;
|
||||
} else {
|
||||
if (1 + procgrid[1]-1 > narg)
|
||||
error->all(FLERR,"Illegal balance command");
|
||||
yflag = USER;
|
||||
delete [] user_ysplit;
|
||||
user_ysplit = new double[procgrid[1]+1];
|
||||
user_ysplit[0] = 0.0;
|
||||
iarg++;
|
||||
for (int i = 1; i < procgrid[1]; i++)
|
||||
user_ysplit[i] = force->numeric(arg[iarg++]);
|
||||
user_ysplit[procgrid[1]] = 1.0;
|
||||
}
|
||||
} else if (strcmp(arg[iarg],"z") == 0) {
|
||||
if (zflag == DYNAMIC) error->all(FLERR,"Illegal balance command");
|
||||
if (strcmp(arg[iarg+1],"uniform") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
|
||||
zflag = UNIFORM;
|
||||
iarg += 2;
|
||||
} else {
|
||||
if (1 + procgrid[2]-1 > narg)
|
||||
error->all(FLERR,"Illegal balance command");
|
||||
zflag = USER;
|
||||
delete [] user_zsplit;
|
||||
user_zsplit = new double[procgrid[2]+1];
|
||||
user_zsplit[0] = 0.0;
|
||||
iarg++;
|
||||
for (int i = 1; i < procgrid[2]; i++)
|
||||
user_zsplit[i] = force->numeric(arg[iarg++]);
|
||||
user_zsplit[procgrid[2]] = 1.0;
|
||||
}
|
||||
|
||||
} else if (strcmp(arg[iarg],"dynamic") == 0) {
|
||||
if (xflag != NONE || yflag != NONE || zflag != NONE)
|
||||
error->all(FLERR,"Illegal balance command");
|
||||
if (iarg+5 > narg) error->all(FLERR,"Illegal balance command");
|
||||
dflag = 1;
|
||||
xflag = yflag = DYNAMIC;
|
||||
if (dimension == 3) zflag = DYNAMIC;
|
||||
nrepeat = atoi(arg[iarg+1]);
|
||||
niter = atoi(arg[iarg+2]);
|
||||
if (nrepeat <= 0 || niter <= 0)
|
||||
error->all(FLERR,"Illegal balance command");
|
||||
int n = strlen(arg[iarg+3]) + 1;
|
||||
bstr = new char[n];
|
||||
strcpy(bstr,arg[iarg+3]);
|
||||
thresh = atof(arg[iarg+4]);
|
||||
if (thresh < 1.0) error->all(FLERR,"Illegal balance command");
|
||||
iarg += 5;
|
||||
|
||||
} else error->all(FLERR,"Illegal balance command");
|
||||
}
|
||||
|
||||
// error check
|
||||
|
||||
if (zflag && dimension == 2)
|
||||
error->all(FLERR,"Cannot balance in z dimension for 2d simulation");
|
||||
|
||||
if (xflag == USER)
|
||||
for (int i = 1; i <= procgrid[0]; i++)
|
||||
if (user_xsplit[i-1] >= user_xsplit[i])
|
||||
error->all(FLERR,"Illegal balance command");
|
||||
if (yflag == USER)
|
||||
for (int i = 1; i <= procgrid[1]; i++)
|
||||
if (user_ysplit[i-1] >= user_ysplit[i])
|
||||
error->all(FLERR,"Illegal balance command");
|
||||
if (zflag == USER)
|
||||
for (int i = 1; i <= procgrid[2]; i++)
|
||||
if (user_zsplit[i-1] >= user_zsplit[i])
|
||||
error->all(FLERR,"Illegal balance command");
|
||||
|
||||
if (dflag) {
|
||||
for (int i = 0; i < strlen(bstr); i++) {
|
||||
if (bstr[i] != 'x' && bstr[i] != 'y' && bstr[i] != 'z')
|
||||
error->all(FLERR,"Balance dynamic string is invalid");
|
||||
if (bstr[i] == 'z' && dimension == 2)
|
||||
error->all(FLERR,"Balance dynamic string is invalid for 2d simulation");
|
||||
}
|
||||
}
|
||||
|
||||
// insure atoms are in current box & update box via shrink-wrap
|
||||
// no exchange() since doesn't matter if atoms are assigned to correct procs
|
||||
|
||||
if (domain->triclinic) domain->x2lamda(atom->nlocal);
|
||||
domain->pbc();
|
||||
domain->reset_box();
|
||||
if (domain->triclinic) domain->lamda2x(atom->nlocal);
|
||||
|
||||
// imbinit = initial imbalance
|
||||
// use current splits instead of nlocal since atoms may not be in sub-box
|
||||
|
||||
domain->x2lamda(atom->nlocal);
|
||||
int maxinit;
|
||||
double imbinit = imbalance_splits(maxinit);
|
||||
domain->lamda2x(atom->nlocal);
|
||||
|
||||
// debug output of initial state
|
||||
|
||||
//dumpout(update->ntimestep);
|
||||
|
||||
// explicit setting of sub-domain sizes
|
||||
|
||||
if (xflag == UNIFORM) {
|
||||
for (int i = 0; i < procgrid[0]; i++)
|
||||
comm->xsplit[i] = i * 1.0/procgrid[0];
|
||||
comm->xsplit[procgrid[0]] = 1.0;
|
||||
}
|
||||
|
||||
if (yflag == UNIFORM) {
|
||||
for (int i = 0; i < procgrid[1]; i++)
|
||||
comm->ysplit[i] = i * 1.0/procgrid[1];
|
||||
comm->ysplit[procgrid[1]] = 1.0;
|
||||
}
|
||||
|
||||
if (zflag == UNIFORM) {
|
||||
for (int i = 0; i < procgrid[2]; i++)
|
||||
comm->zsplit[i] = i * 1.0/procgrid[2];
|
||||
comm->zsplit[procgrid[2]] = 1.0;
|
||||
}
|
||||
|
||||
if (xflag == USER)
|
||||
for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i];
|
||||
|
||||
if (yflag == USER)
|
||||
for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i];
|
||||
|
||||
if (zflag == USER)
|
||||
for (int i = 0; i <= procgrid[2]; i++) comm->zsplit[i] = user_zsplit[i];
|
||||
|
||||
// dynamic load-balance of sub-domain sizes
|
||||
|
||||
if (dflag) {
|
||||
dynamic_setup(bstr);
|
||||
dynamic();
|
||||
} else count = 0;
|
||||
|
||||
// debug output of final result
|
||||
|
||||
//dumpout(-1);
|
||||
|
||||
// reset comm->uniform flag if necessary
|
||||
|
||||
if (comm->uniform) {
|
||||
if (xflag == USER || xflag == DYNAMIC) comm->uniform = 0;
|
||||
if (yflag == USER || yflag == DYNAMIC) comm->uniform = 0;
|
||||
if (zflag == USER || zflag == DYNAMIC) comm->uniform = 0;
|
||||
} else {
|
||||
if (dimension == 3) {
|
||||
if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
|
||||
comm->uniform = 1;
|
||||
} else {
|
||||
if (xflag == UNIFORM && yflag == UNIFORM) comm->uniform = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// reset proc sub-domains
|
||||
|
||||
if (domain->triclinic) domain->set_lamda_box();
|
||||
domain->set_local_box();
|
||||
|
||||
// move atoms to new processors via irregular()
|
||||
|
||||
if (domain->triclinic) domain->x2lamda(atom->nlocal);
|
||||
Irregular *irregular = new Irregular(lmp);
|
||||
irregular->migrate_atoms();
|
||||
delete irregular;
|
||||
if (domain->triclinic) domain->lamda2x(atom->nlocal);
|
||||
|
||||
// check if any atoms were lost
|
||||
|
||||
bigint natoms;
|
||||
bigint nblocal = atom->nlocal;
|
||||
MPI_Allreduce(&nblocal,&natoms,1,MPI_LMP_BIGINT,MPI_SUM,world);
|
||||
if (natoms != atom->natoms) {
|
||||
char str[128];
|
||||
sprintf(str,"Lost atoms via balance: original " BIGINT_FORMAT
|
||||
" current " BIGINT_FORMAT,atom->natoms,natoms);
|
||||
error->all(FLERR,str);
|
||||
}
|
||||
|
||||
// imbfinal = final imbalance based on final nlocal
|
||||
|
||||
int maxfinal;
|
||||
double imbfinal = imbalance_nlocal(maxfinal);
|
||||
|
||||
if (me == 0) {
|
||||
if (screen) {
|
||||
fprintf(screen," iteration count = %d\n",count);
|
||||
fprintf(screen," initial/final max atoms/proc = %d %d\n",
|
||||
maxinit,maxfinal);
|
||||
fprintf(screen," initial/final imbalance factor = %g %g\n",
|
||||
imbinit,imbfinal);
|
||||
}
|
||||
if (logfile) {
|
||||
fprintf(logfile," iteration count = %d\n",count);
|
||||
fprintf(logfile," initial/final max atoms/proc = %d %d\n",
|
||||
maxinit,maxfinal);
|
||||
fprintf(logfile," initial/final imbalance factor = %g %g\n",
|
||||
imbinit,imbfinal);
|
||||
}
|
||||
}
|
||||
|
||||
if (me == 0) {
|
||||
if (screen) {
|
||||
fprintf(screen," x cuts:");
|
||||
for (int i = 0; i <= comm->procgrid[0]; i++)
|
||||
fprintf(screen," %g",comm->xsplit[i]);
|
||||
fprintf(screen,"\n");
|
||||
fprintf(screen," y cuts:");
|
||||
for (int i = 0; i <= comm->procgrid[1]; i++)
|
||||
fprintf(screen," %g",comm->ysplit[i]);
|
||||
fprintf(screen,"\n");
|
||||
fprintf(screen," z cuts:");
|
||||
for (int i = 0; i <= comm->procgrid[2]; i++)
|
||||
fprintf(screen," %g",comm->zsplit[i]);
|
||||
fprintf(screen,"\n");
|
||||
}
|
||||
if (logfile) {
|
||||
fprintf(logfile," x cuts:");
|
||||
for (int i = 0; i <= comm->procgrid[0]; i++)
|
||||
fprintf(logfile," %g",comm->xsplit[i]);
|
||||
fprintf(logfile,"\n");
|
||||
fprintf(logfile," y cuts:");
|
||||
for (int i = 0; i <= comm->procgrid[1]; i++)
|
||||
fprintf(logfile," %g",comm->ysplit[i]);
|
||||
fprintf(logfile,"\n");
|
||||
fprintf(logfile," z cuts:");
|
||||
for (int i = 0; i <= comm->procgrid[2]; i++)
|
||||
fprintf(logfile," %g",comm->zsplit[i]);
|
||||
fprintf(logfile,"\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
calculate imbalance based on nlocal
|
||||
return max = max atom per proc
|
||||
return imbalance factor = max atom per proc / ave atom per proc
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double Balance::imbalance_nlocal(int &max)
|
||||
{
|
||||
MPI_Allreduce(&atom->nlocal,&max,1,MPI_INT,MPI_MAX,world);
|
||||
double imbalance = max / (1.0 * atom->natoms / nprocs);
|
||||
return imbalance;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
calculate imbalance based on processor splits in 3 dims
|
||||
atoms must be in lamda coords (0-1) before called
|
||||
map atoms to 3d grid of procs
|
||||
return max = max atom per proc
|
||||
return imbalance factor = max atom per proc / ave atom per proc
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double Balance::imbalance_splits(int &max)
|
||||
{
|
||||
double *xsplit = comm->xsplit;
|
||||
double *ysplit = comm->ysplit;
|
||||
double *zsplit = comm->zsplit;
|
||||
|
||||
int nx = comm->procgrid[0];
|
||||
int ny = comm->procgrid[1];
|
||||
int nz = comm->procgrid[2];
|
||||
|
||||
for (int i = 0; i < nprocs; i++) pcount[i] = 0;
|
||||
|
||||
double **x = atom->x;
|
||||
int nlocal = atom->nlocal;
|
||||
int ix,iy,iz;
|
||||
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
ix = binary(x[i][0],nx,xsplit);
|
||||
iy = binary(x[i][1],ny,ysplit);
|
||||
iz = binary(x[i][2],nz,zsplit);
|
||||
pcount[iz*nx*ny + iy*nx + ix]++;
|
||||
}
|
||||
|
||||
MPI_Allreduce(pcount,allcount,nprocs,MPI_INT,MPI_SUM,world);
|
||||
max = 0;
|
||||
for (int i = 0; i < nprocs; i++) max = MAX(max,allcount[i]);
|
||||
double imbalance = max / (1.0 * atom->natoms / nprocs);
|
||||
return imbalance;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
setup dynamic load balance operations
|
||||
called from command & fix balance
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Balance::dynamic_setup(char *str)
|
||||
{
|
||||
nops = strlen(str);
|
||||
ops = new int[nops];
|
||||
|
||||
for (int i = 0; i < strlen(str); i++) {
|
||||
if (str[i] == 'x') ops[i] = X;
|
||||
if (str[i] == 'y') ops[i] = Y;
|
||||
if (str[i] == 'z') ops[i] = Z;
|
||||
}
|
||||
|
||||
splits[0] = comm->xsplit;
|
||||
splits[1] = comm->ysplit;
|
||||
splits[2] = comm->zsplit;
|
||||
|
||||
counts[0] = new bigint[comm->procgrid[0]];
|
||||
counts[1] = new bigint[comm->procgrid[1]];
|
||||
counts[2] = new bigint[comm->procgrid[2]];
|
||||
|
||||
int max = MAX(comm->procgrid[0],comm->procgrid[1]);
|
||||
max = MAX(max,comm->procgrid[2]);
|
||||
cuts = new double[max+1];
|
||||
onecount = new bigint[max];
|
||||
|
||||
//MPI_Comm_split(world,comm->myloc[0],0,&commslice[0]);
|
||||
//MPI_Comm_split(world,comm->myloc[1],0,&commslice[1]);
|
||||
//MPI_Comm_split(world,comm->myloc[2],0,&commslice[2]);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
perform dynamic load balance by changing xyz split proc boundaries in Comm
|
||||
called from command and fix balance
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Balance::dynamic()
|
||||
{
|
||||
int i,m,max;
|
||||
double imbfactor;
|
||||
|
||||
int *procgrid = comm->procgrid;
|
||||
|
||||
domain->x2lamda(atom->nlocal);
|
||||
|
||||
count = 0;
|
||||
for (int irepeat = 0; irepeat < nrepeat; irepeat++) {
|
||||
for (i = 0; i < nops; i++) {
|
||||
for (m = 0; m < niter; m++) {
|
||||
stats(ops[i],procgrid[ops[i]],splits[ops[i]],counts[ops[i]]);
|
||||
adjust(procgrid[ops[i]],counts[ops[i]],splits[ops[i]]);
|
||||
count++;
|
||||
// debug output of intermediate result
|
||||
//dumpout(-1);
|
||||
}
|
||||
imbfactor = imbalance_splits(max);
|
||||
if (imbfactor <= thresh) break;
|
||||
}
|
||||
if (i < nops) break;
|
||||
}
|
||||
|
||||
domain->lamda2x(atom->nlocal);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
count atoms in each slice
|
||||
current cuts may be very different than original cuts,
|
||||
so use binary search to find which slice each atom is in
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Balance::stats(int dim, int n, double *split, bigint *count)
|
||||
{
|
||||
for (int i = 0; i < n; i++) onecount[i] = 0;
|
||||
|
||||
double **x = atom->x;
|
||||
int nlocal = atom->nlocal;
|
||||
int index;
|
||||
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
index = binary(x[i][dim],n,split);
|
||||
onecount[index]++;
|
||||
}
|
||||
|
||||
MPI_Allreduce(onecount,count,n,MPI_LMP_BIGINT,MPI_SUM,world);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
adjust cuts between N slices in a dim via diffusive method
|
||||
count = atoms per slice
|
||||
split = current N+1 cuts, with 0.0 and 1.0 at end points
|
||||
overwrite split with new cuts
|
||||
diffusion means slices with more atoms than their neighbors
|
||||
"send" them atoms by moving cut closer to sender, further from receiver
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Balance::adjust(int n, bigint *count, double *split)
|
||||
{
|
||||
// damping factor
|
||||
|
||||
double damp = 0.5;
|
||||
|
||||
// loop over slices from 1 to N-2 inclusive (not end slices 0 and N-1)
|
||||
// cut I is between 2 slices (I-1 and I) with counts
|
||||
// cut I+1 is between 2 slices (I and I+1) with counts
|
||||
// for a cut between 2 slices, only slice with larger count adjusts it
|
||||
|
||||
bigint leftcount,mycount,rightcount;
|
||||
double rho,target,targetleft,targetright;
|
||||
|
||||
for (int i = 1; i < n-1; i++) {
|
||||
leftcount = count[i-1];
|
||||
mycount = count[i];
|
||||
rightcount = count[i+1];
|
||||
|
||||
// middle slice is <= both left and right, so do nothing
|
||||
// special case if 2 slices both have count = 0, no change in cut
|
||||
|
||||
if (mycount <= leftcount && mycount <= rightcount) {
|
||||
if (leftcount == 0) cuts[i] = split[i];
|
||||
if (rightcount == 0) cuts[i+1] = split[i+1];
|
||||
continue;
|
||||
}
|
||||
|
||||
// rho = density of atoms in the slice
|
||||
|
||||
rho = mycount / (split[i+1] - split[i]);
|
||||
|
||||
// middle slice has more atoms then left or right slice
|
||||
// if delta from middle to top slice > delta between top and bottom slice
|
||||
// then send atoms both dirs to bring all 3 slices to same count
|
||||
// else bottom slice is very low, so send atoms only in that dir
|
||||
|
||||
if (mycount > leftcount && mycount > rightcount) {
|
||||
if (mycount-MAX(leftcount,rightcount) >= fabs(leftcount-rightcount)) {
|
||||
if (leftcount <= rightcount) {
|
||||
targetleft = damp *
|
||||
(rightcount-leftcount + (mycount-rightcount)/3.0);
|
||||
targetright = damp * (mycount-rightcount)/3.0;
|
||||
cuts[i] = split[i] + targetleft/rho;
|
||||
cuts[i+1] = split[i+1] - targetright/rho;
|
||||
} else {
|
||||
targetleft = damp * (mycount-leftcount)/3.0;
|
||||
targetright = damp *
|
||||
(leftcount-rightcount + (mycount-leftcount)/3.0);
|
||||
cuts[i] = split[i] + targetleft/rho;
|
||||
cuts[i+1] = split[i+1] - targetright/rho;
|
||||
}
|
||||
} else if (leftcount < rightcount) {
|
||||
target = damp * 0.5*(mycount-leftcount);
|
||||
cuts[i] = split[i] + target/rho;
|
||||
cuts[i+1] = split[i+1];
|
||||
} else if (rightcount < leftcount) {
|
||||
target = damp * 0.5*(mycount-rightcount);
|
||||
cuts[i+1] = split[i+1] - target/rho;
|
||||
cuts[i] = split[i];
|
||||
}
|
||||
|
||||
// middle slice has more atoms than only left or right slice
|
||||
// send atoms only in that dir
|
||||
|
||||
} else if (mycount > leftcount) {
|
||||
target = damp * 0.5*(mycount-leftcount);
|
||||
cuts[i] = split[i] + target/rho;
|
||||
} else if (mycount > rightcount) {
|
||||
target = damp * 0.5*(mycount-rightcount);
|
||||
cuts[i+1] = split[i+1] - target/rho;
|
||||
}
|
||||
}
|
||||
|
||||
// overwrite adjustable splits with new cuts
|
||||
|
||||
for (int i = 1; i < n; i++) split[i] = cuts[i];
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binary search for value in N-length ascending vec
|
||||
value may be outside range of vec limits
|
||||
always return index from 0 to N-1 inclusive
|
||||
return 0 if value < vec[0]
|
||||
reutrn N-1 if value >= vec[N-1]
|
||||
return index = 1 to N-2 if vec[index] <= value < vec[index+1]
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int Balance::binary(double value, int n, double *vec)
|
||||
{
|
||||
int lo = 0;
|
||||
int hi = n-1;
|
||||
|
||||
if (value < vec[lo]) return lo;
|
||||
if (value >= vec[hi]) return hi;
|
||||
|
||||
// insure vec[lo] <= value < vec[hi] at every iteration
|
||||
// done when lo,hi are adjacent
|
||||
|
||||
int index = (lo+hi)/2;
|
||||
while (lo < hi-1) {
|
||||
if (value < vec[index]) hi = index;
|
||||
else if (value >= vec[index]) lo = index;
|
||||
index = (lo+hi)/2;
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
create dump file of line segments in Pizza.py mdump mesh format
|
||||
just for debug/viz purposes
|
||||
write to tmp.mdump.*, open first time if necessary
|
||||
write xy lines around each proc's sub-domain for 2d
|
||||
write xyz cubes around each proc's sub-domain for 3d
|
||||
all procs but 0 just return
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Balance::dumpout(bigint tstamp)
|
||||
{
|
||||
if (me) return;
|
||||
|
||||
bigint tstep;
|
||||
if (tstamp >= 0) tstep = tstamp;
|
||||
else tstep = laststep + 1;
|
||||
laststep = tstep;
|
||||
|
||||
if (fp == NULL) {
|
||||
char file[32];
|
||||
sprintf(file,"tmp.mdump.%ld",tstep);
|
||||
fp = fopen(file,"w");
|
||||
if (!fp) error->one(FLERR,"Cannot open balance output file");
|
||||
|
||||
// write out one square/cute per processor for 2d/3d
|
||||
// only write once since topology is static
|
||||
|
||||
fprintf(fp,"ITEM: TIMESTEP\n");
|
||||
fprintf(fp,"%ld\n",tstep);
|
||||
fprintf(fp,"ITEM: NUMBER OF SQUARES\n");
|
||||
fprintf(fp,"%d\n",nprocs);
|
||||
fprintf(fp,"ITEM: SQUARES\n");
|
||||
|
||||
int nx = comm->procgrid[0] + 1;
|
||||
int ny = comm->procgrid[1] + 1;
|
||||
int nz = comm->procgrid[2] + 1;
|
||||
|
||||
if (domain->dimension == 2) {
|
||||
int m = 0;
|
||||
for (int j = 0; j < comm->procgrid[1]; j++)
|
||||
for (int i = 0; i < comm->procgrid[0]; i++) {
|
||||
int c1 = j*nx + i + 1;
|
||||
int c2 = c1 + 1;
|
||||
int c3 = c2 + nx;
|
||||
int c4 = c3 - 1;
|
||||
fprintf(fp,"%d %d %d %d %d %d\n",m+1,m+1,c1,c2,c3,c4);
|
||||
m++;
|
||||
}
|
||||
|
||||
} else {
|
||||
int m = 0;
|
||||
for (int k = 0; k < comm->procgrid[2]; k++)
|
||||
for (int j = 0; j < comm->procgrid[1]; j++)
|
||||
for (int i = 0; i < comm->procgrid[0]; i++) {
|
||||
int c1 = k*ny*nx + j*nx + i + 1;
|
||||
int c2 = c1 + 1;
|
||||
int c3 = c2 + nx;
|
||||
int c4 = c3 - 1;
|
||||
int c5 = c1 + ny*nx;
|
||||
int c6 = c2 + ny*nx;
|
||||
int c7 = c3 + ny*nx;
|
||||
int c8 = c4 + ny*nx;
|
||||
fprintf(fp,"%d %d %d %d %d %d %d %d %d %d\n",
|
||||
m+1,m+1,c1,c2,c3,c4,c5,c6,c7,c8);
|
||||
m++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// write out nodal coords, can be different every call
|
||||
// scale xsplit,ysplit,zsplit values to full box
|
||||
// only implmented for orthogonal boxes, not triclinic
|
||||
|
||||
int nx = comm->procgrid[0] + 1;
|
||||
int ny = comm->procgrid[1] + 1;
|
||||
int nz = comm->procgrid[2] + 1;
|
||||
|
||||
double *boxlo = domain->boxlo;
|
||||
double *boxhi = domain->boxhi;
|
||||
double *prd = domain->prd;
|
||||
|
||||
fprintf(fp,"ITEM: TIMESTEP\n");
|
||||
fprintf(fp,"%ld\n",tstep);
|
||||
fprintf(fp,"ITEM: NUMBER OF NODES\n");
|
||||
if (domain->dimension == 2)
|
||||
fprintf(fp,"%d\n",nx*ny);
|
||||
else
|
||||
fprintf(fp,"%d\n",nx*ny*nz);
|
||||
fprintf(fp,"ITEM: BOX BOUNDS\n");
|
||||
fprintf(fp,"%g %g\n",boxlo[0],boxhi[0]);
|
||||
fprintf(fp,"%g %g\n",boxlo[1],boxhi[1]);
|
||||
fprintf(fp,"%g %g\n",boxlo[2],boxhi[2]);
|
||||
fprintf(fp,"ITEM: NODES\n");
|
||||
|
||||
if (domain->dimension == 2) {
|
||||
int m = 0;
|
||||
for (int j = 0; j < ny; j++)
|
||||
for (int i = 0; i < nx; i++) {
|
||||
fprintf(fp,"%d %d %g %g %g\n",m+1,1,
|
||||
boxlo[0] + prd[0]*comm->xsplit[i],
|
||||
boxlo[1] + prd[1]*comm->ysplit[j],
|
||||
0.0);
|
||||
m++;
|
||||
}
|
||||
} else {
|
||||
int m = 0;
|
||||
for (int k = 0; k < nz; k++)
|
||||
for (int j = 0; j < ny; j++)
|
||||
for (int i = 0; i < nx; i++) {
|
||||
fprintf(fp,"%d %d %g %g %g\n",m+1,1,
|
||||
boxlo[0] + prd[0]*comm->xsplit[i],
|
||||
boxlo[1] + prd[1]*comm->ysplit[j],
|
||||
boxlo[2] + prd[2]*comm->zsplit[j]);
|
||||
m++;
|
||||
}
|
||||
}
|
||||
}
|
||||
71
src/balance.h
Normal file
71
src/balance.h
Normal file
@ -0,0 +1,71 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef COMMAND_CLASS
|
||||
|
||||
CommandStyle(balance,Balance)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_BALANCE_H
|
||||
#define LMP_BALANCE_H
|
||||
|
||||
#include "mpi.h"
|
||||
#include "stdio.h"
|
||||
#include "pointers.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class Balance : protected Pointers {
|
||||
public:
|
||||
Balance(class LAMMPS *);
|
||||
~Balance();
|
||||
void command(int, char **);
|
||||
|
||||
private:
|
||||
int me,nprocs;
|
||||
int xflag,yflag,zflag,dflag;
|
||||
int nrepeat,niter;
|
||||
double thresh;
|
||||
char *bstr;
|
||||
double *user_xsplit,*user_ysplit,*user_zsplit;
|
||||
|
||||
int *ops;
|
||||
int nops;
|
||||
double *splits[3];
|
||||
bigint *counts[3];
|
||||
double *cuts;
|
||||
bigint *onecount;
|
||||
MPI_Comm commslice[3];
|
||||
|
||||
int count;
|
||||
int *pcount,*allcount;
|
||||
|
||||
FILE *fp; // for debug output
|
||||
bigint laststep;
|
||||
|
||||
double imbalance_splits(int &);
|
||||
double imbalance_nlocal(int &);
|
||||
void dynamic_setup(char *);
|
||||
void dynamic();
|
||||
void stats(int, int, double *, bigint *);
|
||||
void adjust(int, bigint *, double *);
|
||||
int binary(double, int, double *);
|
||||
|
||||
void dumpout(bigint); // for debug output
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@ -150,13 +150,13 @@ void ChangeBox::command(int narg, char **arg)
|
||||
iarg += 4;
|
||||
|
||||
} else if (strcmp(arg[iarg],"ortho") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal change_box command");
|
||||
if (iarg+1 > narg) error->all(FLERR,"Illegal change_box command");
|
||||
ops[nops].style = ORTHO;
|
||||
nops++;
|
||||
iarg += 1;
|
||||
|
||||
} else if (strcmp(arg[iarg],"triclinic") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal change_box command");
|
||||
if (iarg+1 > narg) error->all(FLERR,"Illegal change_box command");
|
||||
ops[nops].style = TRICLINIC;
|
||||
nops++;
|
||||
iarg += 1;
|
||||
|
||||
308
src/comm.cpp
308
src/comm.cpp
@ -78,6 +78,8 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
|
||||
|
||||
bordergroup = 0;
|
||||
style = SINGLE;
|
||||
uniform = 1;
|
||||
xsplit = ysplit = zsplit = NULL;
|
||||
multilo = multihi = NULL;
|
||||
cutghostmulti = NULL;
|
||||
cutghostuser = 0.0;
|
||||
@ -124,6 +126,10 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
|
||||
|
||||
Comm::~Comm()
|
||||
{
|
||||
memory->destroy(xsplit);
|
||||
memory->destroy(ysplit);
|
||||
memory->destroy(zsplit);
|
||||
|
||||
delete [] customfile;
|
||||
delete [] outfile;
|
||||
|
||||
@ -145,7 +151,7 @@ Comm::~Comm()
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
create 3d grid of procs based on Nprocs and box size & shape
|
||||
map processors to grid
|
||||
map processors to grid, setup xyz split for a uniform grid
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Comm::set_proc_grid()
|
||||
@ -252,14 +258,31 @@ void Comm::set_proc_grid()
|
||||
|
||||
if (outfile) pmap->output(outfile,procgrid,grid2proc);
|
||||
|
||||
// set lamda box params after procs are assigned
|
||||
|
||||
if (domain->triclinic) domain->set_lamda_box();
|
||||
|
||||
// free ProcMap class
|
||||
|
||||
delete pmap;
|
||||
|
||||
// set xsplit,ysplit,zsplit for uniform spacings
|
||||
|
||||
memory->destroy(xsplit);
|
||||
memory->destroy(ysplit);
|
||||
memory->destroy(zsplit);
|
||||
|
||||
memory->create(xsplit,procgrid[0]+1,"comm:xsplit");
|
||||
memory->create(ysplit,procgrid[1]+1,"comm:ysplit");
|
||||
memory->create(zsplit,procgrid[2]+1,"comm:zsplit");
|
||||
|
||||
for (int i = 0; i < procgrid[0]; i++) xsplit[i] = i * 1.0/procgrid[0];
|
||||
for (int i = 0; i < procgrid[1]; i++) ysplit[i] = i * 1.0/procgrid[1];
|
||||
for (int i = 0; i < procgrid[2]; i++) zsplit[i] = i * 1.0/procgrid[2];
|
||||
|
||||
xsplit[procgrid[0]] = ysplit[procgrid[1]] = zsplit[procgrid[2]] = 1.0;
|
||||
|
||||
// set lamda box params after procs are assigned
|
||||
// only set once unless load-balancing occurs
|
||||
|
||||
if (domain->triclinic) domain->set_lamda_box();
|
||||
|
||||
// send my 3d proc grid to another partition if requested
|
||||
|
||||
if (send_to_partition >= 0) {
|
||||
@ -399,25 +422,112 @@ void Comm::setup()
|
||||
}
|
||||
}
|
||||
|
||||
// need = # of procs I need atoms from in each dim based on max cutoff
|
||||
// for 2d, don't communicate in z
|
||||
|
||||
need[0] = static_cast<int> (cutghost[0] * procgrid[0] / prd[0]) + 1;
|
||||
need[1] = static_cast<int> (cutghost[1] * procgrid[1] / prd[1]) + 1;
|
||||
need[2] = static_cast<int> (cutghost[2] * procgrid[2] / prd[2]) + 1;
|
||||
if (domain->dimension == 2) need[2] = 0;
|
||||
|
||||
// if non-periodic, do not communicate further than procgrid-1 away
|
||||
// this enables very large cutoffs in non-periodic systems
|
||||
// recvneed[idim][0/1] = # of procs away I recv atoms from, within cutghost
|
||||
// 0 = from left, 1 = from right
|
||||
// do not cross non-periodic boundaries, need[2] = 0 for 2d
|
||||
// sendneed[idim][0/1] = # of procs away I send atoms to
|
||||
// 0 = to left, 1 = to right
|
||||
// set equal to recvneed[idim][1/0] of neighbor proc
|
||||
// maxneed[idim] = max procs away any proc recvs atoms in either direction
|
||||
// uniform = 1 = uniform sized sub-domains:
|
||||
// maxneed is directly computable from sub-domain size
|
||||
// limit to procgrid-1 for non-PBC
|
||||
// recvneed = maxneed except for procs near non-PBC
|
||||
// sendneed = recvneed of neighbor on each side
|
||||
// uniform = 0 = non-uniform sized sub-domains:
|
||||
// compute recvneed via updown() which accounts for non-PBC
|
||||
// sendneed = recvneed of neighbor on each side
|
||||
// maxneed via Allreduce() of recvneed
|
||||
|
||||
int *periodicity = domain->periodicity;
|
||||
if (periodicity[0] == 0) need[0] = MIN(need[0],procgrid[0]-1);
|
||||
if (periodicity[1] == 0) need[1] = MIN(need[1],procgrid[1]-1);
|
||||
if (periodicity[2] == 0) need[2] = MIN(need[2],procgrid[2]-1);
|
||||
int left,right;
|
||||
|
||||
if (uniform) {
|
||||
maxneed[0] = static_cast<int> (cutghost[0] * procgrid[0] / prd[0]) + 1;
|
||||
maxneed[1] = static_cast<int> (cutghost[1] * procgrid[1] / prd[1]) + 1;
|
||||
maxneed[2] = static_cast<int> (cutghost[2] * procgrid[2] / prd[2]) + 1;
|
||||
if (domain->dimension == 2) maxneed[2] = 0;
|
||||
if (!periodicity[0]) maxneed[0] = MIN(maxneed[0],procgrid[0]-1);
|
||||
if (!periodicity[1]) maxneed[1] = MIN(maxneed[1],procgrid[1]-1);
|
||||
if (!periodicity[2]) maxneed[2] = MIN(maxneed[2],procgrid[2]-1);
|
||||
|
||||
if (!periodicity[0]) {
|
||||
recvneed[0][0] = MIN(maxneed[0],myloc[0]);
|
||||
recvneed[0][1] = MIN(maxneed[0],procgrid[0]-myloc[0]-1);
|
||||
left = myloc[0] - 1;
|
||||
if (left < 0) left = procgrid[0] - 1;
|
||||
sendneed[0][0] = MIN(maxneed[0],procgrid[0]-left-1);
|
||||
right = myloc[0] + 1;
|
||||
if (right == procgrid[0]) right = 0;
|
||||
sendneed[0][1] = MIN(maxneed[0],right);
|
||||
} else recvneed[0][0] = recvneed[0][1] =
|
||||
sendneed[0][0] = sendneed[0][1] = maxneed[0];
|
||||
|
||||
if (!periodicity[1]) {
|
||||
recvneed[1][0] = MIN(maxneed[1],myloc[1]);
|
||||
recvneed[1][1] = MIN(maxneed[1],procgrid[1]-myloc[1]-1);
|
||||
left = myloc[1] - 1;
|
||||
if (left < 0) left = procgrid[1] - 1;
|
||||
sendneed[1][0] = MIN(maxneed[1],procgrid[1]-left-1);
|
||||
right = myloc[1] + 1;
|
||||
if (right == procgrid[1]) right = 0;
|
||||
sendneed[1][1] = MIN(maxneed[1],right);
|
||||
} else recvneed[1][0] = recvneed[1][1] =
|
||||
sendneed[1][0] = sendneed[1][1] = maxneed[1];
|
||||
|
||||
if (!periodicity[2]) {
|
||||
recvneed[2][0] = MIN(maxneed[2],myloc[2]);
|
||||
recvneed[2][1] = MIN(maxneed[2],procgrid[2]-myloc[2]-1);
|
||||
left = myloc[2] - 1;
|
||||
if (left < 0) left = procgrid[2] - 1;
|
||||
sendneed[2][0] = MIN(maxneed[2],procgrid[2]-left-1);
|
||||
right = myloc[2] + 1;
|
||||
if (right == procgrid[2]) right = 0;
|
||||
sendneed[2][1] = MIN(maxneed[2],right);
|
||||
} else recvneed[2][0] = recvneed[2][1] =
|
||||
sendneed[2][0] = sendneed[2][1] = maxneed[2];
|
||||
|
||||
} else {
|
||||
recvneed[0][0] = updown(0,0,myloc[0],prd[0],periodicity[0],xsplit);
|
||||
recvneed[0][1] = updown(0,1,myloc[0],prd[0],periodicity[0],xsplit);
|
||||
left = myloc[0] - 1;
|
||||
if (left < 0) left = procgrid[0] - 1;
|
||||
sendneed[0][0] = updown(0,1,left,prd[0],periodicity[0],xsplit);
|
||||
right = myloc[0] + 1;
|
||||
if (right == procgrid[0]) right = 0;
|
||||
sendneed[0][1] = updown(0,0,right,prd[0],periodicity[0],xsplit);
|
||||
|
||||
recvneed[1][0] = updown(1,0,myloc[1],prd[1],periodicity[1],ysplit);
|
||||
recvneed[1][1] = updown(1,1,myloc[1],prd[1],periodicity[1],ysplit);
|
||||
left = myloc[1] - 1;
|
||||
if (left < 0) left = procgrid[1] - 1;
|
||||
sendneed[1][0] = updown(1,1,left,prd[1],periodicity[1],ysplit);
|
||||
right = myloc[1] + 1;
|
||||
if (right == procgrid[1]) right = 0;
|
||||
sendneed[1][1] = updown(1,0,right,prd[1],periodicity[1],ysplit);
|
||||
|
||||
if (domain->dimension == 3) {
|
||||
recvneed[2][0] = updown(2,0,myloc[2],prd[2],periodicity[2],zsplit);
|
||||
recvneed[2][1] = updown(2,1,myloc[2],prd[2],periodicity[2],zsplit);
|
||||
left = myloc[2] - 1;
|
||||
if (left < 0) left = procgrid[2] - 1;
|
||||
sendneed[2][0] = updown(2,1,left,prd[2],periodicity[2],zsplit);
|
||||
right = myloc[2] + 1;
|
||||
if (right == procgrid[2]) right = 0;
|
||||
sendneed[2][1] = updown(2,0,right,prd[2],periodicity[2],zsplit);
|
||||
} else recvneed[2][0] = recvneed[2][1] =
|
||||
sendneed[2][0] = sendneed[2][1] = 0;
|
||||
|
||||
int all[6];
|
||||
MPI_Allreduce(&recvneed[0][0],all,6,MPI_INT,MPI_MAX,world);
|
||||
maxneed[0] = MAX(all[0],all[1]);
|
||||
maxneed[1] = MAX(all[2],all[3]);
|
||||
maxneed[2] = MAX(all[4],all[5]);
|
||||
}
|
||||
|
||||
// allocate comm memory
|
||||
|
||||
nswap = 2 * (need[0]+need[1]+need[2]);
|
||||
nswap = 2 * (maxneed[0]+maxneed[1]+maxneed[2]);
|
||||
if (nswap > maxswap) grow_swap(nswap);
|
||||
|
||||
// setup parameters for each exchange:
|
||||
@ -428,13 +538,11 @@ void Comm::setup()
|
||||
// use -BIG/midpt/BIG to insure all atoms included even if round-off occurs
|
||||
// if round-off, atoms recvd across PBC can be < or > than subbox boundary
|
||||
// note that borders() only loops over subset of atoms during each swap
|
||||
// set slablo > slabhi for swaps across non-periodic boundaries
|
||||
// this insures no atoms are swapped
|
||||
// only for procs owning sub-box at non-periodic end of global box
|
||||
// treat all as PBC here, non-PBC is handled in borders() via r/s need[][]
|
||||
// for style MULTI:
|
||||
// multilo/multihi is same as slablo/slabhi, only for each atom type
|
||||
// multilo/multihi is same, with slablo/slabhi for each atom type
|
||||
// pbc_flag: 0 = nothing across a boundary, 1 = something across a boundary
|
||||
// pbc = -1/0/1 for PBC factor in each of 3/6 orthog/triclinic dirs
|
||||
// pbc = -1/0/1 for PBC factor in each of 3/6 orthogonal/triclinic dirs
|
||||
// for triclinic, slablo/hi and pbc_border will be used in lamda (0-1) coords
|
||||
// 1st part of if statement is sending to the west/south/down
|
||||
// 2nd part of if statement is sending to the east/north/up
|
||||
@ -443,7 +551,7 @@ void Comm::setup()
|
||||
|
||||
int iswap = 0;
|
||||
for (dim = 0; dim < 3; dim++) {
|
||||
for (ineed = 0; ineed < 2*need[dim]; ineed++) {
|
||||
for (ineed = 0; ineed < 2*maxneed[dim]; ineed++) {
|
||||
pbc_flag[iswap] = 0;
|
||||
pbc[iswap][0] = pbc[iswap][1] = pbc[iswap][2] =
|
||||
pbc[iswap][3] = pbc[iswap][4] = pbc[iswap][5] = 0;
|
||||
@ -463,12 +571,6 @@ void Comm::setup()
|
||||
}
|
||||
}
|
||||
if (myloc[dim] == 0) {
|
||||
if (periodicity[dim] == 0) {
|
||||
if (style == SINGLE) slabhi[iswap] = slablo[iswap] - 1.0;
|
||||
else
|
||||
for (i = 1; i <= ntypes; i++)
|
||||
multihi[iswap][i] = multilo[iswap][i] - 1.0;
|
||||
} else {
|
||||
pbc_flag[iswap] = 1;
|
||||
pbc[iswap][dim] = 1;
|
||||
if (triclinic) {
|
||||
@ -476,7 +578,6 @@ void Comm::setup()
|
||||
else if (dim == 2) pbc[iswap][4] = pbc[iswap][3] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
sendproc[iswap] = procneigh[dim][1];
|
||||
@ -493,12 +594,6 @@ void Comm::setup()
|
||||
}
|
||||
}
|
||||
if (myloc[dim] == procgrid[dim]-1) {
|
||||
if (periodicity[dim] == 0) {
|
||||
if (style == SINGLE) slabhi[iswap] = slablo[iswap] - 1.0;
|
||||
else
|
||||
for (i = 1; i <= ntypes; i++)
|
||||
multihi[iswap][i] = multilo[iswap][i] - 1.0;
|
||||
} else {
|
||||
pbc_flag[iswap] = 1;
|
||||
pbc[iswap][dim] = -1;
|
||||
if (triclinic) {
|
||||
@ -507,13 +602,61 @@ void Comm::setup()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
iswap++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
walk up/down the extent of nearby processors in dim and dir
|
||||
loc = myloc of proc to start at
|
||||
dir = 0/1 = walk to left/right
|
||||
do not cross non-periodic boundaries
|
||||
is not called for z dim in 2d
|
||||
return how many procs away are needed to encompass cutghost away from loc
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int Comm::updown(int dim, int dir, int loc,
|
||||
double prd, int periodicity, double *split)
|
||||
{
|
||||
int index,count;
|
||||
double frac,delta;
|
||||
|
||||
if (dir == 0) {
|
||||
frac = cutghost[dim]/prd;
|
||||
index = loc - 1;
|
||||
delta = 0.0;
|
||||
count = 0;
|
||||
while (delta < frac) {
|
||||
if (index < 0) {
|
||||
if (!periodicity) break;
|
||||
index = procgrid[dim] - 1;
|
||||
}
|
||||
count++;
|
||||
delta += split[index+1] - split[index];
|
||||
index--;
|
||||
}
|
||||
|
||||
} else {
|
||||
frac = cutghost[dim]/prd;
|
||||
index = loc + 1;
|
||||
delta = 0.0;
|
||||
count = 0;
|
||||
while (delta < frac) {
|
||||
if (index >= procgrid[dim]) {
|
||||
if (!periodicity) break;
|
||||
index = 0;
|
||||
}
|
||||
count++;
|
||||
delta += split[index+1] - split[index];
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
forward communication of atom coords every timestep
|
||||
other per-atom attributes may also be sent via pack/unpack routines
|
||||
@ -537,27 +680,30 @@ void Comm::forward_comm(int dummy)
|
||||
if (comm_x_only) {
|
||||
if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]];
|
||||
else buf = NULL;
|
||||
if (size_forward_recv[iswap])
|
||||
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
|
||||
buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
|
||||
} else if (ghost_velocity) {
|
||||
if (size_forward_recv[iswap])
|
||||
MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
|
||||
buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
|
||||
avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_recv);
|
||||
} else {
|
||||
if (size_forward_recv[iswap])
|
||||
MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
|
||||
buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
|
||||
avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_recv);
|
||||
}
|
||||
|
||||
@ -601,19 +747,22 @@ void Comm::reverse_comm()
|
||||
for (int iswap = nswap-1; iswap >= 0; iswap--) {
|
||||
if (sendproc[iswap] != me) {
|
||||
if (comm_f_only) {
|
||||
if (size_reverse_recv[iswap])
|
||||
MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
|
||||
sendproc[iswap],0,world,&request);
|
||||
if (size_reverse_send[iswap]) buf = f[firstrecv[iswap]];
|
||||
else buf = NULL;
|
||||
if (size_reverse_send[iswap])
|
||||
MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
if (size_reverse_recv[iswap]) MPI_Wait(&request,&status);
|
||||
} else {
|
||||
if (size_reverse_recv[iswap])
|
||||
MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
|
||||
sendproc[iswap],0,world,&request);
|
||||
n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send);
|
||||
MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap],0,world);
|
||||
if (size_reverse_recv[iswap]) MPI_Wait(&request,&status);
|
||||
}
|
||||
avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_recv);
|
||||
|
||||
@ -752,8 +901,8 @@ void Comm::exchange()
|
||||
|
||||
void Comm::borders()
|
||||
{
|
||||
int i,n,itype,iswap,dim,ineed,maxneed,smax,rmax;
|
||||
int nsend,nrecv,nfirst,nlast,ngroup;
|
||||
int i,n,itype,iswap,dim,ineed,twoneed,smax,rmax;
|
||||
int nsend,nrecv,sendflag,nfirst,nlast,ngroup;
|
||||
double lo,hi;
|
||||
int *type;
|
||||
double **x;
|
||||
@ -774,8 +923,8 @@ void Comm::borders()
|
||||
|
||||
for (dim = 0; dim < 3; dim++) {
|
||||
nlast = 0;
|
||||
maxneed = 2*need[dim];
|
||||
for (ineed = 0; ineed < maxneed; ineed++) {
|
||||
twoneed = 2*maxneed[dim];
|
||||
for (ineed = 0; ineed < twoneed; ineed++) {
|
||||
|
||||
// find atoms within slab boundaries lo/hi using <= and >=
|
||||
// check atoms between nfirst and nlast
|
||||
@ -799,11 +948,19 @@ void Comm::borders()
|
||||
|
||||
nsend = 0;
|
||||
|
||||
// sendflag = 0 if I do not send on this swap
|
||||
// sendneed test indicates receiver no longer requires data
|
||||
// e.g. due to non-PBC or non-uniform sub-domains
|
||||
|
||||
if (ineed/2 >= sendneed[dim][ineed % 2]) sendflag = 0;
|
||||
else sendflag = 1;
|
||||
|
||||
// find send atoms according to SINGLE vs MULTI
|
||||
// all atoms eligible versus atoms in bordergroup
|
||||
// only need to limit loop to bordergroup for first sends (ineed < 2)
|
||||
// on these sends, break loop in two: owned (in group) and ghost
|
||||
|
||||
if (sendflag) {
|
||||
if (!bordergroup || ineed >= 2) {
|
||||
if (style == SINGLE) {
|
||||
for (i = nfirst; i < nlast; i++)
|
||||
@ -852,6 +1009,7 @@ void Comm::borders()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pack up list of border atoms
|
||||
|
||||
@ -865,18 +1023,18 @@ void Comm::borders()
|
||||
pbc_flag[iswap],pbc[iswap]);
|
||||
|
||||
// swap atoms with other proc
|
||||
// no MPI calls except SendRecv if nsend/nrecv = 0
|
||||
// put incoming ghosts at end of my atom arrays
|
||||
// if swapping with self, simply copy, no messages
|
||||
|
||||
if (sendproc[iswap] != me) {
|
||||
MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0,
|
||||
&nrecv,1,MPI_INT,recvproc[iswap],0,world,&status);
|
||||
if (nrecv*size_border > maxrecv)
|
||||
grow_recv(nrecv*size_border);
|
||||
MPI_Irecv(buf_recv,nrecv*size_border,MPI_DOUBLE,
|
||||
if (nrecv*size_border > maxrecv) grow_recv(nrecv*size_border);
|
||||
if (nrecv) MPI_Irecv(buf_recv,nrecv*size_border,MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
if (nrecv) MPI_Wait(&request,&status);
|
||||
buf = buf_recv;
|
||||
} else {
|
||||
nrecv = nsend;
|
||||
@ -939,10 +1097,12 @@ void Comm::forward_comm_pair(Pair *pair)
|
||||
// if self, set recv buffer to send buffer
|
||||
|
||||
if (sendproc[iswap] != me) {
|
||||
if (recvnum[iswap])
|
||||
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
|
||||
world,&request);
|
||||
if (sendnum[iswap])
|
||||
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
if (recvnum[iswap]) MPI_Wait(&request,&status);
|
||||
buf = buf_recv;
|
||||
} else buf = buf_send;
|
||||
|
||||
@ -973,10 +1133,12 @@ void Comm::reverse_comm_pair(Pair *pair)
|
||||
// if self, set recv buffer to send buffer
|
||||
|
||||
if (sendproc[iswap] != me) {
|
||||
if (sendnum[iswap])
|
||||
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
|
||||
world,&request);
|
||||
if (recvnum[iswap])
|
||||
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
if (sendnum[iswap]) MPI_Wait(&request,&status);
|
||||
buf = buf_recv;
|
||||
} else buf = buf_send;
|
||||
|
||||
@ -1008,10 +1170,12 @@ void Comm::forward_comm_fix(Fix *fix)
|
||||
// if self, set recv buffer to send buffer
|
||||
|
||||
if (sendproc[iswap] != me) {
|
||||
if (recvnum[iswap])
|
||||
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
|
||||
world,&request);
|
||||
if (sendnum[iswap])
|
||||
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
if (recvnum[iswap]) MPI_Wait(&request,&status);
|
||||
buf = buf_recv;
|
||||
} else buf = buf_send;
|
||||
|
||||
@ -1042,10 +1206,12 @@ void Comm::reverse_comm_fix(Fix *fix)
|
||||
// if self, set recv buffer to send buffer
|
||||
|
||||
if (sendproc[iswap] != me) {
|
||||
if (sendnum[iswap])
|
||||
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
|
||||
world,&request);
|
||||
if (recvnum[iswap])
|
||||
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
if (sendnum[iswap]) MPI_Wait(&request,&status);
|
||||
buf = buf_recv;
|
||||
} else buf = buf_send;
|
||||
|
||||
@ -1077,10 +1243,12 @@ void Comm::forward_comm_compute(Compute *compute)
|
||||
// if self, set recv buffer to send buffer
|
||||
|
||||
if (sendproc[iswap] != me) {
|
||||
if (recvnum[iswap])
|
||||
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
|
||||
world,&request);
|
||||
if (sendnum[iswap])
|
||||
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
if (recvnum[iswap]) MPI_Wait(&request,&status);
|
||||
buf = buf_recv;
|
||||
} else buf = buf_send;
|
||||
|
||||
@ -1111,10 +1279,12 @@ void Comm::reverse_comm_compute(Compute *compute)
|
||||
// if self, set recv buffer to send buffer
|
||||
|
||||
if (sendproc[iswap] != me) {
|
||||
if (sendnum[iswap])
|
||||
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
|
||||
world,&request);
|
||||
if (recvnum[iswap])
|
||||
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
if (sendnum[iswap]) MPI_Wait(&request,&status);
|
||||
buf = buf_recv;
|
||||
} else buf = buf_send;
|
||||
|
||||
@ -1146,10 +1316,12 @@ void Comm::forward_comm_dump(Dump *dump)
|
||||
// if self, set recv buffer to send buffer
|
||||
|
||||
if (sendproc[iswap] != me) {
|
||||
if (recvnum[iswap])
|
||||
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
|
||||
world,&request);
|
||||
if (sendnum[iswap])
|
||||
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
if (recvnum[iswap]) MPI_Wait(&request,&status);
|
||||
buf = buf_recv;
|
||||
} else buf = buf_send;
|
||||
|
||||
@ -1180,10 +1352,12 @@ void Comm::reverse_comm_dump(Dump *dump)
|
||||
// if self, set recv buffer to send buffer
|
||||
|
||||
if (sendproc[iswap] != me) {
|
||||
if (sendnum[iswap])
|
||||
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
|
||||
world,&request);
|
||||
if (recvnum[iswap])
|
||||
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
if (sendnum[iswap]) MPI_Wait(&request,&status);
|
||||
buf = buf_recv;
|
||||
} else buf = buf_send;
|
||||
|
||||
@ -1373,6 +1547,10 @@ void Comm::set_processors(int narg, char **arg)
|
||||
if (user_procgrid[0] < 0 || user_procgrid[1] < 0 || user_procgrid[2] < 0)
|
||||
error->all(FLERR,"Illegal processors command");
|
||||
|
||||
int p = user_procgrid[0]*user_procgrid[1]*user_procgrid[2];
|
||||
if (p && p != nprocs)
|
||||
error->all(FLERR,"Specified processors != physical processors");
|
||||
|
||||
int iarg = 3;
|
||||
while (iarg < narg) {
|
||||
if (strcmp(arg[iarg],"grid") == 0) {
|
||||
|
||||
12
src/comm.h
12
src/comm.h
@ -24,8 +24,10 @@ class Comm : protected Pointers {
|
||||
int procgrid[3]; // procs assigned in each dim of 3d grid
|
||||
int user_procgrid[3]; // user request for procs in each dim
|
||||
int myloc[3]; // which proc I am in each dim
|
||||
int procneigh[3][2]; // my 6 neighboring procs
|
||||
int procneigh[3][2]; // my 6 neighboring procs, 0/1 = left/right
|
||||
int ghost_velocity; // 1 if ghost atoms have velocity, 0 if not
|
||||
int uniform; // 1 = equal subdomains, 0 = load-balanced
|
||||
double *xsplit,*ysplit,*zsplit; // fractional (0-1) sub-domain sizes
|
||||
double cutghost[3]; // cutoffs used for acquiring ghost atoms
|
||||
double cutghostuser; // user-specified ghost cutoff
|
||||
int ***grid2proc; // which proc owns i,j,k loc in 3d grid
|
||||
@ -63,8 +65,10 @@ class Comm : protected Pointers {
|
||||
|
||||
protected:
|
||||
int style; // single vs multi-type comm
|
||||
int nswap; // # of swaps to perform
|
||||
int need[3]; // procs I need atoms from in each dim
|
||||
int nswap; // # of swaps to perform = sum of maxneed
|
||||
int recvneed[3][2]; // # of procs away I recv atoms from
|
||||
int sendneed[3][2]; // # of procs away I send atoms to
|
||||
int maxneed[3]; // max procs away any proc needs, per dim
|
||||
int triclinic; // 0 if domain is orthog, 1 if triclinic
|
||||
int maxswap; // max # of swaps memory is allocated for
|
||||
int size_forward; // # of per-atom datums in forward comm
|
||||
@ -106,6 +110,8 @@ class Comm : protected Pointers {
|
||||
int maxsend,maxrecv; // current size of send/recv buffer
|
||||
int maxforward,maxreverse; // max # of datums in forward/reverse comm
|
||||
|
||||
int updown(int, int, int, double, int, double *);
|
||||
// compare cutoff to procs
|
||||
virtual void grow_send(int,int); // reallocate send buffer
|
||||
virtual void grow_recv(int); // free/allocate recv buffer
|
||||
virtual void grow_list(int, int); // reallocate one sendlist
|
||||
|
||||
@ -215,58 +215,57 @@ void Domain::set_global_box()
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
set lamda box params, only need be done one time
|
||||
assumes global box is defined and proc assignment has been made by comm
|
||||
for uppermost proc, insure subhi = 1.0 (in case round-off occurs)
|
||||
set lamda box params
|
||||
assumes global box is defined and proc assignment has been made
|
||||
uses comm->xyz_split to define subbox boundaries in consistent manner
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Domain::set_lamda_box()
|
||||
{
|
||||
int *myloc = comm->myloc;
|
||||
int *procgrid = comm->procgrid;
|
||||
double *xsplit = comm->xsplit;
|
||||
double *ysplit = comm->ysplit;
|
||||
double *zsplit = comm->zsplit;
|
||||
|
||||
sublo_lamda[0] = 1.0*myloc[0] / procgrid[0];
|
||||
sublo_lamda[1] = 1.0*myloc[1] / procgrid[1];
|
||||
sublo_lamda[2] = 1.0*myloc[2] / procgrid[2];
|
||||
sublo_lamda[0] = xsplit[myloc[0]];
|
||||
subhi_lamda[0] = xsplit[myloc[0]+1];
|
||||
|
||||
if (myloc[0] < procgrid[0]-1)
|
||||
subhi_lamda[0] = 1.0*(myloc[0]+1) / procgrid[0];
|
||||
else subhi_lamda[0] = 1.0;
|
||||
sublo_lamda[1] = ysplit[myloc[1]];
|
||||
subhi_lamda[1] = ysplit[myloc[1]+1];
|
||||
|
||||
if (myloc[1] < procgrid[1]-1)
|
||||
subhi_lamda[1] = 1.0*(myloc[1]+1) / procgrid[1];
|
||||
else subhi_lamda[1] = 1.0;
|
||||
|
||||
if (myloc[2] < procgrid[2]-1)
|
||||
subhi_lamda[2] = 1.0*(myloc[2]+1) / procgrid[2];
|
||||
else subhi_lamda[2] = 1.0;
|
||||
sublo_lamda[2] = zsplit[myloc[2]];
|
||||
subhi_lamda[2] = zsplit[myloc[2]+1];
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
set local subbox params
|
||||
set local subbox params for orthogonal boxes
|
||||
assumes global box is defined and proc assignment has been made
|
||||
for uppermost proc, insure subhi = boxhi (in case round-off occurs)
|
||||
uses comm->xyz_split to define subbox boundaries in consistent manner
|
||||
insure subhi[max] = boxhi
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Domain::set_local_box()
|
||||
{
|
||||
int *myloc = comm->myloc;
|
||||
int *procgrid = comm->procgrid;
|
||||
double *xsplit = comm->xsplit;
|
||||
double *ysplit = comm->ysplit;
|
||||
double *zsplit = comm->zsplit;
|
||||
|
||||
if (triclinic == 0) {
|
||||
sublo[0] = boxlo[0] + myloc[0] * xprd / procgrid[0];
|
||||
sublo[0] = boxlo[0] + xprd*xsplit[myloc[0]];
|
||||
if (myloc[0] < procgrid[0]-1)
|
||||
subhi[0] = boxlo[0] + (myloc[0]+1) * xprd / procgrid[0];
|
||||
subhi[0] = boxlo[0] + xprd*xsplit[myloc[0]+1];
|
||||
else subhi[0] = boxhi[0];
|
||||
|
||||
sublo[1] = boxlo[1] + myloc[1] * yprd / procgrid[1];
|
||||
sublo[1] = boxlo[1] + yprd*ysplit[myloc[1]];
|
||||
if (myloc[1] < procgrid[1]-1)
|
||||
subhi[1] = boxlo[1] + (myloc[1]+1) * yprd / procgrid[1];
|
||||
subhi[1] = boxlo[1] + yprd*ysplit[myloc[1]+1];
|
||||
else subhi[1] = boxhi[1];
|
||||
|
||||
sublo[2] = boxlo[2] + myloc[2] * zprd / procgrid[2];
|
||||
sublo[2] = boxlo[2] + zprd*zsplit[myloc[2]];
|
||||
if (myloc[2] < procgrid[2]-1)
|
||||
subhi[2] = boxlo[2] + (myloc[2]+1) * zprd / procgrid[2];
|
||||
subhi[2] = boxlo[2] + zprd*zsplit[myloc[2]+1];
|
||||
else subhi[2] = boxhi[2];
|
||||
}
|
||||
}
|
||||
@ -274,7 +273,7 @@ void Domain::set_local_box()
|
||||
/* ----------------------------------------------------------------------
|
||||
reset global & local boxes due to global box boundary changes
|
||||
if shrink-wrapped, determine atom extent and reset boxlo/hi
|
||||
if triclinic, perform any shrink-wrap in lamda space
|
||||
for triclinic, atoms must be in lamda coords (0-1) before reset_box is called
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Domain::reset_box()
|
||||
|
||||
@ -83,7 +83,8 @@ void Irregular::migrate_atoms()
|
||||
atom->nghost = 0;
|
||||
atom->avec->clear_bonus();
|
||||
|
||||
// subbox bounds for orthogonal or triclinic
|
||||
// subbox bounds for orthogonal or triclinic box
|
||||
// other comm/domain data used by coord2proc()
|
||||
|
||||
double *sublo,*subhi;
|
||||
if (triclinic == 0) {
|
||||
@ -94,6 +95,13 @@ void Irregular::migrate_atoms()
|
||||
subhi = domain->subhi_lamda;
|
||||
}
|
||||
|
||||
uniform = comm->uniform;
|
||||
xsplit = comm->xsplit;
|
||||
ysplit = comm->ysplit;
|
||||
zsplit = comm->zsplit;
|
||||
boxlo = domain->boxlo;
|
||||
prd = domain->prd;
|
||||
|
||||
// loop over atoms, flag any that are not in my sub-box
|
||||
// fill buffer with atoms leaving my box, using < and >=
|
||||
// assign which proc it belongs to via coord2proc()
|
||||
@ -603,26 +611,37 @@ void Irregular::destroy_data()
|
||||
/* ----------------------------------------------------------------------
|
||||
determine which proc owns atom with coord x[3]
|
||||
x will be in box (orthogonal) or lamda coords (triclinic)
|
||||
for uniform = 1, directly calculate owning proc
|
||||
for non-uniform, iteratively find owning proc via binary search
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int Irregular::coord2proc(double *x)
|
||||
{
|
||||
int loc[3];
|
||||
|
||||
if (uniform) {
|
||||
if (triclinic == 0) {
|
||||
double *boxlo = domain->boxlo;
|
||||
double *boxhi = domain->boxhi;
|
||||
loc[0] = static_cast<int>
|
||||
(procgrid[0] * (x[0]-boxlo[0]) / (boxhi[0]-boxlo[0]));
|
||||
loc[1] = static_cast<int>
|
||||
(procgrid[1] * (x[1]-boxlo[1]) / (boxhi[1]-boxlo[1]));
|
||||
loc[2] = static_cast<int>
|
||||
(procgrid[2] * (x[2]-boxlo[2]) / (boxhi[2]-boxlo[2]));
|
||||
loc[0] = static_cast<int> (procgrid[0] * (x[0]-boxlo[0]) / prd[0]);
|
||||
loc[1] = static_cast<int> (procgrid[1] * (x[1]-boxlo[1]) / prd[1]);
|
||||
loc[2] = static_cast<int> (procgrid[2] * (x[2]-boxlo[2]) / prd[2]);
|
||||
} else {
|
||||
loc[0] = static_cast<int> (procgrid[0] * x[0]);
|
||||
loc[1] = static_cast<int> (procgrid[1] * x[1]);
|
||||
loc[2] = static_cast<int> (procgrid[2] * x[2]);
|
||||
}
|
||||
|
||||
} else {
|
||||
if (triclinic == 0) {
|
||||
loc[0] = binary((x[0]-boxlo[0])/prd[0],procgrid[0],xsplit);
|
||||
loc[1] = binary((x[1]-boxlo[1])/prd[1],procgrid[1],ysplit);
|
||||
loc[2] = binary((x[2]-boxlo[2])/prd[2],procgrid[2],zsplit);
|
||||
} else {
|
||||
loc[0] = binary(x[0],procgrid[0],xsplit);
|
||||
loc[1] = binary(x[1],procgrid[1],ysplit);
|
||||
loc[2] = binary(x[2],procgrid[2],zsplit);
|
||||
}
|
||||
}
|
||||
|
||||
if (loc[0] < 0) loc[0] = 0;
|
||||
if (loc[0] >= procgrid[0]) loc[0] = procgrid[0] - 1;
|
||||
if (loc[1] < 0) loc[1] = 0;
|
||||
@ -633,6 +652,36 @@ int Irregular::coord2proc(double *x)
|
||||
return grid2proc[loc[0]][loc[1]][loc[2]];
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binary search for value in N-length ascending vec
|
||||
value may be outside range of vec limits
|
||||
always return index from 0 to N-1 inclusive
|
||||
return 0 if value < vec[0]
|
||||
reutrn N-1 if value >= vec[N-1]
|
||||
return index = 1 to N-2 if vec[index] <= value < vec[index+1]
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int Irregular::binary(double value, int n, double *vec)
|
||||
{
|
||||
int lo = 0;
|
||||
int hi = n-1;
|
||||
|
||||
if (value < vec[lo]) return lo;
|
||||
if (value >= vec[hi]) return hi;
|
||||
|
||||
// insure vec[lo] <= value < vec[hi] at every iteration
|
||||
// done when lo,hi are adjacent
|
||||
|
||||
int index = (lo+hi)/2;
|
||||
while (lo < hi-1) {
|
||||
if (value < vec[index]) hi = index;
|
||||
else if (value >= vec[index]) lo = index;
|
||||
index = (lo+hi)/2;
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
realloc the size of the send buffer as needed with BUFFACTOR & BUFEXTRA
|
||||
if flag = 1, realloc
|
||||
|
||||
@ -32,8 +32,12 @@ class Irregular : protected Pointers {
|
||||
int me,nprocs;
|
||||
int triclinic;
|
||||
int map_style;
|
||||
int *procgrid;
|
||||
int ***grid2proc;
|
||||
int uniform;
|
||||
double *xsplit,*ysplit,*zsplit; // ptrs to comm
|
||||
int *procgrid; // ptr to comm
|
||||
int ***grid2proc; // ptr to comm
|
||||
double *boxlo; // ptr to domain
|
||||
double *prd; // ptr to domain
|
||||
|
||||
int maxsend,maxrecv; // size of buffers in # of doubles
|
||||
double *buf_send,*buf_recv;
|
||||
@ -81,6 +85,7 @@ class Irregular : protected Pointers {
|
||||
void exchange_atom(double *, int *, double *);
|
||||
void destroy_atom();
|
||||
int coord2proc(double *);
|
||||
int binary(double, int, double *);
|
||||
|
||||
void grow_send(int,int); // reallocate send buffer
|
||||
void grow_recv(int); // free/allocate recv buffer
|
||||
|
||||
Reference in New Issue
Block a user