git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7734 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2012-02-09 16:56:02 +00:00
parent 2ab478095e
commit d892afbf67
8 changed files with 1235 additions and 199 deletions

728
src/balance.cpp Normal file
View File

@ -0,0 +1,728 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "mpi.h"
#include "math.h"
#include "stdlib.h"
#include "string.h"
#include "balance.h"
#include "atom.h"
#include "comm.h"
#include "irregular.h"
#include "domain.h"
#include "force.h"
#include "update.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
enum{NONE,UNIFORM,USER,DYNAMIC};
enum{X,Y,Z};
enum{EXPAND,CONTRACT};
/* ---------------------------------------------------------------------- */
Balance::Balance(LAMMPS *lmp) : Pointers(lmp)
{
MPI_Comm_rank(world,&me);
MPI_Comm_size(world,&nprocs);
memory->create(pcount,nprocs,"balance:pcount");
memory->create(allcount,nprocs,"balance:allcount");
user_xsplit = user_ysplit = user_zsplit = NULL;
dflag = 0;
fp = NULL;
}
/* ---------------------------------------------------------------------- */
Balance::~Balance()
{
memory->destroy(pcount);
memory->destroy(allcount);
delete [] user_xsplit;
delete [] user_ysplit;
delete [] user_zsplit;
if (dflag) {
delete [] bstr;
delete [] ops;
delete [] counts[0];
delete [] counts[1];
delete [] counts[2];
delete [] cuts;
delete [] onecount;
//MPI_Comm_free(&commslice[0]);
//MPI_Comm_free(&commslice[1]);
//MPI_Comm_free(&commslice[2]);
}
if (fp) fclose(fp);
}
/* ----------------------------------------------------------------------
called as balance command in input script
------------------------------------------------------------------------- */
void Balance::command(int narg, char **arg)
{
if (domain->box_exist == 0)
error->all(FLERR,"Balance command before simulation box is defined");
if (comm->me == 0 && screen) fprintf(screen,"Balancing ...\n");
// parse arguments
int dimension = domain->dimension;
int *procgrid = comm->procgrid;
xflag = yflag = zflag = NONE;
dflag = 0;
int iarg = 0;
while (iarg < narg) {
if (strcmp(arg[iarg],"x") == 0) {
if (xflag == DYNAMIC) error->all(FLERR,"Illegal balance command");
if (strcmp(arg[iarg+1],"uniform") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
xflag = UNIFORM;
iarg += 2;
} else {
if (1 + procgrid[0]-1 > narg)
error->all(FLERR,"Illegal balance command");
xflag = USER;
delete [] user_xsplit;
user_xsplit = new double[procgrid[0]+1];
user_xsplit[0] = 0.0;
iarg++;
for (int i = 1; i < procgrid[0]; i++)
user_xsplit[i] = force->numeric(arg[iarg++]);
user_xsplit[procgrid[0]] = 1.0;
}
} else if (strcmp(arg[iarg],"y") == 0) {
if (yflag == DYNAMIC) error->all(FLERR,"Illegal balance command");
if (strcmp(arg[iarg+1],"uniform") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
yflag = UNIFORM;
iarg += 2;
} else {
if (1 + procgrid[1]-1 > narg)
error->all(FLERR,"Illegal balance command");
yflag = USER;
delete [] user_ysplit;
user_ysplit = new double[procgrid[1]+1];
user_ysplit[0] = 0.0;
iarg++;
for (int i = 1; i < procgrid[1]; i++)
user_ysplit[i] = force->numeric(arg[iarg++]);
user_ysplit[procgrid[1]] = 1.0;
}
} else if (strcmp(arg[iarg],"z") == 0) {
if (zflag == DYNAMIC) error->all(FLERR,"Illegal balance command");
if (strcmp(arg[iarg+1],"uniform") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
zflag = UNIFORM;
iarg += 2;
} else {
if (1 + procgrid[2]-1 > narg)
error->all(FLERR,"Illegal balance command");
zflag = USER;
delete [] user_zsplit;
user_zsplit = new double[procgrid[2]+1];
user_zsplit[0] = 0.0;
iarg++;
for (int i = 1; i < procgrid[2]; i++)
user_zsplit[i] = force->numeric(arg[iarg++]);
user_zsplit[procgrid[2]] = 1.0;
}
} else if (strcmp(arg[iarg],"dynamic") == 0) {
if (xflag != NONE || yflag != NONE || zflag != NONE)
error->all(FLERR,"Illegal balance command");
if (iarg+5 > narg) error->all(FLERR,"Illegal balance command");
dflag = 1;
xflag = yflag = DYNAMIC;
if (dimension == 3) zflag = DYNAMIC;
nrepeat = atoi(arg[iarg+1]);
niter = atoi(arg[iarg+2]);
if (nrepeat <= 0 || niter <= 0)
error->all(FLERR,"Illegal balance command");
int n = strlen(arg[iarg+3]) + 1;
bstr = new char[n];
strcpy(bstr,arg[iarg+3]);
thresh = atof(arg[iarg+4]);
if (thresh < 1.0) error->all(FLERR,"Illegal balance command");
iarg += 5;
} else error->all(FLERR,"Illegal balance command");
}
// error check
if (zflag && dimension == 2)
error->all(FLERR,"Cannot balance in z dimension for 2d simulation");
if (xflag == USER)
for (int i = 1; i <= procgrid[0]; i++)
if (user_xsplit[i-1] >= user_xsplit[i])
error->all(FLERR,"Illegal balance command");
if (yflag == USER)
for (int i = 1; i <= procgrid[1]; i++)
if (user_ysplit[i-1] >= user_ysplit[i])
error->all(FLERR,"Illegal balance command");
if (zflag == USER)
for (int i = 1; i <= procgrid[2]; i++)
if (user_zsplit[i-1] >= user_zsplit[i])
error->all(FLERR,"Illegal balance command");
if (dflag) {
for (int i = 0; i < strlen(bstr); i++) {
if (bstr[i] != 'x' && bstr[i] != 'y' && bstr[i] != 'z')
error->all(FLERR,"Balance dynamic string is invalid");
if (bstr[i] == 'z' && dimension == 2)
error->all(FLERR,"Balance dynamic string is invalid for 2d simulation");
}
}
// insure atoms are in current box & update box via shrink-wrap
// no exchange() since doesn't matter if atoms are assigned to correct procs
if (domain->triclinic) domain->x2lamda(atom->nlocal);
domain->pbc();
domain->reset_box();
if (domain->triclinic) domain->lamda2x(atom->nlocal);
// imbinit = initial imbalance
// use current splits instead of nlocal since atoms may not be in sub-box
domain->x2lamda(atom->nlocal);
int maxinit;
double imbinit = imbalance_splits(maxinit);
domain->lamda2x(atom->nlocal);
// debug output of initial state
//dumpout(update->ntimestep);
// explicit setting of sub-domain sizes
if (xflag == UNIFORM) {
for (int i = 0; i < procgrid[0]; i++)
comm->xsplit[i] = i * 1.0/procgrid[0];
comm->xsplit[procgrid[0]] = 1.0;
}
if (yflag == UNIFORM) {
for (int i = 0; i < procgrid[1]; i++)
comm->ysplit[i] = i * 1.0/procgrid[1];
comm->ysplit[procgrid[1]] = 1.0;
}
if (zflag == UNIFORM) {
for (int i = 0; i < procgrid[2]; i++)
comm->zsplit[i] = i * 1.0/procgrid[2];
comm->zsplit[procgrid[2]] = 1.0;
}
if (xflag == USER)
for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i];
if (yflag == USER)
for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i];
if (zflag == USER)
for (int i = 0; i <= procgrid[2]; i++) comm->zsplit[i] = user_zsplit[i];
// dynamic load-balance of sub-domain sizes
if (dflag) {
dynamic_setup(bstr);
dynamic();
} else count = 0;
// debug output of final result
//dumpout(-1);
// reset comm->uniform flag if necessary
if (comm->uniform) {
if (xflag == USER || xflag == DYNAMIC) comm->uniform = 0;
if (yflag == USER || yflag == DYNAMIC) comm->uniform = 0;
if (zflag == USER || zflag == DYNAMIC) comm->uniform = 0;
} else {
if (dimension == 3) {
if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
comm->uniform = 1;
} else {
if (xflag == UNIFORM && yflag == UNIFORM) comm->uniform = 1;
}
}
// reset proc sub-domains
if (domain->triclinic) domain->set_lamda_box();
domain->set_local_box();
// move atoms to new processors via irregular()
if (domain->triclinic) domain->x2lamda(atom->nlocal);
Irregular *irregular = new Irregular(lmp);
irregular->migrate_atoms();
delete irregular;
if (domain->triclinic) domain->lamda2x(atom->nlocal);
// check if any atoms were lost
bigint natoms;
bigint nblocal = atom->nlocal;
MPI_Allreduce(&nblocal,&natoms,1,MPI_LMP_BIGINT,MPI_SUM,world);
if (natoms != atom->natoms) {
char str[128];
sprintf(str,"Lost atoms via balance: original " BIGINT_FORMAT
" current " BIGINT_FORMAT,atom->natoms,natoms);
error->all(FLERR,str);
}
// imbfinal = final imbalance based on final nlocal
int maxfinal;
double imbfinal = imbalance_nlocal(maxfinal);
if (me == 0) {
if (screen) {
fprintf(screen," iteration count = %d\n",count);
fprintf(screen," initial/final max atoms/proc = %d %d\n",
maxinit,maxfinal);
fprintf(screen," initial/final imbalance factor = %g %g\n",
imbinit,imbfinal);
}
if (logfile) {
fprintf(logfile," iteration count = %d\n",count);
fprintf(logfile," initial/final max atoms/proc = %d %d\n",
maxinit,maxfinal);
fprintf(logfile," initial/final imbalance factor = %g %g\n",
imbinit,imbfinal);
}
}
if (me == 0) {
if (screen) {
fprintf(screen," x cuts:");
for (int i = 0; i <= comm->procgrid[0]; i++)
fprintf(screen," %g",comm->xsplit[i]);
fprintf(screen,"\n");
fprintf(screen," y cuts:");
for (int i = 0; i <= comm->procgrid[1]; i++)
fprintf(screen," %g",comm->ysplit[i]);
fprintf(screen,"\n");
fprintf(screen," z cuts:");
for (int i = 0; i <= comm->procgrid[2]; i++)
fprintf(screen," %g",comm->zsplit[i]);
fprintf(screen,"\n");
}
if (logfile) {
fprintf(logfile," x cuts:");
for (int i = 0; i <= comm->procgrid[0]; i++)
fprintf(logfile," %g",comm->xsplit[i]);
fprintf(logfile,"\n");
fprintf(logfile," y cuts:");
for (int i = 0; i <= comm->procgrid[1]; i++)
fprintf(logfile," %g",comm->ysplit[i]);
fprintf(logfile,"\n");
fprintf(logfile," z cuts:");
for (int i = 0; i <= comm->procgrid[2]; i++)
fprintf(logfile," %g",comm->zsplit[i]);
fprintf(logfile,"\n");
}
}
}
/* ----------------------------------------------------------------------
calculate imbalance based on nlocal
return max = max atom per proc
return imbalance factor = max atom per proc / ave atom per proc
------------------------------------------------------------------------- */
double Balance::imbalance_nlocal(int &max)
{
MPI_Allreduce(&atom->nlocal,&max,1,MPI_INT,MPI_MAX,world);
double imbalance = max / (1.0 * atom->natoms / nprocs);
return imbalance;
}
/* ----------------------------------------------------------------------
calculate imbalance based on processor splits in 3 dims
atoms must be in lamda coords (0-1) before called
map atoms to 3d grid of procs
return max = max atom per proc
return imbalance factor = max atom per proc / ave atom per proc
------------------------------------------------------------------------- */
double Balance::imbalance_splits(int &max)
{
double *xsplit = comm->xsplit;
double *ysplit = comm->ysplit;
double *zsplit = comm->zsplit;
int nx = comm->procgrid[0];
int ny = comm->procgrid[1];
int nz = comm->procgrid[2];
for (int i = 0; i < nprocs; i++) pcount[i] = 0;
double **x = atom->x;
int nlocal = atom->nlocal;
int ix,iy,iz;
for (int i = 0; i < nlocal; i++) {
ix = binary(x[i][0],nx,xsplit);
iy = binary(x[i][1],ny,ysplit);
iz = binary(x[i][2],nz,zsplit);
pcount[iz*nx*ny + iy*nx + ix]++;
}
MPI_Allreduce(pcount,allcount,nprocs,MPI_INT,MPI_SUM,world);
max = 0;
for (int i = 0; i < nprocs; i++) max = MAX(max,allcount[i]);
double imbalance = max / (1.0 * atom->natoms / nprocs);
return imbalance;
}
/* ----------------------------------------------------------------------
setup dynamic load balance operations
called from command & fix balance
------------------------------------------------------------------------- */
void Balance::dynamic_setup(char *str)
{
nops = strlen(str);
ops = new int[nops];
for (int i = 0; i < strlen(str); i++) {
if (str[i] == 'x') ops[i] = X;
if (str[i] == 'y') ops[i] = Y;
if (str[i] == 'z') ops[i] = Z;
}
splits[0] = comm->xsplit;
splits[1] = comm->ysplit;
splits[2] = comm->zsplit;
counts[0] = new bigint[comm->procgrid[0]];
counts[1] = new bigint[comm->procgrid[1]];
counts[2] = new bigint[comm->procgrid[2]];
int max = MAX(comm->procgrid[0],comm->procgrid[1]);
max = MAX(max,comm->procgrid[2]);
cuts = new double[max+1];
onecount = new bigint[max];
//MPI_Comm_split(world,comm->myloc[0],0,&commslice[0]);
//MPI_Comm_split(world,comm->myloc[1],0,&commslice[1]);
//MPI_Comm_split(world,comm->myloc[2],0,&commslice[2]);
}
/* ----------------------------------------------------------------------
perform dynamic load balance by changing xyz split proc boundaries in Comm
called from command and fix balance
------------------------------------------------------------------------- */
void Balance::dynamic()
{
int i,m,max;
double imbfactor;
int *procgrid = comm->procgrid;
domain->x2lamda(atom->nlocal);
count = 0;
for (int irepeat = 0; irepeat < nrepeat; irepeat++) {
for (i = 0; i < nops; i++) {
for (m = 0; m < niter; m++) {
stats(ops[i],procgrid[ops[i]],splits[ops[i]],counts[ops[i]]);
adjust(procgrid[ops[i]],counts[ops[i]],splits[ops[i]]);
count++;
// debug output of intermediate result
//dumpout(-1);
}
imbfactor = imbalance_splits(max);
if (imbfactor <= thresh) break;
}
if (i < nops) break;
}
domain->lamda2x(atom->nlocal);
}
/* ----------------------------------------------------------------------
count atoms in each slice
current cuts may be very different than original cuts,
so use binary search to find which slice each atom is in
------------------------------------------------------------------------- */
void Balance::stats(int dim, int n, double *split, bigint *count)
{
for (int i = 0; i < n; i++) onecount[i] = 0;
double **x = atom->x;
int nlocal = atom->nlocal;
int index;
for (int i = 0; i < nlocal; i++) {
index = binary(x[i][dim],n,split);
onecount[index]++;
}
MPI_Allreduce(onecount,count,n,MPI_LMP_BIGINT,MPI_SUM,world);
}
/* ----------------------------------------------------------------------
adjust cuts between N slices in a dim via diffusive method
count = atoms per slice
split = current N+1 cuts, with 0.0 and 1.0 at end points
overwrite split with new cuts
diffusion means slices with more atoms than their neighbors
"send" them atoms by moving cut closer to sender, further from receiver
------------------------------------------------------------------------- */
void Balance::adjust(int n, bigint *count, double *split)
{
// damping factor
double damp = 0.5;
// loop over slices from 1 to N-2 inclusive (not end slices 0 and N-1)
// cut I is between 2 slices (I-1 and I) with counts
// cut I+1 is between 2 slices (I and I+1) with counts
// for a cut between 2 slices, only slice with larger count adjusts it
bigint leftcount,mycount,rightcount;
double rho,target,targetleft,targetright;
for (int i = 1; i < n-1; i++) {
leftcount = count[i-1];
mycount = count[i];
rightcount = count[i+1];
// middle slice is <= both left and right, so do nothing
// special case if 2 slices both have count = 0, no change in cut
if (mycount <= leftcount && mycount <= rightcount) {
if (leftcount == 0) cuts[i] = split[i];
if (rightcount == 0) cuts[i+1] = split[i+1];
continue;
}
// rho = density of atoms in the slice
rho = mycount / (split[i+1] - split[i]);
// middle slice has more atoms then left or right slice
// if delta from middle to top slice > delta between top and bottom slice
// then send atoms both dirs to bring all 3 slices to same count
// else bottom slice is very low, so send atoms only in that dir
if (mycount > leftcount && mycount > rightcount) {
if (mycount-MAX(leftcount,rightcount) >= fabs(leftcount-rightcount)) {
if (leftcount <= rightcount) {
targetleft = damp *
(rightcount-leftcount + (mycount-rightcount)/3.0);
targetright = damp * (mycount-rightcount)/3.0;
cuts[i] = split[i] + targetleft/rho;
cuts[i+1] = split[i+1] - targetright/rho;
} else {
targetleft = damp * (mycount-leftcount)/3.0;
targetright = damp *
(leftcount-rightcount + (mycount-leftcount)/3.0);
cuts[i] = split[i] + targetleft/rho;
cuts[i+1] = split[i+1] - targetright/rho;
}
} else if (leftcount < rightcount) {
target = damp * 0.5*(mycount-leftcount);
cuts[i] = split[i] + target/rho;
cuts[i+1] = split[i+1];
} else if (rightcount < leftcount) {
target = damp * 0.5*(mycount-rightcount);
cuts[i+1] = split[i+1] - target/rho;
cuts[i] = split[i];
}
// middle slice has more atoms than only left or right slice
// send atoms only in that dir
} else if (mycount > leftcount) {
target = damp * 0.5*(mycount-leftcount);
cuts[i] = split[i] + target/rho;
} else if (mycount > rightcount) {
target = damp * 0.5*(mycount-rightcount);
cuts[i+1] = split[i+1] - target/rho;
}
}
// overwrite adjustable splits with new cuts
for (int i = 1; i < n; i++) split[i] = cuts[i];
}
/* ----------------------------------------------------------------------
binary search for value in N-length ascending vec
value may be outside range of vec limits
always return index from 0 to N-1 inclusive
return 0 if value < vec[0]
reutrn N-1 if value >= vec[N-1]
return index = 1 to N-2 if vec[index] <= value < vec[index+1]
------------------------------------------------------------------------- */
int Balance::binary(double value, int n, double *vec)
{
int lo = 0;
int hi = n-1;
if (value < vec[lo]) return lo;
if (value >= vec[hi]) return hi;
// insure vec[lo] <= value < vec[hi] at every iteration
// done when lo,hi are adjacent
int index = (lo+hi)/2;
while (lo < hi-1) {
if (value < vec[index]) hi = index;
else if (value >= vec[index]) lo = index;
index = (lo+hi)/2;
}
return index;
}
/* ----------------------------------------------------------------------
create dump file of line segments in Pizza.py mdump mesh format
just for debug/viz purposes
write to tmp.mdump.*, open first time if necessary
write xy lines around each proc's sub-domain for 2d
write xyz cubes around each proc's sub-domain for 3d
all procs but 0 just return
------------------------------------------------------------------------- */
void Balance::dumpout(bigint tstamp)
{
if (me) return;
bigint tstep;
if (tstamp >= 0) tstep = tstamp;
else tstep = laststep + 1;
laststep = tstep;
if (fp == NULL) {
char file[32];
sprintf(file,"tmp.mdump.%ld",tstep);
fp = fopen(file,"w");
if (!fp) error->one(FLERR,"Cannot open balance output file");
// write out one square/cute per processor for 2d/3d
// only write once since topology is static
fprintf(fp,"ITEM: TIMESTEP\n");
fprintf(fp,"%ld\n",tstep);
fprintf(fp,"ITEM: NUMBER OF SQUARES\n");
fprintf(fp,"%d\n",nprocs);
fprintf(fp,"ITEM: SQUARES\n");
int nx = comm->procgrid[0] + 1;
int ny = comm->procgrid[1] + 1;
int nz = comm->procgrid[2] + 1;
if (domain->dimension == 2) {
int m = 0;
for (int j = 0; j < comm->procgrid[1]; j++)
for (int i = 0; i < comm->procgrid[0]; i++) {
int c1 = j*nx + i + 1;
int c2 = c1 + 1;
int c3 = c2 + nx;
int c4 = c3 - 1;
fprintf(fp,"%d %d %d %d %d %d\n",m+1,m+1,c1,c2,c3,c4);
m++;
}
} else {
int m = 0;
for (int k = 0; k < comm->procgrid[2]; k++)
for (int j = 0; j < comm->procgrid[1]; j++)
for (int i = 0; i < comm->procgrid[0]; i++) {
int c1 = k*ny*nx + j*nx + i + 1;
int c2 = c1 + 1;
int c3 = c2 + nx;
int c4 = c3 - 1;
int c5 = c1 + ny*nx;
int c6 = c2 + ny*nx;
int c7 = c3 + ny*nx;
int c8 = c4 + ny*nx;
fprintf(fp,"%d %d %d %d %d %d %d %d %d %d\n",
m+1,m+1,c1,c2,c3,c4,c5,c6,c7,c8);
m++;
}
}
}
// write out nodal coords, can be different every call
// scale xsplit,ysplit,zsplit values to full box
// only implmented for orthogonal boxes, not triclinic
int nx = comm->procgrid[0] + 1;
int ny = comm->procgrid[1] + 1;
int nz = comm->procgrid[2] + 1;
double *boxlo = domain->boxlo;
double *boxhi = domain->boxhi;
double *prd = domain->prd;
fprintf(fp,"ITEM: TIMESTEP\n");
fprintf(fp,"%ld\n",tstep);
fprintf(fp,"ITEM: NUMBER OF NODES\n");
if (domain->dimension == 2)
fprintf(fp,"%d\n",nx*ny);
else
fprintf(fp,"%d\n",nx*ny*nz);
fprintf(fp,"ITEM: BOX BOUNDS\n");
fprintf(fp,"%g %g\n",boxlo[0],boxhi[0]);
fprintf(fp,"%g %g\n",boxlo[1],boxhi[1]);
fprintf(fp,"%g %g\n",boxlo[2],boxhi[2]);
fprintf(fp,"ITEM: NODES\n");
if (domain->dimension == 2) {
int m = 0;
for (int j = 0; j < ny; j++)
for (int i = 0; i < nx; i++) {
fprintf(fp,"%d %d %g %g %g\n",m+1,1,
boxlo[0] + prd[0]*comm->xsplit[i],
boxlo[1] + prd[1]*comm->ysplit[j],
0.0);
m++;
}
} else {
int m = 0;
for (int k = 0; k < nz; k++)
for (int j = 0; j < ny; j++)
for (int i = 0; i < nx; i++) {
fprintf(fp,"%d %d %g %g %g\n",m+1,1,
boxlo[0] + prd[0]*comm->xsplit[i],
boxlo[1] + prd[1]*comm->ysplit[j],
boxlo[2] + prd[2]*comm->zsplit[j]);
m++;
}
}
}

71
src/balance.h Normal file
View File

@ -0,0 +1,71 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef COMMAND_CLASS
CommandStyle(balance,Balance)
#else
#ifndef LMP_BALANCE_H
#define LMP_BALANCE_H
#include "mpi.h"
#include "stdio.h"
#include "pointers.h"
namespace LAMMPS_NS {
class Balance : protected Pointers {
public:
Balance(class LAMMPS *);
~Balance();
void command(int, char **);
private:
int me,nprocs;
int xflag,yflag,zflag,dflag;
int nrepeat,niter;
double thresh;
char *bstr;
double *user_xsplit,*user_ysplit,*user_zsplit;
int *ops;
int nops;
double *splits[3];
bigint *counts[3];
double *cuts;
bigint *onecount;
MPI_Comm commslice[3];
int count;
int *pcount,*allcount;
FILE *fp; // for debug output
bigint laststep;
double imbalance_splits(int &);
double imbalance_nlocal(int &);
void dynamic_setup(char *);
void dynamic();
void stats(int, int, double *, bigint *);
void adjust(int, bigint *, double *);
int binary(double, int, double *);
void dumpout(bigint); // for debug output
};
}
#endif
#endif

View File

@ -150,13 +150,13 @@ void ChangeBox::command(int narg, char **arg)
iarg += 4;
} else if (strcmp(arg[iarg],"ortho") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal change_box command");
if (iarg+1 > narg) error->all(FLERR,"Illegal change_box command");
ops[nops].style = ORTHO;
nops++;
iarg += 1;
} else if (strcmp(arg[iarg],"triclinic") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal change_box command");
if (iarg+1 > narg) error->all(FLERR,"Illegal change_box command");
ops[nops].style = TRICLINIC;
nops++;
iarg += 1;

View File

@ -78,6 +78,8 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
bordergroup = 0;
style = SINGLE;
uniform = 1;
xsplit = ysplit = zsplit = NULL;
multilo = multihi = NULL;
cutghostmulti = NULL;
cutghostuser = 0.0;
@ -124,6 +126,10 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
Comm::~Comm()
{
memory->destroy(xsplit);
memory->destroy(ysplit);
memory->destroy(zsplit);
delete [] customfile;
delete [] outfile;
@ -145,7 +151,7 @@ Comm::~Comm()
/* ----------------------------------------------------------------------
create 3d grid of procs based on Nprocs and box size & shape
map processors to grid
map processors to grid, setup xyz split for a uniform grid
------------------------------------------------------------------------- */
void Comm::set_proc_grid()
@ -252,14 +258,31 @@ void Comm::set_proc_grid()
if (outfile) pmap->output(outfile,procgrid,grid2proc);
// set lamda box params after procs are assigned
if (domain->triclinic) domain->set_lamda_box();
// free ProcMap class
delete pmap;
// set xsplit,ysplit,zsplit for uniform spacings
memory->destroy(xsplit);
memory->destroy(ysplit);
memory->destroy(zsplit);
memory->create(xsplit,procgrid[0]+1,"comm:xsplit");
memory->create(ysplit,procgrid[1]+1,"comm:ysplit");
memory->create(zsplit,procgrid[2]+1,"comm:zsplit");
for (int i = 0; i < procgrid[0]; i++) xsplit[i] = i * 1.0/procgrid[0];
for (int i = 0; i < procgrid[1]; i++) ysplit[i] = i * 1.0/procgrid[1];
for (int i = 0; i < procgrid[2]; i++) zsplit[i] = i * 1.0/procgrid[2];
xsplit[procgrid[0]] = ysplit[procgrid[1]] = zsplit[procgrid[2]] = 1.0;
// set lamda box params after procs are assigned
// only set once unless load-balancing occurs
if (domain->triclinic) domain->set_lamda_box();
// send my 3d proc grid to another partition if requested
if (send_to_partition >= 0) {
@ -399,25 +422,112 @@ void Comm::setup()
}
}
// need = # of procs I need atoms from in each dim based on max cutoff
// for 2d, don't communicate in z
need[0] = static_cast<int> (cutghost[0] * procgrid[0] / prd[0]) + 1;
need[1] = static_cast<int> (cutghost[1] * procgrid[1] / prd[1]) + 1;
need[2] = static_cast<int> (cutghost[2] * procgrid[2] / prd[2]) + 1;
if (domain->dimension == 2) need[2] = 0;
// if non-periodic, do not communicate further than procgrid-1 away
// this enables very large cutoffs in non-periodic systems
// recvneed[idim][0/1] = # of procs away I recv atoms from, within cutghost
// 0 = from left, 1 = from right
// do not cross non-periodic boundaries, need[2] = 0 for 2d
// sendneed[idim][0/1] = # of procs away I send atoms to
// 0 = to left, 1 = to right
// set equal to recvneed[idim][1/0] of neighbor proc
// maxneed[idim] = max procs away any proc recvs atoms in either direction
// uniform = 1 = uniform sized sub-domains:
// maxneed is directly computable from sub-domain size
// limit to procgrid-1 for non-PBC
// recvneed = maxneed except for procs near non-PBC
// sendneed = recvneed of neighbor on each side
// uniform = 0 = non-uniform sized sub-domains:
// compute recvneed via updown() which accounts for non-PBC
// sendneed = recvneed of neighbor on each side
// maxneed via Allreduce() of recvneed
int *periodicity = domain->periodicity;
if (periodicity[0] == 0) need[0] = MIN(need[0],procgrid[0]-1);
if (periodicity[1] == 0) need[1] = MIN(need[1],procgrid[1]-1);
if (periodicity[2] == 0) need[2] = MIN(need[2],procgrid[2]-1);
int left,right;
if (uniform) {
maxneed[0] = static_cast<int> (cutghost[0] * procgrid[0] / prd[0]) + 1;
maxneed[1] = static_cast<int> (cutghost[1] * procgrid[1] / prd[1]) + 1;
maxneed[2] = static_cast<int> (cutghost[2] * procgrid[2] / prd[2]) + 1;
if (domain->dimension == 2) maxneed[2] = 0;
if (!periodicity[0]) maxneed[0] = MIN(maxneed[0],procgrid[0]-1);
if (!periodicity[1]) maxneed[1] = MIN(maxneed[1],procgrid[1]-1);
if (!periodicity[2]) maxneed[2] = MIN(maxneed[2],procgrid[2]-1);
if (!periodicity[0]) {
recvneed[0][0] = MIN(maxneed[0],myloc[0]);
recvneed[0][1] = MIN(maxneed[0],procgrid[0]-myloc[0]-1);
left = myloc[0] - 1;
if (left < 0) left = procgrid[0] - 1;
sendneed[0][0] = MIN(maxneed[0],procgrid[0]-left-1);
right = myloc[0] + 1;
if (right == procgrid[0]) right = 0;
sendneed[0][1] = MIN(maxneed[0],right);
} else recvneed[0][0] = recvneed[0][1] =
sendneed[0][0] = sendneed[0][1] = maxneed[0];
if (!periodicity[1]) {
recvneed[1][0] = MIN(maxneed[1],myloc[1]);
recvneed[1][1] = MIN(maxneed[1],procgrid[1]-myloc[1]-1);
left = myloc[1] - 1;
if (left < 0) left = procgrid[1] - 1;
sendneed[1][0] = MIN(maxneed[1],procgrid[1]-left-1);
right = myloc[1] + 1;
if (right == procgrid[1]) right = 0;
sendneed[1][1] = MIN(maxneed[1],right);
} else recvneed[1][0] = recvneed[1][1] =
sendneed[1][0] = sendneed[1][1] = maxneed[1];
if (!periodicity[2]) {
recvneed[2][0] = MIN(maxneed[2],myloc[2]);
recvneed[2][1] = MIN(maxneed[2],procgrid[2]-myloc[2]-1);
left = myloc[2] - 1;
if (left < 0) left = procgrid[2] - 1;
sendneed[2][0] = MIN(maxneed[2],procgrid[2]-left-1);
right = myloc[2] + 1;
if (right == procgrid[2]) right = 0;
sendneed[2][1] = MIN(maxneed[2],right);
} else recvneed[2][0] = recvneed[2][1] =
sendneed[2][0] = sendneed[2][1] = maxneed[2];
} else {
recvneed[0][0] = updown(0,0,myloc[0],prd[0],periodicity[0],xsplit);
recvneed[0][1] = updown(0,1,myloc[0],prd[0],periodicity[0],xsplit);
left = myloc[0] - 1;
if (left < 0) left = procgrid[0] - 1;
sendneed[0][0] = updown(0,1,left,prd[0],periodicity[0],xsplit);
right = myloc[0] + 1;
if (right == procgrid[0]) right = 0;
sendneed[0][1] = updown(0,0,right,prd[0],periodicity[0],xsplit);
recvneed[1][0] = updown(1,0,myloc[1],prd[1],periodicity[1],ysplit);
recvneed[1][1] = updown(1,1,myloc[1],prd[1],periodicity[1],ysplit);
left = myloc[1] - 1;
if (left < 0) left = procgrid[1] - 1;
sendneed[1][0] = updown(1,1,left,prd[1],periodicity[1],ysplit);
right = myloc[1] + 1;
if (right == procgrid[1]) right = 0;
sendneed[1][1] = updown(1,0,right,prd[1],periodicity[1],ysplit);
if (domain->dimension == 3) {
recvneed[2][0] = updown(2,0,myloc[2],prd[2],periodicity[2],zsplit);
recvneed[2][1] = updown(2,1,myloc[2],prd[2],periodicity[2],zsplit);
left = myloc[2] - 1;
if (left < 0) left = procgrid[2] - 1;
sendneed[2][0] = updown(2,1,left,prd[2],periodicity[2],zsplit);
right = myloc[2] + 1;
if (right == procgrid[2]) right = 0;
sendneed[2][1] = updown(2,0,right,prd[2],periodicity[2],zsplit);
} else recvneed[2][0] = recvneed[2][1] =
sendneed[2][0] = sendneed[2][1] = 0;
int all[6];
MPI_Allreduce(&recvneed[0][0],all,6,MPI_INT,MPI_MAX,world);
maxneed[0] = MAX(all[0],all[1]);
maxneed[1] = MAX(all[2],all[3]);
maxneed[2] = MAX(all[4],all[5]);
}
// allocate comm memory
nswap = 2 * (need[0]+need[1]+need[2]);
nswap = 2 * (maxneed[0]+maxneed[1]+maxneed[2]);
if (nswap > maxswap) grow_swap(nswap);
// setup parameters for each exchange:
@ -428,13 +538,11 @@ void Comm::setup()
// use -BIG/midpt/BIG to insure all atoms included even if round-off occurs
// if round-off, atoms recvd across PBC can be < or > than subbox boundary
// note that borders() only loops over subset of atoms during each swap
// set slablo > slabhi for swaps across non-periodic boundaries
// this insures no atoms are swapped
// only for procs owning sub-box at non-periodic end of global box
// treat all as PBC here, non-PBC is handled in borders() via r/s need[][]
// for style MULTI:
// multilo/multihi is same as slablo/slabhi, only for each atom type
// multilo/multihi is same, with slablo/slabhi for each atom type
// pbc_flag: 0 = nothing across a boundary, 1 = something across a boundary
// pbc = -1/0/1 for PBC factor in each of 3/6 orthog/triclinic dirs
// pbc = -1/0/1 for PBC factor in each of 3/6 orthogonal/triclinic dirs
// for triclinic, slablo/hi and pbc_border will be used in lamda (0-1) coords
// 1st part of if statement is sending to the west/south/down
// 2nd part of if statement is sending to the east/north/up
@ -443,7 +551,7 @@ void Comm::setup()
int iswap = 0;
for (dim = 0; dim < 3; dim++) {
for (ineed = 0; ineed < 2*need[dim]; ineed++) {
for (ineed = 0; ineed < 2*maxneed[dim]; ineed++) {
pbc_flag[iswap] = 0;
pbc[iswap][0] = pbc[iswap][1] = pbc[iswap][2] =
pbc[iswap][3] = pbc[iswap][4] = pbc[iswap][5] = 0;
@ -463,12 +571,6 @@ void Comm::setup()
}
}
if (myloc[dim] == 0) {
if (periodicity[dim] == 0) {
if (style == SINGLE) slabhi[iswap] = slablo[iswap] - 1.0;
else
for (i = 1; i <= ntypes; i++)
multihi[iswap][i] = multilo[iswap][i] - 1.0;
} else {
pbc_flag[iswap] = 1;
pbc[iswap][dim] = 1;
if (triclinic) {
@ -476,7 +578,6 @@ void Comm::setup()
else if (dim == 2) pbc[iswap][4] = pbc[iswap][3] = 1;
}
}
}
} else {
sendproc[iswap] = procneigh[dim][1];
@ -493,12 +594,6 @@ void Comm::setup()
}
}
if (myloc[dim] == procgrid[dim]-1) {
if (periodicity[dim] == 0) {
if (style == SINGLE) slabhi[iswap] = slablo[iswap] - 1.0;
else
for (i = 1; i <= ntypes; i++)
multihi[iswap][i] = multilo[iswap][i] - 1.0;
} else {
pbc_flag[iswap] = 1;
pbc[iswap][dim] = -1;
if (triclinic) {
@ -507,13 +602,61 @@ void Comm::setup()
}
}
}
}
iswap++;
}
}
}
/* ----------------------------------------------------------------------
walk up/down the extent of nearby processors in dim and dir
loc = myloc of proc to start at
dir = 0/1 = walk to left/right
do not cross non-periodic boundaries
is not called for z dim in 2d
return how many procs away are needed to encompass cutghost away from loc
------------------------------------------------------------------------- */
int Comm::updown(int dim, int dir, int loc,
double prd, int periodicity, double *split)
{
int index,count;
double frac,delta;
if (dir == 0) {
frac = cutghost[dim]/prd;
index = loc - 1;
delta = 0.0;
count = 0;
while (delta < frac) {
if (index < 0) {
if (!periodicity) break;
index = procgrid[dim] - 1;
}
count++;
delta += split[index+1] - split[index];
index--;
}
} else {
frac = cutghost[dim]/prd;
index = loc + 1;
delta = 0.0;
count = 0;
while (delta < frac) {
if (index >= procgrid[dim]) {
if (!periodicity) break;
index = 0;
}
count++;
delta += split[index+1] - split[index];
index++;
}
}
return count;
}
/* ----------------------------------------------------------------------
forward communication of atom coords every timestep
other per-atom attributes may also be sent via pack/unpack routines
@ -537,27 +680,30 @@ void Comm::forward_comm(int dummy)
if (comm_x_only) {
if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]];
else buf = NULL;
if (size_forward_recv[iswap])
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
buf_send,pbc_flag[iswap],pbc[iswap]);
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
MPI_Wait(&request,&status);
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
} else if (ghost_velocity) {
if (size_forward_recv[iswap])
MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
buf_send,pbc_flag[iswap],pbc[iswap]);
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
MPI_Wait(&request,&status);
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_recv);
} else {
if (size_forward_recv[iswap])
MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
buf_send,pbc_flag[iswap],pbc[iswap]);
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
MPI_Wait(&request,&status);
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_recv);
}
@ -601,19 +747,22 @@ void Comm::reverse_comm()
for (int iswap = nswap-1; iswap >= 0; iswap--) {
if (sendproc[iswap] != me) {
if (comm_f_only) {
if (size_reverse_recv[iswap])
MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
sendproc[iswap],0,world,&request);
if (size_reverse_send[iswap]) buf = f[firstrecv[iswap]];
else buf = NULL;
if (size_reverse_send[iswap])
MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE,
recvproc[iswap],0,world);
MPI_Wait(&request,&status);
if (size_reverse_recv[iswap]) MPI_Wait(&request,&status);
} else {
if (size_reverse_recv[iswap])
MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
sendproc[iswap],0,world,&request);
n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send);
MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap],0,world);
MPI_Wait(&request,&status);
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap],0,world);
if (size_reverse_recv[iswap]) MPI_Wait(&request,&status);
}
avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_recv);
@ -752,8 +901,8 @@ void Comm::exchange()
void Comm::borders()
{
int i,n,itype,iswap,dim,ineed,maxneed,smax,rmax;
int nsend,nrecv,nfirst,nlast,ngroup;
int i,n,itype,iswap,dim,ineed,twoneed,smax,rmax;
int nsend,nrecv,sendflag,nfirst,nlast,ngroup;
double lo,hi;
int *type;
double **x;
@ -774,8 +923,8 @@ void Comm::borders()
for (dim = 0; dim < 3; dim++) {
nlast = 0;
maxneed = 2*need[dim];
for (ineed = 0; ineed < maxneed; ineed++) {
twoneed = 2*maxneed[dim];
for (ineed = 0; ineed < twoneed; ineed++) {
// find atoms within slab boundaries lo/hi using <= and >=
// check atoms between nfirst and nlast
@ -799,11 +948,19 @@ void Comm::borders()
nsend = 0;
// sendflag = 0 if I do not send on this swap
// sendneed test indicates receiver no longer requires data
// e.g. due to non-PBC or non-uniform sub-domains
if (ineed/2 >= sendneed[dim][ineed % 2]) sendflag = 0;
else sendflag = 1;
// find send atoms according to SINGLE vs MULTI
// all atoms eligible versus atoms in bordergroup
// only need to limit loop to bordergroup for first sends (ineed < 2)
// on these sends, break loop in two: owned (in group) and ghost
if (sendflag) {
if (!bordergroup || ineed >= 2) {
if (style == SINGLE) {
for (i = nfirst; i < nlast; i++)
@ -852,6 +1009,7 @@ void Comm::borders()
}
}
}
}
// pack up list of border atoms
@ -865,18 +1023,18 @@ void Comm::borders()
pbc_flag[iswap],pbc[iswap]);
// swap atoms with other proc
// no MPI calls except SendRecv if nsend/nrecv = 0
// put incoming ghosts at end of my atom arrays
// if swapping with self, simply copy, no messages
if (sendproc[iswap] != me) {
MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0,
&nrecv,1,MPI_INT,recvproc[iswap],0,world,&status);
if (nrecv*size_border > maxrecv)
grow_recv(nrecv*size_border);
MPI_Irecv(buf_recv,nrecv*size_border,MPI_DOUBLE,
if (nrecv*size_border > maxrecv) grow_recv(nrecv*size_border);
if (nrecv) MPI_Irecv(buf_recv,nrecv*size_border,MPI_DOUBLE,
recvproc[iswap],0,world,&request);
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
MPI_Wait(&request,&status);
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
if (nrecv) MPI_Wait(&request,&status);
buf = buf_recv;
} else {
nrecv = nsend;
@ -939,10 +1097,12 @@ void Comm::forward_comm_pair(Pair *pair)
// if self, set recv buffer to send buffer
if (sendproc[iswap] != me) {
if (recvnum[iswap])
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
world,&request);
if (sendnum[iswap])
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
MPI_Wait(&request,&status);
if (recvnum[iswap]) MPI_Wait(&request,&status);
buf = buf_recv;
} else buf = buf_send;
@ -973,10 +1133,12 @@ void Comm::reverse_comm_pair(Pair *pair)
// if self, set recv buffer to send buffer
if (sendproc[iswap] != me) {
if (sendnum[iswap])
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
world,&request);
if (recvnum[iswap])
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
MPI_Wait(&request,&status);
if (sendnum[iswap]) MPI_Wait(&request,&status);
buf = buf_recv;
} else buf = buf_send;
@ -1008,10 +1170,12 @@ void Comm::forward_comm_fix(Fix *fix)
// if self, set recv buffer to send buffer
if (sendproc[iswap] != me) {
if (recvnum[iswap])
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
world,&request);
if (sendnum[iswap])
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
MPI_Wait(&request,&status);
if (recvnum[iswap]) MPI_Wait(&request,&status);
buf = buf_recv;
} else buf = buf_send;
@ -1042,10 +1206,12 @@ void Comm::reverse_comm_fix(Fix *fix)
// if self, set recv buffer to send buffer
if (sendproc[iswap] != me) {
if (sendnum[iswap])
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
world,&request);
if (recvnum[iswap])
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
MPI_Wait(&request,&status);
if (sendnum[iswap]) MPI_Wait(&request,&status);
buf = buf_recv;
} else buf = buf_send;
@ -1077,10 +1243,12 @@ void Comm::forward_comm_compute(Compute *compute)
// if self, set recv buffer to send buffer
if (sendproc[iswap] != me) {
if (recvnum[iswap])
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
world,&request);
if (sendnum[iswap])
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
MPI_Wait(&request,&status);
if (recvnum[iswap]) MPI_Wait(&request,&status);
buf = buf_recv;
} else buf = buf_send;
@ -1111,10 +1279,12 @@ void Comm::reverse_comm_compute(Compute *compute)
// if self, set recv buffer to send buffer
if (sendproc[iswap] != me) {
if (sendnum[iswap])
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
world,&request);
if (recvnum[iswap])
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
MPI_Wait(&request,&status);
if (sendnum[iswap]) MPI_Wait(&request,&status);
buf = buf_recv;
} else buf = buf_send;
@ -1146,10 +1316,12 @@ void Comm::forward_comm_dump(Dump *dump)
// if self, set recv buffer to send buffer
if (sendproc[iswap] != me) {
if (recvnum[iswap])
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
world,&request);
if (sendnum[iswap])
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
MPI_Wait(&request,&status);
if (recvnum[iswap]) MPI_Wait(&request,&status);
buf = buf_recv;
} else buf = buf_send;
@ -1180,10 +1352,12 @@ void Comm::reverse_comm_dump(Dump *dump)
// if self, set recv buffer to send buffer
if (sendproc[iswap] != me) {
if (sendnum[iswap])
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
world,&request);
if (recvnum[iswap])
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
MPI_Wait(&request,&status);
if (sendnum[iswap]) MPI_Wait(&request,&status);
buf = buf_recv;
} else buf = buf_send;
@ -1373,6 +1547,10 @@ void Comm::set_processors(int narg, char **arg)
if (user_procgrid[0] < 0 || user_procgrid[1] < 0 || user_procgrid[2] < 0)
error->all(FLERR,"Illegal processors command");
int p = user_procgrid[0]*user_procgrid[1]*user_procgrid[2];
if (p && p != nprocs)
error->all(FLERR,"Specified processors != physical processors");
int iarg = 3;
while (iarg < narg) {
if (strcmp(arg[iarg],"grid") == 0) {

View File

@ -24,8 +24,10 @@ class Comm : protected Pointers {
int procgrid[3]; // procs assigned in each dim of 3d grid
int user_procgrid[3]; // user request for procs in each dim
int myloc[3]; // which proc I am in each dim
int procneigh[3][2]; // my 6 neighboring procs
int procneigh[3][2]; // my 6 neighboring procs, 0/1 = left/right
int ghost_velocity; // 1 if ghost atoms have velocity, 0 if not
int uniform; // 1 = equal subdomains, 0 = load-balanced
double *xsplit,*ysplit,*zsplit; // fractional (0-1) sub-domain sizes
double cutghost[3]; // cutoffs used for acquiring ghost atoms
double cutghostuser; // user-specified ghost cutoff
int ***grid2proc; // which proc owns i,j,k loc in 3d grid
@ -63,8 +65,10 @@ class Comm : protected Pointers {
protected:
int style; // single vs multi-type comm
int nswap; // # of swaps to perform
int need[3]; // procs I need atoms from in each dim
int nswap; // # of swaps to perform = sum of maxneed
int recvneed[3][2]; // # of procs away I recv atoms from
int sendneed[3][2]; // # of procs away I send atoms to
int maxneed[3]; // max procs away any proc needs, per dim
int triclinic; // 0 if domain is orthog, 1 if triclinic
int maxswap; // max # of swaps memory is allocated for
int size_forward; // # of per-atom datums in forward comm
@ -106,6 +110,8 @@ class Comm : protected Pointers {
int maxsend,maxrecv; // current size of send/recv buffer
int maxforward,maxreverse; // max # of datums in forward/reverse comm
int updown(int, int, int, double, int, double *);
// compare cutoff to procs
virtual void grow_send(int,int); // reallocate send buffer
virtual void grow_recv(int); // free/allocate recv buffer
virtual void grow_list(int, int); // reallocate one sendlist

View File

@ -215,58 +215,57 @@ void Domain::set_global_box()
}
/* ----------------------------------------------------------------------
set lamda box params, only need be done one time
assumes global box is defined and proc assignment has been made by comm
for uppermost proc, insure subhi = 1.0 (in case round-off occurs)
set lamda box params
assumes global box is defined and proc assignment has been made
uses comm->xyz_split to define subbox boundaries in consistent manner
------------------------------------------------------------------------- */
void Domain::set_lamda_box()
{
int *myloc = comm->myloc;
int *procgrid = comm->procgrid;
double *xsplit = comm->xsplit;
double *ysplit = comm->ysplit;
double *zsplit = comm->zsplit;
sublo_lamda[0] = 1.0*myloc[0] / procgrid[0];
sublo_lamda[1] = 1.0*myloc[1] / procgrid[1];
sublo_lamda[2] = 1.0*myloc[2] / procgrid[2];
sublo_lamda[0] = xsplit[myloc[0]];
subhi_lamda[0] = xsplit[myloc[0]+1];
if (myloc[0] < procgrid[0]-1)
subhi_lamda[0] = 1.0*(myloc[0]+1) / procgrid[0];
else subhi_lamda[0] = 1.0;
sublo_lamda[1] = ysplit[myloc[1]];
subhi_lamda[1] = ysplit[myloc[1]+1];
if (myloc[1] < procgrid[1]-1)
subhi_lamda[1] = 1.0*(myloc[1]+1) / procgrid[1];
else subhi_lamda[1] = 1.0;
if (myloc[2] < procgrid[2]-1)
subhi_lamda[2] = 1.0*(myloc[2]+1) / procgrid[2];
else subhi_lamda[2] = 1.0;
sublo_lamda[2] = zsplit[myloc[2]];
subhi_lamda[2] = zsplit[myloc[2]+1];
}
/* ----------------------------------------------------------------------
set local subbox params
set local subbox params for orthogonal boxes
assumes global box is defined and proc assignment has been made
for uppermost proc, insure subhi = boxhi (in case round-off occurs)
uses comm->xyz_split to define subbox boundaries in consistent manner
insure subhi[max] = boxhi
------------------------------------------------------------------------- */
void Domain::set_local_box()
{
int *myloc = comm->myloc;
int *procgrid = comm->procgrid;
double *xsplit = comm->xsplit;
double *ysplit = comm->ysplit;
double *zsplit = comm->zsplit;
if (triclinic == 0) {
sublo[0] = boxlo[0] + myloc[0] * xprd / procgrid[0];
sublo[0] = boxlo[0] + xprd*xsplit[myloc[0]];
if (myloc[0] < procgrid[0]-1)
subhi[0] = boxlo[0] + (myloc[0]+1) * xprd / procgrid[0];
subhi[0] = boxlo[0] + xprd*xsplit[myloc[0]+1];
else subhi[0] = boxhi[0];
sublo[1] = boxlo[1] + myloc[1] * yprd / procgrid[1];
sublo[1] = boxlo[1] + yprd*ysplit[myloc[1]];
if (myloc[1] < procgrid[1]-1)
subhi[1] = boxlo[1] + (myloc[1]+1) * yprd / procgrid[1];
subhi[1] = boxlo[1] + yprd*ysplit[myloc[1]+1];
else subhi[1] = boxhi[1];
sublo[2] = boxlo[2] + myloc[2] * zprd / procgrid[2];
sublo[2] = boxlo[2] + zprd*zsplit[myloc[2]];
if (myloc[2] < procgrid[2]-1)
subhi[2] = boxlo[2] + (myloc[2]+1) * zprd / procgrid[2];
subhi[2] = boxlo[2] + zprd*zsplit[myloc[2]+1];
else subhi[2] = boxhi[2];
}
}
@ -274,7 +273,7 @@ void Domain::set_local_box()
/* ----------------------------------------------------------------------
reset global & local boxes due to global box boundary changes
if shrink-wrapped, determine atom extent and reset boxlo/hi
if triclinic, perform any shrink-wrap in lamda space
for triclinic, atoms must be in lamda coords (0-1) before reset_box is called
------------------------------------------------------------------------- */
void Domain::reset_box()

View File

@ -83,7 +83,8 @@ void Irregular::migrate_atoms()
atom->nghost = 0;
atom->avec->clear_bonus();
// subbox bounds for orthogonal or triclinic
// subbox bounds for orthogonal or triclinic box
// other comm/domain data used by coord2proc()
double *sublo,*subhi;
if (triclinic == 0) {
@ -94,6 +95,13 @@ void Irregular::migrate_atoms()
subhi = domain->subhi_lamda;
}
uniform = comm->uniform;
xsplit = comm->xsplit;
ysplit = comm->ysplit;
zsplit = comm->zsplit;
boxlo = domain->boxlo;
prd = domain->prd;
// loop over atoms, flag any that are not in my sub-box
// fill buffer with atoms leaving my box, using < and >=
// assign which proc it belongs to via coord2proc()
@ -603,26 +611,37 @@ void Irregular::destroy_data()
/* ----------------------------------------------------------------------
determine which proc owns atom with coord x[3]
x will be in box (orthogonal) or lamda coords (triclinic)
for uniform = 1, directly calculate owning proc
for non-uniform, iteratively find owning proc via binary search
------------------------------------------------------------------------- */
int Irregular::coord2proc(double *x)
{
int loc[3];
if (uniform) {
if (triclinic == 0) {
double *boxlo = domain->boxlo;
double *boxhi = domain->boxhi;
loc[0] = static_cast<int>
(procgrid[0] * (x[0]-boxlo[0]) / (boxhi[0]-boxlo[0]));
loc[1] = static_cast<int>
(procgrid[1] * (x[1]-boxlo[1]) / (boxhi[1]-boxlo[1]));
loc[2] = static_cast<int>
(procgrid[2] * (x[2]-boxlo[2]) / (boxhi[2]-boxlo[2]));
loc[0] = static_cast<int> (procgrid[0] * (x[0]-boxlo[0]) / prd[0]);
loc[1] = static_cast<int> (procgrid[1] * (x[1]-boxlo[1]) / prd[1]);
loc[2] = static_cast<int> (procgrid[2] * (x[2]-boxlo[2]) / prd[2]);
} else {
loc[0] = static_cast<int> (procgrid[0] * x[0]);
loc[1] = static_cast<int> (procgrid[1] * x[1]);
loc[2] = static_cast<int> (procgrid[2] * x[2]);
}
} else {
if (triclinic == 0) {
loc[0] = binary((x[0]-boxlo[0])/prd[0],procgrid[0],xsplit);
loc[1] = binary((x[1]-boxlo[1])/prd[1],procgrid[1],ysplit);
loc[2] = binary((x[2]-boxlo[2])/prd[2],procgrid[2],zsplit);
} else {
loc[0] = binary(x[0],procgrid[0],xsplit);
loc[1] = binary(x[1],procgrid[1],ysplit);
loc[2] = binary(x[2],procgrid[2],zsplit);
}
}
if (loc[0] < 0) loc[0] = 0;
if (loc[0] >= procgrid[0]) loc[0] = procgrid[0] - 1;
if (loc[1] < 0) loc[1] = 0;
@ -633,6 +652,36 @@ int Irregular::coord2proc(double *x)
return grid2proc[loc[0]][loc[1]][loc[2]];
}
/* ----------------------------------------------------------------------
binary search for value in N-length ascending vec
value may be outside range of vec limits
always return index from 0 to N-1 inclusive
return 0 if value < vec[0]
reutrn N-1 if value >= vec[N-1]
return index = 1 to N-2 if vec[index] <= value < vec[index+1]
------------------------------------------------------------------------- */
int Irregular::binary(double value, int n, double *vec)
{
int lo = 0;
int hi = n-1;
if (value < vec[lo]) return lo;
if (value >= vec[hi]) return hi;
// insure vec[lo] <= value < vec[hi] at every iteration
// done when lo,hi are adjacent
int index = (lo+hi)/2;
while (lo < hi-1) {
if (value < vec[index]) hi = index;
else if (value >= vec[index]) lo = index;
index = (lo+hi)/2;
}
return index;
}
/* ----------------------------------------------------------------------
realloc the size of the send buffer as needed with BUFFACTOR & BUFEXTRA
if flag = 1, realloc

View File

@ -32,8 +32,12 @@ class Irregular : protected Pointers {
int me,nprocs;
int triclinic;
int map_style;
int *procgrid;
int ***grid2proc;
int uniform;
double *xsplit,*ysplit,*zsplit; // ptrs to comm
int *procgrid; // ptr to comm
int ***grid2proc; // ptr to comm
double *boxlo; // ptr to domain
double *prd; // ptr to domain
int maxsend,maxrecv; // size of buffers in # of doubles
double *buf_send,*buf_recv;
@ -81,6 +85,7 @@ class Irregular : protected Pointers {
void exchange_atom(double *, int *, double *);
void destroy_atom();
int coord2proc(double *);
int binary(double, int, double *);
void grow_send(int,int); // reallocate send buffer
void grow_recv(int); // free/allocate recv buffer