git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7734 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
728
src/balance.cpp
Normal file
728
src/balance.cpp
Normal file
@ -0,0 +1,728 @@
|
|||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#include "mpi.h"
|
||||||
|
#include "math.h"
|
||||||
|
#include "stdlib.h"
|
||||||
|
#include "string.h"
|
||||||
|
#include "balance.h"
|
||||||
|
#include "atom.h"
|
||||||
|
#include "comm.h"
|
||||||
|
#include "irregular.h"
|
||||||
|
#include "domain.h"
|
||||||
|
#include "force.h"
|
||||||
|
#include "update.h"
|
||||||
|
#include "memory.h"
|
||||||
|
#include "error.h"
|
||||||
|
|
||||||
|
using namespace LAMMPS_NS;
|
||||||
|
|
||||||
|
enum{NONE,UNIFORM,USER,DYNAMIC};
|
||||||
|
enum{X,Y,Z};
|
||||||
|
enum{EXPAND,CONTRACT};
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
Balance::Balance(LAMMPS *lmp) : Pointers(lmp)
|
||||||
|
{
|
||||||
|
MPI_Comm_rank(world,&me);
|
||||||
|
MPI_Comm_size(world,&nprocs);
|
||||||
|
|
||||||
|
memory->create(pcount,nprocs,"balance:pcount");
|
||||||
|
memory->create(allcount,nprocs,"balance:allcount");
|
||||||
|
|
||||||
|
user_xsplit = user_ysplit = user_zsplit = NULL;
|
||||||
|
dflag = 0;
|
||||||
|
|
||||||
|
fp = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
Balance::~Balance()
|
||||||
|
{
|
||||||
|
memory->destroy(pcount);
|
||||||
|
memory->destroy(allcount);
|
||||||
|
|
||||||
|
delete [] user_xsplit;
|
||||||
|
delete [] user_ysplit;
|
||||||
|
delete [] user_zsplit;
|
||||||
|
|
||||||
|
if (dflag) {
|
||||||
|
delete [] bstr;
|
||||||
|
delete [] ops;
|
||||||
|
delete [] counts[0];
|
||||||
|
delete [] counts[1];
|
||||||
|
delete [] counts[2];
|
||||||
|
delete [] cuts;
|
||||||
|
delete [] onecount;
|
||||||
|
//MPI_Comm_free(&commslice[0]);
|
||||||
|
//MPI_Comm_free(&commslice[1]);
|
||||||
|
//MPI_Comm_free(&commslice[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fp) fclose(fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
called as balance command in input script
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void Balance::command(int narg, char **arg)
|
||||||
|
{
|
||||||
|
if (domain->box_exist == 0)
|
||||||
|
error->all(FLERR,"Balance command before simulation box is defined");
|
||||||
|
|
||||||
|
if (comm->me == 0 && screen) fprintf(screen,"Balancing ...\n");
|
||||||
|
|
||||||
|
// parse arguments
|
||||||
|
|
||||||
|
int dimension = domain->dimension;
|
||||||
|
int *procgrid = comm->procgrid;
|
||||||
|
xflag = yflag = zflag = NONE;
|
||||||
|
dflag = 0;
|
||||||
|
|
||||||
|
int iarg = 0;
|
||||||
|
while (iarg < narg) {
|
||||||
|
if (strcmp(arg[iarg],"x") == 0) {
|
||||||
|
if (xflag == DYNAMIC) error->all(FLERR,"Illegal balance command");
|
||||||
|
if (strcmp(arg[iarg+1],"uniform") == 0) {
|
||||||
|
if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
|
||||||
|
xflag = UNIFORM;
|
||||||
|
iarg += 2;
|
||||||
|
} else {
|
||||||
|
if (1 + procgrid[0]-1 > narg)
|
||||||
|
error->all(FLERR,"Illegal balance command");
|
||||||
|
xflag = USER;
|
||||||
|
delete [] user_xsplit;
|
||||||
|
user_xsplit = new double[procgrid[0]+1];
|
||||||
|
user_xsplit[0] = 0.0;
|
||||||
|
iarg++;
|
||||||
|
for (int i = 1; i < procgrid[0]; i++)
|
||||||
|
user_xsplit[i] = force->numeric(arg[iarg++]);
|
||||||
|
user_xsplit[procgrid[0]] = 1.0;
|
||||||
|
}
|
||||||
|
} else if (strcmp(arg[iarg],"y") == 0) {
|
||||||
|
if (yflag == DYNAMIC) error->all(FLERR,"Illegal balance command");
|
||||||
|
if (strcmp(arg[iarg+1],"uniform") == 0) {
|
||||||
|
if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
|
||||||
|
yflag = UNIFORM;
|
||||||
|
iarg += 2;
|
||||||
|
} else {
|
||||||
|
if (1 + procgrid[1]-1 > narg)
|
||||||
|
error->all(FLERR,"Illegal balance command");
|
||||||
|
yflag = USER;
|
||||||
|
delete [] user_ysplit;
|
||||||
|
user_ysplit = new double[procgrid[1]+1];
|
||||||
|
user_ysplit[0] = 0.0;
|
||||||
|
iarg++;
|
||||||
|
for (int i = 1; i < procgrid[1]; i++)
|
||||||
|
user_ysplit[i] = force->numeric(arg[iarg++]);
|
||||||
|
user_ysplit[procgrid[1]] = 1.0;
|
||||||
|
}
|
||||||
|
} else if (strcmp(arg[iarg],"z") == 0) {
|
||||||
|
if (zflag == DYNAMIC) error->all(FLERR,"Illegal balance command");
|
||||||
|
if (strcmp(arg[iarg+1],"uniform") == 0) {
|
||||||
|
if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
|
||||||
|
zflag = UNIFORM;
|
||||||
|
iarg += 2;
|
||||||
|
} else {
|
||||||
|
if (1 + procgrid[2]-1 > narg)
|
||||||
|
error->all(FLERR,"Illegal balance command");
|
||||||
|
zflag = USER;
|
||||||
|
delete [] user_zsplit;
|
||||||
|
user_zsplit = new double[procgrid[2]+1];
|
||||||
|
user_zsplit[0] = 0.0;
|
||||||
|
iarg++;
|
||||||
|
for (int i = 1; i < procgrid[2]; i++)
|
||||||
|
user_zsplit[i] = force->numeric(arg[iarg++]);
|
||||||
|
user_zsplit[procgrid[2]] = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
} else if (strcmp(arg[iarg],"dynamic") == 0) {
|
||||||
|
if (xflag != NONE || yflag != NONE || zflag != NONE)
|
||||||
|
error->all(FLERR,"Illegal balance command");
|
||||||
|
if (iarg+5 > narg) error->all(FLERR,"Illegal balance command");
|
||||||
|
dflag = 1;
|
||||||
|
xflag = yflag = DYNAMIC;
|
||||||
|
if (dimension == 3) zflag = DYNAMIC;
|
||||||
|
nrepeat = atoi(arg[iarg+1]);
|
||||||
|
niter = atoi(arg[iarg+2]);
|
||||||
|
if (nrepeat <= 0 || niter <= 0)
|
||||||
|
error->all(FLERR,"Illegal balance command");
|
||||||
|
int n = strlen(arg[iarg+3]) + 1;
|
||||||
|
bstr = new char[n];
|
||||||
|
strcpy(bstr,arg[iarg+3]);
|
||||||
|
thresh = atof(arg[iarg+4]);
|
||||||
|
if (thresh < 1.0) error->all(FLERR,"Illegal balance command");
|
||||||
|
iarg += 5;
|
||||||
|
|
||||||
|
} else error->all(FLERR,"Illegal balance command");
|
||||||
|
}
|
||||||
|
|
||||||
|
// error check
|
||||||
|
|
||||||
|
if (zflag && dimension == 2)
|
||||||
|
error->all(FLERR,"Cannot balance in z dimension for 2d simulation");
|
||||||
|
|
||||||
|
if (xflag == USER)
|
||||||
|
for (int i = 1; i <= procgrid[0]; i++)
|
||||||
|
if (user_xsplit[i-1] >= user_xsplit[i])
|
||||||
|
error->all(FLERR,"Illegal balance command");
|
||||||
|
if (yflag == USER)
|
||||||
|
for (int i = 1; i <= procgrid[1]; i++)
|
||||||
|
if (user_ysplit[i-1] >= user_ysplit[i])
|
||||||
|
error->all(FLERR,"Illegal balance command");
|
||||||
|
if (zflag == USER)
|
||||||
|
for (int i = 1; i <= procgrid[2]; i++)
|
||||||
|
if (user_zsplit[i-1] >= user_zsplit[i])
|
||||||
|
error->all(FLERR,"Illegal balance command");
|
||||||
|
|
||||||
|
if (dflag) {
|
||||||
|
for (int i = 0; i < strlen(bstr); i++) {
|
||||||
|
if (bstr[i] != 'x' && bstr[i] != 'y' && bstr[i] != 'z')
|
||||||
|
error->all(FLERR,"Balance dynamic string is invalid");
|
||||||
|
if (bstr[i] == 'z' && dimension == 2)
|
||||||
|
error->all(FLERR,"Balance dynamic string is invalid for 2d simulation");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// insure atoms are in current box & update box via shrink-wrap
|
||||||
|
// no exchange() since doesn't matter if atoms are assigned to correct procs
|
||||||
|
|
||||||
|
if (domain->triclinic) domain->x2lamda(atom->nlocal);
|
||||||
|
domain->pbc();
|
||||||
|
domain->reset_box();
|
||||||
|
if (domain->triclinic) domain->lamda2x(atom->nlocal);
|
||||||
|
|
||||||
|
// imbinit = initial imbalance
|
||||||
|
// use current splits instead of nlocal since atoms may not be in sub-box
|
||||||
|
|
||||||
|
domain->x2lamda(atom->nlocal);
|
||||||
|
int maxinit;
|
||||||
|
double imbinit = imbalance_splits(maxinit);
|
||||||
|
domain->lamda2x(atom->nlocal);
|
||||||
|
|
||||||
|
// debug output of initial state
|
||||||
|
|
||||||
|
//dumpout(update->ntimestep);
|
||||||
|
|
||||||
|
// explicit setting of sub-domain sizes
|
||||||
|
|
||||||
|
if (xflag == UNIFORM) {
|
||||||
|
for (int i = 0; i < procgrid[0]; i++)
|
||||||
|
comm->xsplit[i] = i * 1.0/procgrid[0];
|
||||||
|
comm->xsplit[procgrid[0]] = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (yflag == UNIFORM) {
|
||||||
|
for (int i = 0; i < procgrid[1]; i++)
|
||||||
|
comm->ysplit[i] = i * 1.0/procgrid[1];
|
||||||
|
comm->ysplit[procgrid[1]] = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (zflag == UNIFORM) {
|
||||||
|
for (int i = 0; i < procgrid[2]; i++)
|
||||||
|
comm->zsplit[i] = i * 1.0/procgrid[2];
|
||||||
|
comm->zsplit[procgrid[2]] = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (xflag == USER)
|
||||||
|
for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i];
|
||||||
|
|
||||||
|
if (yflag == USER)
|
||||||
|
for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i];
|
||||||
|
|
||||||
|
if (zflag == USER)
|
||||||
|
for (int i = 0; i <= procgrid[2]; i++) comm->zsplit[i] = user_zsplit[i];
|
||||||
|
|
||||||
|
// dynamic load-balance of sub-domain sizes
|
||||||
|
|
||||||
|
if (dflag) {
|
||||||
|
dynamic_setup(bstr);
|
||||||
|
dynamic();
|
||||||
|
} else count = 0;
|
||||||
|
|
||||||
|
// debug output of final result
|
||||||
|
|
||||||
|
//dumpout(-1);
|
||||||
|
|
||||||
|
// reset comm->uniform flag if necessary
|
||||||
|
|
||||||
|
if (comm->uniform) {
|
||||||
|
if (xflag == USER || xflag == DYNAMIC) comm->uniform = 0;
|
||||||
|
if (yflag == USER || yflag == DYNAMIC) comm->uniform = 0;
|
||||||
|
if (zflag == USER || zflag == DYNAMIC) comm->uniform = 0;
|
||||||
|
} else {
|
||||||
|
if (dimension == 3) {
|
||||||
|
if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
|
||||||
|
comm->uniform = 1;
|
||||||
|
} else {
|
||||||
|
if (xflag == UNIFORM && yflag == UNIFORM) comm->uniform = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// reset proc sub-domains
|
||||||
|
|
||||||
|
if (domain->triclinic) domain->set_lamda_box();
|
||||||
|
domain->set_local_box();
|
||||||
|
|
||||||
|
// move atoms to new processors via irregular()
|
||||||
|
|
||||||
|
if (domain->triclinic) domain->x2lamda(atom->nlocal);
|
||||||
|
Irregular *irregular = new Irregular(lmp);
|
||||||
|
irregular->migrate_atoms();
|
||||||
|
delete irregular;
|
||||||
|
if (domain->triclinic) domain->lamda2x(atom->nlocal);
|
||||||
|
|
||||||
|
// check if any atoms were lost
|
||||||
|
|
||||||
|
bigint natoms;
|
||||||
|
bigint nblocal = atom->nlocal;
|
||||||
|
MPI_Allreduce(&nblocal,&natoms,1,MPI_LMP_BIGINT,MPI_SUM,world);
|
||||||
|
if (natoms != atom->natoms) {
|
||||||
|
char str[128];
|
||||||
|
sprintf(str,"Lost atoms via balance: original " BIGINT_FORMAT
|
||||||
|
" current " BIGINT_FORMAT,atom->natoms,natoms);
|
||||||
|
error->all(FLERR,str);
|
||||||
|
}
|
||||||
|
|
||||||
|
// imbfinal = final imbalance based on final nlocal
|
||||||
|
|
||||||
|
int maxfinal;
|
||||||
|
double imbfinal = imbalance_nlocal(maxfinal);
|
||||||
|
|
||||||
|
if (me == 0) {
|
||||||
|
if (screen) {
|
||||||
|
fprintf(screen," iteration count = %d\n",count);
|
||||||
|
fprintf(screen," initial/final max atoms/proc = %d %d\n",
|
||||||
|
maxinit,maxfinal);
|
||||||
|
fprintf(screen," initial/final imbalance factor = %g %g\n",
|
||||||
|
imbinit,imbfinal);
|
||||||
|
}
|
||||||
|
if (logfile) {
|
||||||
|
fprintf(logfile," iteration count = %d\n",count);
|
||||||
|
fprintf(logfile," initial/final max atoms/proc = %d %d\n",
|
||||||
|
maxinit,maxfinal);
|
||||||
|
fprintf(logfile," initial/final imbalance factor = %g %g\n",
|
||||||
|
imbinit,imbfinal);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (me == 0) {
|
||||||
|
if (screen) {
|
||||||
|
fprintf(screen," x cuts:");
|
||||||
|
for (int i = 0; i <= comm->procgrid[0]; i++)
|
||||||
|
fprintf(screen," %g",comm->xsplit[i]);
|
||||||
|
fprintf(screen,"\n");
|
||||||
|
fprintf(screen," y cuts:");
|
||||||
|
for (int i = 0; i <= comm->procgrid[1]; i++)
|
||||||
|
fprintf(screen," %g",comm->ysplit[i]);
|
||||||
|
fprintf(screen,"\n");
|
||||||
|
fprintf(screen," z cuts:");
|
||||||
|
for (int i = 0; i <= comm->procgrid[2]; i++)
|
||||||
|
fprintf(screen," %g",comm->zsplit[i]);
|
||||||
|
fprintf(screen,"\n");
|
||||||
|
}
|
||||||
|
if (logfile) {
|
||||||
|
fprintf(logfile," x cuts:");
|
||||||
|
for (int i = 0; i <= comm->procgrid[0]; i++)
|
||||||
|
fprintf(logfile," %g",comm->xsplit[i]);
|
||||||
|
fprintf(logfile,"\n");
|
||||||
|
fprintf(logfile," y cuts:");
|
||||||
|
for (int i = 0; i <= comm->procgrid[1]; i++)
|
||||||
|
fprintf(logfile," %g",comm->ysplit[i]);
|
||||||
|
fprintf(logfile,"\n");
|
||||||
|
fprintf(logfile," z cuts:");
|
||||||
|
for (int i = 0; i <= comm->procgrid[2]; i++)
|
||||||
|
fprintf(logfile," %g",comm->zsplit[i]);
|
||||||
|
fprintf(logfile,"\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
calculate imbalance based on nlocal
|
||||||
|
return max = max atom per proc
|
||||||
|
return imbalance factor = max atom per proc / ave atom per proc
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
double Balance::imbalance_nlocal(int &max)
|
||||||
|
{
|
||||||
|
MPI_Allreduce(&atom->nlocal,&max,1,MPI_INT,MPI_MAX,world);
|
||||||
|
double imbalance = max / (1.0 * atom->natoms / nprocs);
|
||||||
|
return imbalance;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
calculate imbalance based on processor splits in 3 dims
|
||||||
|
atoms must be in lamda coords (0-1) before called
|
||||||
|
map atoms to 3d grid of procs
|
||||||
|
return max = max atom per proc
|
||||||
|
return imbalance factor = max atom per proc / ave atom per proc
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
double Balance::imbalance_splits(int &max)
|
||||||
|
{
|
||||||
|
double *xsplit = comm->xsplit;
|
||||||
|
double *ysplit = comm->ysplit;
|
||||||
|
double *zsplit = comm->zsplit;
|
||||||
|
|
||||||
|
int nx = comm->procgrid[0];
|
||||||
|
int ny = comm->procgrid[1];
|
||||||
|
int nz = comm->procgrid[2];
|
||||||
|
|
||||||
|
for (int i = 0; i < nprocs; i++) pcount[i] = 0;
|
||||||
|
|
||||||
|
double **x = atom->x;
|
||||||
|
int nlocal = atom->nlocal;
|
||||||
|
int ix,iy,iz;
|
||||||
|
|
||||||
|
for (int i = 0; i < nlocal; i++) {
|
||||||
|
ix = binary(x[i][0],nx,xsplit);
|
||||||
|
iy = binary(x[i][1],ny,ysplit);
|
||||||
|
iz = binary(x[i][2],nz,zsplit);
|
||||||
|
pcount[iz*nx*ny + iy*nx + ix]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Allreduce(pcount,allcount,nprocs,MPI_INT,MPI_SUM,world);
|
||||||
|
max = 0;
|
||||||
|
for (int i = 0; i < nprocs; i++) max = MAX(max,allcount[i]);
|
||||||
|
double imbalance = max / (1.0 * atom->natoms / nprocs);
|
||||||
|
return imbalance;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
setup dynamic load balance operations
|
||||||
|
called from command & fix balance
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void Balance::dynamic_setup(char *str)
|
||||||
|
{
|
||||||
|
nops = strlen(str);
|
||||||
|
ops = new int[nops];
|
||||||
|
|
||||||
|
for (int i = 0; i < strlen(str); i++) {
|
||||||
|
if (str[i] == 'x') ops[i] = X;
|
||||||
|
if (str[i] == 'y') ops[i] = Y;
|
||||||
|
if (str[i] == 'z') ops[i] = Z;
|
||||||
|
}
|
||||||
|
|
||||||
|
splits[0] = comm->xsplit;
|
||||||
|
splits[1] = comm->ysplit;
|
||||||
|
splits[2] = comm->zsplit;
|
||||||
|
|
||||||
|
counts[0] = new bigint[comm->procgrid[0]];
|
||||||
|
counts[1] = new bigint[comm->procgrid[1]];
|
||||||
|
counts[2] = new bigint[comm->procgrid[2]];
|
||||||
|
|
||||||
|
int max = MAX(comm->procgrid[0],comm->procgrid[1]);
|
||||||
|
max = MAX(max,comm->procgrid[2]);
|
||||||
|
cuts = new double[max+1];
|
||||||
|
onecount = new bigint[max];
|
||||||
|
|
||||||
|
//MPI_Comm_split(world,comm->myloc[0],0,&commslice[0]);
|
||||||
|
//MPI_Comm_split(world,comm->myloc[1],0,&commslice[1]);
|
||||||
|
//MPI_Comm_split(world,comm->myloc[2],0,&commslice[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
perform dynamic load balance by changing xyz split proc boundaries in Comm
|
||||||
|
called from command and fix balance
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void Balance::dynamic()
|
||||||
|
{
|
||||||
|
int i,m,max;
|
||||||
|
double imbfactor;
|
||||||
|
|
||||||
|
int *procgrid = comm->procgrid;
|
||||||
|
|
||||||
|
domain->x2lamda(atom->nlocal);
|
||||||
|
|
||||||
|
count = 0;
|
||||||
|
for (int irepeat = 0; irepeat < nrepeat; irepeat++) {
|
||||||
|
for (i = 0; i < nops; i++) {
|
||||||
|
for (m = 0; m < niter; m++) {
|
||||||
|
stats(ops[i],procgrid[ops[i]],splits[ops[i]],counts[ops[i]]);
|
||||||
|
adjust(procgrid[ops[i]],counts[ops[i]],splits[ops[i]]);
|
||||||
|
count++;
|
||||||
|
// debug output of intermediate result
|
||||||
|
//dumpout(-1);
|
||||||
|
}
|
||||||
|
imbfactor = imbalance_splits(max);
|
||||||
|
if (imbfactor <= thresh) break;
|
||||||
|
}
|
||||||
|
if (i < nops) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
domain->lamda2x(atom->nlocal);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
count atoms in each slice
|
||||||
|
current cuts may be very different than original cuts,
|
||||||
|
so use binary search to find which slice each atom is in
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void Balance::stats(int dim, int n, double *split, bigint *count)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < n; i++) onecount[i] = 0;
|
||||||
|
|
||||||
|
double **x = atom->x;
|
||||||
|
int nlocal = atom->nlocal;
|
||||||
|
int index;
|
||||||
|
|
||||||
|
for (int i = 0; i < nlocal; i++) {
|
||||||
|
index = binary(x[i][dim],n,split);
|
||||||
|
onecount[index]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Allreduce(onecount,count,n,MPI_LMP_BIGINT,MPI_SUM,world);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
adjust cuts between N slices in a dim via diffusive method
|
||||||
|
count = atoms per slice
|
||||||
|
split = current N+1 cuts, with 0.0 and 1.0 at end points
|
||||||
|
overwrite split with new cuts
|
||||||
|
diffusion means slices with more atoms than their neighbors
|
||||||
|
"send" them atoms by moving cut closer to sender, further from receiver
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void Balance::adjust(int n, bigint *count, double *split)
|
||||||
|
{
|
||||||
|
// damping factor
|
||||||
|
|
||||||
|
double damp = 0.5;
|
||||||
|
|
||||||
|
// loop over slices from 1 to N-2 inclusive (not end slices 0 and N-1)
|
||||||
|
// cut I is between 2 slices (I-1 and I) with counts
|
||||||
|
// cut I+1 is between 2 slices (I and I+1) with counts
|
||||||
|
// for a cut between 2 slices, only slice with larger count adjusts it
|
||||||
|
|
||||||
|
bigint leftcount,mycount,rightcount;
|
||||||
|
double rho,target,targetleft,targetright;
|
||||||
|
|
||||||
|
for (int i = 1; i < n-1; i++) {
|
||||||
|
leftcount = count[i-1];
|
||||||
|
mycount = count[i];
|
||||||
|
rightcount = count[i+1];
|
||||||
|
|
||||||
|
// middle slice is <= both left and right, so do nothing
|
||||||
|
// special case if 2 slices both have count = 0, no change in cut
|
||||||
|
|
||||||
|
if (mycount <= leftcount && mycount <= rightcount) {
|
||||||
|
if (leftcount == 0) cuts[i] = split[i];
|
||||||
|
if (rightcount == 0) cuts[i+1] = split[i+1];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// rho = density of atoms in the slice
|
||||||
|
|
||||||
|
rho = mycount / (split[i+1] - split[i]);
|
||||||
|
|
||||||
|
// middle slice has more atoms then left or right slice
|
||||||
|
// if delta from middle to top slice > delta between top and bottom slice
|
||||||
|
// then send atoms both dirs to bring all 3 slices to same count
|
||||||
|
// else bottom slice is very low, so send atoms only in that dir
|
||||||
|
|
||||||
|
if (mycount > leftcount && mycount > rightcount) {
|
||||||
|
if (mycount-MAX(leftcount,rightcount) >= fabs(leftcount-rightcount)) {
|
||||||
|
if (leftcount <= rightcount) {
|
||||||
|
targetleft = damp *
|
||||||
|
(rightcount-leftcount + (mycount-rightcount)/3.0);
|
||||||
|
targetright = damp * (mycount-rightcount)/3.0;
|
||||||
|
cuts[i] = split[i] + targetleft/rho;
|
||||||
|
cuts[i+1] = split[i+1] - targetright/rho;
|
||||||
|
} else {
|
||||||
|
targetleft = damp * (mycount-leftcount)/3.0;
|
||||||
|
targetright = damp *
|
||||||
|
(leftcount-rightcount + (mycount-leftcount)/3.0);
|
||||||
|
cuts[i] = split[i] + targetleft/rho;
|
||||||
|
cuts[i+1] = split[i+1] - targetright/rho;
|
||||||
|
}
|
||||||
|
} else if (leftcount < rightcount) {
|
||||||
|
target = damp * 0.5*(mycount-leftcount);
|
||||||
|
cuts[i] = split[i] + target/rho;
|
||||||
|
cuts[i+1] = split[i+1];
|
||||||
|
} else if (rightcount < leftcount) {
|
||||||
|
target = damp * 0.5*(mycount-rightcount);
|
||||||
|
cuts[i+1] = split[i+1] - target/rho;
|
||||||
|
cuts[i] = split[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// middle slice has more atoms than only left or right slice
|
||||||
|
// send atoms only in that dir
|
||||||
|
|
||||||
|
} else if (mycount > leftcount) {
|
||||||
|
target = damp * 0.5*(mycount-leftcount);
|
||||||
|
cuts[i] = split[i] + target/rho;
|
||||||
|
} else if (mycount > rightcount) {
|
||||||
|
target = damp * 0.5*(mycount-rightcount);
|
||||||
|
cuts[i+1] = split[i+1] - target/rho;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// overwrite adjustable splits with new cuts
|
||||||
|
|
||||||
|
for (int i = 1; i < n; i++) split[i] = cuts[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
binary search for value in N-length ascending vec
|
||||||
|
value may be outside range of vec limits
|
||||||
|
always return index from 0 to N-1 inclusive
|
||||||
|
return 0 if value < vec[0]
|
||||||
|
reutrn N-1 if value >= vec[N-1]
|
||||||
|
return index = 1 to N-2 if vec[index] <= value < vec[index+1]
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
int Balance::binary(double value, int n, double *vec)
|
||||||
|
{
|
||||||
|
int lo = 0;
|
||||||
|
int hi = n-1;
|
||||||
|
|
||||||
|
if (value < vec[lo]) return lo;
|
||||||
|
if (value >= vec[hi]) return hi;
|
||||||
|
|
||||||
|
// insure vec[lo] <= value < vec[hi] at every iteration
|
||||||
|
// done when lo,hi are adjacent
|
||||||
|
|
||||||
|
int index = (lo+hi)/2;
|
||||||
|
while (lo < hi-1) {
|
||||||
|
if (value < vec[index]) hi = index;
|
||||||
|
else if (value >= vec[index]) lo = index;
|
||||||
|
index = (lo+hi)/2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
create dump file of line segments in Pizza.py mdump mesh format
|
||||||
|
just for debug/viz purposes
|
||||||
|
write to tmp.mdump.*, open first time if necessary
|
||||||
|
write xy lines around each proc's sub-domain for 2d
|
||||||
|
write xyz cubes around each proc's sub-domain for 3d
|
||||||
|
all procs but 0 just return
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
void Balance::dumpout(bigint tstamp)
|
||||||
|
{
|
||||||
|
if (me) return;
|
||||||
|
|
||||||
|
bigint tstep;
|
||||||
|
if (tstamp >= 0) tstep = tstamp;
|
||||||
|
else tstep = laststep + 1;
|
||||||
|
laststep = tstep;
|
||||||
|
|
||||||
|
if (fp == NULL) {
|
||||||
|
char file[32];
|
||||||
|
sprintf(file,"tmp.mdump.%ld",tstep);
|
||||||
|
fp = fopen(file,"w");
|
||||||
|
if (!fp) error->one(FLERR,"Cannot open balance output file");
|
||||||
|
|
||||||
|
// write out one square/cute per processor for 2d/3d
|
||||||
|
// only write once since topology is static
|
||||||
|
|
||||||
|
fprintf(fp,"ITEM: TIMESTEP\n");
|
||||||
|
fprintf(fp,"%ld\n",tstep);
|
||||||
|
fprintf(fp,"ITEM: NUMBER OF SQUARES\n");
|
||||||
|
fprintf(fp,"%d\n",nprocs);
|
||||||
|
fprintf(fp,"ITEM: SQUARES\n");
|
||||||
|
|
||||||
|
int nx = comm->procgrid[0] + 1;
|
||||||
|
int ny = comm->procgrid[1] + 1;
|
||||||
|
int nz = comm->procgrid[2] + 1;
|
||||||
|
|
||||||
|
if (domain->dimension == 2) {
|
||||||
|
int m = 0;
|
||||||
|
for (int j = 0; j < comm->procgrid[1]; j++)
|
||||||
|
for (int i = 0; i < comm->procgrid[0]; i++) {
|
||||||
|
int c1 = j*nx + i + 1;
|
||||||
|
int c2 = c1 + 1;
|
||||||
|
int c3 = c2 + nx;
|
||||||
|
int c4 = c3 - 1;
|
||||||
|
fprintf(fp,"%d %d %d %d %d %d\n",m+1,m+1,c1,c2,c3,c4);
|
||||||
|
m++;
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
int m = 0;
|
||||||
|
for (int k = 0; k < comm->procgrid[2]; k++)
|
||||||
|
for (int j = 0; j < comm->procgrid[1]; j++)
|
||||||
|
for (int i = 0; i < comm->procgrid[0]; i++) {
|
||||||
|
int c1 = k*ny*nx + j*nx + i + 1;
|
||||||
|
int c2 = c1 + 1;
|
||||||
|
int c3 = c2 + nx;
|
||||||
|
int c4 = c3 - 1;
|
||||||
|
int c5 = c1 + ny*nx;
|
||||||
|
int c6 = c2 + ny*nx;
|
||||||
|
int c7 = c3 + ny*nx;
|
||||||
|
int c8 = c4 + ny*nx;
|
||||||
|
fprintf(fp,"%d %d %d %d %d %d %d %d %d %d\n",
|
||||||
|
m+1,m+1,c1,c2,c3,c4,c5,c6,c7,c8);
|
||||||
|
m++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// write out nodal coords, can be different every call
|
||||||
|
// scale xsplit,ysplit,zsplit values to full box
|
||||||
|
// only implmented for orthogonal boxes, not triclinic
|
||||||
|
|
||||||
|
int nx = comm->procgrid[0] + 1;
|
||||||
|
int ny = comm->procgrid[1] + 1;
|
||||||
|
int nz = comm->procgrid[2] + 1;
|
||||||
|
|
||||||
|
double *boxlo = domain->boxlo;
|
||||||
|
double *boxhi = domain->boxhi;
|
||||||
|
double *prd = domain->prd;
|
||||||
|
|
||||||
|
fprintf(fp,"ITEM: TIMESTEP\n");
|
||||||
|
fprintf(fp,"%ld\n",tstep);
|
||||||
|
fprintf(fp,"ITEM: NUMBER OF NODES\n");
|
||||||
|
if (domain->dimension == 2)
|
||||||
|
fprintf(fp,"%d\n",nx*ny);
|
||||||
|
else
|
||||||
|
fprintf(fp,"%d\n",nx*ny*nz);
|
||||||
|
fprintf(fp,"ITEM: BOX BOUNDS\n");
|
||||||
|
fprintf(fp,"%g %g\n",boxlo[0],boxhi[0]);
|
||||||
|
fprintf(fp,"%g %g\n",boxlo[1],boxhi[1]);
|
||||||
|
fprintf(fp,"%g %g\n",boxlo[2],boxhi[2]);
|
||||||
|
fprintf(fp,"ITEM: NODES\n");
|
||||||
|
|
||||||
|
if (domain->dimension == 2) {
|
||||||
|
int m = 0;
|
||||||
|
for (int j = 0; j < ny; j++)
|
||||||
|
for (int i = 0; i < nx; i++) {
|
||||||
|
fprintf(fp,"%d %d %g %g %g\n",m+1,1,
|
||||||
|
boxlo[0] + prd[0]*comm->xsplit[i],
|
||||||
|
boxlo[1] + prd[1]*comm->ysplit[j],
|
||||||
|
0.0);
|
||||||
|
m++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int m = 0;
|
||||||
|
for (int k = 0; k < nz; k++)
|
||||||
|
for (int j = 0; j < ny; j++)
|
||||||
|
for (int i = 0; i < nx; i++) {
|
||||||
|
fprintf(fp,"%d %d %g %g %g\n",m+1,1,
|
||||||
|
boxlo[0] + prd[0]*comm->xsplit[i],
|
||||||
|
boxlo[1] + prd[1]*comm->ysplit[j],
|
||||||
|
boxlo[2] + prd[2]*comm->zsplit[j]);
|
||||||
|
m++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
71
src/balance.h
Normal file
71
src/balance.h
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#ifdef COMMAND_CLASS
|
||||||
|
|
||||||
|
CommandStyle(balance,Balance)
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#ifndef LMP_BALANCE_H
|
||||||
|
#define LMP_BALANCE_H
|
||||||
|
|
||||||
|
#include "mpi.h"
|
||||||
|
#include "stdio.h"
|
||||||
|
#include "pointers.h"
|
||||||
|
|
||||||
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
|
class Balance : protected Pointers {
|
||||||
|
public:
|
||||||
|
Balance(class LAMMPS *);
|
||||||
|
~Balance();
|
||||||
|
void command(int, char **);
|
||||||
|
|
||||||
|
private:
|
||||||
|
int me,nprocs;
|
||||||
|
int xflag,yflag,zflag,dflag;
|
||||||
|
int nrepeat,niter;
|
||||||
|
double thresh;
|
||||||
|
char *bstr;
|
||||||
|
double *user_xsplit,*user_ysplit,*user_zsplit;
|
||||||
|
|
||||||
|
int *ops;
|
||||||
|
int nops;
|
||||||
|
double *splits[3];
|
||||||
|
bigint *counts[3];
|
||||||
|
double *cuts;
|
||||||
|
bigint *onecount;
|
||||||
|
MPI_Comm commslice[3];
|
||||||
|
|
||||||
|
int count;
|
||||||
|
int *pcount,*allcount;
|
||||||
|
|
||||||
|
FILE *fp; // for debug output
|
||||||
|
bigint laststep;
|
||||||
|
|
||||||
|
double imbalance_splits(int &);
|
||||||
|
double imbalance_nlocal(int &);
|
||||||
|
void dynamic_setup(char *);
|
||||||
|
void dynamic();
|
||||||
|
void stats(int, int, double *, bigint *);
|
||||||
|
void adjust(int, bigint *, double *);
|
||||||
|
int binary(double, int, double *);
|
||||||
|
|
||||||
|
void dumpout(bigint); // for debug output
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
@ -150,13 +150,13 @@ void ChangeBox::command(int narg, char **arg)
|
|||||||
iarg += 4;
|
iarg += 4;
|
||||||
|
|
||||||
} else if (strcmp(arg[iarg],"ortho") == 0) {
|
} else if (strcmp(arg[iarg],"ortho") == 0) {
|
||||||
if (iarg+2 > narg) error->all(FLERR,"Illegal change_box command");
|
if (iarg+1 > narg) error->all(FLERR,"Illegal change_box command");
|
||||||
ops[nops].style = ORTHO;
|
ops[nops].style = ORTHO;
|
||||||
nops++;
|
nops++;
|
||||||
iarg += 1;
|
iarg += 1;
|
||||||
|
|
||||||
} else if (strcmp(arg[iarg],"triclinic") == 0) {
|
} else if (strcmp(arg[iarg],"triclinic") == 0) {
|
||||||
if (iarg+2 > narg) error->all(FLERR,"Illegal change_box command");
|
if (iarg+1 > narg) error->all(FLERR,"Illegal change_box command");
|
||||||
ops[nops].style = TRICLINIC;
|
ops[nops].style = TRICLINIC;
|
||||||
nops++;
|
nops++;
|
||||||
iarg += 1;
|
iarg += 1;
|
||||||
|
|||||||
308
src/comm.cpp
308
src/comm.cpp
@ -78,6 +78,8 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
|
|||||||
|
|
||||||
bordergroup = 0;
|
bordergroup = 0;
|
||||||
style = SINGLE;
|
style = SINGLE;
|
||||||
|
uniform = 1;
|
||||||
|
xsplit = ysplit = zsplit = NULL;
|
||||||
multilo = multihi = NULL;
|
multilo = multihi = NULL;
|
||||||
cutghostmulti = NULL;
|
cutghostmulti = NULL;
|
||||||
cutghostuser = 0.0;
|
cutghostuser = 0.0;
|
||||||
@ -124,6 +126,10 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
|
|||||||
|
|
||||||
Comm::~Comm()
|
Comm::~Comm()
|
||||||
{
|
{
|
||||||
|
memory->destroy(xsplit);
|
||||||
|
memory->destroy(ysplit);
|
||||||
|
memory->destroy(zsplit);
|
||||||
|
|
||||||
delete [] customfile;
|
delete [] customfile;
|
||||||
delete [] outfile;
|
delete [] outfile;
|
||||||
|
|
||||||
@ -145,7 +151,7 @@ Comm::~Comm()
|
|||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
create 3d grid of procs based on Nprocs and box size & shape
|
create 3d grid of procs based on Nprocs and box size & shape
|
||||||
map processors to grid
|
map processors to grid, setup xyz split for a uniform grid
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Comm::set_proc_grid()
|
void Comm::set_proc_grid()
|
||||||
@ -252,14 +258,31 @@ void Comm::set_proc_grid()
|
|||||||
|
|
||||||
if (outfile) pmap->output(outfile,procgrid,grid2proc);
|
if (outfile) pmap->output(outfile,procgrid,grid2proc);
|
||||||
|
|
||||||
// set lamda box params after procs are assigned
|
|
||||||
|
|
||||||
if (domain->triclinic) domain->set_lamda_box();
|
|
||||||
|
|
||||||
// free ProcMap class
|
// free ProcMap class
|
||||||
|
|
||||||
delete pmap;
|
delete pmap;
|
||||||
|
|
||||||
|
// set xsplit,ysplit,zsplit for uniform spacings
|
||||||
|
|
||||||
|
memory->destroy(xsplit);
|
||||||
|
memory->destroy(ysplit);
|
||||||
|
memory->destroy(zsplit);
|
||||||
|
|
||||||
|
memory->create(xsplit,procgrid[0]+1,"comm:xsplit");
|
||||||
|
memory->create(ysplit,procgrid[1]+1,"comm:ysplit");
|
||||||
|
memory->create(zsplit,procgrid[2]+1,"comm:zsplit");
|
||||||
|
|
||||||
|
for (int i = 0; i < procgrid[0]; i++) xsplit[i] = i * 1.0/procgrid[0];
|
||||||
|
for (int i = 0; i < procgrid[1]; i++) ysplit[i] = i * 1.0/procgrid[1];
|
||||||
|
for (int i = 0; i < procgrid[2]; i++) zsplit[i] = i * 1.0/procgrid[2];
|
||||||
|
|
||||||
|
xsplit[procgrid[0]] = ysplit[procgrid[1]] = zsplit[procgrid[2]] = 1.0;
|
||||||
|
|
||||||
|
// set lamda box params after procs are assigned
|
||||||
|
// only set once unless load-balancing occurs
|
||||||
|
|
||||||
|
if (domain->triclinic) domain->set_lamda_box();
|
||||||
|
|
||||||
// send my 3d proc grid to another partition if requested
|
// send my 3d proc grid to another partition if requested
|
||||||
|
|
||||||
if (send_to_partition >= 0) {
|
if (send_to_partition >= 0) {
|
||||||
@ -399,25 +422,112 @@ void Comm::setup()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// need = # of procs I need atoms from in each dim based on max cutoff
|
// recvneed[idim][0/1] = # of procs away I recv atoms from, within cutghost
|
||||||
// for 2d, don't communicate in z
|
// 0 = from left, 1 = from right
|
||||||
|
// do not cross non-periodic boundaries, need[2] = 0 for 2d
|
||||||
need[0] = static_cast<int> (cutghost[0] * procgrid[0] / prd[0]) + 1;
|
// sendneed[idim][0/1] = # of procs away I send atoms to
|
||||||
need[1] = static_cast<int> (cutghost[1] * procgrid[1] / prd[1]) + 1;
|
// 0 = to left, 1 = to right
|
||||||
need[2] = static_cast<int> (cutghost[2] * procgrid[2] / prd[2]) + 1;
|
// set equal to recvneed[idim][1/0] of neighbor proc
|
||||||
if (domain->dimension == 2) need[2] = 0;
|
// maxneed[idim] = max procs away any proc recvs atoms in either direction
|
||||||
|
// uniform = 1 = uniform sized sub-domains:
|
||||||
// if non-periodic, do not communicate further than procgrid-1 away
|
// maxneed is directly computable from sub-domain size
|
||||||
// this enables very large cutoffs in non-periodic systems
|
// limit to procgrid-1 for non-PBC
|
||||||
|
// recvneed = maxneed except for procs near non-PBC
|
||||||
|
// sendneed = recvneed of neighbor on each side
|
||||||
|
// uniform = 0 = non-uniform sized sub-domains:
|
||||||
|
// compute recvneed via updown() which accounts for non-PBC
|
||||||
|
// sendneed = recvneed of neighbor on each side
|
||||||
|
// maxneed via Allreduce() of recvneed
|
||||||
|
|
||||||
int *periodicity = domain->periodicity;
|
int *periodicity = domain->periodicity;
|
||||||
if (periodicity[0] == 0) need[0] = MIN(need[0],procgrid[0]-1);
|
int left,right;
|
||||||
if (periodicity[1] == 0) need[1] = MIN(need[1],procgrid[1]-1);
|
|
||||||
if (periodicity[2] == 0) need[2] = MIN(need[2],procgrid[2]-1);
|
if (uniform) {
|
||||||
|
maxneed[0] = static_cast<int> (cutghost[0] * procgrid[0] / prd[0]) + 1;
|
||||||
|
maxneed[1] = static_cast<int> (cutghost[1] * procgrid[1] / prd[1]) + 1;
|
||||||
|
maxneed[2] = static_cast<int> (cutghost[2] * procgrid[2] / prd[2]) + 1;
|
||||||
|
if (domain->dimension == 2) maxneed[2] = 0;
|
||||||
|
if (!periodicity[0]) maxneed[0] = MIN(maxneed[0],procgrid[0]-1);
|
||||||
|
if (!periodicity[1]) maxneed[1] = MIN(maxneed[1],procgrid[1]-1);
|
||||||
|
if (!periodicity[2]) maxneed[2] = MIN(maxneed[2],procgrid[2]-1);
|
||||||
|
|
||||||
|
if (!periodicity[0]) {
|
||||||
|
recvneed[0][0] = MIN(maxneed[0],myloc[0]);
|
||||||
|
recvneed[0][1] = MIN(maxneed[0],procgrid[0]-myloc[0]-1);
|
||||||
|
left = myloc[0] - 1;
|
||||||
|
if (left < 0) left = procgrid[0] - 1;
|
||||||
|
sendneed[0][0] = MIN(maxneed[0],procgrid[0]-left-1);
|
||||||
|
right = myloc[0] + 1;
|
||||||
|
if (right == procgrid[0]) right = 0;
|
||||||
|
sendneed[0][1] = MIN(maxneed[0],right);
|
||||||
|
} else recvneed[0][0] = recvneed[0][1] =
|
||||||
|
sendneed[0][0] = sendneed[0][1] = maxneed[0];
|
||||||
|
|
||||||
|
if (!periodicity[1]) {
|
||||||
|
recvneed[1][0] = MIN(maxneed[1],myloc[1]);
|
||||||
|
recvneed[1][1] = MIN(maxneed[1],procgrid[1]-myloc[1]-1);
|
||||||
|
left = myloc[1] - 1;
|
||||||
|
if (left < 0) left = procgrid[1] - 1;
|
||||||
|
sendneed[1][0] = MIN(maxneed[1],procgrid[1]-left-1);
|
||||||
|
right = myloc[1] + 1;
|
||||||
|
if (right == procgrid[1]) right = 0;
|
||||||
|
sendneed[1][1] = MIN(maxneed[1],right);
|
||||||
|
} else recvneed[1][0] = recvneed[1][1] =
|
||||||
|
sendneed[1][0] = sendneed[1][1] = maxneed[1];
|
||||||
|
|
||||||
|
if (!periodicity[2]) {
|
||||||
|
recvneed[2][0] = MIN(maxneed[2],myloc[2]);
|
||||||
|
recvneed[2][1] = MIN(maxneed[2],procgrid[2]-myloc[2]-1);
|
||||||
|
left = myloc[2] - 1;
|
||||||
|
if (left < 0) left = procgrid[2] - 1;
|
||||||
|
sendneed[2][0] = MIN(maxneed[2],procgrid[2]-left-1);
|
||||||
|
right = myloc[2] + 1;
|
||||||
|
if (right == procgrid[2]) right = 0;
|
||||||
|
sendneed[2][1] = MIN(maxneed[2],right);
|
||||||
|
} else recvneed[2][0] = recvneed[2][1] =
|
||||||
|
sendneed[2][0] = sendneed[2][1] = maxneed[2];
|
||||||
|
|
||||||
|
} else {
|
||||||
|
recvneed[0][0] = updown(0,0,myloc[0],prd[0],periodicity[0],xsplit);
|
||||||
|
recvneed[0][1] = updown(0,1,myloc[0],prd[0],periodicity[0],xsplit);
|
||||||
|
left = myloc[0] - 1;
|
||||||
|
if (left < 0) left = procgrid[0] - 1;
|
||||||
|
sendneed[0][0] = updown(0,1,left,prd[0],periodicity[0],xsplit);
|
||||||
|
right = myloc[0] + 1;
|
||||||
|
if (right == procgrid[0]) right = 0;
|
||||||
|
sendneed[0][1] = updown(0,0,right,prd[0],periodicity[0],xsplit);
|
||||||
|
|
||||||
|
recvneed[1][0] = updown(1,0,myloc[1],prd[1],periodicity[1],ysplit);
|
||||||
|
recvneed[1][1] = updown(1,1,myloc[1],prd[1],periodicity[1],ysplit);
|
||||||
|
left = myloc[1] - 1;
|
||||||
|
if (left < 0) left = procgrid[1] - 1;
|
||||||
|
sendneed[1][0] = updown(1,1,left,prd[1],periodicity[1],ysplit);
|
||||||
|
right = myloc[1] + 1;
|
||||||
|
if (right == procgrid[1]) right = 0;
|
||||||
|
sendneed[1][1] = updown(1,0,right,prd[1],periodicity[1],ysplit);
|
||||||
|
|
||||||
|
if (domain->dimension == 3) {
|
||||||
|
recvneed[2][0] = updown(2,0,myloc[2],prd[2],periodicity[2],zsplit);
|
||||||
|
recvneed[2][1] = updown(2,1,myloc[2],prd[2],periodicity[2],zsplit);
|
||||||
|
left = myloc[2] - 1;
|
||||||
|
if (left < 0) left = procgrid[2] - 1;
|
||||||
|
sendneed[2][0] = updown(2,1,left,prd[2],periodicity[2],zsplit);
|
||||||
|
right = myloc[2] + 1;
|
||||||
|
if (right == procgrid[2]) right = 0;
|
||||||
|
sendneed[2][1] = updown(2,0,right,prd[2],periodicity[2],zsplit);
|
||||||
|
} else recvneed[2][0] = recvneed[2][1] =
|
||||||
|
sendneed[2][0] = sendneed[2][1] = 0;
|
||||||
|
|
||||||
|
int all[6];
|
||||||
|
MPI_Allreduce(&recvneed[0][0],all,6,MPI_INT,MPI_MAX,world);
|
||||||
|
maxneed[0] = MAX(all[0],all[1]);
|
||||||
|
maxneed[1] = MAX(all[2],all[3]);
|
||||||
|
maxneed[2] = MAX(all[4],all[5]);
|
||||||
|
}
|
||||||
|
|
||||||
// allocate comm memory
|
// allocate comm memory
|
||||||
|
|
||||||
nswap = 2 * (need[0]+need[1]+need[2]);
|
nswap = 2 * (maxneed[0]+maxneed[1]+maxneed[2]);
|
||||||
if (nswap > maxswap) grow_swap(nswap);
|
if (nswap > maxswap) grow_swap(nswap);
|
||||||
|
|
||||||
// setup parameters for each exchange:
|
// setup parameters for each exchange:
|
||||||
@ -428,13 +538,11 @@ void Comm::setup()
|
|||||||
// use -BIG/midpt/BIG to insure all atoms included even if round-off occurs
|
// use -BIG/midpt/BIG to insure all atoms included even if round-off occurs
|
||||||
// if round-off, atoms recvd across PBC can be < or > than subbox boundary
|
// if round-off, atoms recvd across PBC can be < or > than subbox boundary
|
||||||
// note that borders() only loops over subset of atoms during each swap
|
// note that borders() only loops over subset of atoms during each swap
|
||||||
// set slablo > slabhi for swaps across non-periodic boundaries
|
// treat all as PBC here, non-PBC is handled in borders() via r/s need[][]
|
||||||
// this insures no atoms are swapped
|
|
||||||
// only for procs owning sub-box at non-periodic end of global box
|
|
||||||
// for style MULTI:
|
// for style MULTI:
|
||||||
// multilo/multihi is same as slablo/slabhi, only for each atom type
|
// multilo/multihi is same, with slablo/slabhi for each atom type
|
||||||
// pbc_flag: 0 = nothing across a boundary, 1 = something across a boundary
|
// pbc_flag: 0 = nothing across a boundary, 1 = something across a boundary
|
||||||
// pbc = -1/0/1 for PBC factor in each of 3/6 orthog/triclinic dirs
|
// pbc = -1/0/1 for PBC factor in each of 3/6 orthogonal/triclinic dirs
|
||||||
// for triclinic, slablo/hi and pbc_border will be used in lamda (0-1) coords
|
// for triclinic, slablo/hi and pbc_border will be used in lamda (0-1) coords
|
||||||
// 1st part of if statement is sending to the west/south/down
|
// 1st part of if statement is sending to the west/south/down
|
||||||
// 2nd part of if statement is sending to the east/north/up
|
// 2nd part of if statement is sending to the east/north/up
|
||||||
@ -443,7 +551,7 @@ void Comm::setup()
|
|||||||
|
|
||||||
int iswap = 0;
|
int iswap = 0;
|
||||||
for (dim = 0; dim < 3; dim++) {
|
for (dim = 0; dim < 3; dim++) {
|
||||||
for (ineed = 0; ineed < 2*need[dim]; ineed++) {
|
for (ineed = 0; ineed < 2*maxneed[dim]; ineed++) {
|
||||||
pbc_flag[iswap] = 0;
|
pbc_flag[iswap] = 0;
|
||||||
pbc[iswap][0] = pbc[iswap][1] = pbc[iswap][2] =
|
pbc[iswap][0] = pbc[iswap][1] = pbc[iswap][2] =
|
||||||
pbc[iswap][3] = pbc[iswap][4] = pbc[iswap][5] = 0;
|
pbc[iswap][3] = pbc[iswap][4] = pbc[iswap][5] = 0;
|
||||||
@ -463,12 +571,6 @@ void Comm::setup()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (myloc[dim] == 0) {
|
if (myloc[dim] == 0) {
|
||||||
if (periodicity[dim] == 0) {
|
|
||||||
if (style == SINGLE) slabhi[iswap] = slablo[iswap] - 1.0;
|
|
||||||
else
|
|
||||||
for (i = 1; i <= ntypes; i++)
|
|
||||||
multihi[iswap][i] = multilo[iswap][i] - 1.0;
|
|
||||||
} else {
|
|
||||||
pbc_flag[iswap] = 1;
|
pbc_flag[iswap] = 1;
|
||||||
pbc[iswap][dim] = 1;
|
pbc[iswap][dim] = 1;
|
||||||
if (triclinic) {
|
if (triclinic) {
|
||||||
@ -476,7 +578,6 @@ void Comm::setup()
|
|||||||
else if (dim == 2) pbc[iswap][4] = pbc[iswap][3] = 1;
|
else if (dim == 2) pbc[iswap][4] = pbc[iswap][3] = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
sendproc[iswap] = procneigh[dim][1];
|
sendproc[iswap] = procneigh[dim][1];
|
||||||
@ -493,12 +594,6 @@ void Comm::setup()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (myloc[dim] == procgrid[dim]-1) {
|
if (myloc[dim] == procgrid[dim]-1) {
|
||||||
if (periodicity[dim] == 0) {
|
|
||||||
if (style == SINGLE) slabhi[iswap] = slablo[iswap] - 1.0;
|
|
||||||
else
|
|
||||||
for (i = 1; i <= ntypes; i++)
|
|
||||||
multihi[iswap][i] = multilo[iswap][i] - 1.0;
|
|
||||||
} else {
|
|
||||||
pbc_flag[iswap] = 1;
|
pbc_flag[iswap] = 1;
|
||||||
pbc[iswap][dim] = -1;
|
pbc[iswap][dim] = -1;
|
||||||
if (triclinic) {
|
if (triclinic) {
|
||||||
@ -507,13 +602,61 @@ void Comm::setup()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
iswap++;
|
iswap++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
walk up/down the extent of nearby processors in dim and dir
|
||||||
|
loc = myloc of proc to start at
|
||||||
|
dir = 0/1 = walk to left/right
|
||||||
|
do not cross non-periodic boundaries
|
||||||
|
is not called for z dim in 2d
|
||||||
|
return how many procs away are needed to encompass cutghost away from loc
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
int Comm::updown(int dim, int dir, int loc,
|
||||||
|
double prd, int periodicity, double *split)
|
||||||
|
{
|
||||||
|
int index,count;
|
||||||
|
double frac,delta;
|
||||||
|
|
||||||
|
if (dir == 0) {
|
||||||
|
frac = cutghost[dim]/prd;
|
||||||
|
index = loc - 1;
|
||||||
|
delta = 0.0;
|
||||||
|
count = 0;
|
||||||
|
while (delta < frac) {
|
||||||
|
if (index < 0) {
|
||||||
|
if (!periodicity) break;
|
||||||
|
index = procgrid[dim] - 1;
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
delta += split[index+1] - split[index];
|
||||||
|
index--;
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
frac = cutghost[dim]/prd;
|
||||||
|
index = loc + 1;
|
||||||
|
delta = 0.0;
|
||||||
|
count = 0;
|
||||||
|
while (delta < frac) {
|
||||||
|
if (index >= procgrid[dim]) {
|
||||||
|
if (!periodicity) break;
|
||||||
|
index = 0;
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
delta += split[index+1] - split[index];
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
forward communication of atom coords every timestep
|
forward communication of atom coords every timestep
|
||||||
other per-atom attributes may also be sent via pack/unpack routines
|
other per-atom attributes may also be sent via pack/unpack routines
|
||||||
@ -537,27 +680,30 @@ void Comm::forward_comm(int dummy)
|
|||||||
if (comm_x_only) {
|
if (comm_x_only) {
|
||||||
if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]];
|
if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]];
|
||||||
else buf = NULL;
|
else buf = NULL;
|
||||||
|
if (size_forward_recv[iswap])
|
||||||
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
|
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
|
||||||
recvproc[iswap],0,world,&request);
|
recvproc[iswap],0,world,&request);
|
||||||
n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
|
n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
|
||||||
buf_send,pbc_flag[iswap],pbc[iswap]);
|
buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||||
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
|
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||||
MPI_Wait(&request,&status);
|
if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
|
||||||
} else if (ghost_velocity) {
|
} else if (ghost_velocity) {
|
||||||
|
if (size_forward_recv[iswap])
|
||||||
MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE,
|
MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE,
|
||||||
recvproc[iswap],0,world,&request);
|
recvproc[iswap],0,world,&request);
|
||||||
n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
|
n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
|
||||||
buf_send,pbc_flag[iswap],pbc[iswap]);
|
buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||||
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
|
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||||
MPI_Wait(&request,&status);
|
if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
|
||||||
avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_recv);
|
avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_recv);
|
||||||
} else {
|
} else {
|
||||||
|
if (size_forward_recv[iswap])
|
||||||
MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE,
|
MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE,
|
||||||
recvproc[iswap],0,world,&request);
|
recvproc[iswap],0,world,&request);
|
||||||
n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
|
n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
|
||||||
buf_send,pbc_flag[iswap],pbc[iswap]);
|
buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||||
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
|
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||||
MPI_Wait(&request,&status);
|
if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
|
||||||
avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_recv);
|
avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_recv);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -601,19 +747,22 @@ void Comm::reverse_comm()
|
|||||||
for (int iswap = nswap-1; iswap >= 0; iswap--) {
|
for (int iswap = nswap-1; iswap >= 0; iswap--) {
|
||||||
if (sendproc[iswap] != me) {
|
if (sendproc[iswap] != me) {
|
||||||
if (comm_f_only) {
|
if (comm_f_only) {
|
||||||
|
if (size_reverse_recv[iswap])
|
||||||
MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
|
MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
|
||||||
sendproc[iswap],0,world,&request);
|
sendproc[iswap],0,world,&request);
|
||||||
if (size_reverse_send[iswap]) buf = f[firstrecv[iswap]];
|
if (size_reverse_send[iswap]) buf = f[firstrecv[iswap]];
|
||||||
else buf = NULL;
|
else buf = NULL;
|
||||||
|
if (size_reverse_send[iswap])
|
||||||
MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE,
|
MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE,
|
||||||
recvproc[iswap],0,world);
|
recvproc[iswap],0,world);
|
||||||
MPI_Wait(&request,&status);
|
if (size_reverse_recv[iswap]) MPI_Wait(&request,&status);
|
||||||
} else {
|
} else {
|
||||||
|
if (size_reverse_recv[iswap])
|
||||||
MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
|
MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE,
|
||||||
sendproc[iswap],0,world,&request);
|
sendproc[iswap],0,world,&request);
|
||||||
n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send);
|
n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send);
|
||||||
MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap],0,world);
|
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap],0,world);
|
||||||
MPI_Wait(&request,&status);
|
if (size_reverse_recv[iswap]) MPI_Wait(&request,&status);
|
||||||
}
|
}
|
||||||
avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_recv);
|
avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_recv);
|
||||||
|
|
||||||
@ -752,8 +901,8 @@ void Comm::exchange()
|
|||||||
|
|
||||||
void Comm::borders()
|
void Comm::borders()
|
||||||
{
|
{
|
||||||
int i,n,itype,iswap,dim,ineed,maxneed,smax,rmax;
|
int i,n,itype,iswap,dim,ineed,twoneed,smax,rmax;
|
||||||
int nsend,nrecv,nfirst,nlast,ngroup;
|
int nsend,nrecv,sendflag,nfirst,nlast,ngroup;
|
||||||
double lo,hi;
|
double lo,hi;
|
||||||
int *type;
|
int *type;
|
||||||
double **x;
|
double **x;
|
||||||
@ -774,8 +923,8 @@ void Comm::borders()
|
|||||||
|
|
||||||
for (dim = 0; dim < 3; dim++) {
|
for (dim = 0; dim < 3; dim++) {
|
||||||
nlast = 0;
|
nlast = 0;
|
||||||
maxneed = 2*need[dim];
|
twoneed = 2*maxneed[dim];
|
||||||
for (ineed = 0; ineed < maxneed; ineed++) {
|
for (ineed = 0; ineed < twoneed; ineed++) {
|
||||||
|
|
||||||
// find atoms within slab boundaries lo/hi using <= and >=
|
// find atoms within slab boundaries lo/hi using <= and >=
|
||||||
// check atoms between nfirst and nlast
|
// check atoms between nfirst and nlast
|
||||||
@ -799,11 +948,19 @@ void Comm::borders()
|
|||||||
|
|
||||||
nsend = 0;
|
nsend = 0;
|
||||||
|
|
||||||
|
// sendflag = 0 if I do not send on this swap
|
||||||
|
// sendneed test indicates receiver no longer requires data
|
||||||
|
// e.g. due to non-PBC or non-uniform sub-domains
|
||||||
|
|
||||||
|
if (ineed/2 >= sendneed[dim][ineed % 2]) sendflag = 0;
|
||||||
|
else sendflag = 1;
|
||||||
|
|
||||||
// find send atoms according to SINGLE vs MULTI
|
// find send atoms according to SINGLE vs MULTI
|
||||||
// all atoms eligible versus atoms in bordergroup
|
// all atoms eligible versus atoms in bordergroup
|
||||||
// only need to limit loop to bordergroup for first sends (ineed < 2)
|
// only need to limit loop to bordergroup for first sends (ineed < 2)
|
||||||
// on these sends, break loop in two: owned (in group) and ghost
|
// on these sends, break loop in two: owned (in group) and ghost
|
||||||
|
|
||||||
|
if (sendflag) {
|
||||||
if (!bordergroup || ineed >= 2) {
|
if (!bordergroup || ineed >= 2) {
|
||||||
if (style == SINGLE) {
|
if (style == SINGLE) {
|
||||||
for (i = nfirst; i < nlast; i++)
|
for (i = nfirst; i < nlast; i++)
|
||||||
@ -852,6 +1009,7 @@ void Comm::borders()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// pack up list of border atoms
|
// pack up list of border atoms
|
||||||
|
|
||||||
@ -865,18 +1023,18 @@ void Comm::borders()
|
|||||||
pbc_flag[iswap],pbc[iswap]);
|
pbc_flag[iswap],pbc[iswap]);
|
||||||
|
|
||||||
// swap atoms with other proc
|
// swap atoms with other proc
|
||||||
|
// no MPI calls except SendRecv if nsend/nrecv = 0
|
||||||
// put incoming ghosts at end of my atom arrays
|
// put incoming ghosts at end of my atom arrays
|
||||||
// if swapping with self, simply copy, no messages
|
// if swapping with self, simply copy, no messages
|
||||||
|
|
||||||
if (sendproc[iswap] != me) {
|
if (sendproc[iswap] != me) {
|
||||||
MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0,
|
MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0,
|
||||||
&nrecv,1,MPI_INT,recvproc[iswap],0,world,&status);
|
&nrecv,1,MPI_INT,recvproc[iswap],0,world,&status);
|
||||||
if (nrecv*size_border > maxrecv)
|
if (nrecv*size_border > maxrecv) grow_recv(nrecv*size_border);
|
||||||
grow_recv(nrecv*size_border);
|
if (nrecv) MPI_Irecv(buf_recv,nrecv*size_border,MPI_DOUBLE,
|
||||||
MPI_Irecv(buf_recv,nrecv*size_border,MPI_DOUBLE,
|
|
||||||
recvproc[iswap],0,world,&request);
|
recvproc[iswap],0,world,&request);
|
||||||
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
|
if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||||
MPI_Wait(&request,&status);
|
if (nrecv) MPI_Wait(&request,&status);
|
||||||
buf = buf_recv;
|
buf = buf_recv;
|
||||||
} else {
|
} else {
|
||||||
nrecv = nsend;
|
nrecv = nsend;
|
||||||
@ -939,10 +1097,12 @@ void Comm::forward_comm_pair(Pair *pair)
|
|||||||
// if self, set recv buffer to send buffer
|
// if self, set recv buffer to send buffer
|
||||||
|
|
||||||
if (sendproc[iswap] != me) {
|
if (sendproc[iswap] != me) {
|
||||||
|
if (recvnum[iswap])
|
||||||
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
|
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
|
||||||
world,&request);
|
world,&request);
|
||||||
|
if (sendnum[iswap])
|
||||||
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
|
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
|
||||||
MPI_Wait(&request,&status);
|
if (recvnum[iswap]) MPI_Wait(&request,&status);
|
||||||
buf = buf_recv;
|
buf = buf_recv;
|
||||||
} else buf = buf_send;
|
} else buf = buf_send;
|
||||||
|
|
||||||
@ -973,10 +1133,12 @@ void Comm::reverse_comm_pair(Pair *pair)
|
|||||||
// if self, set recv buffer to send buffer
|
// if self, set recv buffer to send buffer
|
||||||
|
|
||||||
if (sendproc[iswap] != me) {
|
if (sendproc[iswap] != me) {
|
||||||
|
if (sendnum[iswap])
|
||||||
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
|
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
|
||||||
world,&request);
|
world,&request);
|
||||||
|
if (recvnum[iswap])
|
||||||
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
|
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
|
||||||
MPI_Wait(&request,&status);
|
if (sendnum[iswap]) MPI_Wait(&request,&status);
|
||||||
buf = buf_recv;
|
buf = buf_recv;
|
||||||
} else buf = buf_send;
|
} else buf = buf_send;
|
||||||
|
|
||||||
@ -1008,10 +1170,12 @@ void Comm::forward_comm_fix(Fix *fix)
|
|||||||
// if self, set recv buffer to send buffer
|
// if self, set recv buffer to send buffer
|
||||||
|
|
||||||
if (sendproc[iswap] != me) {
|
if (sendproc[iswap] != me) {
|
||||||
|
if (recvnum[iswap])
|
||||||
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
|
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
|
||||||
world,&request);
|
world,&request);
|
||||||
|
if (sendnum[iswap])
|
||||||
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
|
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
|
||||||
MPI_Wait(&request,&status);
|
if (recvnum[iswap]) MPI_Wait(&request,&status);
|
||||||
buf = buf_recv;
|
buf = buf_recv;
|
||||||
} else buf = buf_send;
|
} else buf = buf_send;
|
||||||
|
|
||||||
@ -1042,10 +1206,12 @@ void Comm::reverse_comm_fix(Fix *fix)
|
|||||||
// if self, set recv buffer to send buffer
|
// if self, set recv buffer to send buffer
|
||||||
|
|
||||||
if (sendproc[iswap] != me) {
|
if (sendproc[iswap] != me) {
|
||||||
|
if (sendnum[iswap])
|
||||||
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
|
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
|
||||||
world,&request);
|
world,&request);
|
||||||
|
if (recvnum[iswap])
|
||||||
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
|
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
|
||||||
MPI_Wait(&request,&status);
|
if (sendnum[iswap]) MPI_Wait(&request,&status);
|
||||||
buf = buf_recv;
|
buf = buf_recv;
|
||||||
} else buf = buf_send;
|
} else buf = buf_send;
|
||||||
|
|
||||||
@ -1077,10 +1243,12 @@ void Comm::forward_comm_compute(Compute *compute)
|
|||||||
// if self, set recv buffer to send buffer
|
// if self, set recv buffer to send buffer
|
||||||
|
|
||||||
if (sendproc[iswap] != me) {
|
if (sendproc[iswap] != me) {
|
||||||
|
if (recvnum[iswap])
|
||||||
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
|
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
|
||||||
world,&request);
|
world,&request);
|
||||||
|
if (sendnum[iswap])
|
||||||
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
|
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
|
||||||
MPI_Wait(&request,&status);
|
if (recvnum[iswap]) MPI_Wait(&request,&status);
|
||||||
buf = buf_recv;
|
buf = buf_recv;
|
||||||
} else buf = buf_send;
|
} else buf = buf_send;
|
||||||
|
|
||||||
@ -1111,10 +1279,12 @@ void Comm::reverse_comm_compute(Compute *compute)
|
|||||||
// if self, set recv buffer to send buffer
|
// if self, set recv buffer to send buffer
|
||||||
|
|
||||||
if (sendproc[iswap] != me) {
|
if (sendproc[iswap] != me) {
|
||||||
|
if (sendnum[iswap])
|
||||||
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
|
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
|
||||||
world,&request);
|
world,&request);
|
||||||
|
if (recvnum[iswap])
|
||||||
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
|
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
|
||||||
MPI_Wait(&request,&status);
|
if (sendnum[iswap]) MPI_Wait(&request,&status);
|
||||||
buf = buf_recv;
|
buf = buf_recv;
|
||||||
} else buf = buf_send;
|
} else buf = buf_send;
|
||||||
|
|
||||||
@ -1146,10 +1316,12 @@ void Comm::forward_comm_dump(Dump *dump)
|
|||||||
// if self, set recv buffer to send buffer
|
// if self, set recv buffer to send buffer
|
||||||
|
|
||||||
if (sendproc[iswap] != me) {
|
if (sendproc[iswap] != me) {
|
||||||
|
if (recvnum[iswap])
|
||||||
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
|
MPI_Irecv(buf_recv,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,
|
||||||
world,&request);
|
world,&request);
|
||||||
|
if (sendnum[iswap])
|
||||||
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
|
MPI_Send(buf_send,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world);
|
||||||
MPI_Wait(&request,&status);
|
if (recvnum[iswap]) MPI_Wait(&request,&status);
|
||||||
buf = buf_recv;
|
buf = buf_recv;
|
||||||
} else buf = buf_send;
|
} else buf = buf_send;
|
||||||
|
|
||||||
@ -1180,10 +1352,12 @@ void Comm::reverse_comm_dump(Dump *dump)
|
|||||||
// if self, set recv buffer to send buffer
|
// if self, set recv buffer to send buffer
|
||||||
|
|
||||||
if (sendproc[iswap] != me) {
|
if (sendproc[iswap] != me) {
|
||||||
|
if (sendnum[iswap])
|
||||||
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
|
MPI_Irecv(buf_recv,n*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,
|
||||||
world,&request);
|
world,&request);
|
||||||
|
if (recvnum[iswap])
|
||||||
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
|
MPI_Send(buf_send,n*recvnum[iswap],MPI_DOUBLE,recvproc[iswap],0,world);
|
||||||
MPI_Wait(&request,&status);
|
if (sendnum[iswap]) MPI_Wait(&request,&status);
|
||||||
buf = buf_recv;
|
buf = buf_recv;
|
||||||
} else buf = buf_send;
|
} else buf = buf_send;
|
||||||
|
|
||||||
@ -1373,6 +1547,10 @@ void Comm::set_processors(int narg, char **arg)
|
|||||||
if (user_procgrid[0] < 0 || user_procgrid[1] < 0 || user_procgrid[2] < 0)
|
if (user_procgrid[0] < 0 || user_procgrid[1] < 0 || user_procgrid[2] < 0)
|
||||||
error->all(FLERR,"Illegal processors command");
|
error->all(FLERR,"Illegal processors command");
|
||||||
|
|
||||||
|
int p = user_procgrid[0]*user_procgrid[1]*user_procgrid[2];
|
||||||
|
if (p && p != nprocs)
|
||||||
|
error->all(FLERR,"Specified processors != physical processors");
|
||||||
|
|
||||||
int iarg = 3;
|
int iarg = 3;
|
||||||
while (iarg < narg) {
|
while (iarg < narg) {
|
||||||
if (strcmp(arg[iarg],"grid") == 0) {
|
if (strcmp(arg[iarg],"grid") == 0) {
|
||||||
|
|||||||
12
src/comm.h
12
src/comm.h
@ -24,8 +24,10 @@ class Comm : protected Pointers {
|
|||||||
int procgrid[3]; // procs assigned in each dim of 3d grid
|
int procgrid[3]; // procs assigned in each dim of 3d grid
|
||||||
int user_procgrid[3]; // user request for procs in each dim
|
int user_procgrid[3]; // user request for procs in each dim
|
||||||
int myloc[3]; // which proc I am in each dim
|
int myloc[3]; // which proc I am in each dim
|
||||||
int procneigh[3][2]; // my 6 neighboring procs
|
int procneigh[3][2]; // my 6 neighboring procs, 0/1 = left/right
|
||||||
int ghost_velocity; // 1 if ghost atoms have velocity, 0 if not
|
int ghost_velocity; // 1 if ghost atoms have velocity, 0 if not
|
||||||
|
int uniform; // 1 = equal subdomains, 0 = load-balanced
|
||||||
|
double *xsplit,*ysplit,*zsplit; // fractional (0-1) sub-domain sizes
|
||||||
double cutghost[3]; // cutoffs used for acquiring ghost atoms
|
double cutghost[3]; // cutoffs used for acquiring ghost atoms
|
||||||
double cutghostuser; // user-specified ghost cutoff
|
double cutghostuser; // user-specified ghost cutoff
|
||||||
int ***grid2proc; // which proc owns i,j,k loc in 3d grid
|
int ***grid2proc; // which proc owns i,j,k loc in 3d grid
|
||||||
@ -63,8 +65,10 @@ class Comm : protected Pointers {
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
int style; // single vs multi-type comm
|
int style; // single vs multi-type comm
|
||||||
int nswap; // # of swaps to perform
|
int nswap; // # of swaps to perform = sum of maxneed
|
||||||
int need[3]; // procs I need atoms from in each dim
|
int recvneed[3][2]; // # of procs away I recv atoms from
|
||||||
|
int sendneed[3][2]; // # of procs away I send atoms to
|
||||||
|
int maxneed[3]; // max procs away any proc needs, per dim
|
||||||
int triclinic; // 0 if domain is orthog, 1 if triclinic
|
int triclinic; // 0 if domain is orthog, 1 if triclinic
|
||||||
int maxswap; // max # of swaps memory is allocated for
|
int maxswap; // max # of swaps memory is allocated for
|
||||||
int size_forward; // # of per-atom datums in forward comm
|
int size_forward; // # of per-atom datums in forward comm
|
||||||
@ -106,6 +110,8 @@ class Comm : protected Pointers {
|
|||||||
int maxsend,maxrecv; // current size of send/recv buffer
|
int maxsend,maxrecv; // current size of send/recv buffer
|
||||||
int maxforward,maxreverse; // max # of datums in forward/reverse comm
|
int maxforward,maxreverse; // max # of datums in forward/reverse comm
|
||||||
|
|
||||||
|
int updown(int, int, int, double, int, double *);
|
||||||
|
// compare cutoff to procs
|
||||||
virtual void grow_send(int,int); // reallocate send buffer
|
virtual void grow_send(int,int); // reallocate send buffer
|
||||||
virtual void grow_recv(int); // free/allocate recv buffer
|
virtual void grow_recv(int); // free/allocate recv buffer
|
||||||
virtual void grow_list(int, int); // reallocate one sendlist
|
virtual void grow_list(int, int); // reallocate one sendlist
|
||||||
|
|||||||
@ -215,58 +215,57 @@ void Domain::set_global_box()
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
set lamda box params, only need be done one time
|
set lamda box params
|
||||||
assumes global box is defined and proc assignment has been made by comm
|
assumes global box is defined and proc assignment has been made
|
||||||
for uppermost proc, insure subhi = 1.0 (in case round-off occurs)
|
uses comm->xyz_split to define subbox boundaries in consistent manner
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Domain::set_lamda_box()
|
void Domain::set_lamda_box()
|
||||||
{
|
{
|
||||||
int *myloc = comm->myloc;
|
int *myloc = comm->myloc;
|
||||||
int *procgrid = comm->procgrid;
|
double *xsplit = comm->xsplit;
|
||||||
|
double *ysplit = comm->ysplit;
|
||||||
|
double *zsplit = comm->zsplit;
|
||||||
|
|
||||||
sublo_lamda[0] = 1.0*myloc[0] / procgrid[0];
|
sublo_lamda[0] = xsplit[myloc[0]];
|
||||||
sublo_lamda[1] = 1.0*myloc[1] / procgrid[1];
|
subhi_lamda[0] = xsplit[myloc[0]+1];
|
||||||
sublo_lamda[2] = 1.0*myloc[2] / procgrid[2];
|
|
||||||
|
|
||||||
if (myloc[0] < procgrid[0]-1)
|
sublo_lamda[1] = ysplit[myloc[1]];
|
||||||
subhi_lamda[0] = 1.0*(myloc[0]+1) / procgrid[0];
|
subhi_lamda[1] = ysplit[myloc[1]+1];
|
||||||
else subhi_lamda[0] = 1.0;
|
|
||||||
|
|
||||||
if (myloc[1] < procgrid[1]-1)
|
sublo_lamda[2] = zsplit[myloc[2]];
|
||||||
subhi_lamda[1] = 1.0*(myloc[1]+1) / procgrid[1];
|
subhi_lamda[2] = zsplit[myloc[2]+1];
|
||||||
else subhi_lamda[1] = 1.0;
|
|
||||||
|
|
||||||
if (myloc[2] < procgrid[2]-1)
|
|
||||||
subhi_lamda[2] = 1.0*(myloc[2]+1) / procgrid[2];
|
|
||||||
else subhi_lamda[2] = 1.0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
set local subbox params
|
set local subbox params for orthogonal boxes
|
||||||
assumes global box is defined and proc assignment has been made
|
assumes global box is defined and proc assignment has been made
|
||||||
for uppermost proc, insure subhi = boxhi (in case round-off occurs)
|
uses comm->xyz_split to define subbox boundaries in consistent manner
|
||||||
|
insure subhi[max] = boxhi
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Domain::set_local_box()
|
void Domain::set_local_box()
|
||||||
{
|
{
|
||||||
int *myloc = comm->myloc;
|
int *myloc = comm->myloc;
|
||||||
int *procgrid = comm->procgrid;
|
int *procgrid = comm->procgrid;
|
||||||
|
double *xsplit = comm->xsplit;
|
||||||
|
double *ysplit = comm->ysplit;
|
||||||
|
double *zsplit = comm->zsplit;
|
||||||
|
|
||||||
if (triclinic == 0) {
|
if (triclinic == 0) {
|
||||||
sublo[0] = boxlo[0] + myloc[0] * xprd / procgrid[0];
|
sublo[0] = boxlo[0] + xprd*xsplit[myloc[0]];
|
||||||
if (myloc[0] < procgrid[0]-1)
|
if (myloc[0] < procgrid[0]-1)
|
||||||
subhi[0] = boxlo[0] + (myloc[0]+1) * xprd / procgrid[0];
|
subhi[0] = boxlo[0] + xprd*xsplit[myloc[0]+1];
|
||||||
else subhi[0] = boxhi[0];
|
else subhi[0] = boxhi[0];
|
||||||
|
|
||||||
sublo[1] = boxlo[1] + myloc[1] * yprd / procgrid[1];
|
sublo[1] = boxlo[1] + yprd*ysplit[myloc[1]];
|
||||||
if (myloc[1] < procgrid[1]-1)
|
if (myloc[1] < procgrid[1]-1)
|
||||||
subhi[1] = boxlo[1] + (myloc[1]+1) * yprd / procgrid[1];
|
subhi[1] = boxlo[1] + yprd*ysplit[myloc[1]+1];
|
||||||
else subhi[1] = boxhi[1];
|
else subhi[1] = boxhi[1];
|
||||||
|
|
||||||
sublo[2] = boxlo[2] + myloc[2] * zprd / procgrid[2];
|
sublo[2] = boxlo[2] + zprd*zsplit[myloc[2]];
|
||||||
if (myloc[2] < procgrid[2]-1)
|
if (myloc[2] < procgrid[2]-1)
|
||||||
subhi[2] = boxlo[2] + (myloc[2]+1) * zprd / procgrid[2];
|
subhi[2] = boxlo[2] + zprd*zsplit[myloc[2]+1];
|
||||||
else subhi[2] = boxhi[2];
|
else subhi[2] = boxhi[2];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -274,7 +273,7 @@ void Domain::set_local_box()
|
|||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
reset global & local boxes due to global box boundary changes
|
reset global & local boxes due to global box boundary changes
|
||||||
if shrink-wrapped, determine atom extent and reset boxlo/hi
|
if shrink-wrapped, determine atom extent and reset boxlo/hi
|
||||||
if triclinic, perform any shrink-wrap in lamda space
|
for triclinic, atoms must be in lamda coords (0-1) before reset_box is called
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Domain::reset_box()
|
void Domain::reset_box()
|
||||||
|
|||||||
@ -83,7 +83,8 @@ void Irregular::migrate_atoms()
|
|||||||
atom->nghost = 0;
|
atom->nghost = 0;
|
||||||
atom->avec->clear_bonus();
|
atom->avec->clear_bonus();
|
||||||
|
|
||||||
// subbox bounds for orthogonal or triclinic
|
// subbox bounds for orthogonal or triclinic box
|
||||||
|
// other comm/domain data used by coord2proc()
|
||||||
|
|
||||||
double *sublo,*subhi;
|
double *sublo,*subhi;
|
||||||
if (triclinic == 0) {
|
if (triclinic == 0) {
|
||||||
@ -94,6 +95,13 @@ void Irregular::migrate_atoms()
|
|||||||
subhi = domain->subhi_lamda;
|
subhi = domain->subhi_lamda;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uniform = comm->uniform;
|
||||||
|
xsplit = comm->xsplit;
|
||||||
|
ysplit = comm->ysplit;
|
||||||
|
zsplit = comm->zsplit;
|
||||||
|
boxlo = domain->boxlo;
|
||||||
|
prd = domain->prd;
|
||||||
|
|
||||||
// loop over atoms, flag any that are not in my sub-box
|
// loop over atoms, flag any that are not in my sub-box
|
||||||
// fill buffer with atoms leaving my box, using < and >=
|
// fill buffer with atoms leaving my box, using < and >=
|
||||||
// assign which proc it belongs to via coord2proc()
|
// assign which proc it belongs to via coord2proc()
|
||||||
@ -603,26 +611,37 @@ void Irregular::destroy_data()
|
|||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
determine which proc owns atom with coord x[3]
|
determine which proc owns atom with coord x[3]
|
||||||
x will be in box (orthogonal) or lamda coords (triclinic)
|
x will be in box (orthogonal) or lamda coords (triclinic)
|
||||||
|
for uniform = 1, directly calculate owning proc
|
||||||
|
for non-uniform, iteratively find owning proc via binary search
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
int Irregular::coord2proc(double *x)
|
int Irregular::coord2proc(double *x)
|
||||||
{
|
{
|
||||||
int loc[3];
|
int loc[3];
|
||||||
|
|
||||||
|
if (uniform) {
|
||||||
if (triclinic == 0) {
|
if (triclinic == 0) {
|
||||||
double *boxlo = domain->boxlo;
|
loc[0] = static_cast<int> (procgrid[0] * (x[0]-boxlo[0]) / prd[0]);
|
||||||
double *boxhi = domain->boxhi;
|
loc[1] = static_cast<int> (procgrid[1] * (x[1]-boxlo[1]) / prd[1]);
|
||||||
loc[0] = static_cast<int>
|
loc[2] = static_cast<int> (procgrid[2] * (x[2]-boxlo[2]) / prd[2]);
|
||||||
(procgrid[0] * (x[0]-boxlo[0]) / (boxhi[0]-boxlo[0]));
|
|
||||||
loc[1] = static_cast<int>
|
|
||||||
(procgrid[1] * (x[1]-boxlo[1]) / (boxhi[1]-boxlo[1]));
|
|
||||||
loc[2] = static_cast<int>
|
|
||||||
(procgrid[2] * (x[2]-boxlo[2]) / (boxhi[2]-boxlo[2]));
|
|
||||||
} else {
|
} else {
|
||||||
loc[0] = static_cast<int> (procgrid[0] * x[0]);
|
loc[0] = static_cast<int> (procgrid[0] * x[0]);
|
||||||
loc[1] = static_cast<int> (procgrid[1] * x[1]);
|
loc[1] = static_cast<int> (procgrid[1] * x[1]);
|
||||||
loc[2] = static_cast<int> (procgrid[2] * x[2]);
|
loc[2] = static_cast<int> (procgrid[2] * x[2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
if (triclinic == 0) {
|
||||||
|
loc[0] = binary((x[0]-boxlo[0])/prd[0],procgrid[0],xsplit);
|
||||||
|
loc[1] = binary((x[1]-boxlo[1])/prd[1],procgrid[1],ysplit);
|
||||||
|
loc[2] = binary((x[2]-boxlo[2])/prd[2],procgrid[2],zsplit);
|
||||||
|
} else {
|
||||||
|
loc[0] = binary(x[0],procgrid[0],xsplit);
|
||||||
|
loc[1] = binary(x[1],procgrid[1],ysplit);
|
||||||
|
loc[2] = binary(x[2],procgrid[2],zsplit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (loc[0] < 0) loc[0] = 0;
|
if (loc[0] < 0) loc[0] = 0;
|
||||||
if (loc[0] >= procgrid[0]) loc[0] = procgrid[0] - 1;
|
if (loc[0] >= procgrid[0]) loc[0] = procgrid[0] - 1;
|
||||||
if (loc[1] < 0) loc[1] = 0;
|
if (loc[1] < 0) loc[1] = 0;
|
||||||
@ -633,6 +652,36 @@ int Irregular::coord2proc(double *x)
|
|||||||
return grid2proc[loc[0]][loc[1]][loc[2]];
|
return grid2proc[loc[0]][loc[1]][loc[2]];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
binary search for value in N-length ascending vec
|
||||||
|
value may be outside range of vec limits
|
||||||
|
always return index from 0 to N-1 inclusive
|
||||||
|
return 0 if value < vec[0]
|
||||||
|
reutrn N-1 if value >= vec[N-1]
|
||||||
|
return index = 1 to N-2 if vec[index] <= value < vec[index+1]
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
int Irregular::binary(double value, int n, double *vec)
|
||||||
|
{
|
||||||
|
int lo = 0;
|
||||||
|
int hi = n-1;
|
||||||
|
|
||||||
|
if (value < vec[lo]) return lo;
|
||||||
|
if (value >= vec[hi]) return hi;
|
||||||
|
|
||||||
|
// insure vec[lo] <= value < vec[hi] at every iteration
|
||||||
|
// done when lo,hi are adjacent
|
||||||
|
|
||||||
|
int index = (lo+hi)/2;
|
||||||
|
while (lo < hi-1) {
|
||||||
|
if (value < vec[index]) hi = index;
|
||||||
|
else if (value >= vec[index]) lo = index;
|
||||||
|
index = (lo+hi)/2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
realloc the size of the send buffer as needed with BUFFACTOR & BUFEXTRA
|
realloc the size of the send buffer as needed with BUFFACTOR & BUFEXTRA
|
||||||
if flag = 1, realloc
|
if flag = 1, realloc
|
||||||
|
|||||||
@ -32,8 +32,12 @@ class Irregular : protected Pointers {
|
|||||||
int me,nprocs;
|
int me,nprocs;
|
||||||
int triclinic;
|
int triclinic;
|
||||||
int map_style;
|
int map_style;
|
||||||
int *procgrid;
|
int uniform;
|
||||||
int ***grid2proc;
|
double *xsplit,*ysplit,*zsplit; // ptrs to comm
|
||||||
|
int *procgrid; // ptr to comm
|
||||||
|
int ***grid2proc; // ptr to comm
|
||||||
|
double *boxlo; // ptr to domain
|
||||||
|
double *prd; // ptr to domain
|
||||||
|
|
||||||
int maxsend,maxrecv; // size of buffers in # of doubles
|
int maxsend,maxrecv; // size of buffers in # of doubles
|
||||||
double *buf_send,*buf_recv;
|
double *buf_send,*buf_recv;
|
||||||
@ -81,6 +85,7 @@ class Irregular : protected Pointers {
|
|||||||
void exchange_atom(double *, int *, double *);
|
void exchange_atom(double *, int *, double *);
|
||||||
void destroy_atom();
|
void destroy_atom();
|
||||||
int coord2proc(double *);
|
int coord2proc(double *);
|
||||||
|
int binary(double, int, double *);
|
||||||
|
|
||||||
void grow_send(int,int); // reallocate send buffer
|
void grow_send(int,int); // reallocate send buffer
|
||||||
void grow_recv(int); // free/allocate recv buffer
|
void grow_recv(int); // free/allocate recv buffer
|
||||||
|
|||||||
Reference in New Issue
Block a user