git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7340 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2011-12-13 15:57:18 +00:00
parent 744efbc2cd
commit 86347b81e3
13 changed files with 1121 additions and 517 deletions

View File

@ -35,13 +35,11 @@
#include "compute.h" #include "compute.h"
#include "output.h" #include "output.h"
#include "dump.h" #include "dump.h"
#include "procmap.h"
#include "math_extra.h" #include "math_extra.h"
#include "error.h" #include "error.h"
#include "memory.h" #include "memory.h"
#include <map>
#include <string>
#ifdef _OPENMP #ifdef _OPENMP
#include "omp.h" #include "omp.h"
#endif #endif
@ -54,8 +52,9 @@ using namespace LAMMPS_NS;
#define BIG 1.0e20 #define BIG 1.0e20
enum{SINGLE,MULTI}; enum{SINGLE,MULTI};
enum{NONE,MULTIPLE}; enum{MULTIPLE}; // same as in ProcMap
enum{CART,CARTREORDER,XYZ,XZY,YXZ,YZX,ZXY,ZYX}; enum{ONELEVEL,NUMA,CUSTOM};
enum{CART,CARTREORDER,XYZ};
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
setup MPI and allocate buffer space setup MPI and allocate buffer space
@ -67,9 +66,14 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
MPI_Comm_size(world,&nprocs); MPI_Comm_size(world,&nprocs);
user_procgrid[0] = user_procgrid[1] = user_procgrid[2] = 0; user_procgrid[0] = user_procgrid[1] = user_procgrid[2] = 0;
gridflag = ONELEVEL;
mapflag = CART;
customfile = NULL;
recv_from_partition = send_to_partition = -1;
otherflag = 0;
outfile = NULL;
grid2proc = NULL; grid2proc = NULL;
layoutflag = CART;
numa_nodes = 0;
bordergroup = 0; bordergroup = 0;
style = SINGLE; style = SINGLE;
@ -77,8 +81,6 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
cutghostmulti = NULL; cutghostmulti = NULL;
cutghostuser = 0.0; cutghostuser = 0.0;
ghost_velocity = 0; ghost_velocity = 0;
recv_from_partition = send_to_partition = -1;
other_partition_style = NONE;
// use of OpenMP threads // use of OpenMP threads
// query OpenMP for number of threads/process set by user at run-time // query OpenMP for number of threads/process set by user at run-time
@ -121,7 +123,10 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
Comm::~Comm() Comm::~Comm()
{ {
if (grid2proc) memory->destroy(grid2proc); delete [] customfile;
delete [] outfile;
memory->destroy(grid2proc);
free_swap(); free_swap();
if (style == MULTI) { if (style == MULTI) {
@ -138,12 +143,13 @@ Comm::~Comm()
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
create 3d grid of procs based on box size create 3d grid of procs based on Nprocs and box size & shape
map processors to grid
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Comm::set_proc_grid() void Comm::set_proc_grid()
{ {
// recv proc layout of another partition if my layout depends on it // recv 3d proc grid of another partition if my 3d grid depends on it
if (recv_from_partition >= 0) { if (recv_from_partition >= 0) {
MPI_Status status; MPI_Status status;
@ -153,124 +159,96 @@ void Comm::set_proc_grid()
MPI_Bcast(other_procgrid,3,MPI_INT,0,world); MPI_Bcast(other_procgrid,3,MPI_INT,0,world);
} }
// use NUMA routines if numa_nodes > 0 // create ProcMap class
// if NUMA routines fail, just continue
if (numa_nodes) { ProcMap *pmap = new ProcMap(lmp);
int flag = numa_set_proc_grid();
if (flag) return; // create 3d grid of processors, produces procgrid
// can fail (on one partition) if constrained by other partition
// if numa_grid() fails, try onelevel_grid()
int flag;
if (gridflag == ONELEVEL) {
flag = pmap->onelevel_grid(nprocs,user_procgrid,procgrid,
otherflag,other_style,other_procgrid);
if (!flag) error->all(FLERR,"Could not create grid of processors");
} else if (gridflag == NUMA) {
flag = pmap->numa_grid(nprocs,user_procgrid,procgrid,coregrid);
if (!flag) {
coregrid[0] = coregrid[1] = coregrid[2] = 1;
flag = pmap->onelevel_grid(nprocs,user_procgrid,procgrid,
otherflag,other_style,other_procgrid);
}
if (!flag) error->all(FLERR,"Could not create grid of processors");
} else if (gridflag == CUSTOM) {
pmap->custom_grid(nprocs,user_procgrid,procgrid);
} }
// create layout of procs mapped to simulation box // error check on procgrid
// can fail (on one partition) if constrained by other_partition_style
int flag =
procs2box(nprocs,user_procgrid,procgrid,1,1,1,other_partition_style);
if (!flag) error->all(FLERR,"Could not layout grid of processors",1);
// error check
if (procgrid[0]*procgrid[1]*procgrid[2] != nprocs) if (procgrid[0]*procgrid[1]*procgrid[2] != nprocs)
error->all(FLERR,"Bad grid of processors"); error->all(FLERR,"Bad grid of processors");
if (domain->dimension == 2 && procgrid[2] != 1) if (domain->dimension == 2 && procgrid[2] != 1)
error->all(FLERR,"Processor count in z must be 1 for 2d simulation"); error->all(FLERR,"Processor count in z must be 1 for 2d simulation");
// grid2proc[i][j][k] = proc that owns i,j,k location in grid // grid2proc[i][j][k] = proc that owns i,j,k location in 3d grid
if (grid2proc) memory->destroy(grid2proc); if (grid2proc) memory->destroy(grid2proc);
memory->create(grid2proc,procgrid[0],procgrid[1],procgrid[2], memory->create(grid2proc,procgrid[0],procgrid[1],procgrid[2],
"comm:grid2proc"); "comm:grid2proc");
// use MPI Cartesian routines to layout 3d grid of procs // map processor IDs to 3d processor grid
// MPI may do layout in machine-optimized fashion // produces myloc, procneigh, grid2proc
if (layoutflag == CART || layoutflag == CARTREORDER) { if (gridflag == ONELEVEL) {
int reorder = 0; if (mapflag == CART)
if (layoutflag == CARTREORDER) reorder = 1; pmap->cart_map(0,procgrid,myloc,procneigh,grid2proc);
int periods[3]; else if (mapflag == CARTREORDER)
periods[0] = periods[1] = periods[2] = 1; pmap->cart_map(1,procgrid,myloc,procneigh,grid2proc);
MPI_Comm cartesian; else if (mapflag == XYZ)
pmap->xyz_map(xyz,procgrid,myloc,procneigh,grid2proc);
MPI_Cart_create(world,3,procgrid,periods,reorder,&cartesian);
MPI_Cart_get(cartesian,3,procgrid,periods,myloc);
MPI_Cart_shift(cartesian,0,1,&procneigh[0][0],&procneigh[0][1]);
MPI_Cart_shift(cartesian,1,1,&procneigh[1][0],&procneigh[1][1]);
MPI_Cart_shift(cartesian,2,1,&procneigh[2][0],&procneigh[2][1]);
int coords[3]; } else if (gridflag == NUMA) {
int i,j,k; pmap->numa_map(coregrid,myloc,procneigh,grid2proc);
for (i = 0; i < procgrid[0]; i++)
for (j = 0; j < procgrid[1]; j++)
for (k = 0; k < procgrid[2]; k++) {
coords[0] = i; coords[1] = j; coords[2] = k;
MPI_Cart_rank(cartesian,coords,&grid2proc[i][j][k]);
}
MPI_Comm_free(&cartesian);
// layout 3d grid of procs explicitly, via user-requested XYZ ordering } else if (gridflag == CUSTOM) {
pmap->custom_map(myloc,procneigh,grid2proc);
} else {
int i,j,k,ime,jme,kme;
for (i = 0; i < procgrid[0]; i++)
for (j = 0; j < procgrid[1]; j++)
for (k = 0; k < procgrid[2]; k++) {
if (layoutflag == XYZ)
grid2proc[i][j][k] = k*procgrid[1]*procgrid[0] + j*procgrid[0] + i;
else if (layoutflag == XZY)
grid2proc[i][j][k] = j*procgrid[2]*procgrid[0] + k*procgrid[0] + i;
else if (layoutflag == YXZ)
grid2proc[i][j][k] = k*procgrid[0]*procgrid[1] + i*procgrid[1] + j;
else if (layoutflag == YZX)
grid2proc[i][j][k] = i*procgrid[2]*procgrid[1] + k*procgrid[1] + j;
else if (layoutflag == ZXY)
grid2proc[i][j][k] = j*procgrid[0]*procgrid[2] + i*procgrid[2] + k;
else if (layoutflag == ZYX)
grid2proc[i][j][k] = i*procgrid[1]*procgrid[2] + j*procgrid[2] + k;
if (grid2proc[i][j][k] == me) {
ime = i; jme = j, kme = k;
}
}
myloc[0] = ime;
myloc[1] = jme;
myloc[2] = kme;
i = ime-1;
if (i < 0) i = procgrid[0]-1;
procneigh[0][0] = grid2proc[i][jme][kme];
i = ime+1;
if (i == procgrid[0]) i = 0;
procneigh[0][1] = grid2proc[i][jme][kme];
j = jme-1;
if (j < 0) j = procgrid[1]-1;
procneigh[1][0] = grid2proc[ime][j][kme];
j = jme+1;
if (j == procgrid[1]) j = 0;
procneigh[1][1] = grid2proc[ime][j][kme];
k = kme-1;
if (k < 0) k = procgrid[2]-1;
procneigh[2][0] = grid2proc[ime][jme][k];
k = kme+1;
if (k == procgrid[2]) k = 0;
procneigh[2][1] = grid2proc[ime][jme][k];
} }
// print 3d grid info to screen and logfile
if (me == 0) {
if (screen) {
fprintf(screen," %d by %d by %d MPI processor grid\n",
procgrid[0],procgrid[1],procgrid[2]);
if (gridflag == NUMA)
fprintf(screen," %d by %d by %d core grid within node\n",
coregrid[0],coregrid[1],coregrid[2]);
}
if (logfile) {
fprintf(logfile," %d by %d by %d MPI processor grid\n",
procgrid[0],procgrid[1],procgrid[2]);
if (gridflag == NUMA)
fprintf(logfile," %d by %d by %d core grid within node\n",
coregrid[0],coregrid[1],coregrid[2]);
}
}
// print 3d grid details to outfile
if (outfile) pmap->output(grid2proc,outfile);
// set lamda box params after procs are assigned // set lamda box params after procs are assigned
if (domain->triclinic) domain->set_lamda_box(); if (domain->triclinic) domain->set_lamda_box();
if (me == 0) { // free ProcMap class
if (screen) fprintf(screen," %d by %d by %d MPI processor grid\n",
procgrid[0],procgrid[1],procgrid[2]);
if (logfile) fprintf(logfile," %d by %d by %d MPI processor grid\n",
procgrid[0],procgrid[1],procgrid[2]);
}
// send my proc layout to another partition if requested delete pmap;
// send my 3d proc grid to another partition if requested
if (send_to_partition >= 0) { if (send_to_partition >= 0) {
if (me == 0) MPI_Send(procgrid,3,MPI_INT, if (me == 0) MPI_Send(procgrid,3,MPI_INT,
@ -1196,130 +1174,6 @@ void Comm::reverse_comm_dump(Dump *dump)
} }
} }
/* ----------------------------------------------------------------------
assign numprocs to 3d box so as to minimize surface area
area = surface area of each of 3 faces of simulation box divided by sx,sy,sz
for triclinic, area = cross product of 2 edge vectors stored in h matrix
valid assignment will be factorization of numprocs = Px by Py by Pz
user_factors = if non-zero, factors are specified by user
sx,sy,sz = scale box xyz dimension vy dividing by sx,sy,sz
other = 1 to enforce compatability with other partition's layout
return factors = # of procs assigned to each dimension
return 1 if successully factor, 0 if not
------------------------------------------------------------------------- */
int Comm::procs2box(int numprocs, int user_factors[3], int factors[3],
const int sx, const int sy, const int sz, int other)
{
factors[0] = user_factors[0];
factors[1] = user_factors[1];
factors[2] = user_factors[2];
// all 3 proc counts are specified
if (factors[0] && factors[1] && factors[2]) return 1;
// 2 out of 3 proc counts are specified
if (factors[0] > 0 && factors[1] > 0) {
factors[2] = nprocs/(factors[0]*factors[1]);
return 1;
} else if (factors[0] > 0 && factors[2] > 0) {
factors[1] = nprocs/(factors[0]*factors[2]);
return 1;
} else if (factors[1] > 0 && factors[2] > 0) {
factors[0] = nprocs/(factors[1]*factors[2]);
return 1;
}
// determine cross-sectional areas for orthogonal and triclinic boxes
// area[0] = xy, area[1] = xz, area[2] = yz
double area[3];
if (domain->triclinic == 0) {
area[0] = domain->xprd * domain->yprd / (sx * sy);
area[1] = domain->xprd * domain->zprd / (sx * sz);
area[2] = domain->yprd * domain->zprd / (sy * sz);
} else {
double *h = domain->h;
double a[3],b[3],c[3];
a[0] = h[0]; a[1] = 0.0; a[2] = 0.0;
b[0] = h[5]; b[1] = h[1]; b[2] = 0.0;
MathExtra::cross3(a,b,c);
area[0] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sx * sy);
a[0] = h[0]; a[1] = 0.0; a[2] = 0.0;
b[0] = h[4]; b[1] = h[3]; b[2] = h[2];
MathExtra::cross3(a,b,c);
area[1] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sx * sz);
a[0] = h[5]; a[1] = h[1]; a[2] = 0.0;
b[0] = h[4]; b[1] = h[3]; b[2] = h[2];
MathExtra::cross3(a,b,c);
area[2] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sy * sz);
}
double bestsurf = 2.0 * (area[0]+area[1]+area[2]);
// loop thru all possible factorizations of numprocs
// only consider valid cases that match procgrid settings
// surf = surface area of a proc sub-domain
// only consider cases that match user_factors & other_procgrid settings
// success = 1 if valid factoriztion is found
// may not be if other constraint is enforced
int ipx,ipy,ipz,valid;
double surf;
int success = 0;
ipx = 1;
while (ipx <= numprocs) {
valid = 1;
if (user_factors[0] && ipx != user_factors[0]) valid = 0;
if (other == MULTIPLE && other_procgrid[0] % ipx) valid = 0;
if (numprocs % ipx) valid = 0;
if (!valid) {
ipx++;
continue;
}
ipy = 1;
while (ipy <= numprocs/ipx) {
valid = 1;
if (user_factors[1] && ipy != user_factors[1]) valid = 0;
if (other == MULTIPLE && other_procgrid[1] % ipy) valid = 0;
if ((numprocs/ipx) % ipy) valid = 0;
if (!valid) {
ipy++;
continue;
}
ipz = numprocs/ipx/ipy;
valid = 1;
if (user_factors[2] && ipz != user_factors[2]) valid = 0;
if (other == MULTIPLE && other_procgrid[2] % ipz) valid = 0;
if (domain->dimension == 2 && ipz != 1) valid = 0;
if (!valid) {
ipy++;
continue;
}
surf = area[0]/ipx/ipy + area[1]/ipx/ipz + area[2]/ipy/ipz;
if (surf < bestsurf) {
success = 1;
bestsurf = surf;
factors[0] = ipx;
factors[1] = ipy;
factors[2] = ipz;
}
ipy++;
}
ipx++;
}
return success;
}
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
realloc the size of the send buffer as needed with BUFFACTOR & BUFEXTRA realloc the size of the send buffer as needed with BUFFACTOR & BUFEXTRA
if flag = 1, realloc if flag = 1, realloc
@ -1502,7 +1356,48 @@ void Comm::set_processors(int narg, char **arg)
int iarg = 3; int iarg = 3;
while (iarg < narg) { while (iarg < narg) {
if (strcmp(arg[iarg],"part") == 0) { if (strcmp(arg[iarg],"grid") == 0) {
if (iarg+1 > narg) error->all(FLERR,"Illegal processors command");
if (strcmp(arg[iarg+1],"level1") == 0) {
gridflag = LEVEL1;
iarg += 2;
if (strcmp(arg[iarg+1],"level2") == 0) {
gridflag = LEVEL2;
iarg += 2;
} else if (strcmp(arg[iarg+1],"numa") == 0) {
gridflag = NUMA;
iarg += 2;
} else if (strcmp(arg[iarg],"custom") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
gridflag = CUSTOM;
delete [] customfile;
int n = strlen(arg[iarg+1]) + 1;
customfile = new char(n);
strcpy(customfile,arg[iarg+1]);
iarg += 2;
} else error->all(FLERR,"Illegal processors command");
} else if (strcmp(arg[iarg],"map") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
if (strcmp(arg[iarg+1],"cart") == 0) mapflag = CART;
else if (strcmp(arg[iarg+1],"cart/reorder") == 0) mapflag = CARTREORDER;
else if (strcmp(arg[iarg+1],"xyz") == 0 ||
strcmp(arg[iarg+1],"xzy") == 0 ||
strcmp(arg[iarg+1],"yxz") == 0 ||
strcmp(arg[iarg+1],"yzx") == 0 ||
strcmp(arg[iarg+1],"zxy") == 0 ||
strcmp(arg[iarg+1],"zyx") == 0) {
mapflag = XYZ;
strcpy(xyz,arg[iarg+1]);
} else error->all(FLERR,"Illegal processors command");
iarg += 2;
} else if (strcmp(arg[iarg],"part") == 0) {
if (iarg+4 > narg) error->all(FLERR,"Illegal processors command"); if (iarg+4 > narg) error->all(FLERR,"Illegal processors command");
if (universe->nworlds == 1) if (universe->nworlds == 1)
error->all(FLERR, error->all(FLERR,
@ -1527,250 +1422,32 @@ void Comm::set_processors(int narg, char **arg)
"is already a receiver"); "is already a receiver");
recv_from_partition = isend-1; recv_from_partition = isend-1;
} }
// only receiver has otherflag dependency
if (strcmp(arg[iarg+3],"multiple") == 0) { if (strcmp(arg[iarg+3],"multiple") == 0) {
if (universe->iworld == irecv-1) other_partition_style = MULTIPLE; if (universe->iworld == irecv-1) {
otherflag = 1;
other_style = MULTIPLE;
}
} else error->all(FLERR,"Illegal processors command"); } else error->all(FLERR,"Illegal processors command");
iarg += 4; iarg += 4;
} else if (strcmp(arg[iarg],"grid") == 0) { } else if (strcmp(arg[iarg],"out") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal processors command"); if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
if (strcmp(arg[iarg+1],"cart") == 0) layoutflag = CART; delete [] outfile;
else if (strcmp(arg[iarg+1],"cart/reorder") == 0) int n = strlen(arg[iarg+1]) + 1;
layoutflag = CARTREORDER; outfile = new char(n);
else if (strcmp(arg[iarg+1],"xyz") == 0) layoutflag = XYZ; strcpy(outfile,arg[iarg+1]);
else if (strcmp(arg[iarg+1],"xzy") == 0) layoutflag = XZY;
else if (strcmp(arg[iarg+1],"yxz") == 0) layoutflag = YXZ;
else if (strcmp(arg[iarg+1],"yzx") == 0) layoutflag = YZX;
else if (strcmp(arg[iarg+1],"zxy") == 0) layoutflag = ZXY;
else if (strcmp(arg[iarg+1],"zyx") == 0) layoutflag = ZYX;
else error->all(FLERR,"Illegal processors command");
iarg += 2; iarg += 2;
} else if (strcmp(arg[iarg],"numa") == 0) {
if (iarg+1 > narg) error->all(FLERR,"Illegal processors command");
numa_nodes = 1;
if (numa_nodes < 0) error->all(FLERR,"Illegal processors command");
iarg += 1;
} else error->all(FLERR,"Illegal processors command"); } else error->all(FLERR,"Illegal processors command");
} }
// error check // error check
if (numa_nodes) { if (gridflag == NUMA && mapflag != CART)
if (layoutflag != CART) error->all(FLERR,"Processors grid numa and map choice are incompatible");
error->all(FLERR,"Can only use processors numa "
"with processors grid cart");
if (send_to_partition >= 0 || recv_from_partition >= 0)
error->one(FLERR,"Cannot use processors numa with processors part");
}
}
/* ----------------------------------------------------------------------
setup 3d grid of procs based on box size, group neighbors by numa node
return 1 if successful, return 0 if not
------------------------------------------------------------------------- */
int Comm::numa_set_proc_grid()
{
// get names of all nodes
int name_length;
char node_name[MPI_MAX_PROCESSOR_NAME];
char node_names[MPI_MAX_PROCESSOR_NAME*nprocs];
MPI_Get_processor_name(node_name,&name_length);
MPI_Allgather(&node_name,MPI_MAX_PROCESSOR_NAME,MPI_CHAR,&node_names,
MPI_MAX_PROCESSOR_NAME,MPI_CHAR,world);
std::string node_string = std::string(node_name);
// get number of procs per node
std::map<std::string,int> name_map;
std::map<std::string,int>::iterator np;
for (int i = 0; i < nprocs; i++) {
std::string i_string = std::string(&node_names[i*MPI_MAX_PROCESSOR_NAME]);
np = name_map.find(i_string);
if (np == name_map.end())
name_map[i_string] = 1;
else
np->second++;
}
int procs_per_node = name_map.begin()->second;
int procs_per_numa = procs_per_node / numa_nodes;
// use non-numa mapping if any condition met
if (procs_per_numa < 4 || // less than 4 procs per numa node
procs_per_node % numa_nodes != 0 || // no-op since numa_nodes = 1 for now
nprocs % procs_per_numa != 0 || // total procs not a multiple of node
nprocs <= procs_per_numa || // only 1 node used
user_procgrid[0] > 1 || // user specified grid dimension < 1
user_procgrid[1] > 1 || // in any dimension
user_procgrid[2] > 1) {
if (me == 0) {
if (screen) fprintf(screen," 1 by 1 by 1 Node grid\n");
if (logfile) fprintf(logfile," 1 by 1 by 1 Node grid\n");
}
return 0;
}
// user settings for the factorization per numa node - currently always zero
int user_numagrid[3];
user_numagrid[0] = user_numagrid[1] = user_numagrid[2] = 0;
// if user specifies 1 for a proc grid dimension,
// also use 1 for the node grid dimension
if (user_procgrid[0] == 1) user_numagrid[0] = 1;
if (user_procgrid[1] == 1) user_numagrid[1] = 1;
if (user_procgrid[2] == 1) user_numagrid[2] = 1;
// get an initial factorization for each numa node,
// if the user has not set the number of processors
int numagrid[3];
procs2box(procs_per_numa,user_numagrid,numagrid,1,1,1,0);
if (numagrid[0] * numagrid[1] * numagrid[2] != procs_per_numa)
error->all(FLERR,"Bad node grid of processors");
// get a factorization for the grid of numa nodes
int node_count = nprocs / procs_per_numa;
procs2box(node_count,user_procgrid,procgrid,
numagrid[0],numagrid[1],numagrid[2],0);
if (procgrid[0] * procgrid[1] * procgrid[2] != node_count)
error->all(FLERR,"Bad grid of processors");
// repeat the numa node factorization using the subdomain sizes
// this will refine the factorization if the user specified the node layout
procs2box(procs_per_numa,user_numagrid,numagrid,
procgrid[0],procgrid[1],procgrid[2],0);
if (numagrid[0]*numagrid[1]*numagrid[2] != procs_per_numa)
error->all(FLERR,"Bad Node grid of processors");
if (domain->dimension == 2 && (procgrid[2] != 1 || numagrid[2] != 1))
error->all(FLERR,"Processor count in z must be 1 for 2d simulation");
// assign a unique id to each node
int node_num = 0, node_id = 0;
for (np = name_map.begin(); np != name_map.end(); ++np) {
if (np->first == node_string) node_id = node_num;
node_num++;
}
// setup a per node communicator and find rank within
MPI_Comm node_comm;
MPI_Comm_split(world,node_id,0,&node_comm);
int node_rank;
MPI_Comm_rank(node_comm,&node_rank);
// setup a per numa communicator and find rank within
MPI_Comm numa_comm;
int local_numa = node_rank / procs_per_numa;
MPI_Comm_split(node_comm,local_numa,0,&numa_comm);
int numa_rank;
MPI_Comm_rank(numa_comm,&numa_rank);
// setup a communicator with the rank 0 procs from each numa node
MPI_Comm numa_leaders;
MPI_Comm_split(world,numa_rank,0,&numa_leaders);
// use the MPI Cartesian routines to map the nodes to the grid
// could implement layoutflag as in non-NUMA case?
int reorder = 0;
int periods[3];
periods[0] = periods[1] = periods[2] = 1;
MPI_Comm cartesian;
if (numa_rank == 0) {
MPI_Cart_create(numa_leaders,3,procgrid,periods,reorder,&cartesian);
MPI_Cart_get(cartesian,3,procgrid,periods,myloc);
}
// broadcast numa node location in grid to other procs in numa node
MPI_Bcast(myloc,3,MPI_INT,0,numa_comm);
// get storage for the process mapping
if (grid2proc) memory->destroy(grid2proc);
memory->create(grid2proc,procgrid[0]*numagrid[0],procgrid[1]*numagrid[1],
procgrid[2]*numagrid[2],"comm:grid2proc");
// compute my location within the grid
int z_offset = numa_rank / (numagrid[0] * numagrid[1]);
int y_offset = (numa_rank % (numagrid[0] * numagrid[1]))/numagrid[0];
int x_offset = numa_rank % numagrid[0];
myloc[0] = myloc[0] * numagrid[0] + x_offset;
myloc[1] = myloc[1] * numagrid[1] + y_offset;
myloc[2] = myloc[2] * numagrid[2] + z_offset;
procgrid[0] *= numagrid[0];
procgrid[1] *= numagrid[1];
procgrid[2] *= numagrid[2];
// allgather of locations to fill grid2proc
int **gridi;
memory->create(gridi,nprocs,3,"comm:gridi");
MPI_Allgather(&myloc,3,MPI_INT,gridi[0],3,MPI_INT,world);
for (int i = 0; i < nprocs; i++)
grid2proc[gridi[i][0]][gridi[i][1]][gridi[i][2]] = i;
memory->destroy(gridi);
// get my neighbors
int minus, plus;
for (int i = 0; i < 3; i++) {
numa_shift(myloc[i],procgrid[i],minus,plus);
procneigh[i][0] = minus;
procneigh[i][1] = plus;
}
procneigh[0][0] = grid2proc[procneigh[0][0]][myloc[1]][myloc[2]];
procneigh[0][1] = grid2proc[procneigh[0][1]][myloc[1]][myloc[2]];
procneigh[1][0] = grid2proc[myloc[0]][procneigh[1][0]][myloc[2]];
procneigh[1][1] = grid2proc[myloc[0]][procneigh[1][1]][myloc[2]];
procneigh[2][0] = grid2proc[myloc[0]][myloc[1]][procneigh[2][0]];
procneigh[2][1] = grid2proc[myloc[0]][myloc[1]][procneigh[2][1]];
if (numa_rank == 0) MPI_Comm_free(&cartesian);
MPI_Comm_free(&numa_leaders);
MPI_Comm_free(&numa_comm);
MPI_Comm_free(&node_comm);
// set lamda box params after procs are assigned
if (domain->triclinic) domain->set_lamda_box();
if (me == 0) {
if (screen) fprintf(screen," %d by %d by %d Node grid\n",
numagrid[0],numagrid[1],numagrid[2]);
if (logfile) fprintf(logfile," %d by %d by %d Node grid\n",
numagrid[0],numagrid[1],numagrid[2]);
if (screen) fprintf(screen," %d by %d by %d processor grid\n",
procgrid[0],procgrid[1],procgrid[2]);
if (logfile) fprintf(logfile," %d by %d by %d processor grid\n",
procgrid[0],procgrid[1],procgrid[2]);
}
return 1;
}
/* ----------------------------------------------------------------------
get the index to the neighboring processors in a dimension
------------------------------------------------------------------------- */
void Comm::numa_shift(int myloc, int num_procs, int &minus, int &plus)
{
minus = myloc - 1;
if (minus < 0) minus = num_procs - 1;
plus = myloc + 1;
if (plus == num_procs) plus = 0;
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------

View File

@ -83,10 +83,16 @@ class Comm : protected Pointers {
int comm_x_only,comm_f_only; // 1 if only exchange x,f in for/rev comm int comm_x_only,comm_f_only; // 1 if only exchange x,f in for/rev comm
int map_style; // non-0 if global->local mapping is done int map_style; // non-0 if global->local mapping is done
int bordergroup; // only communicate this group in borders int bordergroup; // only communicate this group in borders
int layoutflag; // user-selected layout for 3d proc grid int gridflag; // option for creating 3d grid
int numa_nodes; // layout NUMA-aware 3d proc grid int mapflag; // option for mapping procs to 3d grid
char xyz[4]; // xyz mapping of procs to 3d grid
char *customfile; // file with custom proc map
char *outfile; // proc grid/map output file
int otherflag; // 1 if this partition dependent on another
int other_style; // style of dependency
int other_procgrid[3]; // proc layout of another partition int other_procgrid[3]; // proc layout of another partition
int coregrid[3]; // procs layout of cores within a node
int *firstrecv; // where to put 1st recv atom in each swap int *firstrecv; // where to put 1st recv atom in each swap
int **sendlist; // list of atoms to send in each swap int **sendlist; // list of atoms to send in each swap
@ -97,8 +103,6 @@ class Comm : protected Pointers {
int maxsend,maxrecv; // current size of send/recv buffer int maxsend,maxrecv; // current size of send/recv buffer
int maxforward,maxreverse; // max # of datums in forward/reverse comm int maxforward,maxreverse; // max # of datums in forward/reverse comm
int procs2box(int, int[3], int[3], // map procs to 3d box
const int, const int, const int, int);
virtual void grow_send(int,int); // reallocate send buffer virtual void grow_send(int,int); // reallocate send buffer
virtual void grow_recv(int); // free/allocate recv buffer virtual void grow_recv(int); // free/allocate recv buffer
virtual void grow_list(int, int); // reallocate one sendlist virtual void grow_list(int, int); // reallocate one sendlist
@ -107,9 +111,6 @@ class Comm : protected Pointers {
virtual void allocate_multi(int); // allocate multi arrays virtual void allocate_multi(int); // allocate multi arrays
virtual void free_swap(); // free swap arrays virtual void free_swap(); // free swap arrays
virtual void free_multi(); // free multi arrays virtual void free_multi(); // free multi arrays
int numa_set_proc_grid();
void numa_shift(int, int, int &, int &);
}; };
} }

View File

@ -69,12 +69,10 @@ void Error::universe_one(const char *file, int line, const char *str)
called by all procs in one world called by all procs in one world
close all output, screen, and log files in world close all output, screen, and log files in world
insure all procs in world call, else will hang insure all procs in world call, else will hang
if abort = 0 (default): force MPI_Abort if running in multi-partition mode
if only one world in universe calls, universe will hang
if abort = 1: force abort of entire universe if any world in universe calls
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Error::all(const char *file, int line, const char *str, int abort) void Error::all(const char *file, int line, const char *str)
{ {
MPI_Barrier(world); MPI_Barrier(world);
@ -90,7 +88,7 @@ void Error::all(const char *file, int line, const char *str, int abort)
if (screen && screen != stdout) fclose(screen); if (screen && screen != stdout) fclose(screen);
if (logfile) fclose(logfile); if (logfile) fclose(logfile);
if (abort) MPI_Abort(world,1); if (universe->nworlds > 1) MPI_Abort(universe->uworld,1);
MPI_Finalize(); MPI_Finalize();
exit(1); exit(1);
} }

View File

@ -25,7 +25,7 @@ class Error : protected Pointers {
void universe_all(const char *, int, const char *); void universe_all(const char *, int, const char *);
void universe_one(const char *, int, const char *); void universe_one(const char *, int, const char *);
void all(const char *, int, const char *, int = 0); void all(const char *, int, const char *);
void one(const char *, int, const char *); void one(const char *, int, const char *);
void warning(const char *, int, const char *, int = 1); void warning(const char *, int, const char *, int = 1);
void message(const char *, int, char *, int = 1); void message(const char *, int, char *, int = 1);

View File

@ -149,12 +149,12 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator)
iarg += 2; iarg += 2;
} else if (strcmp(arg[iarg],"-reorder") == 0 || } else if (strcmp(arg[iarg],"-reorder") == 0 ||
strcmp(arg[iarg],"-r") == 0) { strcmp(arg[iarg],"-r") == 0) {
if (iarg+2 > narg) if (iarg+3 > narg)
error->universe_all(FLERR,"Invalid command-line argument"); error->universe_all(FLERR,"Invalid command-line argument");
if (universe->existflag) if (universe->existflag)
error->universe_all(FLERR,"Cannot use -reorder after -partition"); error->universe_all(FLERR,"Cannot use -reorder after -partition");
universe->reorder(arg[iarg+1]); universe->reorder(arg[iarg+1],arg[iarg+2]);
iarg += 2; iarg += 3;
} else if (strcmp(arg[iarg],"-help") == 0 || } else if (strcmp(arg[iarg],"-help") == 0 ||
strcmp(arg[iarg],"-h") == 0) { strcmp(arg[iarg],"-h") == 0) {
if (iarg+1 > narg) if (iarg+1 > narg)

470
src/pair_born_coul_wolf.cpp Normal file
View File

@ -0,0 +1,470 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Yongfeng Zhang (INL), yongfeng.zhang@inl.gov
------------------------------------------------------------------------- */
#include "math.h"
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "pair_born_coul_wolf.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "math_const.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
using namespace MathConst;
/* ---------------------------------------------------------------------- */
PairBornCoulWolf::PairBornCoulWolf(LAMMPS *lmp) : Pair(lmp) {}
/* ---------------------------------------------------------------------- */
PairBornCoulWolf::~PairBornCoulWolf()
{
if (allocated) {
memory->destroy(setflag);
memory->destroy(cutsq);
memory->destroy(cut_lj);
memory->destroy(cut_ljsq);
memory->destroy(a);
memory->destroy(rho);
memory->destroy(sigma);
memory->destroy(c);
memory->destroy(d);
memory->destroy(rhoinv);
memory->destroy(born1);
memory->destroy(born2);
memory->destroy(born3);
memory->destroy(offset);
}
}
/* ---------------------------------------------------------------------- */
void PairBornCoulWolf::compute(int eflag, int vflag)
{
int i,j,ii,jj,inum,jnum,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
double rsq,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
double prefactor;
double r,rexp;
int *ilist,*jlist,*numneigh,**firstneigh;
double erfcc,erfcd,v_sh,dvdrr,e_self,qisq;
evdwl = ecoul = 0.0;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
double **x = atom->x;
double **f = atom->f;
double *q = atom->q;
int *type = atom->type;
int nlocal = atom->nlocal;
int nall = nlocal + atom->nghost;
double *special_coul = force->special_coul;
double *special_lj = force->special_lj;
int newton_pair = force->newton_pair;
double qqrd2e = force->qqrd2e;
// self and shifted coulombic energy
e_self = v_sh = 0.0;
e_shift = erfc(alf*cut_coul)/cut_coul;
f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
cut_coul;
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
// loop over neighbors of my atoms
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
qtmp = q[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
itype = type[i];
jlist = firstneigh[i];
jnum = numneigh[i];
qisq = qtmp*qtmp;
e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
if (evflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
if (j < nall) factor_coul = factor_lj = 1.0;
else {
factor_coul = special_coul[j/nall];
factor_lj = special_lj[j/nall];
j %= nall;
}
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
if (rsq < cut_coulsq) {
r = sqrt(rsq);
prefactor = qqrd2e*qtmp*q[j]/r;
erfcc = erfc(alf*r);
erfcd = exp(-alf*alf*r*r);
v_sh = (erfcc - e_shift*r) * prefactor;
dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
forcecoul = dvdrr*rsq*prefactor;
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
} else forcecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
r = sqrt(rsq);
rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
+ born3[itype][jtype]*r2inv*r6inv;
} else forceborn = 0.0;
fpair = (forcecoul + factor_lj*forceborn) * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
if (newton_pair || j < nlocal) {
f[j][0] -= delx*fpair;
f[j][1] -= dely*fpair;
f[j][2] -= delz*fpair;
}
if (eflag) {
if (rsq < cut_coulsq) {
ecoul = v_sh;
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv +
d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
evdwl *= factor_lj;
} else evdwl = 0.0;
}
if (evflag) ev_tally(i,j,nlocal,newton_pair,
evdwl,ecoul,fpair,delx,dely,delz);
}
}
}
if (vflag_fdotr) virial_fdotr_compute();
}
/* ----------------------------------------------------------------------
allocate all arrays
------------------------------------------------------------------------- */
void PairBornCoulWolf::allocate()
{
allocated = 1;
int n = atom->ntypes;
memory->create(setflag,n+1,n+1,"pair:setflag");
for (int i = 1; i <= n; i++)
for (int j = i; j <= n; j++)
setflag[i][j] = 0;
memory->create(cutsq,n+1,n+1,"pair:cutsq");
memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
memory->create(a,n+1,n+1,"pair:a");
memory->create(rho,n+1,n+1,"pair:rho");
memory->create(sigma,n+1,n+1,"pair:sigma");
memory->create(c,n+1,n+1,"pair:c");
memory->create(d,n+1,n+1,"pair:d");
memory->create(rhoinv,n+1,n+1,"pair:rhoinv");
memory->create(born1,n+1,n+1,"pair:born1");
memory->create(born2,n+1,n+1,"pair:born2");
memory->create(born3,n+1,n+1,"pair:born3");
memory->create(offset,n+1,n+1,"pair:offset");
}
/* ----------------------------------------------------------------------
global settings
------------------------------------------------------------------------- */
void PairBornCoulWolf::settings(int narg, char **arg)
{
if (narg < 2 || narg > 3) error->all(FLERR,"Illegal pair_style command");
alf = force->numeric(arg[0]);
cut_lj_global = force->numeric(arg[1]);
if (narg == 2) cut_coul = cut_lj_global;
else cut_coul = force->numeric(arg[2]);
if (allocated) {
int i,j;
for (i = 1; i <= atom->ntypes; i++)
for (j = i+1; j <= atom->ntypes; j++)
if (setflag[i][j]) cut_lj[i][j] = cut_lj_global;
}
}
/* ----------------------------------------------------------------------
set coeffs for one or more type pairs
------------------------------------------------------------------------- */
void PairBornCoulWolf::coeff(int narg, char **arg)
{
if (narg < 7 || narg > 8)
error->all(FLERR,"Incorrect args for pair coefficients");
if (!allocated) allocate();
int ilo,ihi,jlo,jhi;
force->bounds(arg[0],atom->ntypes,ilo,ihi);
force->bounds(arg[1],atom->ntypes,jlo,jhi);
double a_one = force->numeric(arg[2]);
double rho_one = force->numeric(arg[3]);
double sigma_one = force->numeric(arg[4]);
if (rho_one <= 0) error->all(FLERR,"Incorrect args for pair coefficients");
double c_one = force->numeric(arg[5]);
double d_one = force->numeric(arg[6]);
double cut_lj_one = cut_lj_global;
if (narg == 8) cut_lj_one = force->numeric(arg[7]);
int count = 0;
for (int i = ilo; i <= ihi; i++) {
for (int j = MAX(jlo,i); j <= jhi; j++) {
a[i][j] = a_one;
rho[i][j] = rho_one;
sigma[i][j] = sigma_one;
c[i][j] = c_one;
d[i][j] = d_one;
cut_lj[i][j] = cut_lj_one;
setflag[i][j] = 1;
count++;
}
}
if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
}
/* ----------------------------------------------------------------------
init specific to this pair style
------------------------------------------------------------------------- */
void PairBornCoulWolf::init_style()
{
if (!atom->q_flag)
error->all(FLERR,"Pair style born/coul/Wolf requires atom attribute q");
int irequest = neighbor->request(this);
cut_coulsq = cut_coul * cut_coul;
}
/* ----------------------------------------------------------------------
init for one type pair i,j and corresponding j,i
------------------------------------------------------------------------- */
double PairBornCoulWolf::init_one(int i, int j)
{
if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
double cut = MAX(cut_lj[i][j],cut_coul);
cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
rhoinv[i][j] = 1.0/rho[i][j];
born1[i][j] = a[i][j]/rho[i][j];
born2[i][j] = 6.0*c[i][j];
born3[i][j] = 8.0*d[i][j];
if (offset_flag) {
double rexp = exp(-cut_lj[i][j]/rho[i][j]);
offset[i][j] = a[i][j]*rexp - c[i][j]/pow(cut_lj[i][j],6.0)
+ d[i][j]/pow(cut_lj[i][j],8.0);
} else offset[i][j] = 0.0;
cut_ljsq[j][i] = cut_ljsq[i][j];
a[j][i] = a[i][j];
c[j][i] = c[i][j];
d[j][i] = d[i][j];
rhoinv[j][i] = rhoinv[i][j];
sigma[j][i] = sigma[i][j];
born1[j][i] = born1[i][j];
born2[j][i] = born2[i][j];
born3[j][i] = born3[i][j];
offset[j][i] = offset[i][j];
return cut;
}
/* ----------------------------------------------------------------------
proc 0 writes to restart file
------------------------------------------------------------------------- */
void PairBornCoulWolf::write_restart(FILE *fp)
{
write_restart_settings(fp);
int i,j;
for (i = 1; i <= atom->ntypes; i++)
for (j = i; j <= atom->ntypes; j++) {
fwrite(&setflag[i][j],sizeof(int),1,fp);
if (setflag[i][j]) {
fwrite(&a[i][j],sizeof(double),1,fp);
fwrite(&rho[i][j],sizeof(double),1,fp);
fwrite(&sigma[i][j],sizeof(double),1,fp);
fwrite(&c[i][j],sizeof(double),1,fp);
fwrite(&d[i][j],sizeof(double),1,fp);
fwrite(&cut_lj[i][j],sizeof(double),1,fp);
}
}
}
/* ----------------------------------------------------------------------
proc 0 reads from restart file, bcasts
------------------------------------------------------------------------- */
void PairBornCoulWolf::read_restart(FILE *fp)
{
read_restart_settings(fp);
allocate();
int i,j;
int me = comm->me;
for (i = 1; i <= atom->ntypes; i++)
for (j = i; j <= atom->ntypes; j++) {
if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
if (setflag[i][j]) {
if (me == 0) {
fread(&a[i][j],sizeof(double),1,fp);
fread(&rho[i][j],sizeof(double),1,fp);
fread(&sigma[i][j],sizeof(double),1,fp);
fread(&c[i][j],sizeof(double),1,fp);
fread(&d[i][j],sizeof(double),1,fp);
fread(&cut_lj[i][j],sizeof(double),1,fp);
}
MPI_Bcast(&a[i][j],1,MPI_DOUBLE,0,world);
MPI_Bcast(&rho[i][j],1,MPI_DOUBLE,0,world);
MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
MPI_Bcast(&c[i][j],1,MPI_DOUBLE,0,world);
MPI_Bcast(&d[i][j],1,MPI_DOUBLE,0,world);
MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
}
}
}
/* ----------------------------------------------------------------------
proc 0 writes to restart file
------------------------------------------------------------------------- */
void PairBornCoulWolf::write_restart_settings(FILE *fp)
{
fwrite(&alf,sizeof(double),1,fp);
fwrite(&cut_lj_global,sizeof(double),1,fp);
fwrite(&cut_coul,sizeof(double),1,fp);
fwrite(&offset_flag,sizeof(int),1,fp);
fwrite(&mix_flag,sizeof(int),1,fp);
}
/* ----------------------------------------------------------------------
proc 0 reads from restart file, bcasts
------------------------------------------------------------------------- */
void PairBornCoulWolf::read_restart_settings(FILE *fp)
{
if (comm->me == 0) {
fread(&alf,sizeof(double),1,fp);
fread(&cut_lj_global,sizeof(double),1,fp);
fread(&cut_coul,sizeof(double),1,fp);
fread(&offset_flag,sizeof(int),1,fp);
fread(&mix_flag,sizeof(int),1,fp);
}
MPI_Bcast(&alf,1,MPI_DOUBLE,0,world);
MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
}
/* ----------------------------------------------------------------------
only the pair part is calculated here
------------------------------------------------------------------------- */
double PairBornCoulWolf::single(int i, int j, int itype, int jtype,
double rsq,
double factor_coul, double factor_lj,
double &fforce)
{
double r2inv,r6inv,r,prefactor,rexp;
double forcecoul,forceborn,phicoul,phiborn;
double e_shift,f_shift,dvdrr,erfcc,erfcd;
r2inv = 1.0/rsq;
e_shift = erfc(alf*cut_coul) / cut_coul;
f_shift = -(e_shift+2*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
cut_coul;
if (rsq < cut_coulsq) {
r = sqrt(rsq);
prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r;
erfcc = erfc(alf*r);
erfcd = exp(-alf*alf*r*r);
dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
forcecoul = dvdrr*rsq*prefactor;
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
} else forcecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
r = sqrt(rsq);
rexp = exp(-r*rhoinv[itype][jtype]);
forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
+ born3[itype][jtype]*r2inv*r6inv;
} else forceborn = 0.0;
fforce = (forcecoul + factor_lj*forceborn) * r2inv;
double eng = 0.0;
if (rsq < cut_coulsq) {
phicoul = prefactor * (erfcc-e_shift*r);
if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
eng += phicoul;
}
if (rsq < cut_ljsq[itype][jtype]) {
phiborn = a[itype][jtype]*rexp - c[itype][jtype]*r6inv
+ d[itype][jtype]*r2inv*r6inv - offset[itype][jtype];
eng += factor_lj*phiborn;
}
return eng;
}

56
src/pair_born_coul_wolf.h Normal file
View File

@ -0,0 +1,56 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(born/coul/wolf,PairBornCoulWolf)
#else
#ifndef LMP_PAIR_BORN_COUL_WOLF_H
#define LMP_PAIR_BORN_COUL_WOLF_H
#include "pair.h"
namespace LAMMPS_NS {
class PairBornCoulWolf : public Pair {
public:
PairBornCoulWolf(class LAMMPS *);
~PairBornCoulWolf();
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_style();
double init_one(int, int);
void write_restart(FILE *);
void read_restart(FILE *);
void write_restart_settings(FILE *);
void read_restart_settings(FILE *);
double single(int, int, int, int, double, double, double, double &);
private:
double cut_lj_global;
double **cut_lj,**cut_ljsq;
double cut_coul,cut_coulsq;
double **a,**rho,**sigma,**c,**d;
double **rhoinv,**born1,**born2,**born3,**offset;
double alf,e_shift,f_shift;
void allocate();
};
}
#endif
#endif

328
src/pair_coul_wolf.cpp Normal file
View File

@ -0,0 +1,328 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Yongfeng Zhang (INL), yongfeng.zhang@inl.gov
------------------------------------------------------------------------- */
#include "math.h"
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "pair_coul_wolf.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "math_const.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
using namespace MathConst;
/* ---------------------------------------------------------------------- */
PairCoulWolf::PairCoulWolf(LAMMPS *lmp) : Pair(lmp) {}
/* ---------------------------------------------------------------------- */
PairCoulWolf::~PairCoulWolf()
{
if (allocated) {
memory->destroy(setflag);
memory->destroy(cutsq);
}
}
/* ---------------------------------------------------------------------- */
void PairCoulWolf::compute(int eflag, int vflag)
{
int i,j,ii,jj,inum,jnum,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
double rsq,forcecoul,factor_coul;
double prefactor;
double r,rexp;
int *ilist,*jlist,*numneigh,**firstneigh;
double erfcc,erfcd,v_sh,dvdrr,e_self,qisq;
ecoul = 0.0;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
double **x = atom->x;
double **f = atom->f;
double *q = atom->q;
int *type = atom->type;
int nlocal = atom->nlocal;
int nall = nlocal + atom->nghost;
double *special_coul = force->special_coul;
int newton_pair = force->newton_pair;
double qqrd2e = force->qqrd2e;
// self and shifted coulombic energy
e_self = v_sh = 0.0;
e_shift = erfc(alf*cut_coul)/cut_coul;
f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
cut_coul;
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
// loop over neighbors of my atoms
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
qtmp = q[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
itype = type[i];
jlist = firstneigh[i];
jnum = numneigh[i];
qisq = qtmp*qtmp;
e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
if (evflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
if (j < nall) factor_coul = 1.0;
else {
factor_coul = special_coul[j/nall];
j %= nall;
}
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
if (rsq < cut_coulsq) {
r = sqrt(rsq);
prefactor = qqrd2e*qtmp*q[j]/r;
erfcc = erfc(alf*r);
erfcd = exp(-alf*alf*r*r);
v_sh = (erfcc - e_shift*r) * prefactor;
dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
forcecoul = dvdrr*rsq*prefactor;
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
fpair = forcecoul / rsq;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
if (newton_pair || j < nlocal) {
f[j][0] -= delx*fpair;
f[j][1] -= dely*fpair;
f[j][2] -= delz*fpair;
}
if (eflag) {
if (rsq < cut_coulsq) {
ecoul = v_sh;
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
}
if (evflag) ev_tally(i,j,nlocal,newton_pair,
0.0,ecoul,fpair,delx,dely,delz);
}
}
}
if (vflag_fdotr) virial_fdotr_compute();
}
/* ----------------------------------------------------------------------
allocate all arrays
------------------------------------------------------------------------- */
void PairCoulWolf::allocate()
{
allocated = 1;
int n = atom->ntypes;
memory->create(setflag,n+1,n+1,"pair:setflag");
for (int i = 1; i <= n; i++)
for (int j = i; j <= n; j++)
setflag[i][j] = 0;
memory->create(cutsq,n+1,n+1,"pair:cutsq");
}
/* ----------------------------------------------------------------------
global settings
unlike other pair styles,
there are no individual pair settings that these override
------------------------------------------------------------------------- */
void PairCoulWolf::settings(int narg, char **arg)
{
if (narg != 2) error->all(FLERR,"Illegal pair_style command");
alf = force->numeric(arg[0]);
cut_coul = force->numeric(arg[1]);
}
/* ----------------------------------------------------------------------
set cutoffs for one or more type pairs, optional
------------------------------------------------------------------------- */
void PairCoulWolf::coeff(int narg, char **arg)
{
if (narg != 2) error->all(FLERR,"Incorrect args for pair coefficients");
if (!allocated) allocate();
int ilo,ihi,jlo,jhi;
force->bounds(arg[0],atom->ntypes,ilo,ihi);
force->bounds(arg[1],atom->ntypes,jlo,jhi);
int count = 0;
for (int i = ilo; i <= ihi; i++) {
for (int j = MAX(jlo,i); j <= jhi; j++) {
setflag[i][j] = 1;
count++;
}
}
if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
}
/* ----------------------------------------------------------------------
init specific to this pair style
------------------------------------------------------------------------- */
void PairCoulWolf::init_style()
{
if (!atom->q_flag)
error->all(FLERR,"Pair coul/wolf requires atom attribute q");
int irequest = neighbor->request(this);
cut_coulsq = cut_coul*cut_coul;
}
/* ----------------------------------------------------------------------
init for one type pair i,j and corresponding j,i
------------------------------------------------------------------------- */
double PairCoulWolf::init_one(int i, int j)
{
return cut_coul;
}
/* ----------------------------------------------------------------------
proc 0 writes to restart file
------------------------------------------------------------------------- */
void PairCoulWolf::write_restart(FILE *fp)
{
write_restart_settings(fp);
int i,j;
for (i = 1; i <= atom->ntypes; i++)
for (j = i; j <= atom->ntypes; j++)
fwrite(&setflag[i][j],sizeof(int),1,fp);
}
/* ----------------------------------------------------------------------
proc 0 reads from restart file, bcasts
------------------------------------------------------------------------- */
void PairCoulWolf::read_restart(FILE *fp)
{
read_restart_settings(fp);
allocate();
int i,j;
int me = comm->me;
for (i = 1; i <= atom->ntypes; i++)
for (j = i; j <= atom->ntypes; j++) {
if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
}
}
/* ----------------------------------------------------------------------
proc 0 writes to restart file
------------------------------------------------------------------------- */
void PairCoulWolf::write_restart_settings(FILE *fp)
{
fwrite(&alf,sizeof(double),1,fp);
fwrite(&cut_coul,sizeof(double),1,fp);
fwrite(&offset_flag,sizeof(int),1,fp);
fwrite(&mix_flag,sizeof(int),1,fp);
}
/* ----------------------------------------------------------------------
proc 0 reads from restart file, bcasts
------------------------------------------------------------------------- */
void PairCoulWolf::read_restart_settings(FILE *fp)
{
if (comm->me == 0) {
fread(&alf,sizeof(double),1,fp);
fread(&cut_coul,sizeof(double),1,fp);
fread(&offset_flag,sizeof(int),1,fp);
fread(&mix_flag,sizeof(int),1,fp);
}
MPI_Bcast(&alf,1,MPI_DOUBLE,0,world);
MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
}
/* ----------------------------------------------------------------------
only the pair part is calculated here
------------------------------------------------------------------------- */
double PairCoulWolf::single(int i, int j, int itype, int jtype, double rsq,
double factor_coul, double factor_lj,
double &fforce)
{
double r6inv,r,prefactor,rexp;
double forcecoul,forceborn,phicoul;
double e_shift,f_shift,dvdrr,erfcc,erfcd;
e_shift = erfc(alf*cut_coul) / cut_coul;
f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
cut_coul;
if (rsq < cut_coulsq) {
r = sqrt(rsq);
prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r;
erfcc = erfc(alf*r);
erfcd = exp(-alf*alf*r*r);
dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
forcecoul = dvdrr*rsq*prefactor;
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
} else forcecoul = 0.0;
fforce = forcecoul / rsq;
double eng = 0.0;
if (rsq < cut_coulsq) {
phicoul = prefactor * (erfcc-e_shift*r);
if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
eng += phicoul;
}
return eng;
}

52
src/pair_coul_wolf.h Normal file
View File

@ -0,0 +1,52 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(coul/wolf,PairCoulWolf)
#else
#ifndef LMP_PAIR_COUL_WOLF_H
#define LMP_PAIR_COUL_WOLF_H
#include "pair.h"
namespace LAMMPS_NS {
class PairCoulWolf : public Pair {
public:
PairCoulWolf(class LAMMPS *);
~PairCoulWolf();
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_style();
double init_one(int, int);
void write_restart(FILE *);
void read_restart(FILE *);
void write_restart_settings(FILE *);
void read_restart_settings(FILE *);
double single(int, int, int, int, double, double, double, double &);
private:
double cut_coul,cut_coulsq;
double alf,e_shift,f_shift;
void allocate();
};
}
#endif
#endif

View File

@ -431,7 +431,7 @@ void PairLJCut::settings(int narg, char **arg)
cut_global = force->numeric(arg[0]); cut_global = force->numeric(arg[0]);
// reset cutoffs that have been explicitly set // reset cutoffs that have been explicitly set
if (allocated) { if (allocated) {
int i,j; int i,j;
for (i = 1; i <= atom->ntypes; i++) for (i = 1; i <= atom->ntypes; i++)
@ -446,7 +446,8 @@ void PairLJCut::settings(int narg, char **arg)
void PairLJCut::coeff(int narg, char **arg) void PairLJCut::coeff(int narg, char **arg)
{ {
if (narg < 4 || narg > 5) error->all(FLERR,"Incorrect args for pair coefficients"); if (narg < 4 || narg > 5)
error->all(FLERR,"Incorrect args for pair coefficients");
if (!allocated) allocate(); if (!allocated) allocate();
int ilo,ihi,jlo,jhi; int ilo,ihi,jlo,jhi;

View File

@ -59,59 +59,81 @@ Universe::~Universe()
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
reorder universe processors based on custom file reorder universe processors
create uni2orig as inverse mapping
re-create uworld communicator with new ordering via Comm_split()
style = "nth", arg = N
move every Nth proc to end of rankings
style = "custom", arg = filename
file has nprocs lines with I J file has nprocs lines with I J
I = universe proc ID in original communicator uorig I = universe proc ID in original communicator uorig
J = universe proc ID in reordered communicator uworld J = universe proc ID in reordered communicator uworld
create uni2orig as inverse mapping
re-create uworld communicator with new ordering via Comm_split()
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void Universe::reorder(char *file) void Universe::reorder(char *style, char *arg)
{ {
char line[MAXLINE]; char line[MAXLINE];
if (uworld != uorig) MPI_Comm_free(&uworld); if (uworld != uorig) MPI_Comm_free(&uworld);
if (me == 0) { if (strcmp(style,"nth") == 0) {
FILE *fp = fopen(file,"r"); int n = atoi(arg);
if (fp == NULL) error->universe_one(FLERR,"Cannot open -reorder file"); if (n <= 0)
error->universe_all(FLERR,"Invalid -reorder N value");
// skip header = blank and comment lines if (nprocs % n)
error->universe_all(FLERR,"Nprocs not a multiple of N for -reorder");
char *ptr; for (int i = 0; i < nprocs; i++) {
if (!fgets(line,MAXLINE,fp)) if (i < (n-1)*nprocs/n) uni2orig[i] = i/(n-1) * n + (i % (n-1));
error->one(FLERR,"Unexpected end of -reorder file"); else uni2orig[i] = (i - (n-1)*nprocs/n) * n + n-1;
while (1) {
if (ptr = strchr(line,'#')) *ptr = '\0';
if (strspn(line," \t\n\r") != strlen(line)) break;
if (!fgets(line,MAXLINE,fp))
error->one(FLERR,"Unexpected end of -reorder file");
} }
// read nprocs lines } else if (strcmp(style,"custom") == 0) {
// uni2orig = inverse mapping
int me_orig,me_new; if (me == 0) {
sscanf(line,"%d %d",&me_orig,&me_new); FILE *fp = fopen(arg,"r");
if (me_orig < 0 || me_orig >= nprocs || if (fp == NULL) error->universe_one(FLERR,"Cannot open -reorder file");
me_new < 0 || me_new >= nprocs)
error->one(FLERR,"Invalid entry in reorder file");
uni2orig[me_new] = me_orig;
for (int i = 1; i < nprocs; i++) { // skip header = blank and comment lines
char *ptr;
if (!fgets(line,MAXLINE,fp)) if (!fgets(line,MAXLINE,fp))
error->one(FLERR,"Unexpected end of reorder file"); error->one(FLERR,"Unexpected end of -reorder file");
sscanf(line,"%ld %ld",&me_orig,&me_new); while (1) {
if (ptr = strchr(line,'#')) *ptr = '\0';
if (strspn(line," \t\n\r") != strlen(line)) break;
if (!fgets(line,MAXLINE,fp))
error->one(FLERR,"Unexpected end of -reorder file");
}
// read nprocs lines
// uni2orig = inverse mapping
int me_orig,me_new;
sscanf(line,"%d %d",&me_orig,&me_new);
if (me_orig < 0 || me_orig >= nprocs || if (me_orig < 0 || me_orig >= nprocs ||
me_new < 0 || me_new >= nprocs) me_new < 0 || me_new >= nprocs)
error->one(FLERR,"Invalid entry in reorder file"); error->one(FLERR,"Invalid entry in reorder file");
uni2orig[me_new] = me_orig; uni2orig[me_new] = me_orig;
}
fclose(fp);
}
MPI_Bcast(uni2orig,nprocs,MPI_INT,0,uorig); for (int i = 1; i < nprocs; i++) {
if (!fgets(line,MAXLINE,fp))
error->one(FLERR,"Unexpected end of reorder file");
sscanf(line,"%d %d",&me_orig,&me_new);
if (me_orig < 0 || me_orig >= nprocs ||
me_new < 0 || me_new >= nprocs)
error->one(FLERR,"Invalid entry in reorder file");
uni2orig[me_new] = me_orig;
}
fclose(fp);
}
// bcast uni2org from proc 0 to all other universe procs
MPI_Bcast(uni2orig,nprocs,MPI_INT,0,uorig);
} else error->universe_all(FLERR,"Invalid command-line argument");
// create new uworld communicator
int ome,key; int ome,key;
MPI_Comm_rank(uorig,&ome); MPI_Comm_rank(uorig,&ome);

View File

@ -41,7 +41,7 @@ class Universe : protected Pointers {
Universe(class LAMMPS *, MPI_Comm); Universe(class LAMMPS *, MPI_Comm);
~Universe(); ~Universe();
void reorder(char *); void reorder(char *, char *);
void add_world(char *); void add_world(char *);
int consistent(); int consistent();
}; };

View File

@ -29,11 +29,10 @@
#include "output.h" #include "output.h"
#include "thermo.h" #include "thermo.h"
#include "random_mars.h" #include "random_mars.h"
#include "math_const.h"
#include "memory.h" #include "memory.h"
#include "error.h" #include "error.h"
#include "math_const.h"
using namespace LAMMPS_NS; using namespace LAMMPS_NS;
using namespace MathConst; using namespace MathConst;