git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7340 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2011-12-13 15:57:18 +00:00
parent 744efbc2cd
commit 86347b81e3
13 changed files with 1121 additions and 517 deletions

View File

@ -35,13 +35,11 @@
#include "compute.h"
#include "output.h"
#include "dump.h"
#include "procmap.h"
#include "math_extra.h"
#include "error.h"
#include "memory.h"
#include <map>
#include <string>
#ifdef _OPENMP
#include "omp.h"
#endif
@ -54,8 +52,9 @@ using namespace LAMMPS_NS;
#define BIG 1.0e20
enum{SINGLE,MULTI};
enum{NONE,MULTIPLE};
enum{CART,CARTREORDER,XYZ,XZY,YXZ,YZX,ZXY,ZYX};
enum{MULTIPLE}; // same as in ProcMap
enum{ONELEVEL,NUMA,CUSTOM};
enum{CART,CARTREORDER,XYZ};
/* ----------------------------------------------------------------------
setup MPI and allocate buffer space
@ -67,9 +66,14 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
MPI_Comm_size(world,&nprocs);
user_procgrid[0] = user_procgrid[1] = user_procgrid[2] = 0;
gridflag = ONELEVEL;
mapflag = CART;
customfile = NULL;
recv_from_partition = send_to_partition = -1;
otherflag = 0;
outfile = NULL;
grid2proc = NULL;
layoutflag = CART;
numa_nodes = 0;
bordergroup = 0;
style = SINGLE;
@ -77,8 +81,6 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
cutghostmulti = NULL;
cutghostuser = 0.0;
ghost_velocity = 0;
recv_from_partition = send_to_partition = -1;
other_partition_style = NONE;
// use of OpenMP threads
// query OpenMP for number of threads/process set by user at run-time
@ -121,7 +123,10 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
Comm::~Comm()
{
if (grid2proc) memory->destroy(grid2proc);
delete [] customfile;
delete [] outfile;
memory->destroy(grid2proc);
free_swap();
if (style == MULTI) {
@ -138,12 +143,13 @@ Comm::~Comm()
}
/* ----------------------------------------------------------------------
create 3d grid of procs based on box size
create 3d grid of procs based on Nprocs and box size & shape
map processors to grid
------------------------------------------------------------------------- */
void Comm::set_proc_grid()
{
// recv proc layout of another partition if my layout depends on it
// recv 3d proc grid of another partition if my 3d grid depends on it
if (recv_from_partition >= 0) {
MPI_Status status;
@ -153,124 +159,96 @@ void Comm::set_proc_grid()
MPI_Bcast(other_procgrid,3,MPI_INT,0,world);
}
// use NUMA routines if numa_nodes > 0
// if NUMA routines fail, just continue
// create ProcMap class
if (numa_nodes) {
int flag = numa_set_proc_grid();
if (flag) return;
ProcMap *pmap = new ProcMap(lmp);
// create 3d grid of processors, produces procgrid
// can fail (on one partition) if constrained by other partition
// if numa_grid() fails, try onelevel_grid()
int flag;
if (gridflag == ONELEVEL) {
flag = pmap->onelevel_grid(nprocs,user_procgrid,procgrid,
otherflag,other_style,other_procgrid);
if (!flag) error->all(FLERR,"Could not create grid of processors");
} else if (gridflag == NUMA) {
flag = pmap->numa_grid(nprocs,user_procgrid,procgrid,coregrid);
if (!flag) {
coregrid[0] = coregrid[1] = coregrid[2] = 1;
flag = pmap->onelevel_grid(nprocs,user_procgrid,procgrid,
otherflag,other_style,other_procgrid);
}
if (!flag) error->all(FLERR,"Could not create grid of processors");
} else if (gridflag == CUSTOM) {
pmap->custom_grid(nprocs,user_procgrid,procgrid);
}
// create layout of procs mapped to simulation box
// can fail (on one partition) if constrained by other_partition_style
int flag =
procs2box(nprocs,user_procgrid,procgrid,1,1,1,other_partition_style);
if (!flag) error->all(FLERR,"Could not layout grid of processors",1);
// error check
// error check on procgrid
if (procgrid[0]*procgrid[1]*procgrid[2] != nprocs)
error->all(FLERR,"Bad grid of processors");
if (domain->dimension == 2 && procgrid[2] != 1)
error->all(FLERR,"Processor count in z must be 1 for 2d simulation");
// grid2proc[i][j][k] = proc that owns i,j,k location in grid
// grid2proc[i][j][k] = proc that owns i,j,k location in 3d grid
if (grid2proc) memory->destroy(grid2proc);
memory->create(grid2proc,procgrid[0],procgrid[1],procgrid[2],
"comm:grid2proc");
// use MPI Cartesian routines to layout 3d grid of procs
// MPI may do layout in machine-optimized fashion
// map processor IDs to 3d processor grid
// produces myloc, procneigh, grid2proc
if (layoutflag == CART || layoutflag == CARTREORDER) {
int reorder = 0;
if (layoutflag == CARTREORDER) reorder = 1;
int periods[3];
periods[0] = periods[1] = periods[2] = 1;
MPI_Comm cartesian;
if (gridflag == ONELEVEL) {
if (mapflag == CART)
pmap->cart_map(0,procgrid,myloc,procneigh,grid2proc);
else if (mapflag == CARTREORDER)
pmap->cart_map(1,procgrid,myloc,procneigh,grid2proc);
else if (mapflag == XYZ)
pmap->xyz_map(xyz,procgrid,myloc,procneigh,grid2proc);
MPI_Cart_create(world,3,procgrid,periods,reorder,&cartesian);
MPI_Cart_get(cartesian,3,procgrid,periods,myloc);
MPI_Cart_shift(cartesian,0,1,&procneigh[0][0],&procneigh[0][1]);
MPI_Cart_shift(cartesian,1,1,&procneigh[1][0],&procneigh[1][1]);
MPI_Cart_shift(cartesian,2,1,&procneigh[2][0],&procneigh[2][1]);
} else if (gridflag == NUMA) {
pmap->numa_map(coregrid,myloc,procneigh,grid2proc);
int coords[3];
int i,j,k;
for (i = 0; i < procgrid[0]; i++)
for (j = 0; j < procgrid[1]; j++)
for (k = 0; k < procgrid[2]; k++) {
coords[0] = i; coords[1] = j; coords[2] = k;
MPI_Cart_rank(cartesian,coords,&grid2proc[i][j][k]);
} else if (gridflag == CUSTOM) {
pmap->custom_map(myloc,procneigh,grid2proc);
}
MPI_Comm_free(&cartesian);
// print 3d grid info to screen and logfile
// layout 3d grid of procs explicitly, via user-requested XYZ ordering
} else {
int i,j,k,ime,jme,kme;
for (i = 0; i < procgrid[0]; i++)
for (j = 0; j < procgrid[1]; j++)
for (k = 0; k < procgrid[2]; k++) {
if (layoutflag == XYZ)
grid2proc[i][j][k] = k*procgrid[1]*procgrid[0] + j*procgrid[0] + i;
else if (layoutflag == XZY)
grid2proc[i][j][k] = j*procgrid[2]*procgrid[0] + k*procgrid[0] + i;
else if (layoutflag == YXZ)
grid2proc[i][j][k] = k*procgrid[0]*procgrid[1] + i*procgrid[1] + j;
else if (layoutflag == YZX)
grid2proc[i][j][k] = i*procgrid[2]*procgrid[1] + k*procgrid[1] + j;
else if (layoutflag == ZXY)
grid2proc[i][j][k] = j*procgrid[0]*procgrid[2] + i*procgrid[2] + k;
else if (layoutflag == ZYX)
grid2proc[i][j][k] = i*procgrid[1]*procgrid[2] + j*procgrid[2] + k;
if (grid2proc[i][j][k] == me) {
ime = i; jme = j, kme = k;
if (me == 0) {
if (screen) {
fprintf(screen," %d by %d by %d MPI processor grid\n",
procgrid[0],procgrid[1],procgrid[2]);
if (gridflag == NUMA)
fprintf(screen," %d by %d by %d core grid within node\n",
coregrid[0],coregrid[1],coregrid[2]);
}
if (logfile) {
fprintf(logfile," %d by %d by %d MPI processor grid\n",
procgrid[0],procgrid[1],procgrid[2]);
if (gridflag == NUMA)
fprintf(logfile," %d by %d by %d core grid within node\n",
coregrid[0],coregrid[1],coregrid[2]);
}
}
myloc[0] = ime;
myloc[1] = jme;
myloc[2] = kme;
// print 3d grid details to outfile
i = ime-1;
if (i < 0) i = procgrid[0]-1;
procneigh[0][0] = grid2proc[i][jme][kme];
i = ime+1;
if (i == procgrid[0]) i = 0;
procneigh[0][1] = grid2proc[i][jme][kme];
j = jme-1;
if (j < 0) j = procgrid[1]-1;
procneigh[1][0] = grid2proc[ime][j][kme];
j = jme+1;
if (j == procgrid[1]) j = 0;
procneigh[1][1] = grid2proc[ime][j][kme];
k = kme-1;
if (k < 0) k = procgrid[2]-1;
procneigh[2][0] = grid2proc[ime][jme][k];
k = kme+1;
if (k == procgrid[2]) k = 0;
procneigh[2][1] = grid2proc[ime][jme][k];
}
if (outfile) pmap->output(grid2proc,outfile);
// set lamda box params after procs are assigned
if (domain->triclinic) domain->set_lamda_box();
if (me == 0) {
if (screen) fprintf(screen," %d by %d by %d MPI processor grid\n",
procgrid[0],procgrid[1],procgrid[2]);
if (logfile) fprintf(logfile," %d by %d by %d MPI processor grid\n",
procgrid[0],procgrid[1],procgrid[2]);
}
// free ProcMap class
// send my proc layout to another partition if requested
delete pmap;
// send my 3d proc grid to another partition if requested
if (send_to_partition >= 0) {
if (me == 0) MPI_Send(procgrid,3,MPI_INT,
@ -1196,130 +1174,6 @@ void Comm::reverse_comm_dump(Dump *dump)
}
}
/* ----------------------------------------------------------------------
assign numprocs to 3d box so as to minimize surface area
area = surface area of each of 3 faces of simulation box divided by sx,sy,sz
for triclinic, area = cross product of 2 edge vectors stored in h matrix
valid assignment will be factorization of numprocs = Px by Py by Pz
user_factors = if non-zero, factors are specified by user
sx,sy,sz = scale box xyz dimension vy dividing by sx,sy,sz
other = 1 to enforce compatability with other partition's layout
return factors = # of procs assigned to each dimension
return 1 if successully factor, 0 if not
------------------------------------------------------------------------- */
int Comm::procs2box(int numprocs, int user_factors[3], int factors[3],
const int sx, const int sy, const int sz, int other)
{
factors[0] = user_factors[0];
factors[1] = user_factors[1];
factors[2] = user_factors[2];
// all 3 proc counts are specified
if (factors[0] && factors[1] && factors[2]) return 1;
// 2 out of 3 proc counts are specified
if (factors[0] > 0 && factors[1] > 0) {
factors[2] = nprocs/(factors[0]*factors[1]);
return 1;
} else if (factors[0] > 0 && factors[2] > 0) {
factors[1] = nprocs/(factors[0]*factors[2]);
return 1;
} else if (factors[1] > 0 && factors[2] > 0) {
factors[0] = nprocs/(factors[1]*factors[2]);
return 1;
}
// determine cross-sectional areas for orthogonal and triclinic boxes
// area[0] = xy, area[1] = xz, area[2] = yz
double area[3];
if (domain->triclinic == 0) {
area[0] = domain->xprd * domain->yprd / (sx * sy);
area[1] = domain->xprd * domain->zprd / (sx * sz);
area[2] = domain->yprd * domain->zprd / (sy * sz);
} else {
double *h = domain->h;
double a[3],b[3],c[3];
a[0] = h[0]; a[1] = 0.0; a[2] = 0.0;
b[0] = h[5]; b[1] = h[1]; b[2] = 0.0;
MathExtra::cross3(a,b,c);
area[0] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sx * sy);
a[0] = h[0]; a[1] = 0.0; a[2] = 0.0;
b[0] = h[4]; b[1] = h[3]; b[2] = h[2];
MathExtra::cross3(a,b,c);
area[1] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sx * sz);
a[0] = h[5]; a[1] = h[1]; a[2] = 0.0;
b[0] = h[4]; b[1] = h[3]; b[2] = h[2];
MathExtra::cross3(a,b,c);
area[2] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sy * sz);
}
double bestsurf = 2.0 * (area[0]+area[1]+area[2]);
// loop thru all possible factorizations of numprocs
// only consider valid cases that match procgrid settings
// surf = surface area of a proc sub-domain
// only consider cases that match user_factors & other_procgrid settings
// success = 1 if valid factoriztion is found
// may not be if other constraint is enforced
int ipx,ipy,ipz,valid;
double surf;
int success = 0;
ipx = 1;
while (ipx <= numprocs) {
valid = 1;
if (user_factors[0] && ipx != user_factors[0]) valid = 0;
if (other == MULTIPLE && other_procgrid[0] % ipx) valid = 0;
if (numprocs % ipx) valid = 0;
if (!valid) {
ipx++;
continue;
}
ipy = 1;
while (ipy <= numprocs/ipx) {
valid = 1;
if (user_factors[1] && ipy != user_factors[1]) valid = 0;
if (other == MULTIPLE && other_procgrid[1] % ipy) valid = 0;
if ((numprocs/ipx) % ipy) valid = 0;
if (!valid) {
ipy++;
continue;
}
ipz = numprocs/ipx/ipy;
valid = 1;
if (user_factors[2] && ipz != user_factors[2]) valid = 0;
if (other == MULTIPLE && other_procgrid[2] % ipz) valid = 0;
if (domain->dimension == 2 && ipz != 1) valid = 0;
if (!valid) {
ipy++;
continue;
}
surf = area[0]/ipx/ipy + area[1]/ipx/ipz + area[2]/ipy/ipz;
if (surf < bestsurf) {
success = 1;
bestsurf = surf;
factors[0] = ipx;
factors[1] = ipy;
factors[2] = ipz;
}
ipy++;
}
ipx++;
}
return success;
}
/* ----------------------------------------------------------------------
realloc the size of the send buffer as needed with BUFFACTOR & BUFEXTRA
if flag = 1, realloc
@ -1502,7 +1356,48 @@ void Comm::set_processors(int narg, char **arg)
int iarg = 3;
while (iarg < narg) {
if (strcmp(arg[iarg],"part") == 0) {
if (strcmp(arg[iarg],"grid") == 0) {
if (iarg+1 > narg) error->all(FLERR,"Illegal processors command");
if (strcmp(arg[iarg+1],"level1") == 0) {
gridflag = LEVEL1;
iarg += 2;
if (strcmp(arg[iarg+1],"level2") == 0) {
gridflag = LEVEL2;
iarg += 2;
} else if (strcmp(arg[iarg+1],"numa") == 0) {
gridflag = NUMA;
iarg += 2;
} else if (strcmp(arg[iarg],"custom") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
gridflag = CUSTOM;
delete [] customfile;
int n = strlen(arg[iarg+1]) + 1;
customfile = new char(n);
strcpy(customfile,arg[iarg+1]);
iarg += 2;
} else error->all(FLERR,"Illegal processors command");
} else if (strcmp(arg[iarg],"map") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
if (strcmp(arg[iarg+1],"cart") == 0) mapflag = CART;
else if (strcmp(arg[iarg+1],"cart/reorder") == 0) mapflag = CARTREORDER;
else if (strcmp(arg[iarg+1],"xyz") == 0 ||
strcmp(arg[iarg+1],"xzy") == 0 ||
strcmp(arg[iarg+1],"yxz") == 0 ||
strcmp(arg[iarg+1],"yzx") == 0 ||
strcmp(arg[iarg+1],"zxy") == 0 ||
strcmp(arg[iarg+1],"zyx") == 0) {
mapflag = XYZ;
strcpy(xyz,arg[iarg+1]);
} else error->all(FLERR,"Illegal processors command");
iarg += 2;
} else if (strcmp(arg[iarg],"part") == 0) {
if (iarg+4 > narg) error->all(FLERR,"Illegal processors command");
if (universe->nworlds == 1)
error->all(FLERR,
@ -1527,250 +1422,32 @@ void Comm::set_processors(int narg, char **arg)
"is already a receiver");
recv_from_partition = isend-1;
}
// only receiver has otherflag dependency
if (strcmp(arg[iarg+3],"multiple") == 0) {
if (universe->iworld == irecv-1) other_partition_style = MULTIPLE;
if (universe->iworld == irecv-1) {
otherflag = 1;
other_style = MULTIPLE;
}
} else error->all(FLERR,"Illegal processors command");
iarg += 4;
} else if (strcmp(arg[iarg],"grid") == 0) {
} else if (strcmp(arg[iarg],"out") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
if (strcmp(arg[iarg+1],"cart") == 0) layoutflag = CART;
else if (strcmp(arg[iarg+1],"cart/reorder") == 0)
layoutflag = CARTREORDER;
else if (strcmp(arg[iarg+1],"xyz") == 0) layoutflag = XYZ;
else if (strcmp(arg[iarg+1],"xzy") == 0) layoutflag = XZY;
else if (strcmp(arg[iarg+1],"yxz") == 0) layoutflag = YXZ;
else if (strcmp(arg[iarg+1],"yzx") == 0) layoutflag = YZX;
else if (strcmp(arg[iarg+1],"zxy") == 0) layoutflag = ZXY;
else if (strcmp(arg[iarg+1],"zyx") == 0) layoutflag = ZYX;
else error->all(FLERR,"Illegal processors command");
delete [] outfile;
int n = strlen(arg[iarg+1]) + 1;
outfile = new char(n);
strcpy(outfile,arg[iarg+1]);
iarg += 2;
} else if (strcmp(arg[iarg],"numa") == 0) {
if (iarg+1 > narg) error->all(FLERR,"Illegal processors command");
numa_nodes = 1;
if (numa_nodes < 0) error->all(FLERR,"Illegal processors command");
iarg += 1;
} else error->all(FLERR,"Illegal processors command");
}
// error check
if (numa_nodes) {
if (layoutflag != CART)
error->all(FLERR,"Can only use processors numa "
"with processors grid cart");
if (send_to_partition >= 0 || recv_from_partition >= 0)
error->one(FLERR,"Cannot use processors numa with processors part");
}
}
/* ----------------------------------------------------------------------
setup 3d grid of procs based on box size, group neighbors by numa node
return 1 if successful, return 0 if not
------------------------------------------------------------------------- */
int Comm::numa_set_proc_grid()
{
// get names of all nodes
int name_length;
char node_name[MPI_MAX_PROCESSOR_NAME];
char node_names[MPI_MAX_PROCESSOR_NAME*nprocs];
MPI_Get_processor_name(node_name,&name_length);
MPI_Allgather(&node_name,MPI_MAX_PROCESSOR_NAME,MPI_CHAR,&node_names,
MPI_MAX_PROCESSOR_NAME,MPI_CHAR,world);
std::string node_string = std::string(node_name);
// get number of procs per node
std::map<std::string,int> name_map;
std::map<std::string,int>::iterator np;
for (int i = 0; i < nprocs; i++) {
std::string i_string = std::string(&node_names[i*MPI_MAX_PROCESSOR_NAME]);
np = name_map.find(i_string);
if (np == name_map.end())
name_map[i_string] = 1;
else
np->second++;
}
int procs_per_node = name_map.begin()->second;
int procs_per_numa = procs_per_node / numa_nodes;
// use non-numa mapping if any condition met
if (procs_per_numa < 4 || // less than 4 procs per numa node
procs_per_node % numa_nodes != 0 || // no-op since numa_nodes = 1 for now
nprocs % procs_per_numa != 0 || // total procs not a multiple of node
nprocs <= procs_per_numa || // only 1 node used
user_procgrid[0] > 1 || // user specified grid dimension < 1
user_procgrid[1] > 1 || // in any dimension
user_procgrid[2] > 1) {
if (me == 0) {
if (screen) fprintf(screen," 1 by 1 by 1 Node grid\n");
if (logfile) fprintf(logfile," 1 by 1 by 1 Node grid\n");
}
return 0;
}
// user settings for the factorization per numa node - currently always zero
int user_numagrid[3];
user_numagrid[0] = user_numagrid[1] = user_numagrid[2] = 0;
// if user specifies 1 for a proc grid dimension,
// also use 1 for the node grid dimension
if (user_procgrid[0] == 1) user_numagrid[0] = 1;
if (user_procgrid[1] == 1) user_numagrid[1] = 1;
if (user_procgrid[2] == 1) user_numagrid[2] = 1;
// get an initial factorization for each numa node,
// if the user has not set the number of processors
int numagrid[3];
procs2box(procs_per_numa,user_numagrid,numagrid,1,1,1,0);
if (numagrid[0] * numagrid[1] * numagrid[2] != procs_per_numa)
error->all(FLERR,"Bad node grid of processors");
// get a factorization for the grid of numa nodes
int node_count = nprocs / procs_per_numa;
procs2box(node_count,user_procgrid,procgrid,
numagrid[0],numagrid[1],numagrid[2],0);
if (procgrid[0] * procgrid[1] * procgrid[2] != node_count)
error->all(FLERR,"Bad grid of processors");
// repeat the numa node factorization using the subdomain sizes
// this will refine the factorization if the user specified the node layout
procs2box(procs_per_numa,user_numagrid,numagrid,
procgrid[0],procgrid[1],procgrid[2],0);
if (numagrid[0]*numagrid[1]*numagrid[2] != procs_per_numa)
error->all(FLERR,"Bad Node grid of processors");
if (domain->dimension == 2 && (procgrid[2] != 1 || numagrid[2] != 1))
error->all(FLERR,"Processor count in z must be 1 for 2d simulation");
// assign a unique id to each node
int node_num = 0, node_id = 0;
for (np = name_map.begin(); np != name_map.end(); ++np) {
if (np->first == node_string) node_id = node_num;
node_num++;
}
// setup a per node communicator and find rank within
MPI_Comm node_comm;
MPI_Comm_split(world,node_id,0,&node_comm);
int node_rank;
MPI_Comm_rank(node_comm,&node_rank);
// setup a per numa communicator and find rank within
MPI_Comm numa_comm;
int local_numa = node_rank / procs_per_numa;
MPI_Comm_split(node_comm,local_numa,0,&numa_comm);
int numa_rank;
MPI_Comm_rank(numa_comm,&numa_rank);
// setup a communicator with the rank 0 procs from each numa node
MPI_Comm numa_leaders;
MPI_Comm_split(world,numa_rank,0,&numa_leaders);
// use the MPI Cartesian routines to map the nodes to the grid
// could implement layoutflag as in non-NUMA case?
int reorder = 0;
int periods[3];
periods[0] = periods[1] = periods[2] = 1;
MPI_Comm cartesian;
if (numa_rank == 0) {
MPI_Cart_create(numa_leaders,3,procgrid,periods,reorder,&cartesian);
MPI_Cart_get(cartesian,3,procgrid,periods,myloc);
}
// broadcast numa node location in grid to other procs in numa node
MPI_Bcast(myloc,3,MPI_INT,0,numa_comm);
// get storage for the process mapping
if (grid2proc) memory->destroy(grid2proc);
memory->create(grid2proc,procgrid[0]*numagrid[0],procgrid[1]*numagrid[1],
procgrid[2]*numagrid[2],"comm:grid2proc");
// compute my location within the grid
int z_offset = numa_rank / (numagrid[0] * numagrid[1]);
int y_offset = (numa_rank % (numagrid[0] * numagrid[1]))/numagrid[0];
int x_offset = numa_rank % numagrid[0];
myloc[0] = myloc[0] * numagrid[0] + x_offset;
myloc[1] = myloc[1] * numagrid[1] + y_offset;
myloc[2] = myloc[2] * numagrid[2] + z_offset;
procgrid[0] *= numagrid[0];
procgrid[1] *= numagrid[1];
procgrid[2] *= numagrid[2];
// allgather of locations to fill grid2proc
int **gridi;
memory->create(gridi,nprocs,3,"comm:gridi");
MPI_Allgather(&myloc,3,MPI_INT,gridi[0],3,MPI_INT,world);
for (int i = 0; i < nprocs; i++)
grid2proc[gridi[i][0]][gridi[i][1]][gridi[i][2]] = i;
memory->destroy(gridi);
// get my neighbors
int minus, plus;
for (int i = 0; i < 3; i++) {
numa_shift(myloc[i],procgrid[i],minus,plus);
procneigh[i][0] = minus;
procneigh[i][1] = plus;
}
procneigh[0][0] = grid2proc[procneigh[0][0]][myloc[1]][myloc[2]];
procneigh[0][1] = grid2proc[procneigh[0][1]][myloc[1]][myloc[2]];
procneigh[1][0] = grid2proc[myloc[0]][procneigh[1][0]][myloc[2]];
procneigh[1][1] = grid2proc[myloc[0]][procneigh[1][1]][myloc[2]];
procneigh[2][0] = grid2proc[myloc[0]][myloc[1]][procneigh[2][0]];
procneigh[2][1] = grid2proc[myloc[0]][myloc[1]][procneigh[2][1]];
if (numa_rank == 0) MPI_Comm_free(&cartesian);
MPI_Comm_free(&numa_leaders);
MPI_Comm_free(&numa_comm);
MPI_Comm_free(&node_comm);
// set lamda box params after procs are assigned
if (domain->triclinic) domain->set_lamda_box();
if (me == 0) {
if (screen) fprintf(screen," %d by %d by %d Node grid\n",
numagrid[0],numagrid[1],numagrid[2]);
if (logfile) fprintf(logfile," %d by %d by %d Node grid\n",
numagrid[0],numagrid[1],numagrid[2]);
if (screen) fprintf(screen," %d by %d by %d processor grid\n",
procgrid[0],procgrid[1],procgrid[2]);
if (logfile) fprintf(logfile," %d by %d by %d processor grid\n",
procgrid[0],procgrid[1],procgrid[2]);
}
return 1;
}
/* ----------------------------------------------------------------------
get the index to the neighboring processors in a dimension
------------------------------------------------------------------------- */
void Comm::numa_shift(int myloc, int num_procs, int &minus, int &plus)
{
minus = myloc - 1;
if (minus < 0) minus = num_procs - 1;
plus = myloc + 1;
if (plus == num_procs) plus = 0;
if (gridflag == NUMA && mapflag != CART)
error->all(FLERR,"Processors grid numa and map choice are incompatible");
}
/* ----------------------------------------------------------------------

View File

@ -83,10 +83,16 @@ class Comm : protected Pointers {
int comm_x_only,comm_f_only; // 1 if only exchange x,f in for/rev comm
int map_style; // non-0 if global->local mapping is done
int bordergroup; // only communicate this group in borders
int layoutflag; // user-selected layout for 3d proc grid
int numa_nodes; // layout NUMA-aware 3d proc grid
int gridflag; // option for creating 3d grid
int mapflag; // option for mapping procs to 3d grid
char xyz[4]; // xyz mapping of procs to 3d grid
char *customfile; // file with custom proc map
char *outfile; // proc grid/map output file
int otherflag; // 1 if this partition dependent on another
int other_style; // style of dependency
int other_procgrid[3]; // proc layout of another partition
int coregrid[3]; // procs layout of cores within a node
int *firstrecv; // where to put 1st recv atom in each swap
int **sendlist; // list of atoms to send in each swap
@ -97,8 +103,6 @@ class Comm : protected Pointers {
int maxsend,maxrecv; // current size of send/recv buffer
int maxforward,maxreverse; // max # of datums in forward/reverse comm
int procs2box(int, int[3], int[3], // map procs to 3d box
const int, const int, const int, int);
virtual void grow_send(int,int); // reallocate send buffer
virtual void grow_recv(int); // free/allocate recv buffer
virtual void grow_list(int, int); // reallocate one sendlist
@ -107,9 +111,6 @@ class Comm : protected Pointers {
virtual void allocate_multi(int); // allocate multi arrays
virtual void free_swap(); // free swap arrays
virtual void free_multi(); // free multi arrays
int numa_set_proc_grid();
void numa_shift(int, int, int &, int &);
};
}

View File

@ -69,12 +69,10 @@ void Error::universe_one(const char *file, int line, const char *str)
called by all procs in one world
close all output, screen, and log files in world
insure all procs in world call, else will hang
if abort = 0 (default):
if only one world in universe calls, universe will hang
if abort = 1: force abort of entire universe if any world in universe calls
force MPI_Abort if running in multi-partition mode
------------------------------------------------------------------------- */
void Error::all(const char *file, int line, const char *str, int abort)
void Error::all(const char *file, int line, const char *str)
{
MPI_Barrier(world);
@ -90,7 +88,7 @@ void Error::all(const char *file, int line, const char *str, int abort)
if (screen && screen != stdout) fclose(screen);
if (logfile) fclose(logfile);
if (abort) MPI_Abort(world,1);
if (universe->nworlds > 1) MPI_Abort(universe->uworld,1);
MPI_Finalize();
exit(1);
}

View File

@ -25,7 +25,7 @@ class Error : protected Pointers {
void universe_all(const char *, int, const char *);
void universe_one(const char *, int, const char *);
void all(const char *, int, const char *, int = 0);
void all(const char *, int, const char *);
void one(const char *, int, const char *);
void warning(const char *, int, const char *, int = 1);
void message(const char *, int, char *, int = 1);

View File

@ -149,12 +149,12 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator)
iarg += 2;
} else if (strcmp(arg[iarg],"-reorder") == 0 ||
strcmp(arg[iarg],"-r") == 0) {
if (iarg+2 > narg)
if (iarg+3 > narg)
error->universe_all(FLERR,"Invalid command-line argument");
if (universe->existflag)
error->universe_all(FLERR,"Cannot use -reorder after -partition");
universe->reorder(arg[iarg+1]);
iarg += 2;
universe->reorder(arg[iarg+1],arg[iarg+2]);
iarg += 3;
} else if (strcmp(arg[iarg],"-help") == 0 ||
strcmp(arg[iarg],"-h") == 0) {
if (iarg+1 > narg)

470
src/pair_born_coul_wolf.cpp Normal file
View File

@ -0,0 +1,470 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Yongfeng Zhang (INL), yongfeng.zhang@inl.gov
------------------------------------------------------------------------- */
#include "math.h"
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "pair_born_coul_wolf.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "math_const.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
using namespace MathConst;
/* ---------------------------------------------------------------------- */
PairBornCoulWolf::PairBornCoulWolf(LAMMPS *lmp) : Pair(lmp) {}
/* ---------------------------------------------------------------------- */
PairBornCoulWolf::~PairBornCoulWolf()
{
if (allocated) {
memory->destroy(setflag);
memory->destroy(cutsq);
memory->destroy(cut_lj);
memory->destroy(cut_ljsq);
memory->destroy(a);
memory->destroy(rho);
memory->destroy(sigma);
memory->destroy(c);
memory->destroy(d);
memory->destroy(rhoinv);
memory->destroy(born1);
memory->destroy(born2);
memory->destroy(born3);
memory->destroy(offset);
}
}
/* ---------------------------------------------------------------------- */
void PairBornCoulWolf::compute(int eflag, int vflag)
{
int i,j,ii,jj,inum,jnum,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
double rsq,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
double prefactor;
double r,rexp;
int *ilist,*jlist,*numneigh,**firstneigh;
double erfcc,erfcd,v_sh,dvdrr,e_self,qisq;
evdwl = ecoul = 0.0;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
double **x = atom->x;
double **f = atom->f;
double *q = atom->q;
int *type = atom->type;
int nlocal = atom->nlocal;
int nall = nlocal + atom->nghost;
double *special_coul = force->special_coul;
double *special_lj = force->special_lj;
int newton_pair = force->newton_pair;
double qqrd2e = force->qqrd2e;
// self and shifted coulombic energy
e_self = v_sh = 0.0;
e_shift = erfc(alf*cut_coul)/cut_coul;
f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
cut_coul;
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
// loop over neighbors of my atoms
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
qtmp = q[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
itype = type[i];
jlist = firstneigh[i];
jnum = numneigh[i];
qisq = qtmp*qtmp;
e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
if (evflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
if (j < nall) factor_coul = factor_lj = 1.0;
else {
factor_coul = special_coul[j/nall];
factor_lj = special_lj[j/nall];
j %= nall;
}
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
if (rsq < cut_coulsq) {
r = sqrt(rsq);
prefactor = qqrd2e*qtmp*q[j]/r;
erfcc = erfc(alf*r);
erfcd = exp(-alf*alf*r*r);
v_sh = (erfcc - e_shift*r) * prefactor;
dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
forcecoul = dvdrr*rsq*prefactor;
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
} else forcecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
r = sqrt(rsq);
rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
+ born3[itype][jtype]*r2inv*r6inv;
} else forceborn = 0.0;
fpair = (forcecoul + factor_lj*forceborn) * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
if (newton_pair || j < nlocal) {
f[j][0] -= delx*fpair;
f[j][1] -= dely*fpair;
f[j][2] -= delz*fpair;
}
if (eflag) {
if (rsq < cut_coulsq) {
ecoul = v_sh;
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv +
d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
evdwl *= factor_lj;
} else evdwl = 0.0;
}
if (evflag) ev_tally(i,j,nlocal,newton_pair,
evdwl,ecoul,fpair,delx,dely,delz);
}
}
}
if (vflag_fdotr) virial_fdotr_compute();
}
/* ----------------------------------------------------------------------
allocate all arrays
------------------------------------------------------------------------- */
void PairBornCoulWolf::allocate()
{
allocated = 1;
int n = atom->ntypes;
memory->create(setflag,n+1,n+1,"pair:setflag");
for (int i = 1; i <= n; i++)
for (int j = i; j <= n; j++)
setflag[i][j] = 0;
memory->create(cutsq,n+1,n+1,"pair:cutsq");
memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
memory->create(a,n+1,n+1,"pair:a");
memory->create(rho,n+1,n+1,"pair:rho");
memory->create(sigma,n+1,n+1,"pair:sigma");
memory->create(c,n+1,n+1,"pair:c");
memory->create(d,n+1,n+1,"pair:d");
memory->create(rhoinv,n+1,n+1,"pair:rhoinv");
memory->create(born1,n+1,n+1,"pair:born1");
memory->create(born2,n+1,n+1,"pair:born2");
memory->create(born3,n+1,n+1,"pair:born3");
memory->create(offset,n+1,n+1,"pair:offset");
}
/* ----------------------------------------------------------------------
global settings
------------------------------------------------------------------------- */
void PairBornCoulWolf::settings(int narg, char **arg)
{
if (narg < 2 || narg > 3) error->all(FLERR,"Illegal pair_style command");
alf = force->numeric(arg[0]);
cut_lj_global = force->numeric(arg[1]);
if (narg == 2) cut_coul = cut_lj_global;
else cut_coul = force->numeric(arg[2]);
if (allocated) {
int i,j;
for (i = 1; i <= atom->ntypes; i++)
for (j = i+1; j <= atom->ntypes; j++)
if (setflag[i][j]) cut_lj[i][j] = cut_lj_global;
}
}
/* ----------------------------------------------------------------------
set coeffs for one or more type pairs
------------------------------------------------------------------------- */
void PairBornCoulWolf::coeff(int narg, char **arg)
{
if (narg < 7 || narg > 8)
error->all(FLERR,"Incorrect args for pair coefficients");
if (!allocated) allocate();
int ilo,ihi,jlo,jhi;
force->bounds(arg[0],atom->ntypes,ilo,ihi);
force->bounds(arg[1],atom->ntypes,jlo,jhi);
double a_one = force->numeric(arg[2]);
double rho_one = force->numeric(arg[3]);
double sigma_one = force->numeric(arg[4]);
if (rho_one <= 0) error->all(FLERR,"Incorrect args for pair coefficients");
double c_one = force->numeric(arg[5]);
double d_one = force->numeric(arg[6]);
double cut_lj_one = cut_lj_global;
if (narg == 8) cut_lj_one = force->numeric(arg[7]);
int count = 0;
for (int i = ilo; i <= ihi; i++) {
for (int j = MAX(jlo,i); j <= jhi; j++) {
a[i][j] = a_one;
rho[i][j] = rho_one;
sigma[i][j] = sigma_one;
c[i][j] = c_one;
d[i][j] = d_one;
cut_lj[i][j] = cut_lj_one;
setflag[i][j] = 1;
count++;
}
}
if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
}
/* ----------------------------------------------------------------------
init specific to this pair style
------------------------------------------------------------------------- */
void PairBornCoulWolf::init_style()
{
if (!atom->q_flag)
error->all(FLERR,"Pair style born/coul/Wolf requires atom attribute q");
int irequest = neighbor->request(this);
cut_coulsq = cut_coul * cut_coul;
}
/* ----------------------------------------------------------------------
init for one type pair i,j and corresponding j,i
------------------------------------------------------------------------- */
double PairBornCoulWolf::init_one(int i, int j)
{
if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
double cut = MAX(cut_lj[i][j],cut_coul);
cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
rhoinv[i][j] = 1.0/rho[i][j];
born1[i][j] = a[i][j]/rho[i][j];
born2[i][j] = 6.0*c[i][j];
born3[i][j] = 8.0*d[i][j];
if (offset_flag) {
double rexp = exp(-cut_lj[i][j]/rho[i][j]);
offset[i][j] = a[i][j]*rexp - c[i][j]/pow(cut_lj[i][j],6.0)
+ d[i][j]/pow(cut_lj[i][j],8.0);
} else offset[i][j] = 0.0;
cut_ljsq[j][i] = cut_ljsq[i][j];
a[j][i] = a[i][j];
c[j][i] = c[i][j];
d[j][i] = d[i][j];
rhoinv[j][i] = rhoinv[i][j];
sigma[j][i] = sigma[i][j];
born1[j][i] = born1[i][j];
born2[j][i] = born2[i][j];
born3[j][i] = born3[i][j];
offset[j][i] = offset[i][j];
return cut;
}
/* ----------------------------------------------------------------------
proc 0 writes to restart file
------------------------------------------------------------------------- */
void PairBornCoulWolf::write_restart(FILE *fp)
{
write_restart_settings(fp);
int i,j;
for (i = 1; i <= atom->ntypes; i++)
for (j = i; j <= atom->ntypes; j++) {
fwrite(&setflag[i][j],sizeof(int),1,fp);
if (setflag[i][j]) {
fwrite(&a[i][j],sizeof(double),1,fp);
fwrite(&rho[i][j],sizeof(double),1,fp);
fwrite(&sigma[i][j],sizeof(double),1,fp);
fwrite(&c[i][j],sizeof(double),1,fp);
fwrite(&d[i][j],sizeof(double),1,fp);
fwrite(&cut_lj[i][j],sizeof(double),1,fp);
}
}
}
/* ----------------------------------------------------------------------
proc 0 reads from restart file, bcasts
------------------------------------------------------------------------- */
void PairBornCoulWolf::read_restart(FILE *fp)
{
read_restart_settings(fp);
allocate();
int i,j;
int me = comm->me;
for (i = 1; i <= atom->ntypes; i++)
for (j = i; j <= atom->ntypes; j++) {
if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
if (setflag[i][j]) {
if (me == 0) {
fread(&a[i][j],sizeof(double),1,fp);
fread(&rho[i][j],sizeof(double),1,fp);
fread(&sigma[i][j],sizeof(double),1,fp);
fread(&c[i][j],sizeof(double),1,fp);
fread(&d[i][j],sizeof(double),1,fp);
fread(&cut_lj[i][j],sizeof(double),1,fp);
}
MPI_Bcast(&a[i][j],1,MPI_DOUBLE,0,world);
MPI_Bcast(&rho[i][j],1,MPI_DOUBLE,0,world);
MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
MPI_Bcast(&c[i][j],1,MPI_DOUBLE,0,world);
MPI_Bcast(&d[i][j],1,MPI_DOUBLE,0,world);
MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
}
}
}
/* ----------------------------------------------------------------------
proc 0 writes to restart file
------------------------------------------------------------------------- */
void PairBornCoulWolf::write_restart_settings(FILE *fp)
{
fwrite(&alf,sizeof(double),1,fp);
fwrite(&cut_lj_global,sizeof(double),1,fp);
fwrite(&cut_coul,sizeof(double),1,fp);
fwrite(&offset_flag,sizeof(int),1,fp);
fwrite(&mix_flag,sizeof(int),1,fp);
}
/* ----------------------------------------------------------------------
proc 0 reads from restart file, bcasts
------------------------------------------------------------------------- */
void PairBornCoulWolf::read_restart_settings(FILE *fp)
{
if (comm->me == 0) {
fread(&alf,sizeof(double),1,fp);
fread(&cut_lj_global,sizeof(double),1,fp);
fread(&cut_coul,sizeof(double),1,fp);
fread(&offset_flag,sizeof(int),1,fp);
fread(&mix_flag,sizeof(int),1,fp);
}
MPI_Bcast(&alf,1,MPI_DOUBLE,0,world);
MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
}
/* ----------------------------------------------------------------------
only the pair part is calculated here
------------------------------------------------------------------------- */
double PairBornCoulWolf::single(int i, int j, int itype, int jtype,
double rsq,
double factor_coul, double factor_lj,
double &fforce)
{
double r2inv,r6inv,r,prefactor,rexp;
double forcecoul,forceborn,phicoul,phiborn;
double e_shift,f_shift,dvdrr,erfcc,erfcd;
r2inv = 1.0/rsq;
e_shift = erfc(alf*cut_coul) / cut_coul;
f_shift = -(e_shift+2*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
cut_coul;
if (rsq < cut_coulsq) {
r = sqrt(rsq);
prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r;
erfcc = erfc(alf*r);
erfcd = exp(-alf*alf*r*r);
dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
forcecoul = dvdrr*rsq*prefactor;
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
} else forcecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
r = sqrt(rsq);
rexp = exp(-r*rhoinv[itype][jtype]);
forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
+ born3[itype][jtype]*r2inv*r6inv;
} else forceborn = 0.0;
fforce = (forcecoul + factor_lj*forceborn) * r2inv;
double eng = 0.0;
if (rsq < cut_coulsq) {
phicoul = prefactor * (erfcc-e_shift*r);
if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
eng += phicoul;
}
if (rsq < cut_ljsq[itype][jtype]) {
phiborn = a[itype][jtype]*rexp - c[itype][jtype]*r6inv
+ d[itype][jtype]*r2inv*r6inv - offset[itype][jtype];
eng += factor_lj*phiborn;
}
return eng;
}

56
src/pair_born_coul_wolf.h Normal file
View File

@ -0,0 +1,56 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(born/coul/wolf,PairBornCoulWolf)
#else
#ifndef LMP_PAIR_BORN_COUL_WOLF_H
#define LMP_PAIR_BORN_COUL_WOLF_H
#include "pair.h"
namespace LAMMPS_NS {
class PairBornCoulWolf : public Pair {
public:
PairBornCoulWolf(class LAMMPS *);
~PairBornCoulWolf();
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_style();
double init_one(int, int);
void write_restart(FILE *);
void read_restart(FILE *);
void write_restart_settings(FILE *);
void read_restart_settings(FILE *);
double single(int, int, int, int, double, double, double, double &);
private:
double cut_lj_global;
double **cut_lj,**cut_ljsq;
double cut_coul,cut_coulsq;
double **a,**rho,**sigma,**c,**d;
double **rhoinv,**born1,**born2,**born3,**offset;
double alf,e_shift,f_shift;
void allocate();
};
}
#endif
#endif

328
src/pair_coul_wolf.cpp Normal file
View File

@ -0,0 +1,328 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Yongfeng Zhang (INL), yongfeng.zhang@inl.gov
------------------------------------------------------------------------- */
#include "math.h"
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "pair_coul_wolf.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "math_const.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
using namespace MathConst;
/* ---------------------------------------------------------------------- */
PairCoulWolf::PairCoulWolf(LAMMPS *lmp) : Pair(lmp) {}
/* ---------------------------------------------------------------------- */
PairCoulWolf::~PairCoulWolf()
{
if (allocated) {
memory->destroy(setflag);
memory->destroy(cutsq);
}
}
/* ---------------------------------------------------------------------- */
void PairCoulWolf::compute(int eflag, int vflag)
{
int i,j,ii,jj,inum,jnum,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
double rsq,forcecoul,factor_coul;
double prefactor;
double r,rexp;
int *ilist,*jlist,*numneigh,**firstneigh;
double erfcc,erfcd,v_sh,dvdrr,e_self,qisq;
ecoul = 0.0;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
double **x = atom->x;
double **f = atom->f;
double *q = atom->q;
int *type = atom->type;
int nlocal = atom->nlocal;
int nall = nlocal + atom->nghost;
double *special_coul = force->special_coul;
int newton_pair = force->newton_pair;
double qqrd2e = force->qqrd2e;
// self and shifted coulombic energy
e_self = v_sh = 0.0;
e_shift = erfc(alf*cut_coul)/cut_coul;
f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
cut_coul;
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
// loop over neighbors of my atoms
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
qtmp = q[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
itype = type[i];
jlist = firstneigh[i];
jnum = numneigh[i];
qisq = qtmp*qtmp;
e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
if (evflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
if (j < nall) factor_coul = 1.0;
else {
factor_coul = special_coul[j/nall];
j %= nall;
}
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
if (rsq < cut_coulsq) {
r = sqrt(rsq);
prefactor = qqrd2e*qtmp*q[j]/r;
erfcc = erfc(alf*r);
erfcd = exp(-alf*alf*r*r);
v_sh = (erfcc - e_shift*r) * prefactor;
dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
forcecoul = dvdrr*rsq*prefactor;
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
fpair = forcecoul / rsq;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
if (newton_pair || j < nlocal) {
f[j][0] -= delx*fpair;
f[j][1] -= dely*fpair;
f[j][2] -= delz*fpair;
}
if (eflag) {
if (rsq < cut_coulsq) {
ecoul = v_sh;
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
}
if (evflag) ev_tally(i,j,nlocal,newton_pair,
0.0,ecoul,fpair,delx,dely,delz);
}
}
}
if (vflag_fdotr) virial_fdotr_compute();
}
/* ----------------------------------------------------------------------
allocate all arrays
------------------------------------------------------------------------- */
void PairCoulWolf::allocate()
{
allocated = 1;
int n = atom->ntypes;
memory->create(setflag,n+1,n+1,"pair:setflag");
for (int i = 1; i <= n; i++)
for (int j = i; j <= n; j++)
setflag[i][j] = 0;
memory->create(cutsq,n+1,n+1,"pair:cutsq");
}
/* ----------------------------------------------------------------------
global settings
unlike other pair styles,
there are no individual pair settings that these override
------------------------------------------------------------------------- */
void PairCoulWolf::settings(int narg, char **arg)
{
if (narg != 2) error->all(FLERR,"Illegal pair_style command");
alf = force->numeric(arg[0]);
cut_coul = force->numeric(arg[1]);
}
/* ----------------------------------------------------------------------
set cutoffs for one or more type pairs, optional
------------------------------------------------------------------------- */
void PairCoulWolf::coeff(int narg, char **arg)
{
if (narg != 2) error->all(FLERR,"Incorrect args for pair coefficients");
if (!allocated) allocate();
int ilo,ihi,jlo,jhi;
force->bounds(arg[0],atom->ntypes,ilo,ihi);
force->bounds(arg[1],atom->ntypes,jlo,jhi);
int count = 0;
for (int i = ilo; i <= ihi; i++) {
for (int j = MAX(jlo,i); j <= jhi; j++) {
setflag[i][j] = 1;
count++;
}
}
if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
}
/* ----------------------------------------------------------------------
init specific to this pair style
------------------------------------------------------------------------- */
void PairCoulWolf::init_style()
{
if (!atom->q_flag)
error->all(FLERR,"Pair coul/wolf requires atom attribute q");
int irequest = neighbor->request(this);
cut_coulsq = cut_coul*cut_coul;
}
/* ----------------------------------------------------------------------
init for one type pair i,j and corresponding j,i
------------------------------------------------------------------------- */
double PairCoulWolf::init_one(int i, int j)
{
return cut_coul;
}
/* ----------------------------------------------------------------------
proc 0 writes to restart file
------------------------------------------------------------------------- */
void PairCoulWolf::write_restart(FILE *fp)
{
write_restart_settings(fp);
int i,j;
for (i = 1; i <= atom->ntypes; i++)
for (j = i; j <= atom->ntypes; j++)
fwrite(&setflag[i][j],sizeof(int),1,fp);
}
/* ----------------------------------------------------------------------
proc 0 reads from restart file, bcasts
------------------------------------------------------------------------- */
void PairCoulWolf::read_restart(FILE *fp)
{
read_restart_settings(fp);
allocate();
int i,j;
int me = comm->me;
for (i = 1; i <= atom->ntypes; i++)
for (j = i; j <= atom->ntypes; j++) {
if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
}
}
/* ----------------------------------------------------------------------
proc 0 writes to restart file
------------------------------------------------------------------------- */
void PairCoulWolf::write_restart_settings(FILE *fp)
{
fwrite(&alf,sizeof(double),1,fp);
fwrite(&cut_coul,sizeof(double),1,fp);
fwrite(&offset_flag,sizeof(int),1,fp);
fwrite(&mix_flag,sizeof(int),1,fp);
}
/* ----------------------------------------------------------------------
proc 0 reads from restart file, bcasts
------------------------------------------------------------------------- */
void PairCoulWolf::read_restart_settings(FILE *fp)
{
if (comm->me == 0) {
fread(&alf,sizeof(double),1,fp);
fread(&cut_coul,sizeof(double),1,fp);
fread(&offset_flag,sizeof(int),1,fp);
fread(&mix_flag,sizeof(int),1,fp);
}
MPI_Bcast(&alf,1,MPI_DOUBLE,0,world);
MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
}
/* ----------------------------------------------------------------------
only the pair part is calculated here
------------------------------------------------------------------------- */
double PairCoulWolf::single(int i, int j, int itype, int jtype, double rsq,
double factor_coul, double factor_lj,
double &fforce)
{
double r6inv,r,prefactor,rexp;
double forcecoul,forceborn,phicoul;
double e_shift,f_shift,dvdrr,erfcc,erfcd;
e_shift = erfc(alf*cut_coul) / cut_coul;
f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
cut_coul;
if (rsq < cut_coulsq) {
r = sqrt(rsq);
prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r;
erfcc = erfc(alf*r);
erfcd = exp(-alf*alf*r*r);
dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
forcecoul = dvdrr*rsq*prefactor;
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
} else forcecoul = 0.0;
fforce = forcecoul / rsq;
double eng = 0.0;
if (rsq < cut_coulsq) {
phicoul = prefactor * (erfcc-e_shift*r);
if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
eng += phicoul;
}
return eng;
}

52
src/pair_coul_wolf.h Normal file
View File

@ -0,0 +1,52 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(coul/wolf,PairCoulWolf)
#else
#ifndef LMP_PAIR_COUL_WOLF_H
#define LMP_PAIR_COUL_WOLF_H
#include "pair.h"
namespace LAMMPS_NS {
class PairCoulWolf : public Pair {
public:
PairCoulWolf(class LAMMPS *);
~PairCoulWolf();
void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
void init_style();
double init_one(int, int);
void write_restart(FILE *);
void read_restart(FILE *);
void write_restart_settings(FILE *);
void read_restart_settings(FILE *);
double single(int, int, int, int, double, double, double, double &);
private:
double cut_coul,cut_coulsq;
double alf,e_shift,f_shift;
void allocate();
};
}
#endif
#endif

View File

@ -446,7 +446,8 @@ void PairLJCut::settings(int narg, char **arg)
void PairLJCut::coeff(int narg, char **arg)
{
if (narg < 4 || narg > 5) error->all(FLERR,"Incorrect args for pair coefficients");
if (narg < 4 || narg > 5)
error->all(FLERR,"Incorrect args for pair coefficients");
if (!allocated) allocate();
int ilo,ihi,jlo,jhi;

View File

@ -59,22 +59,38 @@ Universe::~Universe()
}
/* ----------------------------------------------------------------------
reorder universe processors based on custom file
reorder universe processors
create uni2orig as inverse mapping
re-create uworld communicator with new ordering via Comm_split()
style = "nth", arg = N
move every Nth proc to end of rankings
style = "custom", arg = filename
file has nprocs lines with I J
I = universe proc ID in original communicator uorig
J = universe proc ID in reordered communicator uworld
create uni2orig as inverse mapping
re-create uworld communicator with new ordering via Comm_split()
------------------------------------------------------------------------- */
void Universe::reorder(char *file)
void Universe::reorder(char *style, char *arg)
{
char line[MAXLINE];
if (uworld != uorig) MPI_Comm_free(&uworld);
if (strcmp(style,"nth") == 0) {
int n = atoi(arg);
if (n <= 0)
error->universe_all(FLERR,"Invalid -reorder N value");
if (nprocs % n)
error->universe_all(FLERR,"Nprocs not a multiple of N for -reorder");
for (int i = 0; i < nprocs; i++) {
if (i < (n-1)*nprocs/n) uni2orig[i] = i/(n-1) * n + (i % (n-1));
else uni2orig[i] = (i - (n-1)*nprocs/n) * n + n-1;
}
} else if (strcmp(style,"custom") == 0) {
if (me == 0) {
FILE *fp = fopen(file,"r");
FILE *fp = fopen(arg,"r");
if (fp == NULL) error->universe_one(FLERR,"Cannot open -reorder file");
// skip header = blank and comment lines
@ -102,7 +118,7 @@ void Universe::reorder(char *file)
for (int i = 1; i < nprocs; i++) {
if (!fgets(line,MAXLINE,fp))
error->one(FLERR,"Unexpected end of reorder file");
sscanf(line,"%ld %ld",&me_orig,&me_new);
sscanf(line,"%d %d",&me_orig,&me_new);
if (me_orig < 0 || me_orig >= nprocs ||
me_new < 0 || me_new >= nprocs)
error->one(FLERR,"Invalid entry in reorder file");
@ -111,8 +127,14 @@ void Universe::reorder(char *file)
fclose(fp);
}
// bcast uni2org from proc 0 to all other universe procs
MPI_Bcast(uni2orig,nprocs,MPI_INT,0,uorig);
} else error->universe_all(FLERR,"Invalid command-line argument");
// create new uworld communicator
int ome,key;
MPI_Comm_rank(uorig,&ome);
for (int i = 0; i < nprocs; i++)

View File

@ -41,7 +41,7 @@ class Universe : protected Pointers {
Universe(class LAMMPS *, MPI_Comm);
~Universe();
void reorder(char *);
void reorder(char *, char *);
void add_world(char *);
int consistent();
};

View File

@ -29,11 +29,10 @@
#include "output.h"
#include "thermo.h"
#include "random_mars.h"
#include "math_const.h"
#include "memory.h"
#include "error.h"
#include "math_const.h"
using namespace LAMMPS_NS;
using namespace MathConst;