git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7340 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
591
src/comm.cpp
591
src/comm.cpp
@ -35,13 +35,11 @@
|
||||
#include "compute.h"
|
||||
#include "output.h"
|
||||
#include "dump.h"
|
||||
#include "procmap.h"
|
||||
#include "math_extra.h"
|
||||
#include "error.h"
|
||||
#include "memory.h"
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
#ifdef _OPENMP
|
||||
#include "omp.h"
|
||||
#endif
|
||||
@ -54,8 +52,9 @@ using namespace LAMMPS_NS;
|
||||
#define BIG 1.0e20
|
||||
|
||||
enum{SINGLE,MULTI};
|
||||
enum{NONE,MULTIPLE};
|
||||
enum{CART,CARTREORDER,XYZ,XZY,YXZ,YZX,ZXY,ZYX};
|
||||
enum{MULTIPLE}; // same as in ProcMap
|
||||
enum{ONELEVEL,NUMA,CUSTOM};
|
||||
enum{CART,CARTREORDER,XYZ};
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
setup MPI and allocate buffer space
|
||||
@ -67,9 +66,14 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
|
||||
MPI_Comm_size(world,&nprocs);
|
||||
|
||||
user_procgrid[0] = user_procgrid[1] = user_procgrid[2] = 0;
|
||||
gridflag = ONELEVEL;
|
||||
mapflag = CART;
|
||||
customfile = NULL;
|
||||
recv_from_partition = send_to_partition = -1;
|
||||
otherflag = 0;
|
||||
outfile = NULL;
|
||||
|
||||
grid2proc = NULL;
|
||||
layoutflag = CART;
|
||||
numa_nodes = 0;
|
||||
|
||||
bordergroup = 0;
|
||||
style = SINGLE;
|
||||
@ -77,8 +81,6 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
|
||||
cutghostmulti = NULL;
|
||||
cutghostuser = 0.0;
|
||||
ghost_velocity = 0;
|
||||
recv_from_partition = send_to_partition = -1;
|
||||
other_partition_style = NONE;
|
||||
|
||||
// use of OpenMP threads
|
||||
// query OpenMP for number of threads/process set by user at run-time
|
||||
@ -121,7 +123,10 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
|
||||
|
||||
Comm::~Comm()
|
||||
{
|
||||
if (grid2proc) memory->destroy(grid2proc);
|
||||
delete [] customfile;
|
||||
delete [] outfile;
|
||||
|
||||
memory->destroy(grid2proc);
|
||||
|
||||
free_swap();
|
||||
if (style == MULTI) {
|
||||
@ -138,12 +143,13 @@ Comm::~Comm()
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
create 3d grid of procs based on box size
|
||||
create 3d grid of procs based on Nprocs and box size & shape
|
||||
map processors to grid
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Comm::set_proc_grid()
|
||||
{
|
||||
// recv proc layout of another partition if my layout depends on it
|
||||
// recv 3d proc grid of another partition if my 3d grid depends on it
|
||||
|
||||
if (recv_from_partition >= 0) {
|
||||
MPI_Status status;
|
||||
@ -153,124 +159,96 @@ void Comm::set_proc_grid()
|
||||
MPI_Bcast(other_procgrid,3,MPI_INT,0,world);
|
||||
}
|
||||
|
||||
// use NUMA routines if numa_nodes > 0
|
||||
// if NUMA routines fail, just continue
|
||||
// create ProcMap class
|
||||
|
||||
if (numa_nodes) {
|
||||
int flag = numa_set_proc_grid();
|
||||
if (flag) return;
|
||||
ProcMap *pmap = new ProcMap(lmp);
|
||||
|
||||
// create 3d grid of processors, produces procgrid
|
||||
// can fail (on one partition) if constrained by other partition
|
||||
// if numa_grid() fails, try onelevel_grid()
|
||||
|
||||
int flag;
|
||||
if (gridflag == ONELEVEL) {
|
||||
flag = pmap->onelevel_grid(nprocs,user_procgrid,procgrid,
|
||||
otherflag,other_style,other_procgrid);
|
||||
if (!flag) error->all(FLERR,"Could not create grid of processors");
|
||||
|
||||
} else if (gridflag == NUMA) {
|
||||
flag = pmap->numa_grid(nprocs,user_procgrid,procgrid,coregrid);
|
||||
if (!flag) {
|
||||
coregrid[0] = coregrid[1] = coregrid[2] = 1;
|
||||
flag = pmap->onelevel_grid(nprocs,user_procgrid,procgrid,
|
||||
otherflag,other_style,other_procgrid);
|
||||
}
|
||||
if (!flag) error->all(FLERR,"Could not create grid of processors");
|
||||
|
||||
} else if (gridflag == CUSTOM) {
|
||||
pmap->custom_grid(nprocs,user_procgrid,procgrid);
|
||||
}
|
||||
|
||||
// create layout of procs mapped to simulation box
|
||||
// can fail (on one partition) if constrained by other_partition_style
|
||||
|
||||
int flag =
|
||||
procs2box(nprocs,user_procgrid,procgrid,1,1,1,other_partition_style);
|
||||
if (!flag) error->all(FLERR,"Could not layout grid of processors",1);
|
||||
|
||||
// error check
|
||||
// error check on procgrid
|
||||
|
||||
if (procgrid[0]*procgrid[1]*procgrid[2] != nprocs)
|
||||
error->all(FLERR,"Bad grid of processors");
|
||||
if (domain->dimension == 2 && procgrid[2] != 1)
|
||||
error->all(FLERR,"Processor count in z must be 1 for 2d simulation");
|
||||
|
||||
// grid2proc[i][j][k] = proc that owns i,j,k location in grid
|
||||
// grid2proc[i][j][k] = proc that owns i,j,k location in 3d grid
|
||||
|
||||
if (grid2proc) memory->destroy(grid2proc);
|
||||
memory->create(grid2proc,procgrid[0],procgrid[1],procgrid[2],
|
||||
"comm:grid2proc");
|
||||
|
||||
// use MPI Cartesian routines to layout 3d grid of procs
|
||||
// MPI may do layout in machine-optimized fashion
|
||||
// map processor IDs to 3d processor grid
|
||||
// produces myloc, procneigh, grid2proc
|
||||
|
||||
if (layoutflag == CART || layoutflag == CARTREORDER) {
|
||||
int reorder = 0;
|
||||
if (layoutflag == CARTREORDER) reorder = 1;
|
||||
int periods[3];
|
||||
periods[0] = periods[1] = periods[2] = 1;
|
||||
MPI_Comm cartesian;
|
||||
if (gridflag == ONELEVEL) {
|
||||
if (mapflag == CART)
|
||||
pmap->cart_map(0,procgrid,myloc,procneigh,grid2proc);
|
||||
else if (mapflag == CARTREORDER)
|
||||
pmap->cart_map(1,procgrid,myloc,procneigh,grid2proc);
|
||||
else if (mapflag == XYZ)
|
||||
pmap->xyz_map(xyz,procgrid,myloc,procneigh,grid2proc);
|
||||
|
||||
MPI_Cart_create(world,3,procgrid,periods,reorder,&cartesian);
|
||||
MPI_Cart_get(cartesian,3,procgrid,periods,myloc);
|
||||
MPI_Cart_shift(cartesian,0,1,&procneigh[0][0],&procneigh[0][1]);
|
||||
MPI_Cart_shift(cartesian,1,1,&procneigh[1][0],&procneigh[1][1]);
|
||||
MPI_Cart_shift(cartesian,2,1,&procneigh[2][0],&procneigh[2][1]);
|
||||
} else if (gridflag == NUMA) {
|
||||
pmap->numa_map(coregrid,myloc,procneigh,grid2proc);
|
||||
|
||||
int coords[3];
|
||||
int i,j,k;
|
||||
for (i = 0; i < procgrid[0]; i++)
|
||||
for (j = 0; j < procgrid[1]; j++)
|
||||
for (k = 0; k < procgrid[2]; k++) {
|
||||
coords[0] = i; coords[1] = j; coords[2] = k;
|
||||
MPI_Cart_rank(cartesian,coords,&grid2proc[i][j][k]);
|
||||
} else if (gridflag == CUSTOM) {
|
||||
pmap->custom_map(myloc,procneigh,grid2proc);
|
||||
}
|
||||
|
||||
MPI_Comm_free(&cartesian);
|
||||
// print 3d grid info to screen and logfile
|
||||
|
||||
// layout 3d grid of procs explicitly, via user-requested XYZ ordering
|
||||
|
||||
} else {
|
||||
int i,j,k,ime,jme,kme;
|
||||
for (i = 0; i < procgrid[0]; i++)
|
||||
for (j = 0; j < procgrid[1]; j++)
|
||||
for (k = 0; k < procgrid[2]; k++) {
|
||||
if (layoutflag == XYZ)
|
||||
grid2proc[i][j][k] = k*procgrid[1]*procgrid[0] + j*procgrid[0] + i;
|
||||
else if (layoutflag == XZY)
|
||||
grid2proc[i][j][k] = j*procgrid[2]*procgrid[0] + k*procgrid[0] + i;
|
||||
else if (layoutflag == YXZ)
|
||||
grid2proc[i][j][k] = k*procgrid[0]*procgrid[1] + i*procgrid[1] + j;
|
||||
else if (layoutflag == YZX)
|
||||
grid2proc[i][j][k] = i*procgrid[2]*procgrid[1] + k*procgrid[1] + j;
|
||||
else if (layoutflag == ZXY)
|
||||
grid2proc[i][j][k] = j*procgrid[0]*procgrid[2] + i*procgrid[2] + k;
|
||||
else if (layoutflag == ZYX)
|
||||
grid2proc[i][j][k] = i*procgrid[1]*procgrid[2] + j*procgrid[2] + k;
|
||||
|
||||
if (grid2proc[i][j][k] == me) {
|
||||
ime = i; jme = j, kme = k;
|
||||
if (me == 0) {
|
||||
if (screen) {
|
||||
fprintf(screen," %d by %d by %d MPI processor grid\n",
|
||||
procgrid[0],procgrid[1],procgrid[2]);
|
||||
if (gridflag == NUMA)
|
||||
fprintf(screen," %d by %d by %d core grid within node\n",
|
||||
coregrid[0],coregrid[1],coregrid[2]);
|
||||
}
|
||||
if (logfile) {
|
||||
fprintf(logfile," %d by %d by %d MPI processor grid\n",
|
||||
procgrid[0],procgrid[1],procgrid[2]);
|
||||
if (gridflag == NUMA)
|
||||
fprintf(logfile," %d by %d by %d core grid within node\n",
|
||||
coregrid[0],coregrid[1],coregrid[2]);
|
||||
}
|
||||
}
|
||||
|
||||
myloc[0] = ime;
|
||||
myloc[1] = jme;
|
||||
myloc[2] = kme;
|
||||
// print 3d grid details to outfile
|
||||
|
||||
i = ime-1;
|
||||
if (i < 0) i = procgrid[0]-1;
|
||||
procneigh[0][0] = grid2proc[i][jme][kme];
|
||||
i = ime+1;
|
||||
if (i == procgrid[0]) i = 0;
|
||||
procneigh[0][1] = grid2proc[i][jme][kme];
|
||||
|
||||
j = jme-1;
|
||||
if (j < 0) j = procgrid[1]-1;
|
||||
procneigh[1][0] = grid2proc[ime][j][kme];
|
||||
j = jme+1;
|
||||
if (j == procgrid[1]) j = 0;
|
||||
procneigh[1][1] = grid2proc[ime][j][kme];
|
||||
|
||||
k = kme-1;
|
||||
if (k < 0) k = procgrid[2]-1;
|
||||
procneigh[2][0] = grid2proc[ime][jme][k];
|
||||
k = kme+1;
|
||||
if (k == procgrid[2]) k = 0;
|
||||
procneigh[2][1] = grid2proc[ime][jme][k];
|
||||
}
|
||||
if (outfile) pmap->output(grid2proc,outfile);
|
||||
|
||||
// set lamda box params after procs are assigned
|
||||
|
||||
if (domain->triclinic) domain->set_lamda_box();
|
||||
|
||||
if (me == 0) {
|
||||
if (screen) fprintf(screen," %d by %d by %d MPI processor grid\n",
|
||||
procgrid[0],procgrid[1],procgrid[2]);
|
||||
if (logfile) fprintf(logfile," %d by %d by %d MPI processor grid\n",
|
||||
procgrid[0],procgrid[1],procgrid[2]);
|
||||
}
|
||||
// free ProcMap class
|
||||
|
||||
// send my proc layout to another partition if requested
|
||||
delete pmap;
|
||||
|
||||
// send my 3d proc grid to another partition if requested
|
||||
|
||||
if (send_to_partition >= 0) {
|
||||
if (me == 0) MPI_Send(procgrid,3,MPI_INT,
|
||||
@ -1196,130 +1174,6 @@ void Comm::reverse_comm_dump(Dump *dump)
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
assign numprocs to 3d box so as to minimize surface area
|
||||
area = surface area of each of 3 faces of simulation box divided by sx,sy,sz
|
||||
for triclinic, area = cross product of 2 edge vectors stored in h matrix
|
||||
valid assignment will be factorization of numprocs = Px by Py by Pz
|
||||
user_factors = if non-zero, factors are specified by user
|
||||
sx,sy,sz = scale box xyz dimension vy dividing by sx,sy,sz
|
||||
other = 1 to enforce compatability with other partition's layout
|
||||
return factors = # of procs assigned to each dimension
|
||||
return 1 if successully factor, 0 if not
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int Comm::procs2box(int numprocs, int user_factors[3], int factors[3],
|
||||
const int sx, const int sy, const int sz, int other)
|
||||
{
|
||||
factors[0] = user_factors[0];
|
||||
factors[1] = user_factors[1];
|
||||
factors[2] = user_factors[2];
|
||||
|
||||
// all 3 proc counts are specified
|
||||
|
||||
if (factors[0] && factors[1] && factors[2]) return 1;
|
||||
|
||||
// 2 out of 3 proc counts are specified
|
||||
|
||||
if (factors[0] > 0 && factors[1] > 0) {
|
||||
factors[2] = nprocs/(factors[0]*factors[1]);
|
||||
return 1;
|
||||
} else if (factors[0] > 0 && factors[2] > 0) {
|
||||
factors[1] = nprocs/(factors[0]*factors[2]);
|
||||
return 1;
|
||||
} else if (factors[1] > 0 && factors[2] > 0) {
|
||||
factors[0] = nprocs/(factors[1]*factors[2]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// determine cross-sectional areas for orthogonal and triclinic boxes
|
||||
// area[0] = xy, area[1] = xz, area[2] = yz
|
||||
|
||||
double area[3];
|
||||
if (domain->triclinic == 0) {
|
||||
area[0] = domain->xprd * domain->yprd / (sx * sy);
|
||||
area[1] = domain->xprd * domain->zprd / (sx * sz);
|
||||
area[2] = domain->yprd * domain->zprd / (sy * sz);
|
||||
} else {
|
||||
double *h = domain->h;
|
||||
double a[3],b[3],c[3];
|
||||
a[0] = h[0]; a[1] = 0.0; a[2] = 0.0;
|
||||
b[0] = h[5]; b[1] = h[1]; b[2] = 0.0;
|
||||
MathExtra::cross3(a,b,c);
|
||||
area[0] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sx * sy);
|
||||
a[0] = h[0]; a[1] = 0.0; a[2] = 0.0;
|
||||
b[0] = h[4]; b[1] = h[3]; b[2] = h[2];
|
||||
MathExtra::cross3(a,b,c);
|
||||
area[1] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sx * sz);
|
||||
a[0] = h[5]; a[1] = h[1]; a[2] = 0.0;
|
||||
b[0] = h[4]; b[1] = h[3]; b[2] = h[2];
|
||||
MathExtra::cross3(a,b,c);
|
||||
area[2] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sy * sz);
|
||||
}
|
||||
|
||||
double bestsurf = 2.0 * (area[0]+area[1]+area[2]);
|
||||
|
||||
// loop thru all possible factorizations of numprocs
|
||||
// only consider valid cases that match procgrid settings
|
||||
// surf = surface area of a proc sub-domain
|
||||
// only consider cases that match user_factors & other_procgrid settings
|
||||
// success = 1 if valid factoriztion is found
|
||||
// may not be if other constraint is enforced
|
||||
|
||||
int ipx,ipy,ipz,valid;
|
||||
double surf;
|
||||
|
||||
int success = 0;
|
||||
ipx = 1;
|
||||
while (ipx <= numprocs) {
|
||||
valid = 1;
|
||||
if (user_factors[0] && ipx != user_factors[0]) valid = 0;
|
||||
if (other == MULTIPLE && other_procgrid[0] % ipx) valid = 0;
|
||||
if (numprocs % ipx) valid = 0;
|
||||
|
||||
if (!valid) {
|
||||
ipx++;
|
||||
continue;
|
||||
}
|
||||
|
||||
ipy = 1;
|
||||
while (ipy <= numprocs/ipx) {
|
||||
valid = 1;
|
||||
if (user_factors[1] && ipy != user_factors[1]) valid = 0;
|
||||
if (other == MULTIPLE && other_procgrid[1] % ipy) valid = 0;
|
||||
if ((numprocs/ipx) % ipy) valid = 0;
|
||||
if (!valid) {
|
||||
ipy++;
|
||||
continue;
|
||||
}
|
||||
|
||||
ipz = numprocs/ipx/ipy;
|
||||
valid = 1;
|
||||
if (user_factors[2] && ipz != user_factors[2]) valid = 0;
|
||||
if (other == MULTIPLE && other_procgrid[2] % ipz) valid = 0;
|
||||
if (domain->dimension == 2 && ipz != 1) valid = 0;
|
||||
if (!valid) {
|
||||
ipy++;
|
||||
continue;
|
||||
}
|
||||
|
||||
surf = area[0]/ipx/ipy + area[1]/ipx/ipz + area[2]/ipy/ipz;
|
||||
if (surf < bestsurf) {
|
||||
success = 1;
|
||||
bestsurf = surf;
|
||||
factors[0] = ipx;
|
||||
factors[1] = ipy;
|
||||
factors[2] = ipz;
|
||||
}
|
||||
ipy++;
|
||||
}
|
||||
|
||||
ipx++;
|
||||
}
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
realloc the size of the send buffer as needed with BUFFACTOR & BUFEXTRA
|
||||
if flag = 1, realloc
|
||||
@ -1502,7 +1356,48 @@ void Comm::set_processors(int narg, char **arg)
|
||||
|
||||
int iarg = 3;
|
||||
while (iarg < narg) {
|
||||
if (strcmp(arg[iarg],"part") == 0) {
|
||||
if (strcmp(arg[iarg],"grid") == 0) {
|
||||
if (iarg+1 > narg) error->all(FLERR,"Illegal processors command");
|
||||
|
||||
if (strcmp(arg[iarg+1],"level1") == 0) {
|
||||
gridflag = LEVEL1;
|
||||
iarg += 2;
|
||||
|
||||
if (strcmp(arg[iarg+1],"level2") == 0) {
|
||||
gridflag = LEVEL2;
|
||||
iarg += 2;
|
||||
|
||||
} else if (strcmp(arg[iarg+1],"numa") == 0) {
|
||||
gridflag = NUMA;
|
||||
iarg += 2;
|
||||
|
||||
} else if (strcmp(arg[iarg],"custom") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
|
||||
gridflag = CUSTOM;
|
||||
delete [] customfile;
|
||||
int n = strlen(arg[iarg+1]) + 1;
|
||||
customfile = new char(n);
|
||||
strcpy(customfile,arg[iarg+1]);
|
||||
iarg += 2;
|
||||
|
||||
} else error->all(FLERR,"Illegal processors command");
|
||||
|
||||
} else if (strcmp(arg[iarg],"map") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
|
||||
if (strcmp(arg[iarg+1],"cart") == 0) mapflag = CART;
|
||||
else if (strcmp(arg[iarg+1],"cart/reorder") == 0) mapflag = CARTREORDER;
|
||||
else if (strcmp(arg[iarg+1],"xyz") == 0 ||
|
||||
strcmp(arg[iarg+1],"xzy") == 0 ||
|
||||
strcmp(arg[iarg+1],"yxz") == 0 ||
|
||||
strcmp(arg[iarg+1],"yzx") == 0 ||
|
||||
strcmp(arg[iarg+1],"zxy") == 0 ||
|
||||
strcmp(arg[iarg+1],"zyx") == 0) {
|
||||
mapflag = XYZ;
|
||||
strcpy(xyz,arg[iarg+1]);
|
||||
} else error->all(FLERR,"Illegal processors command");
|
||||
iarg += 2;
|
||||
|
||||
} else if (strcmp(arg[iarg],"part") == 0) {
|
||||
if (iarg+4 > narg) error->all(FLERR,"Illegal processors command");
|
||||
if (universe->nworlds == 1)
|
||||
error->all(FLERR,
|
||||
@ -1527,250 +1422,32 @@ void Comm::set_processors(int narg, char **arg)
|
||||
"is already a receiver");
|
||||
recv_from_partition = isend-1;
|
||||
}
|
||||
|
||||
// only receiver has otherflag dependency
|
||||
|
||||
if (strcmp(arg[iarg+3],"multiple") == 0) {
|
||||
if (universe->iworld == irecv-1) other_partition_style = MULTIPLE;
|
||||
if (universe->iworld == irecv-1) {
|
||||
otherflag = 1;
|
||||
other_style = MULTIPLE;
|
||||
}
|
||||
} else error->all(FLERR,"Illegal processors command");
|
||||
iarg += 4;
|
||||
|
||||
} else if (strcmp(arg[iarg],"grid") == 0) {
|
||||
} else if (strcmp(arg[iarg],"out") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
|
||||
if (strcmp(arg[iarg+1],"cart") == 0) layoutflag = CART;
|
||||
else if (strcmp(arg[iarg+1],"cart/reorder") == 0)
|
||||
layoutflag = CARTREORDER;
|
||||
else if (strcmp(arg[iarg+1],"xyz") == 0) layoutflag = XYZ;
|
||||
else if (strcmp(arg[iarg+1],"xzy") == 0) layoutflag = XZY;
|
||||
else if (strcmp(arg[iarg+1],"yxz") == 0) layoutflag = YXZ;
|
||||
else if (strcmp(arg[iarg+1],"yzx") == 0) layoutflag = YZX;
|
||||
else if (strcmp(arg[iarg+1],"zxy") == 0) layoutflag = ZXY;
|
||||
else if (strcmp(arg[iarg+1],"zyx") == 0) layoutflag = ZYX;
|
||||
else error->all(FLERR,"Illegal processors command");
|
||||
delete [] outfile;
|
||||
int n = strlen(arg[iarg+1]) + 1;
|
||||
outfile = new char(n);
|
||||
strcpy(outfile,arg[iarg+1]);
|
||||
iarg += 2;
|
||||
|
||||
} else if (strcmp(arg[iarg],"numa") == 0) {
|
||||
if (iarg+1 > narg) error->all(FLERR,"Illegal processors command");
|
||||
numa_nodes = 1;
|
||||
if (numa_nodes < 0) error->all(FLERR,"Illegal processors command");
|
||||
iarg += 1;
|
||||
|
||||
} else error->all(FLERR,"Illegal processors command");
|
||||
}
|
||||
|
||||
// error check
|
||||
|
||||
if (numa_nodes) {
|
||||
if (layoutflag != CART)
|
||||
error->all(FLERR,"Can only use processors numa "
|
||||
"with processors grid cart");
|
||||
if (send_to_partition >= 0 || recv_from_partition >= 0)
|
||||
error->one(FLERR,"Cannot use processors numa with processors part");
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
setup 3d grid of procs based on box size, group neighbors by numa node
|
||||
return 1 if successful, return 0 if not
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int Comm::numa_set_proc_grid()
|
||||
{
|
||||
// get names of all nodes
|
||||
|
||||
int name_length;
|
||||
char node_name[MPI_MAX_PROCESSOR_NAME];
|
||||
char node_names[MPI_MAX_PROCESSOR_NAME*nprocs];
|
||||
MPI_Get_processor_name(node_name,&name_length);
|
||||
MPI_Allgather(&node_name,MPI_MAX_PROCESSOR_NAME,MPI_CHAR,&node_names,
|
||||
MPI_MAX_PROCESSOR_NAME,MPI_CHAR,world);
|
||||
std::string node_string = std::string(node_name);
|
||||
|
||||
// get number of procs per node
|
||||
|
||||
std::map<std::string,int> name_map;
|
||||
std::map<std::string,int>::iterator np;
|
||||
for (int i = 0; i < nprocs; i++) {
|
||||
std::string i_string = std::string(&node_names[i*MPI_MAX_PROCESSOR_NAME]);
|
||||
np = name_map.find(i_string);
|
||||
if (np == name_map.end())
|
||||
name_map[i_string] = 1;
|
||||
else
|
||||
np->second++;
|
||||
}
|
||||
int procs_per_node = name_map.begin()->second;
|
||||
int procs_per_numa = procs_per_node / numa_nodes;
|
||||
|
||||
// use non-numa mapping if any condition met
|
||||
|
||||
if (procs_per_numa < 4 || // less than 4 procs per numa node
|
||||
procs_per_node % numa_nodes != 0 || // no-op since numa_nodes = 1 for now
|
||||
nprocs % procs_per_numa != 0 || // total procs not a multiple of node
|
||||
nprocs <= procs_per_numa || // only 1 node used
|
||||
user_procgrid[0] > 1 || // user specified grid dimension < 1
|
||||
user_procgrid[1] > 1 || // in any dimension
|
||||
user_procgrid[2] > 1) {
|
||||
if (me == 0) {
|
||||
if (screen) fprintf(screen," 1 by 1 by 1 Node grid\n");
|
||||
if (logfile) fprintf(logfile," 1 by 1 by 1 Node grid\n");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// user settings for the factorization per numa node - currently always zero
|
||||
|
||||
int user_numagrid[3];
|
||||
user_numagrid[0] = user_numagrid[1] = user_numagrid[2] = 0;
|
||||
|
||||
// if user specifies 1 for a proc grid dimension,
|
||||
// also use 1 for the node grid dimension
|
||||
|
||||
if (user_procgrid[0] == 1) user_numagrid[0] = 1;
|
||||
if (user_procgrid[1] == 1) user_numagrid[1] = 1;
|
||||
if (user_procgrid[2] == 1) user_numagrid[2] = 1;
|
||||
|
||||
// get an initial factorization for each numa node,
|
||||
// if the user has not set the number of processors
|
||||
|
||||
int numagrid[3];
|
||||
procs2box(procs_per_numa,user_numagrid,numagrid,1,1,1,0);
|
||||
if (numagrid[0] * numagrid[1] * numagrid[2] != procs_per_numa)
|
||||
error->all(FLERR,"Bad node grid of processors");
|
||||
|
||||
// get a factorization for the grid of numa nodes
|
||||
|
||||
int node_count = nprocs / procs_per_numa;
|
||||
procs2box(node_count,user_procgrid,procgrid,
|
||||
numagrid[0],numagrid[1],numagrid[2],0);
|
||||
if (procgrid[0] * procgrid[1] * procgrid[2] != node_count)
|
||||
error->all(FLERR,"Bad grid of processors");
|
||||
|
||||
// repeat the numa node factorization using the subdomain sizes
|
||||
// this will refine the factorization if the user specified the node layout
|
||||
|
||||
procs2box(procs_per_numa,user_numagrid,numagrid,
|
||||
procgrid[0],procgrid[1],procgrid[2],0);
|
||||
if (numagrid[0]*numagrid[1]*numagrid[2] != procs_per_numa)
|
||||
error->all(FLERR,"Bad Node grid of processors");
|
||||
if (domain->dimension == 2 && (procgrid[2] != 1 || numagrid[2] != 1))
|
||||
error->all(FLERR,"Processor count in z must be 1 for 2d simulation");
|
||||
|
||||
// assign a unique id to each node
|
||||
|
||||
int node_num = 0, node_id = 0;
|
||||
for (np = name_map.begin(); np != name_map.end(); ++np) {
|
||||
if (np->first == node_string) node_id = node_num;
|
||||
node_num++;
|
||||
}
|
||||
|
||||
// setup a per node communicator and find rank within
|
||||
|
||||
MPI_Comm node_comm;
|
||||
MPI_Comm_split(world,node_id,0,&node_comm);
|
||||
int node_rank;
|
||||
MPI_Comm_rank(node_comm,&node_rank);
|
||||
|
||||
// setup a per numa communicator and find rank within
|
||||
|
||||
MPI_Comm numa_comm;
|
||||
int local_numa = node_rank / procs_per_numa;
|
||||
MPI_Comm_split(node_comm,local_numa,0,&numa_comm);
|
||||
int numa_rank;
|
||||
MPI_Comm_rank(numa_comm,&numa_rank);
|
||||
|
||||
// setup a communicator with the rank 0 procs from each numa node
|
||||
|
||||
MPI_Comm numa_leaders;
|
||||
MPI_Comm_split(world,numa_rank,0,&numa_leaders);
|
||||
|
||||
// use the MPI Cartesian routines to map the nodes to the grid
|
||||
// could implement layoutflag as in non-NUMA case?
|
||||
|
||||
int reorder = 0;
|
||||
int periods[3];
|
||||
periods[0] = periods[1] = periods[2] = 1;
|
||||
MPI_Comm cartesian;
|
||||
if (numa_rank == 0) {
|
||||
MPI_Cart_create(numa_leaders,3,procgrid,periods,reorder,&cartesian);
|
||||
MPI_Cart_get(cartesian,3,procgrid,periods,myloc);
|
||||
}
|
||||
|
||||
// broadcast numa node location in grid to other procs in numa node
|
||||
|
||||
MPI_Bcast(myloc,3,MPI_INT,0,numa_comm);
|
||||
|
||||
// get storage for the process mapping
|
||||
|
||||
if (grid2proc) memory->destroy(grid2proc);
|
||||
memory->create(grid2proc,procgrid[0]*numagrid[0],procgrid[1]*numagrid[1],
|
||||
procgrid[2]*numagrid[2],"comm:grid2proc");
|
||||
|
||||
// compute my location within the grid
|
||||
|
||||
int z_offset = numa_rank / (numagrid[0] * numagrid[1]);
|
||||
int y_offset = (numa_rank % (numagrid[0] * numagrid[1]))/numagrid[0];
|
||||
int x_offset = numa_rank % numagrid[0];
|
||||
myloc[0] = myloc[0] * numagrid[0] + x_offset;
|
||||
myloc[1] = myloc[1] * numagrid[1] + y_offset;
|
||||
myloc[2] = myloc[2] * numagrid[2] + z_offset;
|
||||
procgrid[0] *= numagrid[0];
|
||||
procgrid[1] *= numagrid[1];
|
||||
procgrid[2] *= numagrid[2];
|
||||
|
||||
// allgather of locations to fill grid2proc
|
||||
|
||||
int **gridi;
|
||||
memory->create(gridi,nprocs,3,"comm:gridi");
|
||||
MPI_Allgather(&myloc,3,MPI_INT,gridi[0],3,MPI_INT,world);
|
||||
for (int i = 0; i < nprocs; i++)
|
||||
grid2proc[gridi[i][0]][gridi[i][1]][gridi[i][2]] = i;
|
||||
memory->destroy(gridi);
|
||||
|
||||
// get my neighbors
|
||||
|
||||
int minus, plus;
|
||||
for (int i = 0; i < 3; i++) {
|
||||
numa_shift(myloc[i],procgrid[i],minus,plus);
|
||||
procneigh[i][0] = minus;
|
||||
procneigh[i][1] = plus;
|
||||
}
|
||||
procneigh[0][0] = grid2proc[procneigh[0][0]][myloc[1]][myloc[2]];
|
||||
procneigh[0][1] = grid2proc[procneigh[0][1]][myloc[1]][myloc[2]];
|
||||
procneigh[1][0] = grid2proc[myloc[0]][procneigh[1][0]][myloc[2]];
|
||||
procneigh[1][1] = grid2proc[myloc[0]][procneigh[1][1]][myloc[2]];
|
||||
procneigh[2][0] = grid2proc[myloc[0]][myloc[1]][procneigh[2][0]];
|
||||
procneigh[2][1] = grid2proc[myloc[0]][myloc[1]][procneigh[2][1]];
|
||||
|
||||
if (numa_rank == 0) MPI_Comm_free(&cartesian);
|
||||
MPI_Comm_free(&numa_leaders);
|
||||
MPI_Comm_free(&numa_comm);
|
||||
MPI_Comm_free(&node_comm);
|
||||
|
||||
// set lamda box params after procs are assigned
|
||||
|
||||
if (domain->triclinic) domain->set_lamda_box();
|
||||
|
||||
if (me == 0) {
|
||||
if (screen) fprintf(screen," %d by %d by %d Node grid\n",
|
||||
numagrid[0],numagrid[1],numagrid[2]);
|
||||
if (logfile) fprintf(logfile," %d by %d by %d Node grid\n",
|
||||
numagrid[0],numagrid[1],numagrid[2]);
|
||||
if (screen) fprintf(screen," %d by %d by %d processor grid\n",
|
||||
procgrid[0],procgrid[1],procgrid[2]);
|
||||
if (logfile) fprintf(logfile," %d by %d by %d processor grid\n",
|
||||
procgrid[0],procgrid[1],procgrid[2]);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
get the index to the neighboring processors in a dimension
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Comm::numa_shift(int myloc, int num_procs, int &minus, int &plus)
|
||||
{
|
||||
minus = myloc - 1;
|
||||
if (minus < 0) minus = num_procs - 1;
|
||||
plus = myloc + 1;
|
||||
if (plus == num_procs) plus = 0;
|
||||
if (gridflag == NUMA && mapflag != CART)
|
||||
error->all(FLERR,"Processors grid numa and map choice are incompatible");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
||||
15
src/comm.h
15
src/comm.h
@ -83,10 +83,16 @@ class Comm : protected Pointers {
|
||||
int comm_x_only,comm_f_only; // 1 if only exchange x,f in for/rev comm
|
||||
int map_style; // non-0 if global->local mapping is done
|
||||
int bordergroup; // only communicate this group in borders
|
||||
int layoutflag; // user-selected layout for 3d proc grid
|
||||
int numa_nodes; // layout NUMA-aware 3d proc grid
|
||||
int gridflag; // option for creating 3d grid
|
||||
int mapflag; // option for mapping procs to 3d grid
|
||||
char xyz[4]; // xyz mapping of procs to 3d grid
|
||||
char *customfile; // file with custom proc map
|
||||
char *outfile; // proc grid/map output file
|
||||
|
||||
int otherflag; // 1 if this partition dependent on another
|
||||
int other_style; // style of dependency
|
||||
int other_procgrid[3]; // proc layout of another partition
|
||||
int coregrid[3]; // procs layout of cores within a node
|
||||
|
||||
int *firstrecv; // where to put 1st recv atom in each swap
|
||||
int **sendlist; // list of atoms to send in each swap
|
||||
@ -97,8 +103,6 @@ class Comm : protected Pointers {
|
||||
int maxsend,maxrecv; // current size of send/recv buffer
|
||||
int maxforward,maxreverse; // max # of datums in forward/reverse comm
|
||||
|
||||
int procs2box(int, int[3], int[3], // map procs to 3d box
|
||||
const int, const int, const int, int);
|
||||
virtual void grow_send(int,int); // reallocate send buffer
|
||||
virtual void grow_recv(int); // free/allocate recv buffer
|
||||
virtual void grow_list(int, int); // reallocate one sendlist
|
||||
@ -107,9 +111,6 @@ class Comm : protected Pointers {
|
||||
virtual void allocate_multi(int); // allocate multi arrays
|
||||
virtual void free_swap(); // free swap arrays
|
||||
virtual void free_multi(); // free multi arrays
|
||||
|
||||
int numa_set_proc_grid();
|
||||
void numa_shift(int, int, int &, int &);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@ -69,12 +69,10 @@ void Error::universe_one(const char *file, int line, const char *str)
|
||||
called by all procs in one world
|
||||
close all output, screen, and log files in world
|
||||
insure all procs in world call, else will hang
|
||||
if abort = 0 (default):
|
||||
if only one world in universe calls, universe will hang
|
||||
if abort = 1: force abort of entire universe if any world in universe calls
|
||||
force MPI_Abort if running in multi-partition mode
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Error::all(const char *file, int line, const char *str, int abort)
|
||||
void Error::all(const char *file, int line, const char *str)
|
||||
{
|
||||
MPI_Barrier(world);
|
||||
|
||||
@ -90,7 +88,7 @@ void Error::all(const char *file, int line, const char *str, int abort)
|
||||
if (screen && screen != stdout) fclose(screen);
|
||||
if (logfile) fclose(logfile);
|
||||
|
||||
if (abort) MPI_Abort(world,1);
|
||||
if (universe->nworlds > 1) MPI_Abort(universe->uworld,1);
|
||||
MPI_Finalize();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@ -25,7 +25,7 @@ class Error : protected Pointers {
|
||||
void universe_all(const char *, int, const char *);
|
||||
void universe_one(const char *, int, const char *);
|
||||
|
||||
void all(const char *, int, const char *, int = 0);
|
||||
void all(const char *, int, const char *);
|
||||
void one(const char *, int, const char *);
|
||||
void warning(const char *, int, const char *, int = 1);
|
||||
void message(const char *, int, char *, int = 1);
|
||||
|
||||
@ -149,12 +149,12 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator)
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"-reorder") == 0 ||
|
||||
strcmp(arg[iarg],"-r") == 0) {
|
||||
if (iarg+2 > narg)
|
||||
if (iarg+3 > narg)
|
||||
error->universe_all(FLERR,"Invalid command-line argument");
|
||||
if (universe->existflag)
|
||||
error->universe_all(FLERR,"Cannot use -reorder after -partition");
|
||||
universe->reorder(arg[iarg+1]);
|
||||
iarg += 2;
|
||||
universe->reorder(arg[iarg+1],arg[iarg+2]);
|
||||
iarg += 3;
|
||||
} else if (strcmp(arg[iarg],"-help") == 0 ||
|
||||
strcmp(arg[iarg],"-h") == 0) {
|
||||
if (iarg+1 > narg)
|
||||
|
||||
470
src/pair_born_coul_wolf.cpp
Normal file
470
src/pair_born_coul_wolf.cpp
Normal file
@ -0,0 +1,470 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Yongfeng Zhang (INL), yongfeng.zhang@inl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "string.h"
|
||||
#include "pair_born_coul_wolf.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "math_const.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathConst;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairBornCoulWolf::PairBornCoulWolf(LAMMPS *lmp) : Pair(lmp) {}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairBornCoulWolf::~PairBornCoulWolf()
|
||||
{
|
||||
if (allocated) {
|
||||
memory->destroy(setflag);
|
||||
memory->destroy(cutsq);
|
||||
|
||||
memory->destroy(cut_lj);
|
||||
memory->destroy(cut_ljsq);
|
||||
memory->destroy(a);
|
||||
memory->destroy(rho);
|
||||
memory->destroy(sigma);
|
||||
memory->destroy(c);
|
||||
memory->destroy(d);
|
||||
memory->destroy(rhoinv);
|
||||
memory->destroy(born1);
|
||||
memory->destroy(born2);
|
||||
memory->destroy(born3);
|
||||
memory->destroy(offset);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairBornCoulWolf::compute(int eflag, int vflag)
|
||||
{
|
||||
int i,j,ii,jj,inum,jnum,itype,jtype;
|
||||
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
|
||||
double rsq,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
|
||||
double prefactor;
|
||||
double r,rexp;
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
double erfcc,erfcd,v_sh,dvdrr,e_self,qisq;
|
||||
|
||||
evdwl = ecoul = 0.0;
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
double *q = atom->q;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
double *special_coul = force->special_coul;
|
||||
double *special_lj = force->special_lj;
|
||||
int newton_pair = force->newton_pair;
|
||||
double qqrd2e = force->qqrd2e;
|
||||
|
||||
// self and shifted coulombic energy
|
||||
|
||||
e_self = v_sh = 0.0;
|
||||
e_shift = erfc(alf*cut_coul)/cut_coul;
|
||||
f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
|
||||
cut_coul;
|
||||
|
||||
inum = list->inum;
|
||||
ilist = list->ilist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (ii = 0; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
qtmp = q[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
qisq = qtmp*qtmp;
|
||||
e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
|
||||
if (evflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
|
||||
if (j < nall) factor_coul = factor_lj = 1.0;
|
||||
else {
|
||||
factor_coul = special_coul[j/nall];
|
||||
factor_lj = special_lj[j/nall];
|
||||
j %= nall;
|
||||
}
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
if (rsq < cutsq[itype][jtype]) {
|
||||
r2inv = 1.0/rsq;
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
r = sqrt(rsq);
|
||||
prefactor = qqrd2e*qtmp*q[j]/r;
|
||||
erfcc = erfc(alf*r);
|
||||
erfcd = exp(-alf*alf*r*r);
|
||||
v_sh = (erfcc - e_shift*r) * prefactor;
|
||||
dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
|
||||
forcecoul = dvdrr*rsq*prefactor;
|
||||
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
|
||||
} else forcecoul = 0.0;
|
||||
|
||||
if (rsq < cut_ljsq[itype][jtype]) {
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
r = sqrt(rsq);
|
||||
rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
|
||||
forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
|
||||
+ born3[itype][jtype]*r2inv*r6inv;
|
||||
} else forceborn = 0.0;
|
||||
|
||||
fpair = (forcecoul + factor_lj*forceborn) * r2inv;
|
||||
|
||||
f[i][0] += delx*fpair;
|
||||
f[i][1] += dely*fpair;
|
||||
f[i][2] += delz*fpair;
|
||||
if (newton_pair || j < nlocal) {
|
||||
f[j][0] -= delx*fpair;
|
||||
f[j][1] -= dely*fpair;
|
||||
f[j][2] -= delz*fpair;
|
||||
}
|
||||
|
||||
if (eflag) {
|
||||
if (rsq < cut_coulsq) {
|
||||
ecoul = v_sh;
|
||||
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
|
||||
} else ecoul = 0.0;
|
||||
if (rsq < cut_ljsq[itype][jtype]) {
|
||||
evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv +
|
||||
d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
|
||||
evdwl *= factor_lj;
|
||||
} else evdwl = 0.0;
|
||||
}
|
||||
|
||||
if (evflag) ev_tally(i,j,nlocal,newton_pair,
|
||||
evdwl,ecoul,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (vflag_fdotr) virial_fdotr_compute();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
allocate all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairBornCoulWolf::allocate()
|
||||
{
|
||||
allocated = 1;
|
||||
int n = atom->ntypes;
|
||||
|
||||
memory->create(setflag,n+1,n+1,"pair:setflag");
|
||||
for (int i = 1; i <= n; i++)
|
||||
for (int j = i; j <= n; j++)
|
||||
setflag[i][j] = 0;
|
||||
|
||||
memory->create(cutsq,n+1,n+1,"pair:cutsq");
|
||||
|
||||
memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
|
||||
memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
|
||||
memory->create(a,n+1,n+1,"pair:a");
|
||||
memory->create(rho,n+1,n+1,"pair:rho");
|
||||
memory->create(sigma,n+1,n+1,"pair:sigma");
|
||||
memory->create(c,n+1,n+1,"pair:c");
|
||||
memory->create(d,n+1,n+1,"pair:d");
|
||||
memory->create(rhoinv,n+1,n+1,"pair:rhoinv");
|
||||
memory->create(born1,n+1,n+1,"pair:born1");
|
||||
memory->create(born2,n+1,n+1,"pair:born2");
|
||||
memory->create(born3,n+1,n+1,"pair:born3");
|
||||
memory->create(offset,n+1,n+1,"pair:offset");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
global settings
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairBornCoulWolf::settings(int narg, char **arg)
|
||||
{
|
||||
if (narg < 2 || narg > 3) error->all(FLERR,"Illegal pair_style command");
|
||||
|
||||
alf = force->numeric(arg[0]);
|
||||
cut_lj_global = force->numeric(arg[1]);
|
||||
if (narg == 2) cut_coul = cut_lj_global;
|
||||
else cut_coul = force->numeric(arg[2]);
|
||||
|
||||
if (allocated) {
|
||||
int i,j;
|
||||
for (i = 1; i <= atom->ntypes; i++)
|
||||
for (j = i+1; j <= atom->ntypes; j++)
|
||||
if (setflag[i][j]) cut_lj[i][j] = cut_lj_global;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
set coeffs for one or more type pairs
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairBornCoulWolf::coeff(int narg, char **arg)
|
||||
{
|
||||
if (narg < 7 || narg > 8)
|
||||
error->all(FLERR,"Incorrect args for pair coefficients");
|
||||
if (!allocated) allocate();
|
||||
|
||||
int ilo,ihi,jlo,jhi;
|
||||
force->bounds(arg[0],atom->ntypes,ilo,ihi);
|
||||
force->bounds(arg[1],atom->ntypes,jlo,jhi);
|
||||
|
||||
double a_one = force->numeric(arg[2]);
|
||||
double rho_one = force->numeric(arg[3]);
|
||||
double sigma_one = force->numeric(arg[4]);
|
||||
if (rho_one <= 0) error->all(FLERR,"Incorrect args for pair coefficients");
|
||||
double c_one = force->numeric(arg[5]);
|
||||
double d_one = force->numeric(arg[6]);
|
||||
|
||||
double cut_lj_one = cut_lj_global;
|
||||
if (narg == 8) cut_lj_one = force->numeric(arg[7]);
|
||||
|
||||
int count = 0;
|
||||
for (int i = ilo; i <= ihi; i++) {
|
||||
for (int j = MAX(jlo,i); j <= jhi; j++) {
|
||||
a[i][j] = a_one;
|
||||
rho[i][j] = rho_one;
|
||||
sigma[i][j] = sigma_one;
|
||||
c[i][j] = c_one;
|
||||
d[i][j] = d_one;
|
||||
cut_lj[i][j] = cut_lj_one;
|
||||
setflag[i][j] = 1;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairBornCoulWolf::init_style()
|
||||
{
|
||||
if (!atom->q_flag)
|
||||
error->all(FLERR,"Pair style born/coul/Wolf requires atom attribute q");
|
||||
|
||||
int irequest = neighbor->request(this);
|
||||
|
||||
cut_coulsq = cut_coul * cut_coul;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init for one type pair i,j and corresponding j,i
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double PairBornCoulWolf::init_one(int i, int j)
|
||||
{
|
||||
if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
|
||||
|
||||
double cut = MAX(cut_lj[i][j],cut_coul);
|
||||
cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
|
||||
|
||||
rhoinv[i][j] = 1.0/rho[i][j];
|
||||
born1[i][j] = a[i][j]/rho[i][j];
|
||||
born2[i][j] = 6.0*c[i][j];
|
||||
born3[i][j] = 8.0*d[i][j];
|
||||
|
||||
if (offset_flag) {
|
||||
double rexp = exp(-cut_lj[i][j]/rho[i][j]);
|
||||
offset[i][j] = a[i][j]*rexp - c[i][j]/pow(cut_lj[i][j],6.0)
|
||||
+ d[i][j]/pow(cut_lj[i][j],8.0);
|
||||
} else offset[i][j] = 0.0;
|
||||
|
||||
cut_ljsq[j][i] = cut_ljsq[i][j];
|
||||
a[j][i] = a[i][j];
|
||||
c[j][i] = c[i][j];
|
||||
d[j][i] = d[i][j];
|
||||
rhoinv[j][i] = rhoinv[i][j];
|
||||
sigma[j][i] = sigma[i][j];
|
||||
born1[j][i] = born1[i][j];
|
||||
born2[j][i] = born2[i][j];
|
||||
born3[j][i] = born3[i][j];
|
||||
offset[j][i] = offset[i][j];
|
||||
|
||||
return cut;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
proc 0 writes to restart file
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairBornCoulWolf::write_restart(FILE *fp)
|
||||
{
|
||||
write_restart_settings(fp);
|
||||
|
||||
int i,j;
|
||||
for (i = 1; i <= atom->ntypes; i++)
|
||||
for (j = i; j <= atom->ntypes; j++) {
|
||||
fwrite(&setflag[i][j],sizeof(int),1,fp);
|
||||
if (setflag[i][j]) {
|
||||
fwrite(&a[i][j],sizeof(double),1,fp);
|
||||
fwrite(&rho[i][j],sizeof(double),1,fp);
|
||||
fwrite(&sigma[i][j],sizeof(double),1,fp);
|
||||
fwrite(&c[i][j],sizeof(double),1,fp);
|
||||
fwrite(&d[i][j],sizeof(double),1,fp);
|
||||
fwrite(&cut_lj[i][j],sizeof(double),1,fp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
proc 0 reads from restart file, bcasts
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairBornCoulWolf::read_restart(FILE *fp)
|
||||
{
|
||||
read_restart_settings(fp);
|
||||
|
||||
allocate();
|
||||
|
||||
int i,j;
|
||||
int me = comm->me;
|
||||
for (i = 1; i <= atom->ntypes; i++)
|
||||
for (j = i; j <= atom->ntypes; j++) {
|
||||
if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
|
||||
MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
|
||||
if (setflag[i][j]) {
|
||||
if (me == 0) {
|
||||
fread(&a[i][j],sizeof(double),1,fp);
|
||||
fread(&rho[i][j],sizeof(double),1,fp);
|
||||
fread(&sigma[i][j],sizeof(double),1,fp);
|
||||
fread(&c[i][j],sizeof(double),1,fp);
|
||||
fread(&d[i][j],sizeof(double),1,fp);
|
||||
fread(&cut_lj[i][j],sizeof(double),1,fp);
|
||||
}
|
||||
MPI_Bcast(&a[i][j],1,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&rho[i][j],1,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&c[i][j],1,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&d[i][j],1,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
proc 0 writes to restart file
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairBornCoulWolf::write_restart_settings(FILE *fp)
|
||||
{
|
||||
fwrite(&alf,sizeof(double),1,fp);
|
||||
fwrite(&cut_lj_global,sizeof(double),1,fp);
|
||||
fwrite(&cut_coul,sizeof(double),1,fp);
|
||||
fwrite(&offset_flag,sizeof(int),1,fp);
|
||||
fwrite(&mix_flag,sizeof(int),1,fp);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
proc 0 reads from restart file, bcasts
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairBornCoulWolf::read_restart_settings(FILE *fp)
|
||||
{
|
||||
if (comm->me == 0) {
|
||||
fread(&alf,sizeof(double),1,fp);
|
||||
fread(&cut_lj_global,sizeof(double),1,fp);
|
||||
fread(&cut_coul,sizeof(double),1,fp);
|
||||
fread(&offset_flag,sizeof(int),1,fp);
|
||||
fread(&mix_flag,sizeof(int),1,fp);
|
||||
}
|
||||
MPI_Bcast(&alf,1,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
|
||||
MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
only the pair part is calculated here
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double PairBornCoulWolf::single(int i, int j, int itype, int jtype,
|
||||
double rsq,
|
||||
double factor_coul, double factor_lj,
|
||||
double &fforce)
|
||||
{
|
||||
double r2inv,r6inv,r,prefactor,rexp;
|
||||
double forcecoul,forceborn,phicoul,phiborn;
|
||||
double e_shift,f_shift,dvdrr,erfcc,erfcd;
|
||||
|
||||
r2inv = 1.0/rsq;
|
||||
e_shift = erfc(alf*cut_coul) / cut_coul;
|
||||
f_shift = -(e_shift+2*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
|
||||
cut_coul;
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
r = sqrt(rsq);
|
||||
prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r;
|
||||
erfcc = erfc(alf*r);
|
||||
erfcd = exp(-alf*alf*r*r);
|
||||
dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
|
||||
forcecoul = dvdrr*rsq*prefactor;
|
||||
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
|
||||
} else forcecoul = 0.0;
|
||||
|
||||
if (rsq < cut_ljsq[itype][jtype]) {
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
r = sqrt(rsq);
|
||||
rexp = exp(-r*rhoinv[itype][jtype]);
|
||||
forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
|
||||
+ born3[itype][jtype]*r2inv*r6inv;
|
||||
} else forceborn = 0.0;
|
||||
|
||||
fforce = (forcecoul + factor_lj*forceborn) * r2inv;
|
||||
|
||||
double eng = 0.0;
|
||||
if (rsq < cut_coulsq) {
|
||||
phicoul = prefactor * (erfcc-e_shift*r);
|
||||
if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
|
||||
eng += phicoul;
|
||||
}
|
||||
if (rsq < cut_ljsq[itype][jtype]) {
|
||||
phiborn = a[itype][jtype]*rexp - c[itype][jtype]*r6inv
|
||||
+ d[itype][jtype]*r2inv*r6inv - offset[itype][jtype];
|
||||
eng += factor_lj*phiborn;
|
||||
}
|
||||
return eng;
|
||||
}
|
||||
56
src/pair_born_coul_wolf.h
Normal file
56
src/pair_born_coul_wolf.h
Normal file
@ -0,0 +1,56 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(born/coul/wolf,PairBornCoulWolf)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_BORN_COUL_WOLF_H
|
||||
#define LMP_PAIR_BORN_COUL_WOLF_H
|
||||
|
||||
#include "pair.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairBornCoulWolf : public Pair {
|
||||
public:
|
||||
PairBornCoulWolf(class LAMMPS *);
|
||||
~PairBornCoulWolf();
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_style();
|
||||
double init_one(int, int);
|
||||
void write_restart(FILE *);
|
||||
void read_restart(FILE *);
|
||||
void write_restart_settings(FILE *);
|
||||
void read_restart_settings(FILE *);
|
||||
double single(int, int, int, int, double, double, double, double &);
|
||||
|
||||
private:
|
||||
double cut_lj_global;
|
||||
double **cut_lj,**cut_ljsq;
|
||||
double cut_coul,cut_coulsq;
|
||||
double **a,**rho,**sigma,**c,**d;
|
||||
double **rhoinv,**born1,**born2,**born3,**offset;
|
||||
double alf,e_shift,f_shift;
|
||||
|
||||
void allocate();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
328
src/pair_coul_wolf.cpp
Normal file
328
src/pair_coul_wolf.cpp
Normal file
@ -0,0 +1,328 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Yongfeng Zhang (INL), yongfeng.zhang@inl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "string.h"
|
||||
#include "pair_coul_wolf.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "math_const.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathConst;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairCoulWolf::PairCoulWolf(LAMMPS *lmp) : Pair(lmp) {}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairCoulWolf::~PairCoulWolf()
|
||||
{
|
||||
if (allocated) {
|
||||
memory->destroy(setflag);
|
||||
memory->destroy(cutsq);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairCoulWolf::compute(int eflag, int vflag)
|
||||
{
|
||||
int i,j,ii,jj,inum,jnum,itype,jtype;
|
||||
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
|
||||
double rsq,forcecoul,factor_coul;
|
||||
double prefactor;
|
||||
double r,rexp;
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
double erfcc,erfcd,v_sh,dvdrr,e_self,qisq;
|
||||
|
||||
ecoul = 0.0;
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
double *q = atom->q;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
double *special_coul = force->special_coul;
|
||||
int newton_pair = force->newton_pair;
|
||||
double qqrd2e = force->qqrd2e;
|
||||
|
||||
// self and shifted coulombic energy
|
||||
|
||||
e_self = v_sh = 0.0;
|
||||
e_shift = erfc(alf*cut_coul)/cut_coul;
|
||||
f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
|
||||
cut_coul;
|
||||
|
||||
inum = list->inum;
|
||||
ilist = list->ilist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (ii = 0; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
qtmp = q[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
qisq = qtmp*qtmp;
|
||||
e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
|
||||
if (evflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
|
||||
if (j < nall) factor_coul = 1.0;
|
||||
else {
|
||||
factor_coul = special_coul[j/nall];
|
||||
j %= nall;
|
||||
}
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
r = sqrt(rsq);
|
||||
prefactor = qqrd2e*qtmp*q[j]/r;
|
||||
erfcc = erfc(alf*r);
|
||||
erfcd = exp(-alf*alf*r*r);
|
||||
v_sh = (erfcc - e_shift*r) * prefactor;
|
||||
dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
|
||||
forcecoul = dvdrr*rsq*prefactor;
|
||||
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
|
||||
fpair = forcecoul / rsq;
|
||||
|
||||
f[i][0] += delx*fpair;
|
||||
f[i][1] += dely*fpair;
|
||||
f[i][2] += delz*fpair;
|
||||
if (newton_pair || j < nlocal) {
|
||||
f[j][0] -= delx*fpair;
|
||||
f[j][1] -= dely*fpair;
|
||||
f[j][2] -= delz*fpair;
|
||||
}
|
||||
|
||||
if (eflag) {
|
||||
if (rsq < cut_coulsq) {
|
||||
ecoul = v_sh;
|
||||
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
|
||||
} else ecoul = 0.0;
|
||||
}
|
||||
|
||||
if (evflag) ev_tally(i,j,nlocal,newton_pair,
|
||||
0.0,ecoul,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (vflag_fdotr) virial_fdotr_compute();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
allocate all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairCoulWolf::allocate()
|
||||
{
|
||||
allocated = 1;
|
||||
int n = atom->ntypes;
|
||||
|
||||
memory->create(setflag,n+1,n+1,"pair:setflag");
|
||||
for (int i = 1; i <= n; i++)
|
||||
for (int j = i; j <= n; j++)
|
||||
setflag[i][j] = 0;
|
||||
|
||||
memory->create(cutsq,n+1,n+1,"pair:cutsq");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
global settings
|
||||
unlike other pair styles,
|
||||
there are no individual pair settings that these override
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairCoulWolf::settings(int narg, char **arg)
|
||||
{
|
||||
if (narg != 2) error->all(FLERR,"Illegal pair_style command");
|
||||
|
||||
alf = force->numeric(arg[0]);
|
||||
cut_coul = force->numeric(arg[1]);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
set cutoffs for one or more type pairs, optional
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairCoulWolf::coeff(int narg, char **arg)
|
||||
{
|
||||
if (narg != 2) error->all(FLERR,"Incorrect args for pair coefficients");
|
||||
if (!allocated) allocate();
|
||||
|
||||
int ilo,ihi,jlo,jhi;
|
||||
force->bounds(arg[0],atom->ntypes,ilo,ihi);
|
||||
force->bounds(arg[1],atom->ntypes,jlo,jhi);
|
||||
|
||||
int count = 0;
|
||||
for (int i = ilo; i <= ihi; i++) {
|
||||
for (int j = MAX(jlo,i); j <= jhi; j++) {
|
||||
setflag[i][j] = 1;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairCoulWolf::init_style()
|
||||
{
|
||||
if (!atom->q_flag)
|
||||
error->all(FLERR,"Pair coul/wolf requires atom attribute q");
|
||||
|
||||
int irequest = neighbor->request(this);
|
||||
|
||||
cut_coulsq = cut_coul*cut_coul;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init for one type pair i,j and corresponding j,i
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double PairCoulWolf::init_one(int i, int j)
|
||||
{
|
||||
return cut_coul;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
proc 0 writes to restart file
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairCoulWolf::write_restart(FILE *fp)
|
||||
{
|
||||
write_restart_settings(fp);
|
||||
|
||||
int i,j;
|
||||
for (i = 1; i <= atom->ntypes; i++)
|
||||
for (j = i; j <= atom->ntypes; j++)
|
||||
fwrite(&setflag[i][j],sizeof(int),1,fp);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
proc 0 reads from restart file, bcasts
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairCoulWolf::read_restart(FILE *fp)
|
||||
{
|
||||
read_restart_settings(fp);
|
||||
allocate();
|
||||
|
||||
int i,j;
|
||||
int me = comm->me;
|
||||
for (i = 1; i <= atom->ntypes; i++)
|
||||
for (j = i; j <= atom->ntypes; j++) {
|
||||
if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
|
||||
MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
proc 0 writes to restart file
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairCoulWolf::write_restart_settings(FILE *fp)
|
||||
{
|
||||
fwrite(&alf,sizeof(double),1,fp);
|
||||
fwrite(&cut_coul,sizeof(double),1,fp);
|
||||
fwrite(&offset_flag,sizeof(int),1,fp);
|
||||
fwrite(&mix_flag,sizeof(int),1,fp);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
proc 0 reads from restart file, bcasts
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairCoulWolf::read_restart_settings(FILE *fp)
|
||||
{
|
||||
if (comm->me == 0) {
|
||||
fread(&alf,sizeof(double),1,fp);
|
||||
fread(&cut_coul,sizeof(double),1,fp);
|
||||
fread(&offset_flag,sizeof(int),1,fp);
|
||||
fread(&mix_flag,sizeof(int),1,fp);
|
||||
}
|
||||
MPI_Bcast(&alf,1,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
|
||||
MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
only the pair part is calculated here
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double PairCoulWolf::single(int i, int j, int itype, int jtype, double rsq,
|
||||
double factor_coul, double factor_lj,
|
||||
double &fforce)
|
||||
{
|
||||
double r6inv,r,prefactor,rexp;
|
||||
double forcecoul,forceborn,phicoul;
|
||||
double e_shift,f_shift,dvdrr,erfcc,erfcd;
|
||||
|
||||
e_shift = erfc(alf*cut_coul) / cut_coul;
|
||||
f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
|
||||
cut_coul;
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
r = sqrt(rsq);
|
||||
prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r;
|
||||
erfcc = erfc(alf*r);
|
||||
erfcd = exp(-alf*alf*r*r);
|
||||
dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
|
||||
forcecoul = dvdrr*rsq*prefactor;
|
||||
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
|
||||
} else forcecoul = 0.0;
|
||||
fforce = forcecoul / rsq;
|
||||
|
||||
double eng = 0.0;
|
||||
if (rsq < cut_coulsq) {
|
||||
phicoul = prefactor * (erfcc-e_shift*r);
|
||||
if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
|
||||
eng += phicoul;
|
||||
}
|
||||
return eng;
|
||||
}
|
||||
52
src/pair_coul_wolf.h
Normal file
52
src/pair_coul_wolf.h
Normal file
@ -0,0 +1,52 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(coul/wolf,PairCoulWolf)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_COUL_WOLF_H
|
||||
#define LMP_PAIR_COUL_WOLF_H
|
||||
|
||||
#include "pair.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairCoulWolf : public Pair {
|
||||
public:
|
||||
PairCoulWolf(class LAMMPS *);
|
||||
~PairCoulWolf();
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_style();
|
||||
double init_one(int, int);
|
||||
void write_restart(FILE *);
|
||||
void read_restart(FILE *);
|
||||
void write_restart_settings(FILE *);
|
||||
void read_restart_settings(FILE *);
|
||||
double single(int, int, int, int, double, double, double, double &);
|
||||
|
||||
private:
|
||||
double cut_coul,cut_coulsq;
|
||||
double alf,e_shift,f_shift;
|
||||
|
||||
void allocate();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@ -446,7 +446,8 @@ void PairLJCut::settings(int narg, char **arg)
|
||||
|
||||
void PairLJCut::coeff(int narg, char **arg)
|
||||
{
|
||||
if (narg < 4 || narg > 5) error->all(FLERR,"Incorrect args for pair coefficients");
|
||||
if (narg < 4 || narg > 5)
|
||||
error->all(FLERR,"Incorrect args for pair coefficients");
|
||||
if (!allocated) allocate();
|
||||
|
||||
int ilo,ihi,jlo,jhi;
|
||||
|
||||
@ -59,22 +59,38 @@ Universe::~Universe()
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
reorder universe processors based on custom file
|
||||
reorder universe processors
|
||||
create uni2orig as inverse mapping
|
||||
re-create uworld communicator with new ordering via Comm_split()
|
||||
style = "nth", arg = N
|
||||
move every Nth proc to end of rankings
|
||||
style = "custom", arg = filename
|
||||
file has nprocs lines with I J
|
||||
I = universe proc ID in original communicator uorig
|
||||
J = universe proc ID in reordered communicator uworld
|
||||
create uni2orig as inverse mapping
|
||||
re-create uworld communicator with new ordering via Comm_split()
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Universe::reorder(char *file)
|
||||
void Universe::reorder(char *style, char *arg)
|
||||
{
|
||||
char line[MAXLINE];
|
||||
|
||||
if (uworld != uorig) MPI_Comm_free(&uworld);
|
||||
|
||||
if (strcmp(style,"nth") == 0) {
|
||||
int n = atoi(arg);
|
||||
if (n <= 0)
|
||||
error->universe_all(FLERR,"Invalid -reorder N value");
|
||||
if (nprocs % n)
|
||||
error->universe_all(FLERR,"Nprocs not a multiple of N for -reorder");
|
||||
for (int i = 0; i < nprocs; i++) {
|
||||
if (i < (n-1)*nprocs/n) uni2orig[i] = i/(n-1) * n + (i % (n-1));
|
||||
else uni2orig[i] = (i - (n-1)*nprocs/n) * n + n-1;
|
||||
}
|
||||
|
||||
} else if (strcmp(style,"custom") == 0) {
|
||||
|
||||
if (me == 0) {
|
||||
FILE *fp = fopen(file,"r");
|
||||
FILE *fp = fopen(arg,"r");
|
||||
if (fp == NULL) error->universe_one(FLERR,"Cannot open -reorder file");
|
||||
|
||||
// skip header = blank and comment lines
|
||||
@ -102,7 +118,7 @@ void Universe::reorder(char *file)
|
||||
for (int i = 1; i < nprocs; i++) {
|
||||
if (!fgets(line,MAXLINE,fp))
|
||||
error->one(FLERR,"Unexpected end of reorder file");
|
||||
sscanf(line,"%ld %ld",&me_orig,&me_new);
|
||||
sscanf(line,"%d %d",&me_orig,&me_new);
|
||||
if (me_orig < 0 || me_orig >= nprocs ||
|
||||
me_new < 0 || me_new >= nprocs)
|
||||
error->one(FLERR,"Invalid entry in reorder file");
|
||||
@ -111,8 +127,14 @@ void Universe::reorder(char *file)
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
// bcast uni2org from proc 0 to all other universe procs
|
||||
|
||||
MPI_Bcast(uni2orig,nprocs,MPI_INT,0,uorig);
|
||||
|
||||
} else error->universe_all(FLERR,"Invalid command-line argument");
|
||||
|
||||
// create new uworld communicator
|
||||
|
||||
int ome,key;
|
||||
MPI_Comm_rank(uorig,&ome);
|
||||
for (int i = 0; i < nprocs; i++)
|
||||
|
||||
@ -41,7 +41,7 @@ class Universe : protected Pointers {
|
||||
|
||||
Universe(class LAMMPS *, MPI_Comm);
|
||||
~Universe();
|
||||
void reorder(char *);
|
||||
void reorder(char *, char *);
|
||||
void add_world(char *);
|
||||
int consistent();
|
||||
};
|
||||
|
||||
@ -29,11 +29,10 @@
|
||||
#include "output.h"
|
||||
#include "thermo.h"
|
||||
#include "random_mars.h"
|
||||
#include "math_const.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
#include "math_const.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathConst;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user