From 86347b81e32f08bc403ba960a599d1f593511bb4 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Tue, 13 Dec 2011 15:57:18 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7340 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/comm.cpp | 603 +++++++++--------------------------- src/comm.h | 15 +- src/error.cpp | 8 +- src/error.h | 2 +- src/lammps.cpp | 6 +- src/pair_born_coul_wolf.cpp | 470 ++++++++++++++++++++++++++++ src/pair_born_coul_wolf.h | 56 ++++ src/pair_coul_wolf.cpp | 328 ++++++++++++++++++++ src/pair_coul_wolf.h | 52 ++++ src/pair_lj_cut.cpp | 5 +- src/universe.cpp | 88 ++++-- src/universe.h | 2 +- src/variable.cpp | 3 +- 13 files changed, 1121 insertions(+), 517 deletions(-) create mode 100644 src/pair_born_coul_wolf.cpp create mode 100644 src/pair_born_coul_wolf.h create mode 100644 src/pair_coul_wolf.cpp create mode 100644 src/pair_coul_wolf.h diff --git a/src/comm.cpp b/src/comm.cpp index 7e7ad5b3ef..e8f4c0ca54 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -35,13 +35,11 @@ #include "compute.h" #include "output.h" #include "dump.h" +#include "procmap.h" #include "math_extra.h" #include "error.h" #include "memory.h" -#include -#include - #ifdef _OPENMP #include "omp.h" #endif @@ -54,8 +52,9 @@ using namespace LAMMPS_NS; #define BIG 1.0e20 enum{SINGLE,MULTI}; -enum{NONE,MULTIPLE}; -enum{CART,CARTREORDER,XYZ,XZY,YXZ,YZX,ZXY,ZYX}; +enum{MULTIPLE}; // same as in ProcMap +enum{ONELEVEL,NUMA,CUSTOM}; +enum{CART,CARTREORDER,XYZ}; /* ---------------------------------------------------------------------- setup MPI and allocate buffer space @@ -67,9 +66,14 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp) MPI_Comm_size(world,&nprocs); user_procgrid[0] = user_procgrid[1] = user_procgrid[2] = 0; + gridflag = ONELEVEL; + mapflag = CART; + customfile = NULL; + recv_from_partition = send_to_partition = -1; + otherflag = 0; + outfile = NULL; + grid2proc = NULL; - layoutflag = CART; - numa_nodes = 0; bordergroup = 0; style = SINGLE; @@ -77,8 +81,6 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp) cutghostmulti = NULL; cutghostuser = 0.0; ghost_velocity = 0; - recv_from_partition = send_to_partition = -1; - other_partition_style = NONE; // use of OpenMP threads // query OpenMP for number of threads/process set by user at run-time @@ -121,7 +123,10 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp) Comm::~Comm() { - if (grid2proc) memory->destroy(grid2proc); + delete [] customfile; + delete [] outfile; + + memory->destroy(grid2proc); free_swap(); if (style == MULTI) { @@ -138,12 +143,13 @@ Comm::~Comm() } /* ---------------------------------------------------------------------- - create 3d grid of procs based on box size + create 3d grid of procs based on Nprocs and box size & shape + map processors to grid ------------------------------------------------------------------------- */ void Comm::set_proc_grid() { - // recv proc layout of another partition if my layout depends on it + // recv 3d proc grid of another partition if my 3d grid depends on it if (recv_from_partition >= 0) { MPI_Status status; @@ -153,124 +159,96 @@ void Comm::set_proc_grid() MPI_Bcast(other_procgrid,3,MPI_INT,0,world); } - // use NUMA routines if numa_nodes > 0 - // if NUMA routines fail, just continue + // create ProcMap class - if (numa_nodes) { - int flag = numa_set_proc_grid(); - if (flag) return; + ProcMap *pmap = new ProcMap(lmp); + + // create 3d grid of processors, produces procgrid + // can fail (on one partition) if constrained by other partition + // if numa_grid() fails, try onelevel_grid() + + int flag; + if (gridflag == ONELEVEL) { + flag = pmap->onelevel_grid(nprocs,user_procgrid,procgrid, + otherflag,other_style,other_procgrid); + if (!flag) error->all(FLERR,"Could not create grid of processors"); + + } else if (gridflag == NUMA) { + flag = pmap->numa_grid(nprocs,user_procgrid,procgrid,coregrid); + if (!flag) { + coregrid[0] = coregrid[1] = coregrid[2] = 1; + flag = pmap->onelevel_grid(nprocs,user_procgrid,procgrid, + otherflag,other_style,other_procgrid); + } + if (!flag) error->all(FLERR,"Could not create grid of processors"); + + } else if (gridflag == CUSTOM) { + pmap->custom_grid(nprocs,user_procgrid,procgrid); } - // create layout of procs mapped to simulation box - // can fail (on one partition) if constrained by other_partition_style - - int flag = - procs2box(nprocs,user_procgrid,procgrid,1,1,1,other_partition_style); - if (!flag) error->all(FLERR,"Could not layout grid of processors",1); - - // error check + // error check on procgrid if (procgrid[0]*procgrid[1]*procgrid[2] != nprocs) error->all(FLERR,"Bad grid of processors"); if (domain->dimension == 2 && procgrid[2] != 1) error->all(FLERR,"Processor count in z must be 1 for 2d simulation"); - // grid2proc[i][j][k] = proc that owns i,j,k location in grid + // grid2proc[i][j][k] = proc that owns i,j,k location in 3d grid if (grid2proc) memory->destroy(grid2proc); memory->create(grid2proc,procgrid[0],procgrid[1],procgrid[2], "comm:grid2proc"); - // use MPI Cartesian routines to layout 3d grid of procs - // MPI may do layout in machine-optimized fashion + // map processor IDs to 3d processor grid + // produces myloc, procneigh, grid2proc - if (layoutflag == CART || layoutflag == CARTREORDER) { - int reorder = 0; - if (layoutflag == CARTREORDER) reorder = 1; - int periods[3]; - periods[0] = periods[1] = periods[2] = 1; - MPI_Comm cartesian; - - MPI_Cart_create(world,3,procgrid,periods,reorder,&cartesian); - MPI_Cart_get(cartesian,3,procgrid,periods,myloc); - MPI_Cart_shift(cartesian,0,1,&procneigh[0][0],&procneigh[0][1]); - MPI_Cart_shift(cartesian,1,1,&procneigh[1][0],&procneigh[1][1]); - MPI_Cart_shift(cartesian,2,1,&procneigh[2][0],&procneigh[2][1]); + if (gridflag == ONELEVEL) { + if (mapflag == CART) + pmap->cart_map(0,procgrid,myloc,procneigh,grid2proc); + else if (mapflag == CARTREORDER) + pmap->cart_map(1,procgrid,myloc,procneigh,grid2proc); + else if (mapflag == XYZ) + pmap->xyz_map(xyz,procgrid,myloc,procneigh,grid2proc); - int coords[3]; - int i,j,k; - for (i = 0; i < procgrid[0]; i++) - for (j = 0; j < procgrid[1]; j++) - for (k = 0; k < procgrid[2]; k++) { - coords[0] = i; coords[1] = j; coords[2] = k; - MPI_Cart_rank(cartesian,coords,&grid2proc[i][j][k]); - } - - MPI_Comm_free(&cartesian); + } else if (gridflag == NUMA) { + pmap->numa_map(coregrid,myloc,procneigh,grid2proc); - // layout 3d grid of procs explicitly, via user-requested XYZ ordering - - } else { - int i,j,k,ime,jme,kme; - for (i = 0; i < procgrid[0]; i++) - for (j = 0; j < procgrid[1]; j++) - for (k = 0; k < procgrid[2]; k++) { - if (layoutflag == XYZ) - grid2proc[i][j][k] = k*procgrid[1]*procgrid[0] + j*procgrid[0] + i; - else if (layoutflag == XZY) - grid2proc[i][j][k] = j*procgrid[2]*procgrid[0] + k*procgrid[0] + i; - else if (layoutflag == YXZ) - grid2proc[i][j][k] = k*procgrid[0]*procgrid[1] + i*procgrid[1] + j; - else if (layoutflag == YZX) - grid2proc[i][j][k] = i*procgrid[2]*procgrid[1] + k*procgrid[1] + j; - else if (layoutflag == ZXY) - grid2proc[i][j][k] = j*procgrid[0]*procgrid[2] + i*procgrid[2] + k; - else if (layoutflag == ZYX) - grid2proc[i][j][k] = i*procgrid[1]*procgrid[2] + j*procgrid[2] + k; - - if (grid2proc[i][j][k] == me) { - ime = i; jme = j, kme = k; - } - } - - myloc[0] = ime; - myloc[1] = jme; - myloc[2] = kme; - - i = ime-1; - if (i < 0) i = procgrid[0]-1; - procneigh[0][0] = grid2proc[i][jme][kme]; - i = ime+1; - if (i == procgrid[0]) i = 0; - procneigh[0][1] = grid2proc[i][jme][kme]; - - j = jme-1; - if (j < 0) j = procgrid[1]-1; - procneigh[1][0] = grid2proc[ime][j][kme]; - j = jme+1; - if (j == procgrid[1]) j = 0; - procneigh[1][1] = grid2proc[ime][j][kme]; - - k = kme-1; - if (k < 0) k = procgrid[2]-1; - procneigh[2][0] = grid2proc[ime][jme][k]; - k = kme+1; - if (k == procgrid[2]) k = 0; - procneigh[2][1] = grid2proc[ime][jme][k]; + } else if (gridflag == CUSTOM) { + pmap->custom_map(myloc,procneigh,grid2proc); } + // print 3d grid info to screen and logfile + + if (me == 0) { + if (screen) { + fprintf(screen," %d by %d by %d MPI processor grid\n", + procgrid[0],procgrid[1],procgrid[2]); + if (gridflag == NUMA) + fprintf(screen," %d by %d by %d core grid within node\n", + coregrid[0],coregrid[1],coregrid[2]); + } + if (logfile) { + fprintf(logfile," %d by %d by %d MPI processor grid\n", + procgrid[0],procgrid[1],procgrid[2]); + if (gridflag == NUMA) + fprintf(logfile," %d by %d by %d core grid within node\n", + coregrid[0],coregrid[1],coregrid[2]); + } + } + + // print 3d grid details to outfile + + if (outfile) pmap->output(grid2proc,outfile); + // set lamda box params after procs are assigned if (domain->triclinic) domain->set_lamda_box(); - if (me == 0) { - if (screen) fprintf(screen," %d by %d by %d MPI processor grid\n", - procgrid[0],procgrid[1],procgrid[2]); - if (logfile) fprintf(logfile," %d by %d by %d MPI processor grid\n", - procgrid[0],procgrid[1],procgrid[2]); - } + // free ProcMap class - // send my proc layout to another partition if requested + delete pmap; + + // send my 3d proc grid to another partition if requested if (send_to_partition >= 0) { if (me == 0) MPI_Send(procgrid,3,MPI_INT, @@ -1196,130 +1174,6 @@ void Comm::reverse_comm_dump(Dump *dump) } } -/* ---------------------------------------------------------------------- - assign numprocs to 3d box so as to minimize surface area - area = surface area of each of 3 faces of simulation box divided by sx,sy,sz - for triclinic, area = cross product of 2 edge vectors stored in h matrix - valid assignment will be factorization of numprocs = Px by Py by Pz - user_factors = if non-zero, factors are specified by user - sx,sy,sz = scale box xyz dimension vy dividing by sx,sy,sz - other = 1 to enforce compatability with other partition's layout - return factors = # of procs assigned to each dimension - return 1 if successully factor, 0 if not -------------------------------------------------------------------------- */ - -int Comm::procs2box(int numprocs, int user_factors[3], int factors[3], - const int sx, const int sy, const int sz, int other) -{ - factors[0] = user_factors[0]; - factors[1] = user_factors[1]; - factors[2] = user_factors[2]; - - // all 3 proc counts are specified - - if (factors[0] && factors[1] && factors[2]) return 1; - - // 2 out of 3 proc counts are specified - - if (factors[0] > 0 && factors[1] > 0) { - factors[2] = nprocs/(factors[0]*factors[1]); - return 1; - } else if (factors[0] > 0 && factors[2] > 0) { - factors[1] = nprocs/(factors[0]*factors[2]); - return 1; - } else if (factors[1] > 0 && factors[2] > 0) { - factors[0] = nprocs/(factors[1]*factors[2]); - return 1; - } - - // determine cross-sectional areas for orthogonal and triclinic boxes - // area[0] = xy, area[1] = xz, area[2] = yz - - double area[3]; - if (domain->triclinic == 0) { - area[0] = domain->xprd * domain->yprd / (sx * sy); - area[1] = domain->xprd * domain->zprd / (sx * sz); - area[2] = domain->yprd * domain->zprd / (sy * sz); - } else { - double *h = domain->h; - double a[3],b[3],c[3]; - a[0] = h[0]; a[1] = 0.0; a[2] = 0.0; - b[0] = h[5]; b[1] = h[1]; b[2] = 0.0; - MathExtra::cross3(a,b,c); - area[0] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sx * sy); - a[0] = h[0]; a[1] = 0.0; a[2] = 0.0; - b[0] = h[4]; b[1] = h[3]; b[2] = h[2]; - MathExtra::cross3(a,b,c); - area[1] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sx * sz); - a[0] = h[5]; a[1] = h[1]; a[2] = 0.0; - b[0] = h[4]; b[1] = h[3]; b[2] = h[2]; - MathExtra::cross3(a,b,c); - area[2] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sy * sz); - } - - double bestsurf = 2.0 * (area[0]+area[1]+area[2]); - - // loop thru all possible factorizations of numprocs - // only consider valid cases that match procgrid settings - // surf = surface area of a proc sub-domain - // only consider cases that match user_factors & other_procgrid settings - // success = 1 if valid factoriztion is found - // may not be if other constraint is enforced - - int ipx,ipy,ipz,valid; - double surf; - - int success = 0; - ipx = 1; - while (ipx <= numprocs) { - valid = 1; - if (user_factors[0] && ipx != user_factors[0]) valid = 0; - if (other == MULTIPLE && other_procgrid[0] % ipx) valid = 0; - if (numprocs % ipx) valid = 0; - - if (!valid) { - ipx++; - continue; - } - - ipy = 1; - while (ipy <= numprocs/ipx) { - valid = 1; - if (user_factors[1] && ipy != user_factors[1]) valid = 0; - if (other == MULTIPLE && other_procgrid[1] % ipy) valid = 0; - if ((numprocs/ipx) % ipy) valid = 0; - if (!valid) { - ipy++; - continue; - } - - ipz = numprocs/ipx/ipy; - valid = 1; - if (user_factors[2] && ipz != user_factors[2]) valid = 0; - if (other == MULTIPLE && other_procgrid[2] % ipz) valid = 0; - if (domain->dimension == 2 && ipz != 1) valid = 0; - if (!valid) { - ipy++; - continue; - } - - surf = area[0]/ipx/ipy + area[1]/ipx/ipz + area[2]/ipy/ipz; - if (surf < bestsurf) { - success = 1; - bestsurf = surf; - factors[0] = ipx; - factors[1] = ipy; - factors[2] = ipz; - } - ipy++; - } - - ipx++; - } - - return success; -} - /* ---------------------------------------------------------------------- realloc the size of the send buffer as needed with BUFFACTOR & BUFEXTRA if flag = 1, realloc @@ -1502,7 +1356,48 @@ void Comm::set_processors(int narg, char **arg) int iarg = 3; while (iarg < narg) { - if (strcmp(arg[iarg],"part") == 0) { + if (strcmp(arg[iarg],"grid") == 0) { + if (iarg+1 > narg) error->all(FLERR,"Illegal processors command"); + + if (strcmp(arg[iarg+1],"level1") == 0) { + gridflag = LEVEL1; + iarg += 2; + + if (strcmp(arg[iarg+1],"level2") == 0) { + gridflag = LEVEL2; + iarg += 2; + + } else if (strcmp(arg[iarg+1],"numa") == 0) { + gridflag = NUMA; + iarg += 2; + + } else if (strcmp(arg[iarg],"custom") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal processors command"); + gridflag = CUSTOM; + delete [] customfile; + int n = strlen(arg[iarg+1]) + 1; + customfile = new char(n); + strcpy(customfile,arg[iarg+1]); + iarg += 2; + + } else error->all(FLERR,"Illegal processors command"); + + } else if (strcmp(arg[iarg],"map") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal processors command"); + if (strcmp(arg[iarg+1],"cart") == 0) mapflag = CART; + else if (strcmp(arg[iarg+1],"cart/reorder") == 0) mapflag = CARTREORDER; + else if (strcmp(arg[iarg+1],"xyz") == 0 || + strcmp(arg[iarg+1],"xzy") == 0 || + strcmp(arg[iarg+1],"yxz") == 0 || + strcmp(arg[iarg+1],"yzx") == 0 || + strcmp(arg[iarg+1],"zxy") == 0 || + strcmp(arg[iarg+1],"zyx") == 0) { + mapflag = XYZ; + strcpy(xyz,arg[iarg+1]); + } else error->all(FLERR,"Illegal processors command"); + iarg += 2; + + } else if (strcmp(arg[iarg],"part") == 0) { if (iarg+4 > narg) error->all(FLERR,"Illegal processors command"); if (universe->nworlds == 1) error->all(FLERR, @@ -1527,250 +1422,32 @@ void Comm::set_processors(int narg, char **arg) "is already a receiver"); recv_from_partition = isend-1; } + + // only receiver has otherflag dependency + if (strcmp(arg[iarg+3],"multiple") == 0) { - if (universe->iworld == irecv-1) other_partition_style = MULTIPLE; + if (universe->iworld == irecv-1) { + otherflag = 1; + other_style = MULTIPLE; + } } else error->all(FLERR,"Illegal processors command"); iarg += 4; - } else if (strcmp(arg[iarg],"grid") == 0) { + } else if (strcmp(arg[iarg],"out") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal processors command"); - if (strcmp(arg[iarg+1],"cart") == 0) layoutflag = CART; - else if (strcmp(arg[iarg+1],"cart/reorder") == 0) - layoutflag = CARTREORDER; - else if (strcmp(arg[iarg+1],"xyz") == 0) layoutflag = XYZ; - else if (strcmp(arg[iarg+1],"xzy") == 0) layoutflag = XZY; - else if (strcmp(arg[iarg+1],"yxz") == 0) layoutflag = YXZ; - else if (strcmp(arg[iarg+1],"yzx") == 0) layoutflag = YZX; - else if (strcmp(arg[iarg+1],"zxy") == 0) layoutflag = ZXY; - else if (strcmp(arg[iarg+1],"zyx") == 0) layoutflag = ZYX; - else error->all(FLERR,"Illegal processors command"); + delete [] outfile; + int n = strlen(arg[iarg+1]) + 1; + outfile = new char(n); + strcpy(outfile,arg[iarg+1]); iarg += 2; - } else if (strcmp(arg[iarg],"numa") == 0) { - if (iarg+1 > narg) error->all(FLERR,"Illegal processors command"); - numa_nodes = 1; - if (numa_nodes < 0) error->all(FLERR,"Illegal processors command"); - iarg += 1; - } else error->all(FLERR,"Illegal processors command"); } // error check - if (numa_nodes) { - if (layoutflag != CART) - error->all(FLERR,"Can only use processors numa " - "with processors grid cart"); - if (send_to_partition >= 0 || recv_from_partition >= 0) - error->one(FLERR,"Cannot use processors numa with processors part"); - } -} - -/* ---------------------------------------------------------------------- - setup 3d grid of procs based on box size, group neighbors by numa node - return 1 if successful, return 0 if not -------------------------------------------------------------------------- */ - -int Comm::numa_set_proc_grid() -{ - // get names of all nodes - - int name_length; - char node_name[MPI_MAX_PROCESSOR_NAME]; - char node_names[MPI_MAX_PROCESSOR_NAME*nprocs]; - MPI_Get_processor_name(node_name,&name_length); - MPI_Allgather(&node_name,MPI_MAX_PROCESSOR_NAME,MPI_CHAR,&node_names, - MPI_MAX_PROCESSOR_NAME,MPI_CHAR,world); - std::string node_string = std::string(node_name); - - // get number of procs per node - - std::map name_map; - std::map::iterator np; - for (int i = 0; i < nprocs; i++) { - std::string i_string = std::string(&node_names[i*MPI_MAX_PROCESSOR_NAME]); - np = name_map.find(i_string); - if (np == name_map.end()) - name_map[i_string] = 1; - else - np->second++; - } - int procs_per_node = name_map.begin()->second; - int procs_per_numa = procs_per_node / numa_nodes; - - // use non-numa mapping if any condition met - - if (procs_per_numa < 4 || // less than 4 procs per numa node - procs_per_node % numa_nodes != 0 || // no-op since numa_nodes = 1 for now - nprocs % procs_per_numa != 0 || // total procs not a multiple of node - nprocs <= procs_per_numa || // only 1 node used - user_procgrid[0] > 1 || // user specified grid dimension < 1 - user_procgrid[1] > 1 || // in any dimension - user_procgrid[2] > 1) { - if (me == 0) { - if (screen) fprintf(screen," 1 by 1 by 1 Node grid\n"); - if (logfile) fprintf(logfile," 1 by 1 by 1 Node grid\n"); - } - return 0; - } - - // user settings for the factorization per numa node - currently always zero - - int user_numagrid[3]; - user_numagrid[0] = user_numagrid[1] = user_numagrid[2] = 0; - - // if user specifies 1 for a proc grid dimension, - // also use 1 for the node grid dimension - - if (user_procgrid[0] == 1) user_numagrid[0] = 1; - if (user_procgrid[1] == 1) user_numagrid[1] = 1; - if (user_procgrid[2] == 1) user_numagrid[2] = 1; - - // get an initial factorization for each numa node, - // if the user has not set the number of processors - - int numagrid[3]; - procs2box(procs_per_numa,user_numagrid,numagrid,1,1,1,0); - if (numagrid[0] * numagrid[1] * numagrid[2] != procs_per_numa) - error->all(FLERR,"Bad node grid of processors"); - - // get a factorization for the grid of numa nodes - - int node_count = nprocs / procs_per_numa; - procs2box(node_count,user_procgrid,procgrid, - numagrid[0],numagrid[1],numagrid[2],0); - if (procgrid[0] * procgrid[1] * procgrid[2] != node_count) - error->all(FLERR,"Bad grid of processors"); - - // repeat the numa node factorization using the subdomain sizes - // this will refine the factorization if the user specified the node layout - - procs2box(procs_per_numa,user_numagrid,numagrid, - procgrid[0],procgrid[1],procgrid[2],0); - if (numagrid[0]*numagrid[1]*numagrid[2] != procs_per_numa) - error->all(FLERR,"Bad Node grid of processors"); - if (domain->dimension == 2 && (procgrid[2] != 1 || numagrid[2] != 1)) - error->all(FLERR,"Processor count in z must be 1 for 2d simulation"); - - // assign a unique id to each node - - int node_num = 0, node_id = 0; - for (np = name_map.begin(); np != name_map.end(); ++np) { - if (np->first == node_string) node_id = node_num; - node_num++; - } - - // setup a per node communicator and find rank within - - MPI_Comm node_comm; - MPI_Comm_split(world,node_id,0,&node_comm); - int node_rank; - MPI_Comm_rank(node_comm,&node_rank); - - // setup a per numa communicator and find rank within - - MPI_Comm numa_comm; - int local_numa = node_rank / procs_per_numa; - MPI_Comm_split(node_comm,local_numa,0,&numa_comm); - int numa_rank; - MPI_Comm_rank(numa_comm,&numa_rank); - - // setup a communicator with the rank 0 procs from each numa node - - MPI_Comm numa_leaders; - MPI_Comm_split(world,numa_rank,0,&numa_leaders); - - // use the MPI Cartesian routines to map the nodes to the grid - // could implement layoutflag as in non-NUMA case? - - int reorder = 0; - int periods[3]; - periods[0] = periods[1] = periods[2] = 1; - MPI_Comm cartesian; - if (numa_rank == 0) { - MPI_Cart_create(numa_leaders,3,procgrid,periods,reorder,&cartesian); - MPI_Cart_get(cartesian,3,procgrid,periods,myloc); - } - - // broadcast numa node location in grid to other procs in numa node - - MPI_Bcast(myloc,3,MPI_INT,0,numa_comm); - - // get storage for the process mapping - - if (grid2proc) memory->destroy(grid2proc); - memory->create(grid2proc,procgrid[0]*numagrid[0],procgrid[1]*numagrid[1], - procgrid[2]*numagrid[2],"comm:grid2proc"); - - // compute my location within the grid - - int z_offset = numa_rank / (numagrid[0] * numagrid[1]); - int y_offset = (numa_rank % (numagrid[0] * numagrid[1]))/numagrid[0]; - int x_offset = numa_rank % numagrid[0]; - myloc[0] = myloc[0] * numagrid[0] + x_offset; - myloc[1] = myloc[1] * numagrid[1] + y_offset; - myloc[2] = myloc[2] * numagrid[2] + z_offset; - procgrid[0] *= numagrid[0]; - procgrid[1] *= numagrid[1]; - procgrid[2] *= numagrid[2]; - - // allgather of locations to fill grid2proc - - int **gridi; - memory->create(gridi,nprocs,3,"comm:gridi"); - MPI_Allgather(&myloc,3,MPI_INT,gridi[0],3,MPI_INT,world); - for (int i = 0; i < nprocs; i++) - grid2proc[gridi[i][0]][gridi[i][1]][gridi[i][2]] = i; - memory->destroy(gridi); - - // get my neighbors - - int minus, plus; - for (int i = 0; i < 3; i++) { - numa_shift(myloc[i],procgrid[i],minus,plus); - procneigh[i][0] = minus; - procneigh[i][1] = plus; - } - procneigh[0][0] = grid2proc[procneigh[0][0]][myloc[1]][myloc[2]]; - procneigh[0][1] = grid2proc[procneigh[0][1]][myloc[1]][myloc[2]]; - procneigh[1][0] = grid2proc[myloc[0]][procneigh[1][0]][myloc[2]]; - procneigh[1][1] = grid2proc[myloc[0]][procneigh[1][1]][myloc[2]]; - procneigh[2][0] = grid2proc[myloc[0]][myloc[1]][procneigh[2][0]]; - procneigh[2][1] = grid2proc[myloc[0]][myloc[1]][procneigh[2][1]]; - - if (numa_rank == 0) MPI_Comm_free(&cartesian); - MPI_Comm_free(&numa_leaders); - MPI_Comm_free(&numa_comm); - MPI_Comm_free(&node_comm); - - // set lamda box params after procs are assigned - - if (domain->triclinic) domain->set_lamda_box(); - - if (me == 0) { - if (screen) fprintf(screen," %d by %d by %d Node grid\n", - numagrid[0],numagrid[1],numagrid[2]); - if (logfile) fprintf(logfile," %d by %d by %d Node grid\n", - numagrid[0],numagrid[1],numagrid[2]); - if (screen) fprintf(screen," %d by %d by %d processor grid\n", - procgrid[0],procgrid[1],procgrid[2]); - if (logfile) fprintf(logfile," %d by %d by %d processor grid\n", - procgrid[0],procgrid[1],procgrid[2]); - } - - return 1; -} - -/* ---------------------------------------------------------------------- - get the index to the neighboring processors in a dimension -------------------------------------------------------------------------- */ - -void Comm::numa_shift(int myloc, int num_procs, int &minus, int &plus) -{ - minus = myloc - 1; - if (minus < 0) minus = num_procs - 1; - plus = myloc + 1; - if (plus == num_procs) plus = 0; + if (gridflag == NUMA && mapflag != CART) + error->all(FLERR,"Processors grid numa and map choice are incompatible"); } /* ---------------------------------------------------------------------- diff --git a/src/comm.h b/src/comm.h index e4b66931b6..706b7766aa 100644 --- a/src/comm.h +++ b/src/comm.h @@ -83,10 +83,16 @@ class Comm : protected Pointers { int comm_x_only,comm_f_only; // 1 if only exchange x,f in for/rev comm int map_style; // non-0 if global->local mapping is done int bordergroup; // only communicate this group in borders - int layoutflag; // user-selected layout for 3d proc grid - int numa_nodes; // layout NUMA-aware 3d proc grid + int gridflag; // option for creating 3d grid + int mapflag; // option for mapping procs to 3d grid + char xyz[4]; // xyz mapping of procs to 3d grid + char *customfile; // file with custom proc map + char *outfile; // proc grid/map output file + int otherflag; // 1 if this partition dependent on another + int other_style; // style of dependency int other_procgrid[3]; // proc layout of another partition + int coregrid[3]; // procs layout of cores within a node int *firstrecv; // where to put 1st recv atom in each swap int **sendlist; // list of atoms to send in each swap @@ -97,8 +103,6 @@ class Comm : protected Pointers { int maxsend,maxrecv; // current size of send/recv buffer int maxforward,maxreverse; // max # of datums in forward/reverse comm - int procs2box(int, int[3], int[3], // map procs to 3d box - const int, const int, const int, int); virtual void grow_send(int,int); // reallocate send buffer virtual void grow_recv(int); // free/allocate recv buffer virtual void grow_list(int, int); // reallocate one sendlist @@ -107,9 +111,6 @@ class Comm : protected Pointers { virtual void allocate_multi(int); // allocate multi arrays virtual void free_swap(); // free swap arrays virtual void free_multi(); // free multi arrays - - int numa_set_proc_grid(); - void numa_shift(int, int, int &, int &); }; } diff --git a/src/error.cpp b/src/error.cpp index b7836b45c1..9465a6f97c 100644 --- a/src/error.cpp +++ b/src/error.cpp @@ -69,12 +69,10 @@ void Error::universe_one(const char *file, int line, const char *str) called by all procs in one world close all output, screen, and log files in world insure all procs in world call, else will hang - if abort = 0 (default): - if only one world in universe calls, universe will hang - if abort = 1: force abort of entire universe if any world in universe calls + force MPI_Abort if running in multi-partition mode ------------------------------------------------------------------------- */ -void Error::all(const char *file, int line, const char *str, int abort) +void Error::all(const char *file, int line, const char *str) { MPI_Barrier(world); @@ -90,7 +88,7 @@ void Error::all(const char *file, int line, const char *str, int abort) if (screen && screen != stdout) fclose(screen); if (logfile) fclose(logfile); - if (abort) MPI_Abort(world,1); + if (universe->nworlds > 1) MPI_Abort(universe->uworld,1); MPI_Finalize(); exit(1); } diff --git a/src/error.h b/src/error.h index 3ab51866bf..395581dee3 100644 --- a/src/error.h +++ b/src/error.h @@ -25,7 +25,7 @@ class Error : protected Pointers { void universe_all(const char *, int, const char *); void universe_one(const char *, int, const char *); - void all(const char *, int, const char *, int = 0); + void all(const char *, int, const char *); void one(const char *, int, const char *); void warning(const char *, int, const char *, int = 1); void message(const char *, int, char *, int = 1); diff --git a/src/lammps.cpp b/src/lammps.cpp index cbfddc2f7b..537ab1748e 100644 --- a/src/lammps.cpp +++ b/src/lammps.cpp @@ -149,12 +149,12 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator) iarg += 2; } else if (strcmp(arg[iarg],"-reorder") == 0 || strcmp(arg[iarg],"-r") == 0) { - if (iarg+2 > narg) + if (iarg+3 > narg) error->universe_all(FLERR,"Invalid command-line argument"); if (universe->existflag) error->universe_all(FLERR,"Cannot use -reorder after -partition"); - universe->reorder(arg[iarg+1]); - iarg += 2; + universe->reorder(arg[iarg+1],arg[iarg+2]); + iarg += 3; } else if (strcmp(arg[iarg],"-help") == 0 || strcmp(arg[iarg],"-h") == 0) { if (iarg+1 > narg) diff --git a/src/pair_born_coul_wolf.cpp b/src/pair_born_coul_wolf.cpp new file mode 100644 index 0000000000..647c6bdaf6 --- /dev/null +++ b/src/pair_born_coul_wolf.cpp @@ -0,0 +1,470 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Yongfeng Zhang (INL), yongfeng.zhang@inl.gov +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_born_coul_wolf.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "math_const.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +/* ---------------------------------------------------------------------- */ + +PairBornCoulWolf::PairBornCoulWolf(LAMMPS *lmp) : Pair(lmp) {} + +/* ---------------------------------------------------------------------- */ + +PairBornCoulWolf::~PairBornCoulWolf() +{ + if (allocated) { + memory->destroy(setflag); + memory->destroy(cutsq); + + memory->destroy(cut_lj); + memory->destroy(cut_ljsq); + memory->destroy(a); + memory->destroy(rho); + memory->destroy(sigma); + memory->destroy(c); + memory->destroy(d); + memory->destroy(rhoinv); + memory->destroy(born1); + memory->destroy(born2); + memory->destroy(born3); + memory->destroy(offset); + } +} + +/* ---------------------------------------------------------------------- */ + +void PairBornCoulWolf::compute(int eflag, int vflag) +{ + int i,j,ii,jj,inum,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj; + double prefactor; + double r,rexp; + int *ilist,*jlist,*numneigh,**firstneigh; + double erfcc,erfcd,v_sh,dvdrr,e_self,qisq; + + evdwl = ecoul = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + double **x = atom->x; + double **f = atom->f; + double *q = atom->q; + int *type = atom->type; + int nlocal = atom->nlocal; + int nall = nlocal + atom->nghost; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + int newton_pair = force->newton_pair; + double qqrd2e = force->qqrd2e; + + // self and shifted coulombic energy + + e_self = v_sh = 0.0; + e_shift = erfc(alf*cut_coul)/cut_coul; + f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) / + cut_coul; + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + qisq = qtmp*qtmp; + e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e; + if (evflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0); + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + + if (j < nall) factor_coul = factor_lj = 1.0; + else { + factor_coul = special_coul[j/nall]; + factor_lj = special_lj[j/nall]; + j %= nall; + } + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + + if (rsq < cut_coulsq) { + r = sqrt(rsq); + prefactor = qqrd2e*qtmp*q[j]/r; + erfcc = erfc(alf*r); + erfcd = exp(-alf*alf*r*r); + v_sh = (erfcc - e_shift*r) * prefactor; + dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift; + forcecoul = dvdrr*rsq*prefactor; + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + r = sqrt(rsq); + rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]); + forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv + + born3[itype][jtype]*r2inv*r6inv; + } else forceborn = 0.0; + + fpair = (forcecoul + factor_lj*forceborn) * r2inv; + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + if (newton_pair || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (eflag) { + if (rsq < cut_coulsq) { + ecoul = v_sh; + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv + + d[itype][jtype]*r6inv*r2inv - offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (evflag) ev_tally(i,j,nlocal,newton_pair, + evdwl,ecoul,fpair,delx,dely,delz); + } + } + } + + if (vflag_fdotr) virial_fdotr_compute(); +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +void PairBornCoulWolf::allocate() +{ + allocated = 1; + int n = atom->ntypes; + + memory->create(setflag,n+1,n+1,"pair:setflag"); + for (int i = 1; i <= n; i++) + for (int j = i; j <= n; j++) + setflag[i][j] = 0; + + memory->create(cutsq,n+1,n+1,"pair:cutsq"); + + memory->create(cut_lj,n+1,n+1,"pair:cut_lj"); + memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq"); + memory->create(a,n+1,n+1,"pair:a"); + memory->create(rho,n+1,n+1,"pair:rho"); + memory->create(sigma,n+1,n+1,"pair:sigma"); + memory->create(c,n+1,n+1,"pair:c"); + memory->create(d,n+1,n+1,"pair:d"); + memory->create(rhoinv,n+1,n+1,"pair:rhoinv"); + memory->create(born1,n+1,n+1,"pair:born1"); + memory->create(born2,n+1,n+1,"pair:born2"); + memory->create(born3,n+1,n+1,"pair:born3"); + memory->create(offset,n+1,n+1,"pair:offset"); +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +void PairBornCoulWolf::settings(int narg, char **arg) +{ + if (narg < 2 || narg > 3) error->all(FLERR,"Illegal pair_style command"); + + alf = force->numeric(arg[0]); + cut_lj_global = force->numeric(arg[1]); + if (narg == 2) cut_coul = cut_lj_global; + else cut_coul = force->numeric(arg[2]); + + if (allocated) { + int i,j; + for (i = 1; i <= atom->ntypes; i++) + for (j = i+1; j <= atom->ntypes; j++) + if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; + } +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +void PairBornCoulWolf::coeff(int narg, char **arg) +{ + if (narg < 7 || narg > 8) + error->all(FLERR,"Incorrect args for pair coefficients"); + if (!allocated) allocate(); + + int ilo,ihi,jlo,jhi; + force->bounds(arg[0],atom->ntypes,ilo,ihi); + force->bounds(arg[1],atom->ntypes,jlo,jhi); + + double a_one = force->numeric(arg[2]); + double rho_one = force->numeric(arg[3]); + double sigma_one = force->numeric(arg[4]); + if (rho_one <= 0) error->all(FLERR,"Incorrect args for pair coefficients"); + double c_one = force->numeric(arg[5]); + double d_one = force->numeric(arg[6]); + + double cut_lj_one = cut_lj_global; + if (narg == 8) cut_lj_one = force->numeric(arg[7]); + + int count = 0; + for (int i = ilo; i <= ihi; i++) { + for (int j = MAX(jlo,i); j <= jhi; j++) { + a[i][j] = a_one; + rho[i][j] = rho_one; + sigma[i][j] = sigma_one; + c[i][j] = c_one; + d[i][j] = d_one; + cut_lj[i][j] = cut_lj_one; + setflag[i][j] = 1; + count++; + } + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairBornCoulWolf::init_style() +{ + if (!atom->q_flag) + error->all(FLERR,"Pair style born/coul/Wolf requires atom attribute q"); + + int irequest = neighbor->request(this); + + cut_coulsq = cut_coul * cut_coul; +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +double PairBornCoulWolf::init_one(int i, int j) +{ + if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set"); + + double cut = MAX(cut_lj[i][j],cut_coul); + cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j]; + + rhoinv[i][j] = 1.0/rho[i][j]; + born1[i][j] = a[i][j]/rho[i][j]; + born2[i][j] = 6.0*c[i][j]; + born3[i][j] = 8.0*d[i][j]; + + if (offset_flag) { + double rexp = exp(-cut_lj[i][j]/rho[i][j]); + offset[i][j] = a[i][j]*rexp - c[i][j]/pow(cut_lj[i][j],6.0) + + d[i][j]/pow(cut_lj[i][j],8.0); + } else offset[i][j] = 0.0; + + cut_ljsq[j][i] = cut_ljsq[i][j]; + a[j][i] = a[i][j]; + c[j][i] = c[i][j]; + d[j][i] = d[i][j]; + rhoinv[j][i] = rhoinv[i][j]; + sigma[j][i] = sigma[i][j]; + born1[j][i] = born1[i][j]; + born2[j][i] = born2[i][j]; + born3[j][i] = born3[i][j]; + offset[j][i] = offset[i][j]; + + return cut; +} + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +void PairBornCoulWolf::write_restart(FILE *fp) +{ + write_restart_settings(fp); + + int i,j; + for (i = 1; i <= atom->ntypes; i++) + for (j = i; j <= atom->ntypes; j++) { + fwrite(&setflag[i][j],sizeof(int),1,fp); + if (setflag[i][j]) { + fwrite(&a[i][j],sizeof(double),1,fp); + fwrite(&rho[i][j],sizeof(double),1,fp); + fwrite(&sigma[i][j],sizeof(double),1,fp); + fwrite(&c[i][j],sizeof(double),1,fp); + fwrite(&d[i][j],sizeof(double),1,fp); + fwrite(&cut_lj[i][j],sizeof(double),1,fp); + } + } +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +void PairBornCoulWolf::read_restart(FILE *fp) +{ + read_restart_settings(fp); + + allocate(); + + int i,j; + int me = comm->me; + for (i = 1; i <= atom->ntypes; i++) + for (j = i; j <= atom->ntypes; j++) { + if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp); + MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world); + if (setflag[i][j]) { + if (me == 0) { + fread(&a[i][j],sizeof(double),1,fp); + fread(&rho[i][j],sizeof(double),1,fp); + fread(&sigma[i][j],sizeof(double),1,fp); + fread(&c[i][j],sizeof(double),1,fp); + fread(&d[i][j],sizeof(double),1,fp); + fread(&cut_lj[i][j],sizeof(double),1,fp); + } + MPI_Bcast(&a[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&rho[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&c[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&d[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world); + } + } +} + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +void PairBornCoulWolf::write_restart_settings(FILE *fp) +{ + fwrite(&alf,sizeof(double),1,fp); + fwrite(&cut_lj_global,sizeof(double),1,fp); + fwrite(&cut_coul,sizeof(double),1,fp); + fwrite(&offset_flag,sizeof(int),1,fp); + fwrite(&mix_flag,sizeof(int),1,fp); +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +void PairBornCoulWolf::read_restart_settings(FILE *fp) +{ + if (comm->me == 0) { + fread(&alf,sizeof(double),1,fp); + fread(&cut_lj_global,sizeof(double),1,fp); + fread(&cut_coul,sizeof(double),1,fp); + fread(&offset_flag,sizeof(int),1,fp); + fread(&mix_flag,sizeof(int),1,fp); + } + MPI_Bcast(&alf,1,MPI_DOUBLE,0,world); + MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world); + MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world); + MPI_Bcast(&offset_flag,1,MPI_INT,0,world); + MPI_Bcast(&mix_flag,1,MPI_INT,0,world); +} + +/* ---------------------------------------------------------------------- + only the pair part is calculated here +------------------------------------------------------------------------- */ + +double PairBornCoulWolf::single(int i, int j, int itype, int jtype, + double rsq, + double factor_coul, double factor_lj, + double &fforce) +{ + double r2inv,r6inv,r,prefactor,rexp; + double forcecoul,forceborn,phicoul,phiborn; + double e_shift,f_shift,dvdrr,erfcc,erfcd; + + r2inv = 1.0/rsq; + e_shift = erfc(alf*cut_coul) / cut_coul; + f_shift = -(e_shift+2*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) / + cut_coul; + + if (rsq < cut_coulsq) { + r = sqrt(rsq); + prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r; + erfcc = erfc(alf*r); + erfcd = exp(-alf*alf*r*r); + dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift; + forcecoul = dvdrr*rsq*prefactor; + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + r = sqrt(rsq); + rexp = exp(-r*rhoinv[itype][jtype]); + forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv + + born3[itype][jtype]*r2inv*r6inv; + } else forceborn = 0.0; + + fforce = (forcecoul + factor_lj*forceborn) * r2inv; + + double eng = 0.0; + if (rsq < cut_coulsq) { + phicoul = prefactor * (erfcc-e_shift*r); + if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor; + eng += phicoul; + } + if (rsq < cut_ljsq[itype][jtype]) { + phiborn = a[itype][jtype]*rexp - c[itype][jtype]*r6inv + + d[itype][jtype]*r2inv*r6inv - offset[itype][jtype]; + eng += factor_lj*phiborn; + } + return eng; +} diff --git a/src/pair_born_coul_wolf.h b/src/pair_born_coul_wolf.h new file mode 100644 index 0000000000..d643a6e996 --- /dev/null +++ b/src/pair_born_coul_wolf.h @@ -0,0 +1,56 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(born/coul/wolf,PairBornCoulWolf) + +#else + +#ifndef LMP_PAIR_BORN_COUL_WOLF_H +#define LMP_PAIR_BORN_COUL_WOLF_H + +#include "pair.h" + +namespace LAMMPS_NS { + +class PairBornCoulWolf : public Pair { + public: + PairBornCoulWolf(class LAMMPS *); + ~PairBornCoulWolf(); + void compute(int, int); + void settings(int, char **); + void coeff(int, char **); + void init_style(); + double init_one(int, int); + void write_restart(FILE *); + void read_restart(FILE *); + void write_restart_settings(FILE *); + void read_restart_settings(FILE *); + double single(int, int, int, int, double, double, double, double &); + + private: + double cut_lj_global; + double **cut_lj,**cut_ljsq; + double cut_coul,cut_coulsq; + double **a,**rho,**sigma,**c,**d; + double **rhoinv,**born1,**born2,**born3,**offset; + double alf,e_shift,f_shift; + + void allocate(); +}; + +} + +#endif +#endif diff --git a/src/pair_coul_wolf.cpp b/src/pair_coul_wolf.cpp new file mode 100644 index 0000000000..d44a245426 --- /dev/null +++ b/src/pair_coul_wolf.cpp @@ -0,0 +1,328 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Yongfeng Zhang (INL), yongfeng.zhang@inl.gov +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_coul_wolf.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "math_const.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +/* ---------------------------------------------------------------------- */ + +PairCoulWolf::PairCoulWolf(LAMMPS *lmp) : Pair(lmp) {} + +/* ---------------------------------------------------------------------- */ + +PairCoulWolf::~PairCoulWolf() +{ + if (allocated) { + memory->destroy(setflag); + memory->destroy(cutsq); + } +} + +/* ---------------------------------------------------------------------- */ + +void PairCoulWolf::compute(int eflag, int vflag) +{ + int i,j,ii,jj,inum,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair; + double rsq,forcecoul,factor_coul; + double prefactor; + double r,rexp; + int *ilist,*jlist,*numneigh,**firstneigh; + double erfcc,erfcd,v_sh,dvdrr,e_self,qisq; + + ecoul = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + double **x = atom->x; + double **f = atom->f; + double *q = atom->q; + int *type = atom->type; + int nlocal = atom->nlocal; + int nall = nlocal + atom->nghost; + double *special_coul = force->special_coul; + int newton_pair = force->newton_pair; + double qqrd2e = force->qqrd2e; + + // self and shifted coulombic energy + + e_self = v_sh = 0.0; + e_shift = erfc(alf*cut_coul)/cut_coul; + f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) / + cut_coul; + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + qisq = qtmp*qtmp; + e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e; + if (evflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0); + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + + if (j < nall) factor_coul = 1.0; + else { + factor_coul = special_coul[j/nall]; + j %= nall; + } + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cut_coulsq) { + r = sqrt(rsq); + prefactor = qqrd2e*qtmp*q[j]/r; + erfcc = erfc(alf*r); + erfcd = exp(-alf*alf*r*r); + v_sh = (erfcc - e_shift*r) * prefactor; + dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift; + forcecoul = dvdrr*rsq*prefactor; + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + fpair = forcecoul / rsq; + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + if (newton_pair || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (eflag) { + if (rsq < cut_coulsq) { + ecoul = v_sh; + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + } + + if (evflag) ev_tally(i,j,nlocal,newton_pair, + 0.0,ecoul,fpair,delx,dely,delz); + } + } + } + + if (vflag_fdotr) virial_fdotr_compute(); +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +void PairCoulWolf::allocate() +{ + allocated = 1; + int n = atom->ntypes; + + memory->create(setflag,n+1,n+1,"pair:setflag"); + for (int i = 1; i <= n; i++) + for (int j = i; j <= n; j++) + setflag[i][j] = 0; + + memory->create(cutsq,n+1,n+1,"pair:cutsq"); +} + +/* ---------------------------------------------------------------------- + global settings + unlike other pair styles, + there are no individual pair settings that these override +------------------------------------------------------------------------- */ + +void PairCoulWolf::settings(int narg, char **arg) +{ + if (narg != 2) error->all(FLERR,"Illegal pair_style command"); + + alf = force->numeric(arg[0]); + cut_coul = force->numeric(arg[1]); +} + +/* ---------------------------------------------------------------------- + set cutoffs for one or more type pairs, optional +------------------------------------------------------------------------- */ + +void PairCoulWolf::coeff(int narg, char **arg) +{ + if (narg != 2) error->all(FLERR,"Incorrect args for pair coefficients"); + if (!allocated) allocate(); + + int ilo,ihi,jlo,jhi; + force->bounds(arg[0],atom->ntypes,ilo,ihi); + force->bounds(arg[1],atom->ntypes,jlo,jhi); + + int count = 0; + for (int i = ilo; i <= ihi; i++) { + for (int j = MAX(jlo,i); j <= jhi; j++) { + setflag[i][j] = 1; + count++; + } + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairCoulWolf::init_style() +{ + if (!atom->q_flag) + error->all(FLERR,"Pair coul/wolf requires atom attribute q"); + + int irequest = neighbor->request(this); + + cut_coulsq = cut_coul*cut_coul; +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +double PairCoulWolf::init_one(int i, int j) +{ + return cut_coul; +} + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +void PairCoulWolf::write_restart(FILE *fp) +{ + write_restart_settings(fp); + + int i,j; + for (i = 1; i <= atom->ntypes; i++) + for (j = i; j <= atom->ntypes; j++) + fwrite(&setflag[i][j],sizeof(int),1,fp); +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +void PairCoulWolf::read_restart(FILE *fp) +{ + read_restart_settings(fp); + allocate(); + + int i,j; + int me = comm->me; + for (i = 1; i <= atom->ntypes; i++) + for (j = i; j <= atom->ntypes; j++) { + if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp); + MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world); + } +} + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +void PairCoulWolf::write_restart_settings(FILE *fp) +{ + fwrite(&alf,sizeof(double),1,fp); + fwrite(&cut_coul,sizeof(double),1,fp); + fwrite(&offset_flag,sizeof(int),1,fp); + fwrite(&mix_flag,sizeof(int),1,fp); +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +void PairCoulWolf::read_restart_settings(FILE *fp) +{ + if (comm->me == 0) { + fread(&alf,sizeof(double),1,fp); + fread(&cut_coul,sizeof(double),1,fp); + fread(&offset_flag,sizeof(int),1,fp); + fread(&mix_flag,sizeof(int),1,fp); + } + MPI_Bcast(&alf,1,MPI_DOUBLE,0,world); + MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world); + MPI_Bcast(&offset_flag,1,MPI_INT,0,world); + MPI_Bcast(&mix_flag,1,MPI_INT,0,world); +} + +/* ---------------------------------------------------------------------- + only the pair part is calculated here +------------------------------------------------------------------------- */ + +double PairCoulWolf::single(int i, int j, int itype, int jtype, double rsq, + double factor_coul, double factor_lj, + double &fforce) +{ + double r6inv,r,prefactor,rexp; + double forcecoul,forceborn,phicoul; + double e_shift,f_shift,dvdrr,erfcc,erfcd; + + e_shift = erfc(alf*cut_coul) / cut_coul; + f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) / + cut_coul; + + if (rsq < cut_coulsq) { + r = sqrt(rsq); + prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r; + erfcc = erfc(alf*r); + erfcd = exp(-alf*alf*r*r); + dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift; + forcecoul = dvdrr*rsq*prefactor; + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else forcecoul = 0.0; + fforce = forcecoul / rsq; + + double eng = 0.0; + if (rsq < cut_coulsq) { + phicoul = prefactor * (erfcc-e_shift*r); + if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor; + eng += phicoul; + } + return eng; +} diff --git a/src/pair_coul_wolf.h b/src/pair_coul_wolf.h new file mode 100644 index 0000000000..acdcb5f185 --- /dev/null +++ b/src/pair_coul_wolf.h @@ -0,0 +1,52 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(coul/wolf,PairCoulWolf) + +#else + +#ifndef LMP_PAIR_COUL_WOLF_H +#define LMP_PAIR_COUL_WOLF_H + +#include "pair.h" + +namespace LAMMPS_NS { + +class PairCoulWolf : public Pair { + public: + PairCoulWolf(class LAMMPS *); + ~PairCoulWolf(); + void compute(int, int); + void settings(int, char **); + void coeff(int, char **); + void init_style(); + double init_one(int, int); + void write_restart(FILE *); + void read_restart(FILE *); + void write_restart_settings(FILE *); + void read_restart_settings(FILE *); + double single(int, int, int, int, double, double, double, double &); + + private: + double cut_coul,cut_coulsq; + double alf,e_shift,f_shift; + + void allocate(); +}; + +} + +#endif +#endif diff --git a/src/pair_lj_cut.cpp b/src/pair_lj_cut.cpp index 44d86990df..f5ea2887d3 100644 --- a/src/pair_lj_cut.cpp +++ b/src/pair_lj_cut.cpp @@ -431,7 +431,7 @@ void PairLJCut::settings(int narg, char **arg) cut_global = force->numeric(arg[0]); // reset cutoffs that have been explicitly set - + if (allocated) { int i,j; for (i = 1; i <= atom->ntypes; i++) @@ -446,7 +446,8 @@ void PairLJCut::settings(int narg, char **arg) void PairLJCut::coeff(int narg, char **arg) { - if (narg < 4 || narg > 5) error->all(FLERR,"Incorrect args for pair coefficients"); + if (narg < 4 || narg > 5) + error->all(FLERR,"Incorrect args for pair coefficients"); if (!allocated) allocate(); int ilo,ihi,jlo,jhi; diff --git a/src/universe.cpp b/src/universe.cpp index 060e524a68..b473c9c794 100644 --- a/src/universe.cpp +++ b/src/universe.cpp @@ -59,59 +59,81 @@ Universe::~Universe() } /* ---------------------------------------------------------------------- - reorder universe processors based on custom file + reorder universe processors + create uni2orig as inverse mapping + re-create uworld communicator with new ordering via Comm_split() + style = "nth", arg = N + move every Nth proc to end of rankings + style = "custom", arg = filename file has nprocs lines with I J I = universe proc ID in original communicator uorig J = universe proc ID in reordered communicator uworld - create uni2orig as inverse mapping - re-create uworld communicator with new ordering via Comm_split() ------------------------------------------------------------------------- */ -void Universe::reorder(char *file) +void Universe::reorder(char *style, char *arg) { char line[MAXLINE]; if (uworld != uorig) MPI_Comm_free(&uworld); - if (me == 0) { - FILE *fp = fopen(file,"r"); - if (fp == NULL) error->universe_one(FLERR,"Cannot open -reorder file"); - - // skip header = blank and comment lines - - char *ptr; - if (!fgets(line,MAXLINE,fp)) - error->one(FLERR,"Unexpected end of -reorder file"); - while (1) { - if (ptr = strchr(line,'#')) *ptr = '\0'; - if (strspn(line," \t\n\r") != strlen(line)) break; - if (!fgets(line,MAXLINE,fp)) - error->one(FLERR,"Unexpected end of -reorder file"); + if (strcmp(style,"nth") == 0) { + int n = atoi(arg); + if (n <= 0) + error->universe_all(FLERR,"Invalid -reorder N value"); + if (nprocs % n) + error->universe_all(FLERR,"Nprocs not a multiple of N for -reorder"); + for (int i = 0; i < nprocs; i++) { + if (i < (n-1)*nprocs/n) uni2orig[i] = i/(n-1) * n + (i % (n-1)); + else uni2orig[i] = (i - (n-1)*nprocs/n) * n + n-1; } - // read nprocs lines - // uni2orig = inverse mapping + } else if (strcmp(style,"custom") == 0) { - int me_orig,me_new; - sscanf(line,"%d %d",&me_orig,&me_new); - if (me_orig < 0 || me_orig >= nprocs || - me_new < 0 || me_new >= nprocs) - error->one(FLERR,"Invalid entry in reorder file"); - uni2orig[me_new] = me_orig; + if (me == 0) { + FILE *fp = fopen(arg,"r"); + if (fp == NULL) error->universe_one(FLERR,"Cannot open -reorder file"); - for (int i = 1; i < nprocs; i++) { + // skip header = blank and comment lines + + char *ptr; if (!fgets(line,MAXLINE,fp)) - error->one(FLERR,"Unexpected end of reorder file"); - sscanf(line,"%ld %ld",&me_orig,&me_new); + error->one(FLERR,"Unexpected end of -reorder file"); + while (1) { + if (ptr = strchr(line,'#')) *ptr = '\0'; + if (strspn(line," \t\n\r") != strlen(line)) break; + if (!fgets(line,MAXLINE,fp)) + error->one(FLERR,"Unexpected end of -reorder file"); + } + + // read nprocs lines + // uni2orig = inverse mapping + + int me_orig,me_new; + sscanf(line,"%d %d",&me_orig,&me_new); if (me_orig < 0 || me_orig >= nprocs || me_new < 0 || me_new >= nprocs) error->one(FLERR,"Invalid entry in reorder file"); uni2orig[me_new] = me_orig; - } - fclose(fp); - } - MPI_Bcast(uni2orig,nprocs,MPI_INT,0,uorig); + for (int i = 1; i < nprocs; i++) { + if (!fgets(line,MAXLINE,fp)) + error->one(FLERR,"Unexpected end of reorder file"); + sscanf(line,"%d %d",&me_orig,&me_new); + if (me_orig < 0 || me_orig >= nprocs || + me_new < 0 || me_new >= nprocs) + error->one(FLERR,"Invalid entry in reorder file"); + uni2orig[me_new] = me_orig; + } + fclose(fp); + } + + // bcast uni2org from proc 0 to all other universe procs + + MPI_Bcast(uni2orig,nprocs,MPI_INT,0,uorig); + + } else error->universe_all(FLERR,"Invalid command-line argument"); + + // create new uworld communicator int ome,key; MPI_Comm_rank(uorig,&ome); diff --git a/src/universe.h b/src/universe.h index 7a601a3371..1c24ab727c 100644 --- a/src/universe.h +++ b/src/universe.h @@ -41,7 +41,7 @@ class Universe : protected Pointers { Universe(class LAMMPS *, MPI_Comm); ~Universe(); - void reorder(char *); + void reorder(char *, char *); void add_world(char *); int consistent(); }; diff --git a/src/variable.cpp b/src/variable.cpp index c6f955f75e..428e8939f6 100644 --- a/src/variable.cpp +++ b/src/variable.cpp @@ -29,11 +29,10 @@ #include "output.h" #include "thermo.h" #include "random_mars.h" +#include "math_const.h" #include "memory.h" #include "error.h" -#include "math_const.h" - using namespace LAMMPS_NS; using namespace MathConst;