diff --git a/src/KSPACE/commgrid.cpp b/src/KSPACE/commgrid.cpp index d2f146c94f..993a5eabdf 100644 --- a/src/KSPACE/commgrid.cpp +++ b/src/KSPACE/commgrid.cpp @@ -30,8 +30,8 @@ CommGrid::CommGrid(LAMMPS *lmp, MPI_Comm gcomm, int forward, int reverse, int pxlo, int pxhi, int pylo, int pyhi, int pzlo, int pzhi) : Pointers(lmp) { - me = comm->me; gridcomm = gcomm; + MPI_Comm_rank(gridcomm,&me); nforward = forward; nreverse = reverse; @@ -50,6 +50,61 @@ CommGrid::CommGrid(LAMMPS *lmp, MPI_Comm gcomm, int forward, int reverse, outzlo = ozlo; outzhi = ozhi; + outxlo_max = oxlo; + outxhi_max = oxhi; + outylo_max = oylo; + outyhi_max = oyhi; + outzlo_max = ozlo; + outzhi_max = ozhi; + + procxlo = pxlo; + procxhi = pxhi; + procylo = pylo; + procyhi = pyhi; + proczlo = pzlo; + proczhi = pzhi; + + nswap = 0; + swap = NULL; + buf1 = buf2 = NULL; +} + +/* ---------------------------------------------------------------------- */ + +CommGrid::CommGrid(LAMMPS *lmp, MPI_Comm gcomm, int forward, int reverse, + int ixlo, int ixhi, int iylo, int iyhi, int izlo, int izhi, + int oxlo, int oxhi, int oylo, int oyhi, int ozlo, int ozhi, + int oxlo_max, int oxhi_max, int oylo_max, int oyhi_max, int ozlo_max, int ozhi_max, + int pxlo, int pxhi, int pylo, int pyhi, int pzlo, int pzhi) + : Pointers(lmp) +{ + gridcomm = gcomm; + MPI_Comm_rank(gridcomm,&me); + + nforward = forward; + nreverse = reverse; + + inxlo = ixlo; + inxhi = ixhi; + inylo = iylo; + inyhi = iyhi; + inzlo = izlo; + inzhi = izhi; + + outxlo = oxlo; + outxhi = oxhi; + outylo = oylo; + outyhi = oyhi; + outzlo = ozlo; + outzhi = ozhi; + + outxlo_max = oxlo_max; + outxhi_max = oxhi_max; + outylo_max = oylo_max; + outyhi_max = oyhi_max; + outzlo_max = ozlo_max; + outzhi_max = ozhi_max; + procxlo = pxlo; procxhi = pxhi; procylo = pylo; @@ -482,7 +537,8 @@ void CommGrid::reverse_comm(KSpace *kspace, int which) /* ---------------------------------------------------------------------- create 1d list of offsets into 3d array section (xlo:xhi,ylo:yhi,zlo:zhi) - assume 3d array is allocated as (outxlo:outxhi,outylo:outyhi,outzlo:outzhi) + assume 3d array is allocated as (outxlo_max:outxhi_max,outylo_max:outyhi_max, + outzlo_max:outzhi_max) ------------------------------------------------------------------------- */ int CommGrid::indices(int *&list, @@ -491,15 +547,15 @@ int CommGrid::indices(int *&list, int nmax = (xhi-xlo+1) * (yhi-ylo+1) * (zhi-zlo+1); memory->create(list,nmax,"Commgrid:list"); - int nx = (outxhi-outxlo+1); - int ny = (outyhi-outylo+1); + int nx = (outxhi_max-outxlo_max+1); + int ny = (outyhi_max-outylo_max+1); int n = 0; int ix,iy,iz; for (iz = zlo; iz <= zhi; iz++) for (iy = ylo; iy <= yhi; iy++) for (ix = xlo; ix <= xhi; ix++) - list[n++] = (iz-outzlo)*ny*nx + (iy-outylo)*nx + (ix-outxlo); + list[n++] = (iz-outzlo_max)*ny*nx + (iy-outylo_max)*nx + (ix-outxlo_max); return nmax; } diff --git a/src/KSPACE/commgrid.h b/src/KSPACE/commgrid.h index bb1a010fa7..b069e74f87 100644 --- a/src/KSPACE/commgrid.h +++ b/src/KSPACE/commgrid.h @@ -32,6 +32,11 @@ class CommGrid : protected Pointers { int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int); + CommGrid(class LAMMPS *, MPI_Comm, int, int, + int, int, int, int, int, int, + int, int, int, int, int, int, + int, int, int, int, int, int, + int, int, int, int, int, int); ~CommGrid(); void ghost_notify(); int ghost_overlap(); @@ -55,6 +60,7 @@ class CommGrid : protected Pointers { int inxlo,inxhi,inylo,inyhi,inzlo,inzhi; int outxlo,outxhi,outylo,outyhi,outzlo,outzhi; + int outxlo_max,outxhi_max,outylo_max,outyhi_max,outzlo_max,outzhi_max; int procxlo,procxhi,procylo,procyhi,proczlo,proczhi; int ghostxlo,ghostxhi,ghostylo,ghostyhi,ghostzlo,ghostzhi; diff --git a/src/KSPACE/msm.cpp b/src/KSPACE/msm.cpp index 2c32c50dda..cadd865b02 100644 --- a/src/KSPACE/msm.cpp +++ b/src/KSPACE/msm.cpp @@ -28,8 +28,6 @@ #include "neighbor.h" #include "force.h" #include "pair.h" -#include "bond.h" -#include "angle.h" #include "domain.h" #include "memory.h" #include "error.h" @@ -42,7 +40,6 @@ using namespace MathConst; #define MAX_LEVELS 10 #define OFFSET 16384 #define SMALL 0.00001 -#define LARGE 10000.0 enum{REVERSE_RHO,REVERSE_AD,REVERSE_AD_PERATOM}; enum{FORWARD_RHO,FORWARD_AD,FORWARD_AD_PERATOM}; @@ -61,7 +58,9 @@ MSM::MSM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) factors[0] = 2; MPI_Comm_rank(world,&me); - MPI_Comm_size(world,&nprocs); + procneigh_levels = NULL; + world_levels = NULL; + active_flag = NULL; phi1d = dphi1d = NULL; @@ -77,6 +76,7 @@ MSM::MSM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) v0_direct_top = v1_direct_top = v2_direct_top = NULL; v3_direct_top = v4_direct_top = v5_direct_top = NULL; + cg_all = cg_peratom_all = NULL; cg = cg_peratom = NULL; levels = 0; @@ -135,7 +135,7 @@ void MSM::init() if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); if (slabflag == 1) - error->all(FLERR,"Cannot use slab correction with MSM"); + error->all(FLERR,"Slab correction not needed for MSM"); if (order < 4 || order > 10) { char str[128]; @@ -147,13 +147,10 @@ void MSM::init() if (sizeof(FFT_SCALAR) != 8) error->all(FLERR,"Cannot (yet) use single precision with MSM (remove -DFFT_SINGLE from Makefile and recompile)"); - // extract short-range Coulombic cutoff from pair style - - qqrd2e = force->qqrd2e; - scale = 1.0; - pair_check(); + // extract short-range Coulombic cutoff from pair style + int itmp; double *p_cutoff = (double *) force->pair->extract("cut_msm",itmp); if (p_cutoff == NULL) @@ -168,6 +165,9 @@ void MSM::init() qsqsum += atom->q[i]*atom->q[i]; } + qqrd2e = force->qqrd2e; + scale = 1.0; + double tmp; MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); qsum = tmp; @@ -275,7 +275,6 @@ double MSM::estimate_cutoff(double h, double prd) C_p *= error_scaling; // equation 3.200 from Hardy's thesis - a = C_p*pow(h,(p-1))/accuracy; // include dependency of error on other terms @@ -381,7 +380,6 @@ double MSM::estimate_total_error() void MSM::setup() { double *prd; - double a = cutoff; // volume-dependent factors @@ -393,7 +391,7 @@ void MSM::setup() double zprd = prd[2]; volume = xprd * yprd * zprd; - // loop over grid levels + // loop over grid levels and compute grid spacing for (int n=0; n (2.0*a*delxinv[0]); nxlo_direct = -nxhi_direct; nyhi_direct = static_cast (2.0*a*delyinv[0]); @@ -414,7 +414,9 @@ void MSM::setup() nmax_direct = 8*(nxhi_direct+1)*(nyhi_direct+1)*(nzhi_direct+1); deallocate(); - deallocate_peratom(); + if (peratom_allocate_flag) deallocate_peratom(); + + // compute direct sum interaction weights if (!peratom_allocate_flag) { // Timestep 0 get_g_direct(); @@ -434,6 +436,8 @@ void MSM::setup() boxlo = domain->boxlo; + // ghost grid points depend on direct sum interaction limits, so need to recompute local grid + set_grid_local(); // allocate K-space dependent memory @@ -442,7 +446,12 @@ void MSM::setup() allocate(); peratom_allocate_flag = 0; + // setup commgrid + + cg_all->ghost_notify(); + cg_all->setup(); for (int n=0; nghost_notify(); cg[n]->setup(); } @@ -458,15 +467,19 @@ void MSM::compute(int eflag, int vflag) int i,j; // set energy/virial flags - // invoke allocate_peratom() if needed for first time if (eflag || vflag) ev_setup(eflag,vflag); else evflag = evflag_atom = eflag_global = vflag_global = eflag_atom = vflag_atom = eflag_either = vflag_either = 0; + // invoke allocate_peratom() if needed for first time + if (vflag_atom && !peratom_allocate_flag) { allocate_peratom(); + cg_peratom_all->ghost_notify(); + cg_peratom_all->setup(); for (int n=0; nghost_notify(); cg_peratom[n]->setup(); } @@ -487,28 +500,52 @@ void MSM::compute(int eflag, int vflag) particle_map(); make_rho(); - current_level = 0; - cg[0]->reverse_comm(this,REVERSE_RHO); + // all procs reverse communicate charge density values from their ghost grid points + // to fully sum contribution in their 3d grid - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks + current_level = 0; + cg_all->reverse_comm(this,REVERSE_RHO); + + // forward communicate charge density values to fill ghost grid points + // compute direct sum interaction and then restrict to coarser grid for (int n=0; n<=levels-2; n++) { + if (!active_flag[n]) continue; current_level = n; cg[n]->forward_comm(this,FORWARD_RHO); direct(n); restriction(n); } - - // top grid level - current_level = levels-1; - cg[levels-1]->forward_comm(this,FORWARD_RHO); - direct_top(levels-1); + + // compute direct interation for top grid level for nonperiodic + // and for second from top grid level for periodic + + if (active_flag[levels-1]) { + if (domain->nonperiodic) { + current_level = levels-1; + cg[levels-1]->forward_comm(this,FORWARD_RHO); + direct_top(levels-1); + cg[levels-1]->reverse_comm(this,REVERSE_AD); + if (vflag_atom) + cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM); + } else { + // Here using MPI_Allreduce is cheaper than using commgrid + grid_swap_forward(levels-1,qgrid[levels-1]); + direct(levels-1); + grid_swap_reverse(levels-1,egrid[levels-1]); + current_level = levels-1; + if (vflag_atom) + cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM); + } + } + + // prolongate energy/virial from coarser grid to finer grid + // reverse communicate from ghost grid points to get full sum for (int n=levels-2; n>=0; n--) { - + if (!active_flag[n]) continue; prolongation(n); current_level = n; @@ -524,26 +561,25 @@ void MSM::compute(int eflag, int vflag) // to fill ghost cells surrounding their 3d bricks current_level = 0; - - cg[0]->forward_comm(this,FORWARD_AD); + cg_all->forward_comm(this,FORWARD_AD); // extra per-atom energy/virial communication if (vflag_atom) - cg_peratom[0]->forward_comm(this,FORWARD_AD_PERATOM); + cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM); // calculate the force on my particles (interpolation) fieldforce(); - // calculate the per-atom energy for my particles + // calculate the per-atom energy/virial for my particles if (evflag_atom) fieldforce_peratom(); + // total long-range energy + const double qscale = force->qqrd2e * scale; - // Total long-range energy - if (eflag_global) { double energy_all; MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); @@ -554,7 +590,7 @@ void MSM::compute(int eflag, int vflag) energy *= 0.5*qscale; } - // Total long-range virial + // total long-range virial if (vflag_global) { double virial_all[6]; @@ -590,12 +626,24 @@ void MSM::compute(int eflag, int vflag) void MSM::allocate() { - // summation coeffs + // interpolation coeffs memory->create2d_offset(phi1d,3,-order,order,"msm:phi1d"); memory->create2d_offset(dphi1d,3,-order,order,"msm:dphi1d"); - // allocate grid levels + // commgrid using all processors for finest grid level + + int (*procneigh_all)[2] = comm->procneigh; + + + cg_all = new CommGrid(lmp,world,1,1, + nxlo_in[0],nxhi_in[0],nylo_in[0],nyhi_in[0],nzlo_in[0],nzhi_in[0], + nxlo_out_all,nxhi_out_all,nylo_out_all,nyhi_out_all,nzlo_out_all,nzhi_out_all, + nxlo_out[0],nxhi_out[0],nylo_out[0],nyhi_out[0],nzlo_out[0],nzhi_out[0], + procneigh_all[0][0],procneigh_all[0][1],procneigh_all[1][0], + procneigh_all[1][1],procneigh_all[2][0],procneigh_all[2][1]); + + // allocate memory for each grid level for (int n=0; ncreate3d_offset(qgrid[n],nzlo_out[n],nzhi_out[n], @@ -604,25 +652,38 @@ void MSM::allocate() memory->create3d_offset(egrid[n],nzlo_out[n],nzhi_out[n], nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:egrid"); - // create ghost grid object for rho and electric field communication + // create commgrid object for rho and electric field communication - int (*procneigh)[2] = comm->procneigh; - - cg[n] = new CommGrid(lmp,world,1,1, - nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],nzlo_in[n],nzhi_in[n], - nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],nzlo_out[n],nzhi_out[n], - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); + if (active_flag[n]) { + int **procneigh = procneigh_levels[n]; + cg[n] = new CommGrid(lmp,world_levels[n],1,1, + nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],nzlo_in[n],nzhi_in[n], + nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],nzlo_out[n],nzhi_out[n], + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + } } } /* ---------------------------------------------------------------------- - allocate per-atom memory that depends on # of grid points + allocate per-atom virial memory that depends on # of grid points ------------------------------------------------------------------------- */ void MSM::allocate_peratom() { - // allocate grid levels + // create commgrid object for per-atom virial using all processors + + int (*procneigh_all)[2] = comm->procneigh; + + cg_peratom_all = + new CommGrid(lmp,world,6,6, + nxlo_in[0],nxhi_in[0],nylo_in[0],nyhi_in[0],nzlo_in[0],nzhi_in[0], + nxlo_out_all,nxhi_out_all,nylo_out_all,nyhi_out_all,nzlo_out_all,nzhi_out_all, + nxlo_out[0],nxhi_out[0],nylo_out[0],nyhi_out[0],nzlo_out[0],nzhi_out[0], + procneigh_all[0][0],procneigh_all[0][1],procneigh_all[1][0], + procneigh_all[1][1],procneigh_all[2][0],procneigh_all[2][1]); + + // allocate memory for each grid level for (int n=0; ncreate3d_offset(v0grid[n],nzlo_out[n],nzhi_out[n], @@ -638,16 +699,17 @@ void MSM::allocate_peratom() memory->create3d_offset(v5grid[n],nzlo_out[n],nzhi_out[n], nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v5grid"); - // create ghost grid object for per-atom energy/virial + // create commgrid object for per-atom virial - int (*procneigh)[2] = comm->procneigh; - - cg_peratom[n] = - new CommGrid(lmp,world,6,6, - nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],nzlo_in[n],nzhi_in[n], - nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],nzlo_out[n],nzhi_out[n], - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); + if (active_flag[n]) { + int **procneigh = procneigh_levels[n]; + cg_peratom[n] = + new CommGrid(lmp,world_levels[n],6,6, + nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],nzlo_in[n],nzhi_in[n], + nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],nzlo_out[n],nzhi_out[n], + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + } } } @@ -660,7 +722,7 @@ void MSM::deallocate() memory->destroy2d_offset(phi1d,-order); memory->destroy2d_offset(dphi1d,-order); - // deallocate grid levels + if (cg_all) delete cg_all; for (int n=0; ndestroy3d_offset(egrid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]); + if (world_levels) + if (world_levels[n] != MPI_COMM_NULL) + MPI_Comm_free(&world_levels[n]); + if (cg) if (cg[n]) delete cg[n]; } } /* ---------------------------------------------------------------------- - deallocate per-atom memory that depends on # of grid points + deallocate per-atom virial memory that depends on # of grid points ------------------------------------------------------------------------- */ void MSM::deallocate_peratom() { + if (cg_peratom_all) delete cg_peratom_all; + for (int n=0; ndestroy3d_offset(v0grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]); @@ -709,7 +777,11 @@ void MSM::allocate_levels() cg = new CommGrid*[levels]; cg_peratom = new CommGrid*[levels]; - + + memory->create(procneigh_levels,levels,3,2,"msm:procneigh_levels"); + world_levels = new MPI_Comm[levels]; + active_flag = new int[levels]; + alpha = new int[levels]; betax = new int[levels]; betay = new int[levels]; @@ -752,6 +824,7 @@ void MSM::allocate_levels() for (int n=0; ndestroy(procneigh_levels); + delete [] world_levels; + delete [] active_flag; delete [] cg; delete [] cg_peratom; @@ -820,7 +896,7 @@ void MSM::deallocate_levels() } /* ---------------------------------------------------------------------- - set size of MSM grids + set total size of MSM grids ------------------------------------------------------------------------- */ void MSM::set_grid_global() @@ -836,6 +912,8 @@ void MSM::set_grid_global() double hx,hy,hz; if (adjust_cutoff_flag && !gridflag) { + // seek to choose optimal Coulombic cutoff and number of grid levels + // (based on a cost estimate in Hardy's thesis) int p = order - 1; double hmin = 3072.0*(p+1)/(p-1)/ (448.0*MY_PI + 56.0*MY_PI*order/2 + 1701.0); @@ -848,6 +926,7 @@ void MSM::set_grid_global() ny_max = static_cast(yprd/hy); nz_max = static_cast(zprd/hz); } else if (!gridflag) { + // Coulombic cutoff is set by user, choose grid to give requested error nx_max = ny_max = nz_max = 2; hx = xprd/nx_max; hy = yprd/ny_max; @@ -875,12 +954,13 @@ void MSM::set_grid_global() z_error = estimate_1d_error(hz,zprd); } } else { + // cutoff and grid are set by user nx_max = nx_msm_max; ny_max = ny_msm_max; nz_max = nz_msm_max; } - // boost grid size until it is factorable + // boost grid size until it is factorable by 2 int flag = 0; int xlevels,ylevels,zlevels; @@ -892,23 +972,29 @@ void MSM::set_grid_global() if (flag && gridflag && me == 0) error->warning(FLERR,"Number of MSM mesh points increased to be a multiple of 2"); - // Find maximum number of levels + // find maximum number of levels levels = MAX(xlevels,ylevels); levels = MAX(levels,zlevels); if (levels > MAX_LEVELS) error->all(FLERR,"Too many MSM grid levels"); -// Need at least 2 MSM levels for periodic systems + // need at least 2 MSM levels for periodic systems if (levels <= 1) { levels = xlevels = ylevels = zlevels = 2; nx_max = ny_max = nz_max = 2; - if (gridflag) + if (gridflag) error->warning(FLERR, - "MSM mesh too small, increasing to 2 points in each direction"); + "MSM mesh too small, increasing to 2 points in each direction"); } + // omit top grid level for periodic systems + + if (!domain->nonperiodic) levels -= 1; + + // adjust Coulombic cutoff to give desired error (if requested) + if (adjust_cutoff_flag) { hx = xprd/nx_max; hy = yprd/ny_max; @@ -931,6 +1017,8 @@ void MSM::set_grid_global() allocate_levels(); + // find number of grid levels in each direction + for (int n = 0; n < levels; n++) { if (xlevels-n-1 > 0) @@ -951,7 +1039,22 @@ void MSM::set_grid_global() if (nx_msm[0] >= OFFSET || ny_msm[0] >= OFFSET || nz_msm[0] >= OFFSET) error->all(FLERR,"MSM grid is too large"); - + + // compute number of extra grid points needed for nonperiodic boundary conditions + + if (domain->nonperiodic) { + alpha[0] = -(order/2 - 1); + betax[0] = nx_msm[0] + (order/2 - 1); + betay[0] = ny_msm[0] + (order/2 - 1); + betaz[0] = nz_msm[0] + (order/2 - 1); + for (int n = 1; n < levels; n++) { + alpha[n] = -((-alpha[n-1]+1)/2) - (order/2 - 1); + betax[n] = ((betax[n-1]+1)/2) + (order/2 - 1); + betay[n] = ((betay[n-1]+1)/2) + (order/2 - 1); + betaz[n] = ((betaz[n-1]+1)/2) + (order/2 - 1); + } + } + if (domain->nonperiodic) { alpha[0] = -(order/2 - 1); betax[0] = nx_msm[0] + (order/2 - 1); @@ -969,16 +1072,12 @@ void MSM::set_grid_global() /* ---------------------------------------------------------------------- set local subset of MSM grid that I own - n xyz lo/hi in = 3d brick that I own (inclusive) - n xyz lo/hi out = 3d brick + ghost cells in 6 directions (inclusive) + n xyz lo/hi in = 3d grid that I own (inclusive) + n xyz lo/hi out = 3d grid + ghost cells in 6 directions (inclusive) ------------------------------------------------------------------------- */ void MSM::set_grid_local() { - // global indices of MSM grid range from 0 to N-1 - // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of - // global MSM grid that I own without ghost cells - // loop over grid levels for (int n=0; n (comm->zsplit[comm->myloc[2]] * nz_msm[n]); nzhi_in[n] = static_cast (comm->zsplit[comm->myloc[2]+1] * nz_msm[n]) - 1; - // nlower,nupper = stencil size for mapping particles to MSM grid + // nlower,nupper = stencil size for mapping (interpolating) particles to MSM grid nlower = -(order-1)/2; nupper = order/2; + // lengths of box and processor sub-domains + double *prd,*sublo,*subhi,*boxhi; prd = domain->prd; boxlo = domain->boxlo; boxhi = domain->boxhi; + sublo = domain->sublo; + subhi = domain->subhi; + double xprd = prd[0]; double yprd = prd[1]; double zprd = prd[2]; @@ -1022,12 +1126,9 @@ void MSM::set_grid_local() // dist[3] = particle position bound = subbox + skin/2.0 // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping - sublo = domain->sublo; - subhi = domain->subhi; - double dist[3]; double cuthalf = 0.0; - if (n == 0) cuthalf = 0.5*neighbor->skin; // Only applies to finest grid + if (n == 0) cuthalf = 0.5*neighbor->skin; // only applies to finest grid dist[0] = dist[1] = dist[2] = cuthalf; int nlo,nhi; @@ -1036,6 +1137,12 @@ void MSM::set_grid_local() nx_msm[n]/xprd + OFFSET) - OFFSET; nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * nx_msm[n]/xprd + OFFSET) - OFFSET; + if (n == 0) { + // use a smaller ghost region for interpolation + nxlo_out_all = nlo + nlower; + nxhi_out_all = nhi + nupper; + } + // a larger ghost region is needed for the direct sum and for restriction/prolongation nxlo_out[n] = nlo + MIN(-order,nxlo_direct); nxhi_out[n] = nhi + MAX(order,nxhi_direct); @@ -1043,6 +1150,10 @@ void MSM::set_grid_local() ny_msm[n]/yprd + OFFSET) - OFFSET; nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * ny_msm[n]/yprd + OFFSET) - OFFSET; + if (n == 0) { + nylo_out_all = nlo + nlower; + nyhi_out_all = nhi + nupper; + } nylo_out[n] = nlo + MIN(-order,nylo_direct); nyhi_out[n] = nhi + MAX(order,nyhi_direct); @@ -1050,10 +1161,16 @@ void MSM::set_grid_local() nz_msm[n]/zprd + OFFSET) - OFFSET; nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * nz_msm[n]/zprd + OFFSET) - OFFSET; - nzlo_out[n] = nlo + MIN(-order,nzlo_direct); + if (n == 0) { + nzlo_out_all = nlo + nlower; + nzhi_out_all = nhi + nupper; + } + // a hemisphere is used for direct sum interactions, + // so no ghosting is needed for direct sum in the -z direction + nzlo_out[n] = nlo - order; nzhi_out[n] = nhi + MAX(order,nzhi_direct); - - // Add extra grid points for nonperiodic boundary conditions + + // add extra grid points for nonperiodic boundary conditions if (domain->nonperiodic) { @@ -1061,22 +1178,26 @@ void MSM::set_grid_local() if (nxlo_in[n] == 0) nxlo_in[n] = alpha[n]; nxlo_out[n] = MAX(nxlo_out[n],alpha[n]); + if (n == 0) nxlo_out_all = MAX(nxlo_out_all,alpha[0]); if (nxhi_in[n] == nx_msm[n] - 1) nxhi_in[n] = betax[n]; nxhi_out[n] = MIN(nxhi_out[n],betax[n]); + if (n == 0) nxhi_out_all = MIN(nxhi_out_all,betax[0]); if (nxhi_in[n] < 0) nxhi_in[n] = alpha[n] - 1; } - + if (!domain->yperiodic) { if (nylo_in[n] == 0) nylo_in[n] = alpha[n]; nylo_out[n] = MAX(nylo_out[n],alpha[n]); - + if (n == 0) nylo_out_all = MAX(nylo_out_all,alpha[0]); + if (nyhi_in[n] == ny_msm[n] - 1) nyhi_in[n] = betay[n]; nyhi_out[n] = MIN(nyhi_out[n],betay[n]); + if (n == 0) nyhi_out_all = MIN(nyhi_out_all,betay[0]); if (nyhi_in[n] < 0) nyhi_in[n] = alpha[n] - 1; } @@ -1085,15 +1206,21 @@ void MSM::set_grid_local() if (nzlo_in[n] == 0) nzlo_in[n] = alpha[n]; nzlo_out[n] = MAX(nzlo_out[n],alpha[n]); - + if (n == 0) nzlo_out_all = MAX(nzlo_out_all,alpha[0]); + if (nzhi_in[n] == nz_msm[n] - 1) nzhi_in[n] = betaz[n]; nzhi_out[n] = MIN(nzhi_out[n],betaz[n]); + if (n == 0) nzhi_out_all = MIN(nzhi_out_all,betaz[0]); if (nzhi_in[n] < 0) nzhi_in[n] = alpha[n] - 1; } } + // prevent inactive processors from participating in MPI communication routines + + set_proc_grid(n); + // MSM grids for this proc, including ghosts ngrid[n] = (nxhi_out[n]-nxlo_out[n]+1) * (nyhi_out[n]-nylo_out[n]+1) * @@ -1101,6 +1228,82 @@ void MSM::set_grid_local() } } +/* ---------------------------------------------------------------------- + find active procs and neighbor procs for each grid level +------------------------------------------------------------------------- */ + +void MSM::set_proc_grid(int n) +{ + for (int i=0; i<3; i++) + myloc[i] = comm->myloc[i]; + + // size of inner MSM grid owned by this proc + + int nxgrid_in = nxhi_in[n]-nxlo_in[n]+1; + int nygrid_in = nyhi_in[n]-nylo_in[n]+1; + int nzgrid_in = nzhi_in[n]-nzlo_in[n]+1; + int ngrid_in = nxgrid_in * nygrid_in * nzgrid_in; + + // check to see if this proc owns any inner grid points on this grid level + // if not, deactivate by setting active_flag = 0 + + int cnt[3]; + + cnt[0] = 0; + if (myloc[1] == 0 && myloc[2] == 0) + if (nxgrid_in > 0) + cnt[0] = 1; + + cnt[1] = 0; + if (myloc[0] == 0 && myloc[2] == 0) + if (nygrid_in > 0) + cnt[1] = 1; + + cnt[2] = 0; + if (myloc[0] == 0 && myloc[1] == 0) + if (nzgrid_in > 0) + cnt[2] = 1; + + MPI_Allreduce(&cnt[0],&procgrid[0],3,MPI_INT,MPI_SUM,world); + + int color; + + if (ngrid_in > 0) { + active_flag[n] = 1; + color = 0; + } else { + active_flag[n] = 0; + color = MPI_UNDEFINED; + } + + // define a new MPI communicator for this grid level that only includes active procs + + MPI_Comm_split(world,color,me,&world_levels[n]); + + if (!active_flag[n]) return; + + int procneigh[3][2]; // my 6 neighboring procs, 0/1 = left/right + + // map processor IDs to new 3d processor grid + // produces myloc, procneigh + + int periods[3]; + periods[0] = periods[1] = periods[2] = 1; + MPI_Comm cartesian; + + MPI_Cart_create(world_levels[n],3,procgrid,periods,0,&cartesian); + MPI_Cart_get(cartesian,3,procgrid,periods,myloc); + MPI_Cart_shift(cartesian,0,1,&procneigh[0][0],&procneigh[0][1]); + MPI_Cart_shift(cartesian,1,1,&procneigh[1][0],&procneigh[1][1]); + MPI_Cart_shift(cartesian,2,1,&procneigh[2][0],&procneigh[2][1]); + + MPI_Comm_free(&cartesian); + + for (int i=0; i<3; i++) + for (int j=0; j<2; j++) + procneigh_levels[n][i][j] = procneigh[i][j]; +} + /* ---------------------------------------------------------------------- reset local grid arrays and communication stencils called by fix balance b/c it changed sizes of processor sub-domains @@ -1124,8 +1327,7 @@ void MSM::setup_grid() int MSM::factorable(int n, int &flag, int &levels) { - int i,norig; - norig = n; + int i; levels = 1; while (n > 1) { @@ -1185,10 +1387,7 @@ void MSM::particle_map() } /* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid + aninterpolation: interpolate charges from particles to grid ------------------------------------------------------------------------- */ void MSM::make_rho() @@ -1222,7 +1421,7 @@ void MSM::make_rho() dy = ny - (x[i][1]-boxlo[1])*delyinv[0]; dz = nz - (x[i][2]-boxlo[2])*delzinv[0]; - compute_phis_and_dphis(dx,dy,dz); + compute_phis(dx,dy,dz); z0 = q[i]; for (n = nlower; n <= nupper; n++) { @@ -1242,7 +1441,8 @@ void MSM::make_rho() } /* ---------------------------------------------------------------------- - MSM direct part procedure for intermediate grid levels + MSM direct sum procedure for intermediate grid levels, solve Poisson's + equation to get energy, virial, etc. ------------------------------------------------------------------------- */ void MSM::direct(int n) @@ -1258,6 +1458,12 @@ void MSM::direct(int n) double ***v4gridn = v4grid[n]; double ***v5gridn = v5grid[n]; double *g_directn = g_direct[n]; + double *v0_directn = v0_direct[n]; + double *v1_directn = v1_direct[n]; + double *v2_directn = v2_direct[n]; + double *v3_directn = v3_direct[n]; + double *v4_directn = v4_direct[n]; + double *v5_directn = v5_direct[n]; // zero out electric potential @@ -1275,16 +1481,20 @@ void MSM::direct(int n) } int icx,icy,icz,ix,iy,iz,zk,zyk,k; - int jj,kk; + int ii,jj,kk; int imin,imax,jmin,jmax,kmin,kmax; - double qtmp; + double qtmp,qtmp2,gtmp; double esum,v0sum,v1sum,v2sum,v3sum,v4sum,v5sum; - + double **qk,**ek; + double *qkj,*ekj; + int nx = nxhi_direct - nxlo_direct + 1; int ny = nyhi_direct - nylo_direct + 1; + // loop over inner grid points + for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) { - + if (domain->zperiodic) { kmin = nzlo_direct; kmax = nzhi_direct; @@ -1292,7 +1502,7 @@ void MSM::direct(int n) kmin = MAX(nzlo_direct,alpha[n] - icz); kmax = MIN(nzhi_direct,betaz[n] - icz); } - + for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) { if (domain->yperiodic) { @@ -1302,7 +1512,7 @@ void MSM::direct(int n) jmin = MAX(nylo_direct,alpha[n] - icy); jmax = MIN(nyhi_direct,betay[n] - icy); } - + for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) { if (domain->xperiodic) { @@ -1313,63 +1523,288 @@ void MSM::direct(int n) imax = MIN(nxhi_direct,betax[n] - icx); } + qtmp = qgridn[icz][icy][icx]; // charge on center grid point + esum = 0.0; if (vflag_either) v0sum = v1sum = v2sum = v3sum = v4sum = v5sum = 0.0; - - for (iz = kmin; iz <= kmax; iz++) { + + // use hemisphere to avoid double computation of pair-wise + // interactions in direct sum (no computations in -z direction) + + for (iz = 1; iz <= kmax; iz++) { + kk = icz+iz; + qk = qgridn[kk]; + ek = egridn[kk]; + zk = (iz + nzhi_direct)*ny; + for (iy = jmin; iy <= jmax; iy++) { + jj = icy+iy; + qkj = qk[jj]; + ekj = ek[jj]; + zyk = (zk + iy + nyhi_direct)*nx; + for (ix = imin; ix <= imax; ix++) { + ii = icx+ix; + qtmp2 = qkj[ii]; // charge on outer grid point + k = zyk + ix + nxhi_direct; + gtmp = g_directn[k]; + esum += gtmp * qtmp2; + ekj[ii] += gtmp * qtmp; + + if (vflag_either) { + v0sum += v0_directn[k] * qtmp2; + v1sum += v1_directn[k] * qtmp2; + v2sum += v2_directn[k] * qtmp2; + v3sum += v3_directn[k] * qtmp2; + v4sum += v4_directn[k] * qtmp2; + v5sum += v5_directn[k] * qtmp2; + } + } + } + } + + // iz=0 + + iz = 0; + kk = icz+iz; + qk = qgridn[kk]; + ek = egridn[kk]; + zk = (iz + nzhi_direct)*ny; + for (iy = 1; iy <= jmax; iy++) { + jj = icy+iy; + qkj = qk[jj]; + ekj = ek[jj]; + zyk = (zk + iy + nyhi_direct)*nx; + for (ix = imin; ix <= imax; ix++) { + ii = icx+ix; + qtmp2 = qkj[ii]; + k = zyk + ix + nxhi_direct; + gtmp = g_directn[k]; + esum += gtmp * qtmp2; + ekj[ii] += gtmp * qtmp; + + if (vflag_either) { + v0sum += v0_directn[k] * qtmp2; + v1sum += v1_directn[k] * qtmp2; + v2sum += v2_directn[k] * qtmp2; + v3sum += v3_directn[k] * qtmp2; + v4sum += v4_directn[k] * qtmp2; + v5sum += v5_directn[k] * qtmp2; + } + } + } + + // iz=0, iy=0 + + iz = 0; + kk = icz+iz; + qk = qgridn[kk]; + ek = egridn[kk]; + zk = (iz + nzhi_direct)*ny; + iy = 0; + jj = icy+iy; + qkj = qk[jj]; + ekj = ek[jj]; + zyk = (zk + iy + nyhi_direct)*nx; + for (ix = 1; ix <= imax; ix++) { + ii = icx+ix; + qtmp2 = qkj[ii]; + k = zyk + ix + nxhi_direct; + gtmp = g_directn[k]; + esum += gtmp * qtmp2; + ekj[ii] += gtmp * qtmp; + + if (vflag_either) { + v0sum += v0_directn[k] * qtmp2; + v1sum += v1_directn[k] * qtmp2; + v2sum += v2_directn[k] * qtmp2; + v3sum += v3_directn[k] * qtmp2; + v4sum += v4_directn[k] * qtmp2; + v5sum += v5_directn[k] * qtmp2; + } + } + + // iz=0, iy=0, ix=0 + + iz = 0; + zk = (iz + nzhi_direct)*ny; + iy = 0; + zyk = (zk + iy + nyhi_direct)*nx; + ix = 0; + k = zyk + ix + nxhi_direct; + gtmp = g_directn[k]; + esum += 0.5 * gtmp * qtmp; + egridn[icz][icy][icx] += 0.5 * gtmp * qtmp; + + // virial is zero for iz=0, iy=0, ix=0 + + // accumulate per-atom energy/virial + + egridn[icz][icy][icx] += esum; + + if (vflag_atom) { + v0gridn[icz][icy][icx] += v0sum; + v1gridn[icz][icy][icx] += v1sum; + v2gridn[icz][icy][icx] += v2sum; + v3gridn[icz][icy][icx] += v3sum; + v4gridn[icz][icy][icx] += v4sum; + v5gridn[icz][icy][icx] += v5sum; + } + + // accumulate total energy/virial + + if (evflag) { + qtmp = qgridn[icz][icy][icx]; + if (eflag_global) energy += 2.0 * esum * qtmp; + if (vflag_global) { + virial[0] += 2.0 * v0sum * qtmp; + virial[1] += 2.0 * v1sum * qtmp; + virial[2] += 2.0 * v2sum * qtmp; + virial[3] += 2.0 * v3sum * qtmp; + virial[4] += 2.0 * v4sum * qtmp; + virial[5] += 2.0 * v5sum * qtmp; + } + } + + } + } + } + + // compute per-atom virial (if requested) + + if (vflag_atom) + direct_peratom(n); +} + +/* ---------------------------------------------------------------------- + MSM direct sum procedure for intermediate grid levels, solve Poisson's + equation to get per-atom virial, separate method used for performance + reasons +------------------------------------------------------------------------- */ + +void MSM::direct_peratom(int n) +{ + //fprintf(screen,"Direct contribution on level %i\n\n",n); + + double ***qgridn = qgrid[n]; + double ***v0gridn = v0grid[n]; + double ***v1gridn = v1grid[n]; + double ***v2gridn = v2grid[n]; + double ***v3gridn = v3grid[n]; + double ***v4gridn = v4grid[n]; + double ***v5gridn = v5grid[n]; + + int icx,icy,icz,ix,iy,iz,zk,zyk,k; + int ii,jj,kk; + int imin,imax,jmin,jmax,kmin,kmax; + double qtmp; + + int nx = nxhi_direct - nxlo_direct + 1; + int ny = nyhi_direct - nylo_direct + 1; + + // loop over inner grid points + + for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) { + + if (domain->zperiodic) { + kmin = nzlo_direct; + kmax = nzhi_direct; + } else { + kmin = MAX(nzlo_direct,alpha[n] - icz); + kmax = MIN(nzhi_direct,betaz[n] - icz); + } + + for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) { + + if (domain->yperiodic) { + jmin = nylo_direct; + jmax = nyhi_direct; + } else { + jmin = MAX(nylo_direct,alpha[n] - icy); + jmax = MIN(nyhi_direct,betay[n] - icy); + } + + for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) { + + if (domain->xperiodic) { + imin = nxlo_direct; + imax = nxhi_direct; + } else { + imin = MAX(nxlo_direct,alpha[n] - icx); + imax = MIN(nxhi_direct,betax[n] - icx); + } + + qtmp = qgridn[icz][icy][icx]; // center grid point + + // use hemisphere to avoid double computation of pair-wise + // interactions in direct sum (no computations in -z direction) + + for (iz = 1; iz <= kmax; iz++) { kk = icz+iz; zk = (iz + nzhi_direct)*ny; for (iy = jmin; iy <= jmax; iy++) { jj = icy+iy; zyk = (zk + iy + nyhi_direct)*nx; for (ix = imin; ix <= imax; ix++) { - qtmp = qgridn[kk][jj][icx+ix]; + ii = icx+ix; k = zyk + ix + nxhi_direct; - esum += g_directn[k] * qtmp; - - if (vflag_either) { - v0sum += v0_direct[n][k] * qtmp; - v1sum += v1_direct[n][k] * qtmp; - v2sum += v2_direct[n][k] * qtmp; - v3sum += v3_direct[n][k] * qtmp; - v4sum += v4_direct[n][k] * qtmp; - v5sum += v5_direct[n][k] * qtmp; - } + v0gridn[kk][jj][ii] += v0_direct[n][k] * qtmp; + v1gridn[kk][jj][ii] += v1_direct[n][k] * qtmp; + v2gridn[kk][jj][ii] += v2_direct[n][k] * qtmp; + v3gridn[kk][jj][ii] += v3_direct[n][k] * qtmp; + v4gridn[kk][jj][ii] += v4_direct[n][k] * qtmp; + v5gridn[kk][jj][ii] += v5_direct[n][k] * qtmp; } } } - egridn[icz][icy][icx] = esum; - if (vflag_atom) { - v0gridn[icz][icy][icx] = v0sum; - v1gridn[icz][icy][icx] = v1sum; - v2gridn[icz][icy][icx] = v2sum; - v3gridn[icz][icy][icx] = v3sum; - v4gridn[icz][icy][icx] = v4sum; - v5gridn[icz][icy][icx] = v5sum; - } + // iz=0 - if (evflag) { - qtmp = qgridn[icz][icy][icx]; - if (eflag_global) energy += esum * qtmp; - if (vflag_global) { - virial[0] += v0sum * qtmp; - virial[1] += v1sum * qtmp; - virial[2] += v2sum * qtmp; - virial[3] += v3sum * qtmp; - virial[4] += v4sum * qtmp; - virial[5] += v5sum * qtmp; + iz = 0; + kk = icz+iz; + zk = (iz + nzhi_direct)*ny; + for (iy = 1; iy <= jmax; iy++) { + jj = icy+iy; + zyk = (zk + iy + nyhi_direct)*nx; + for (ix = imin; ix <= imax; ix++) { + ii = icx+ix; + k = zyk + ix + nxhi_direct; + v0gridn[kk][jj][ii] += v0_direct[n][k] * qtmp; + v1gridn[kk][jj][ii] += v1_direct[n][k] * qtmp; + v2gridn[kk][jj][ii] += v2_direct[n][k] * qtmp; + v3gridn[kk][jj][ii] += v3_direct[n][k] * qtmp; + v4gridn[kk][jj][ii] += v4_direct[n][k] * qtmp; + v5gridn[kk][jj][ii] += v5_direct[n][k] * qtmp; } } + // iz=0, iy=0 + + iz = 0; + kk = icz+iz; + zk = (iz + nzhi_direct)*ny; + iy = 0; + jj = icy+iy; + zyk = (zk + iy + nyhi_direct)*nx; + for (ix = 1; ix <= imax; ix++) { + ii = icx+ix; + k = zyk + ix + nxhi_direct; + v0gridn[kk][jj][ii] += v0_direct[n][k] * qtmp; + v1gridn[kk][jj][ii] += v1_direct[n][k] * qtmp; + v2gridn[kk][jj][ii] += v2_direct[n][k] * qtmp; + v3gridn[kk][jj][ii] += v3_direct[n][k] * qtmp; + v4gridn[kk][jj][ii] += v4_direct[n][k] * qtmp; + v5gridn[kk][jj][ii] += v5_direct[n][k] * qtmp; + } + + // virial is zero for iz=0, iy=0, ix=0 + } } } } /* ---------------------------------------------------------------------- - MSM direct part procedure for top grid level + MSM direct sum procedure for top grid level (nonperiodic systems only) ------------------------------------------------------------------------- */ void MSM::direct_top(int n) @@ -1400,88 +1835,345 @@ void MSM::direct_top(int n) memset(&(v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double)); } - if (!domain->nonperiodic) return; // omit top grid level for periodic systems - int icx,icy,icz,ix,iy,iz,zk,zyk,k; - int jj,kk; + int ii,jj,kk; int imin,imax,jmin,jmax,kmin,kmax; - double qtmp; + double qtmp,qtmp2,gtmp; double esum,v0sum,v1sum,v2sum,v3sum,v4sum,v5sum; + double **qk,**ek; + double *qkj,*ekj; int nx_top = betax[n] - alpha[n]; int ny_top = betay[n] - alpha[n]; int nz_top = betaz[n] - alpha[n]; - + int nx = 2*nx_top + 1; int ny = 2*ny_top + 1; + // loop over inner grid points + for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) { + + if (domain->zperiodic) { + kmin = 0; + kmax = nz_msm[n]-1; + } else { kmin = alpha[n] - icz; kmax = betaz[n] - icz; + } + for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) { + + if (domain->yperiodic) { + jmin = 0; + jmax = ny_msm[n]-1; + } else { jmin = alpha[n] - icy; jmax = betay[n] - icy; + } + for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) { + + if (domain->xperiodic) { + imin = 0; + imax = nx_msm[n]-1; + } else { imin = alpha[n] - icx; imax = betax[n] - icx; + } + + qtmp = qgridn[icz][icy][icx]; esum = 0.0; if (vflag_either) v0sum = v1sum = v2sum = v3sum = v4sum = v5sum = 0.0; - for (iz = kmin; iz <= kmax; iz++) { + // use hemisphere to avoid double computation of pair-wise + // interactions in direct sum (no computations in -z direction) + + for (iz = 1; iz <= kmax; iz++) { + kk = icz+iz; + qk = qgridn[kk]; + ek = egridn[kk]; + zk = (iz + nz_top)*ny; + for (iy = jmin; iy <= jmax; iy++) { + jj = icy+iy; + qkj = qk[jj]; + ekj = ek[jj]; + zyk = (zk + iy + ny_top)*nx; + for (ix = imin; ix <= imax; ix++) { + ii = icx+ix; + qtmp2 = qkj[ii]; + k = zyk + ix + nx_top; + gtmp = g_direct_top[k]; + esum += gtmp * qtmp2; + ekj[ii] += gtmp * qtmp; + + if (vflag_either) { + v0sum += v0_direct_top[k] * qtmp2; + v1sum += v1_direct_top[k] * qtmp2; + v2sum += v2_direct_top[k] * qtmp2; + v3sum += v3_direct_top[k] * qtmp2; + v4sum += v4_direct_top[k] * qtmp2; + v5sum += v5_direct_top[k] * qtmp2; + } + } + } + } + + // iz=0 + + iz = 0; + kk = icz+iz; + qk = qgridn[kk]; + ek = egridn[kk]; + zk = (iz + nz_top)*ny; + for (iy = 1; iy <= jmax; iy++) { + jj = icy+iy; + qkj = qk[jj]; + ekj = ek[jj]; + zyk = (zk + iy + ny_top)*nx; + for (ix = imin; ix <= imax; ix++) { + ii = icx+ix; + qtmp2 = qkj[ii]; + k = zyk + ix + nx_top; + gtmp = g_direct_top[k]; + esum += gtmp * qtmp2; + ekj[ii] += gtmp * qtmp; + + if (vflag_either) { + v0sum += v0_direct_top[k] * qtmp2; + v1sum += v1_direct_top[k] * qtmp2; + v2sum += v2_direct_top[k] * qtmp2; + v3sum += v3_direct_top[k] * qtmp2; + v4sum += v4_direct_top[k] * qtmp2; + v5sum += v5_direct_top[k] * qtmp2; + } + } + } + + // iz=0, iy=0 + + iz = 0; + kk = icz+iz; + qk = qgridn[kk]; + ek = egridn[kk]; + zk = (iz + nz_top)*ny; + iy = 0; + jj = icy+iy; + qkj = qk[jj]; + ekj = ek[jj]; + zyk = (zk + iy + ny_top)*nx; + for (ix = 1; ix <= imax; ix++) { + ii = icx+ix; + qtmp2 = qkj[ii]; + k = zyk + ix + nx_top; + gtmp = g_direct_top[k]; + esum += gtmp * qtmp2; + ekj[ii] += gtmp * qtmp; + + if (vflag_either) { + v0sum += v0_direct_top[k] * qtmp2; + v1sum += v1_direct_top[k] * qtmp2; + v2sum += v2_direct_top[k] * qtmp2; + v3sum += v3_direct_top[k] * qtmp2; + v4sum += v4_direct_top[k] * qtmp2; + v5sum += v5_direct_top[k] * qtmp2; + } + } + + // iz=0, iy=0, ix=0 + + iz = 0; + zk = (iz + nz_top)*ny; + iy = 0; + zyk = (zk + iy + ny_top)*nx; + ix = 0; + ii = icx+ix; + k = zyk + ix + nx_top; + gtmp = g_direct_top[k]; + esum += 0.5 * gtmp * qtmp; + egridn[icz][icy][icx] += 0.5 * gtmp * qtmp; + + if (vflag_either) { + v0sum += v0_direct_top[k] * qtmp; + v1sum += v1_direct_top[k] * qtmp; + v2sum += v2_direct_top[k] * qtmp; + v3sum += v3_direct_top[k] * qtmp; + v4sum += v4_direct_top[k] * qtmp; + v5sum += v5_direct_top[k] * qtmp; + } + + // accumulate per-atom energy/virial + + egridn[icz][icy][icx] += esum; + + if (vflag_atom) { + v0gridn[icz][icy][icx] += v0sum; + v1gridn[icz][icy][icx] += v1sum; + v2gridn[icz][icy][icx] += v2sum; + v3gridn[icz][icy][icx] += v3sum; + v4gridn[icz][icy][icx] += v4sum; + v5gridn[icz][icy][icx] += v5sum; + } + + // accumulate total energy/virial + + if (evflag) { + qtmp = qgridn[icz][icy][icx]; + if (eflag_global) energy += 2.0 * esum * qtmp; + if (vflag_global) { + virial[0] += 2.0 * v0sum * qtmp; + virial[1] += 2.0 * v1sum * qtmp; + virial[2] += 2.0 * v2sum * qtmp; + virial[3] += 2.0 * v3sum * qtmp; + virial[4] += 2.0 * v4sum * qtmp; + virial[5] += 2.0 * v5sum * qtmp; + } + } + + } + } + } + + // compute per-atom virial (if requested) + + if (vflag_atom) + direct_peratom_top(n); +} +/* ---------------------------------------------------------------------- + MSM direct sum procedure for top grid level, solve Poisson's + equation to get per-atom virial, separate method used for performance + reasons +------------------------------------------------------------------------- */ + +void MSM::direct_peratom_top(int n) +{ + double ***qgridn = qgrid[n]; + double ***v0gridn = v0grid[n]; + double ***v1gridn = v1grid[n]; + double ***v2gridn = v2grid[n]; + double ***v3gridn = v3grid[n]; + double ***v4gridn = v4grid[n]; + double ***v5gridn = v5grid[n]; + + int icx,icy,icz,ix,iy,iz,zk,zyk,k; + int ii,jj,kk; + int imin,imax,jmin,jmax,kmin,kmax; + double qtmp; + + int nx_top = betax[n] - alpha[n]; + int ny_top = betay[n] - alpha[n]; + int nz_top = betaz[n] - alpha[n]; + + int nx = 2*nx_top + 1; + int ny = 2*ny_top + 1; + + // loop over inner grid points + + for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) { + + if (domain->zperiodic) { + kmin = 0; + kmax = nz_msm[n]-1; + } else { + kmin = alpha[n] - icz; + kmax = betaz[n] - icz; + } + + for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) { + + if (domain->yperiodic) { + jmin = 0; + jmax = ny_msm[n]-1; + } else { + jmin = alpha[n] - icy; + jmax = betay[n] - icy; + } + + for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) { + + if (domain->xperiodic) { + imin = 0; + imax = nx_msm[n]-1; + } else { + imin = alpha[n] - icx; + imax = betax[n] - icx; + } + + qtmp = qgridn[icz][icy][icx]; // center grid point + + // use hemisphere to avoid double computation of pair-wise + // interactions in direct sum (no computations in -z direction) + + for (iz = 1; iz <= kmax; iz++) { kk = icz+iz; zk = (iz + nz_top)*ny; for (iy = jmin; iy <= jmax; iy++) { jj = icy+iy; zyk = (zk + iy + ny_top)*nx; for (ix = imin; ix <= imax; ix++) { - qtmp = qgridn[kk][jj][icx+ix]; + ii = icx+ix; k = zyk + ix + nx_top; - esum += g_direct_top[k] * qtmp; - - if (vflag_either) { - v0sum += v0_direct_top[k] * qtmp; - v1sum += v1_direct_top[k] * qtmp; - v2sum += v2_direct_top[k] * qtmp; - v3sum += v3_direct_top[k] * qtmp; - v4sum += v4_direct_top[k] * qtmp; - v5sum += v5_direct_top[k] * qtmp; - } + v0gridn[kk][jj][ii] += v0_direct_top[k] * qtmp; + v1gridn[kk][jj][ii] += v1_direct_top[k] * qtmp; + v2gridn[kk][jj][ii] += v2_direct_top[k] * qtmp; + v3gridn[kk][jj][ii] += v3_direct_top[k] * qtmp; + v4gridn[kk][jj][ii] += v4_direct_top[k] * qtmp; + v5gridn[kk][jj][ii] += v5_direct_top[k] * qtmp; } } } - egridn[icz][icy][icx] = esum; - if (vflag_atom) { - v0gridn[icz][icy][icx] = v0sum; - v1gridn[icz][icy][icx] = v1sum; - v2gridn[icz][icy][icx] = v2sum; - v3gridn[icz][icy][icx] = v3sum; - v4gridn[icz][icy][icx] = v4sum; - v5gridn[icz][icy][icx] = v5sum; - } + // iz=0 - if (evflag) { - qtmp = qgridn[icz][icy][icx]; - if (eflag_global) energy += esum * qtmp; - if (vflag_global) { - virial[0] += v0sum * qtmp; - virial[1] += v1sum * qtmp; - virial[2] += v2sum * qtmp; - virial[3] += v3sum * qtmp; - virial[4] += v4sum * qtmp; - virial[5] += v5sum * qtmp; + iz = 0; + kk = icz+iz; + zk = (iz + nz_top)*ny; + for (iy = 1; iy <= jmax; iy++) { + jj = icy+iy; + zyk = (zk + iy + ny_top)*nx; + for (ix = imin; ix <= imax; ix++) { + ii = icx+ix; + k = zyk + ix + nx_top; + v0gridn[kk][jj][ii] += v0_direct_top[k] * qtmp; + v1gridn[kk][jj][ii] += v1_direct_top[k] * qtmp; + v2gridn[kk][jj][ii] += v2_direct_top[k] * qtmp; + v3gridn[kk][jj][ii] += v3_direct_top[k] * qtmp; + v4gridn[kk][jj][ii] += v4_direct_top[k] * qtmp; + v5gridn[kk][jj][ii] += v5_direct_top[k] * qtmp; } } + // iz=0, iy=0 + + iz = 0; + kk = icz+iz; + zk = (iz + nz_top)*ny; + iy = 0; + jj = icy+iy; + zyk = (zk + iy + ny_top)*nx; + for (ix = 1; ix <= imax; ix++) { + ii = icx+ix; + k = zyk + ix + nx_top; + v0gridn[kk][jj][ii] += v0_direct_top[k] * qtmp; + v1gridn[kk][jj][ii] += v1_direct_top[k] * qtmp; + v2gridn[kk][jj][ii] += v2_direct_top[k] * qtmp; + v3gridn[kk][jj][ii] += v3_direct_top[k] * qtmp; + v4gridn[kk][jj][ii] += v4_direct_top[k] * qtmp; + v5gridn[kk][jj][ii] += v5_direct_top[k] * qtmp; + } + + // virial is zero for iz=0, iy=0, ix=0 + } } } } /* ---------------------------------------------------------------------- - MSM restriction procedure for intermediate grid levels + MSM restriction procedure for intermediate grid levels, interpolate + charges from finer grid to coarser grid ------------------------------------------------------------------------- */ void MSM::restriction(int n) @@ -1493,10 +2185,8 @@ void MSM::restriction(int n) double ***qgrid1 = qgrid[n]; double ***qgrid2 = qgrid[n+1]; - //restrict grid (going from grid n to grid n+1, i.e. to a coarser grid) - int k = 0; - int index[p+1]; + int index[p+2]; for (int nu=-p; nu<=p; nu++) { if (nu%2 == 0 && nu != 0) continue; phi1d[0][k] = compute_phi(nu*delxinv[n+1]/delxinv[n]); @@ -1508,7 +2198,7 @@ void MSM::restriction(int n) int ip,jp,kp,ic,jc,kc,i,j; int ii,jj,kk; - double phiz,phizy; + double phiz,phizy,q2sum; // zero out charge on coarser grid @@ -1522,6 +2212,8 @@ void MSM::restriction(int n) jc = jp * static_cast (delyinv[n]/delyinv[n+1]); kc = kp * static_cast (delzinv[n]/delzinv[n+1]); + q2sum = 0.0; + for (k=0; k<=p+1; k++) { kk = kc+index[k]; if (!domain->zperiodic) { @@ -1542,17 +2234,19 @@ void MSM::restriction(int n) if (ii < alpha[n]) continue; if (ii > betax[n]) break; } - qgrid2[kp][jp][ip] += qgrid1[kk][jj][ii] * + q2sum += qgrid1[kk][jj][ii] * phi1d[0][i]*phizy; } } } + qgrid2[kp][jp][ip] += q2sum; } } /* ---------------------------------------------------------------------- - MSM prolongation procedure for intermediate grid levels + MSM prolongation procedure for intermediate grid levels, interpolate + per-atom energy/virial from coarser grid to finer grid ------------------------------------------------------------------------- */ void MSM::prolongation(int n) @@ -1577,10 +2271,8 @@ void MSM::prolongation(int n) double ***v5grid1 = v5grid[n]; double ***v5grid2 = v5grid[n+1]; - //prolongate grid (going from grid n to grid n-1, i.e. to a finer grid) - int k = 0; - int index[p+1]; + int index[p+2]; for (int nu=-p; nu<=p; nu++) { if (nu%2 == 0 && nu != 0) continue; phi1d[0][k] = compute_phi(nu*delxinv[n+1]/delxinv[n]); @@ -1593,6 +2285,7 @@ void MSM::prolongation(int n) int ip,jp,kp,ic,jc,kc,i,j; int ii,jj,kk; double phiz,phizy,phi3d; + double etmp2,v0tmp2,v1tmp2,v2tmp2,v3tmp2,v4tmp2,v5tmp2; for (kp = nzlo_in[n+1]; kp <= nzhi_in[n+1]; kp++) for (jp = nylo_in[n+1]; jp <= nyhi_in[n+1]; jp++) @@ -1602,6 +2295,17 @@ void MSM::prolongation(int n) jc = jp * static_cast (delyinv[n]/delyinv[n+1]); kc = kp * static_cast (delzinv[n]/delzinv[n+1]); + etmp2 = egrid2[kp][jp][ip]; + + if (vflag_atom) { + v0tmp2 = v0grid2[kp][jp][ip]; + v1tmp2 = v1grid2[kp][jp][ip]; + v2tmp2 = v2grid2[kp][jp][ip]; + v3tmp2 = v3grid2[kp][jp][ip]; + v4tmp2 = v4grid2[kp][jp][ip]; + v5tmp2 = v5grid2[kp][jp][ip]; + } + for (k=0; k<=p+1; k++) { kk = kc+index[k]; if (!domain->zperiodic) { @@ -1624,27 +2328,126 @@ void MSM::prolongation(int n) } phi3d = phi1d[0][i]*phizy; - egrid1[kk][jj][ii] += egrid2[kp][jp][ip] * phi3d; + egrid1[kk][jj][ii] += etmp2 * phi3d; if (vflag_atom) { - v0grid1[kk][jj][ii] += v0grid2[kp][jp][ip] * phi3d; - v1grid1[kk][jj][ii] += v1grid2[kp][jp][ip] * phi3d; - v2grid1[kk][jj][ii] += v2grid2[kp][jp][ip] * phi3d; - v3grid1[kk][jj][ii] += v3grid2[kp][jp][ip] * phi3d; - v4grid1[kk][jj][ii] += v4grid2[kp][jp][ip] * phi3d; - v5grid1[kk][jj][ii] += v5grid2[kp][jp][ip] * phi3d; + v0grid1[kk][jj][ii] += v0tmp2 * phi3d; + v1grid1[kk][jj][ii] += v1tmp2 * phi3d; + v2grid1[kk][jj][ii] += v2tmp2 * phi3d; + v3grid1[kk][jj][ii] += v3tmp2 * phi3d; + v4grid1[kk][jj][ii] += v4tmp2 * phi3d; + v5grid1[kk][jj][ii] += v5tmp2 * phi3d; } - + } } } - + } } /* ---------------------------------------------------------------------- - pack own values to buf to send to another proc + Use MPI_Allreduce to fill ghost grid values, for coarse grids this may + be cheaper than using nearest-neighbor communication (commgrid), right + now only works for periodic boundary conditions +------------------------------------------------------------------------- */ +void MSM::grid_swap_forward(int n, double*** &gridn) +{ + double ***gridn_tmp; + memory->create(gridn_tmp,nz_msm[n],ny_msm[n],nx_msm[n],"msm:grid_tmp"); + + double ***gridn_all; + memory->create(gridn_all,nz_msm[n],ny_msm[n],nx_msm[n],"msm:grid_all"); + + int ngrid_in = nx_msm[n] * ny_msm[n] * nz_msm[n]; + + memset(&(gridn_tmp[0][0][0]),0,ngrid_in*sizeof(double)); + memset(&(gridn_all[0][0][0]),0,ngrid_in*sizeof(double)); + + // copy inner grid cell values from gridn into gridn_tmp + + int icx,icy,icz; + + for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) + for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) + for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) + gridn_tmp[icz][icy][icx] = gridn[icz][icy][icx]; + + MPI_Allreduce(&(gridn_tmp[0][0][0]), + &(gridn_all[0][0][0]), + ngrid_in,MPI_DOUBLE,MPI_SUM,world_levels[n]); + + // bitmask for PBCs (only works for power of 2 numbers) + + int PBCx,PBCy,PBCz; + + PBCx = nx_msm[n]-1; + PBCy = ny_msm[n]-1; + PBCz = nz_msm[n]-1; + + // copy from gridn_all into gridn to fill ghost grid cell values + + for (icz = nzlo_out[n]; icz <= nzhi_out[n]; icz++) + for (icy = nylo_out[n]; icy <= nyhi_out[n]; icy++) + for (icx = nxlo_out[n]; icx <= nxhi_out[n]; icx++) + gridn[icz][icy][icx] = gridn_all[icz&PBCz][icy&PBCy][icx&PBCx]; + + memory->destroy(gridn_tmp); + memory->destroy(gridn_all); +} + +/* ---------------------------------------------------------------------- + Use MPI_Allreduce to get contribution from ghost grid cells, for coarse + grids this may be cheaper than using nearest-neighbor communication + (commgrid), right now only works for periodic boundary conditions +------------------------------------------------------------------------- */ +void MSM::grid_swap_reverse(int n, double*** &gridn) +{ + double ***gridn_tmp; + memory->create(gridn_tmp,nz_msm[n],ny_msm[n],nx_msm[n],"msm:grid_tmp"); + + double ***gridn_all; + memory->create(gridn_all,nz_msm[n],ny_msm[n],nx_msm[n],"msm:grid_all"); + + int ngrid_in = nx_msm[n] * ny_msm[n] * nz_msm[n]; + + memset(&(gridn_tmp[0][0][0]),0,ngrid_in*sizeof(double)); + memset(&(gridn_all[0][0][0]),0,ngrid_in*sizeof(double)); + + // bitmask for PBCs (only works for power of 2 numbers) + + int icx,icy,icz; + int PBCx,PBCy,PBCz; + + PBCx = nx_msm[n]-1; + PBCy = ny_msm[n]-1; + PBCz = nz_msm[n]-1; + + // copy ghost grid cell values from gridn into inner portion of gridn_tmp + + for (icz = nzlo_out[n]; icz <= nzhi_out[n]; icz++) + for (icy = nylo_out[n]; icy <= nyhi_out[n]; icy++) + for (icx = nxlo_out[n]; icx <= nxhi_out[n]; icx++) + gridn_tmp[icz&PBCz][icy&PBCy][icx&PBCx] += gridn[icz][icy][icx]; + + MPI_Allreduce(&(gridn_tmp[0][0][0]), + &(gridn_all[0][0][0]), + ngrid_in,MPI_DOUBLE,MPI_SUM,world_levels[n]); + + // copy inner grid cell values from gridn_all into gridn + + for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) + for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) + for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) + gridn[icz][icy][icx] = gridn_all[icz][icy][icx]; + + memory->destroy(gridn_tmp); + memory->destroy(gridn_all); +} + +/* ---------------------------------------------------------------------- + pack own values to buf to send to another proc (used by commgrid) ------------------------------------------------------------------------- */ void MSM::pack_forward(int flag, double *buf, int nlist, int *list) @@ -1845,7 +2648,7 @@ void MSM::fieldforce() double dx,dy,dz; double phi_x,phi_y,phi_z; double dphi_x,dphi_y,dphi_z; - double ekx,eky,ekz; + double ekx,eky,ekz,etmp; // loop over my charges, interpolate electric field from nearby grid points @@ -1883,9 +2686,10 @@ void MSM::fieldforce() mx = l+nx; phi_x = phi1d[0][l]; dphi_x = dphi1d[0][l]; - ekx += dphi_x*phi_y*phi_z*egridn[mz][my][mx]; - eky += phi_x*dphi_y*phi_z*egridn[mz][my][mx]; - ekz += phi_x*phi_y*dphi_z*egridn[mz][my][mx]; + etmp = egridn[mz][my][mx]; + ekx += dphi_x*phi_y*phi_z*etmp; + eky += phi_x*dphi_y*phi_z*etmp; + ekz += phi_x*phi_y*dphi_z*etmp; } } } @@ -1978,7 +2782,27 @@ void MSM::fieldforce_peratom() } /* ---------------------------------------------------------------------- - charge assignment into phi1d + charge assignment into phi1d (interpolation coefficients) +------------------------------------------------------------------------- */ + +void MSM::compute_phis(const double &dx, const double &dy, + const double &dz) +{ + double delx,dely,delz; + + for (int nu = nlower; nu <= nupper; nu++) { + delx = dx + double(nu); + dely = dy + double(nu); + delz = dz + double(nu); + + phi1d[0][nu] = compute_phi(delx); + phi1d[1][nu] = compute_phi(dely); + phi1d[2][nu] = compute_phi(delz); + } +} + +/* ---------------------------------------------------------------------- + charge assignment into phi1d and dphi1d (interpolation coefficients) ------------------------------------------------------------------------- */ void MSM::compute_phis_and_dphis(const double &dx, const double &dy, @@ -2006,7 +2830,7 @@ void MSM::compute_phis_and_dphis(const double &dx, const double &dy, and Hardy's thesis ------------------------------------------------------------------------- */ -double MSM::compute_phi(const double &xi) +inline double MSM::compute_phi(const double &xi) { double phi; double abs_xi = fabs(xi); @@ -2085,7 +2909,7 @@ double MSM::compute_phi(const double &xi) and Hardy's thesis ------------------------------------------------------------------------- */ -double MSM::compute_dphi(const double &xi) +inline double MSM::compute_dphi(const double &xi) { double dphi; double abs_xi = fabs(xi); @@ -2129,30 +2953,22 @@ double MSM::compute_dphi(const double &xi) double xi2 = xi*xi; double xi4 = xi2*xi2; double xi6 = xi4*xi2; - double abs_xi2 = abs_xi*abs_xi; - double abs_xi3 = abs_xi2*abs_xi; - double abs_xi4 = abs_xi2*abs_xi2; - double abs_xi5 = abs_xi4*abs_xi; - double abs_xi6 = abs_xi5*abs_xi; + double abs_xi3 = xi2*abs_xi; + double abs_xi5 = xi4*abs_xi; if (abs_xi == 0.0) { dphi = 0.0; } else if (abs_xi <= 1) { - dphi = xi*(7*xi6 + 42*xi4*abs_xi2 - 134*xi4*abs_xi - 35*xi4 - - 16*xi2*abs_xi3 - 140*xi2*abs_xi2 + 604*xi2*abs_xi + 28*xi2 + - 40*abs_xi3 + 56*abs_xi2 - 560*abs_xi)/144.0/abs_xi; + dphi = xi*(49*xi6 - 175*xi4 + 84*xi2 - 150*abs_xi5 + + 644*abs_xi3 - 560*abs_xi)/144.0/abs_xi; } else if (abs_xi <= 2) { - dphi = xi*(126*xi4*abs_xi - 21*xi4*abs_xi2 - 182*xi4 - - 28*xi2*abs_xi4 + 300*xi2*abs_xi3 - 1001*xi2*abs_xi2 + - 990*xi2*abs_xi + 154*xi2 + 24*abs_xi5 - 182*abs_xi4 + - 270*abs_xi3 + 602*abs_xi2 - 1260*abs_xi + 28)/240.0/abs_xi; + dphi = xi*(-49*xi6 - 1365*xi4 + 756*xi2 + + 450*abs_xi5 + 1260*abs_xi3 - 1260*abs_xi + 28)/240.0/abs_xi; } else if (abs_xi <= 3) { - dphi = xi*(35*xi2*abs_xi4 - 420*xi2*abs_xi3 + - 1785*xi2*abs_xi2 - 3150*xi2*abs_xi + 1918*xi2 + - 14*abs_xi6 - 330*abs_xi5 + 2660*abs_xi4 - - 9590*abs_xi3 + 15806*abs_xi2 - 9940*abs_xi + 756)/720.0/abs_xi; + dphi = xi*(49*xi6 + 4445*xi4 + 17724*xi2 - + 750*abs_xi5 - 12740*abs_xi3 - 9940*abs_xi + 756)/720.0/abs_xi; } else if (abs_xi <= 4) { - dphi = -xi*(abs_xi - 4)*(7*abs_xi5 - 122*abs_xi4 + - 807*abs_xi3 - 2512*abs_xi2 + 3644*abs_xi - 1944)/720.0/abs_xi; + dphi = -xi*(abs_xi - 4)*(7*abs_xi5 - 122*xi4 + + 807*abs_xi3 - 2512*xi2 + 3644*abs_xi - 1944)/720.0/abs_xi; } else { dphi = 0.0; } @@ -2209,7 +3025,7 @@ double MSM::compute_dphi(const double &xi) } /* ---------------------------------------------------------------------- - Compute direct interaction for intermediate grid levels + Compute direct interaction (energy) weights for intermediate grid levels ------------------------------------------------------------------------- */ void MSM::get_g_direct() { @@ -2249,7 +3065,7 @@ void MSM::get_g_direct() } /* ---------------------------------------------------------------------- - Compute direct interaction for intermediate grid levels + Compute direct interaction (virial) weights for intermediate grid levels ------------------------------------------------------------------------- */ void MSM::get_virial_direct() { @@ -2319,20 +3135,21 @@ void MSM::get_virial_direct() } /* ---------------------------------------------------------------------- - Compute direct interaction for top grid level + Compute direct interaction (energy) weights for top grid level + (nonperiodic systems only) ------------------------------------------------------------------------- */ void MSM::get_g_direct_top(int n) { int nx_top = betax[n] - alpha[n]; int ny_top = betay[n] - alpha[n]; int nz_top = betaz[n] - alpha[n]; - + int nx = 2*nx_top + 1; int ny = 2*ny_top + 1; int nz = 2*nz_top + 1; - + int nmax_top = 8*(nx+1)*(ny*1)*(nz+1); - + if (g_direct_top) memory->destroy(g_direct_top); memory->create(g_direct_top,nmax_top,"msm:g_direct_top"); @@ -2362,20 +3179,21 @@ void MSM::get_g_direct_top(int n) } /* ---------------------------------------------------------------------- - Compute direct interaction for top grid level + Compute direct interaction (virial) weights for top grid level + (nonperiodic systems only) ------------------------------------------------------------------------- */ void MSM::get_virial_direct_top(int n) { int nx_top = betax[n] - alpha[n]; int ny_top = betay[n] - alpha[n]; int nz_top = betaz[n] - alpha[n]; - + int nx = 2*nx_top + 1; int ny = 2*ny_top + 1; int nz = 2*nz_top + 1; - + int nmax_top = 8*(nx+1)*(ny*1)*(nz+1); - + if (v0_direct_top) memory->destroy(v0_direct_top); memory->create(v0_direct_top,nmax_top,"msm:v0_direct_top"); if (v1_direct_top) memory->destroy(v1_direct_top); diff --git a/src/KSPACE/msm.h b/src/KSPACE/msm.h index eb2c5a4db7..781f91c4bd 100644 --- a/src/KSPACE/msm.h +++ b/src/KSPACE/msm.h @@ -51,8 +51,10 @@ class MSM : public KSpace { int *nxhi_in,*nyhi_in,*nzhi_in; int *nxlo_out,*nylo_out,*nzlo_out; int *nxhi_out,*nyhi_out,*nzhi_out; - int *ngrid; + int *ngrid,*active_flag; int *alpha,*betax,*betay,*betaz; + int nxlo_out_all,nylo_out_all,nzlo_out_all; + int nxhi_out_all,nyhi_out_all,nzhi_out_all; int nxlo_direct,nxhi_direct,nylo_direct; int nyhi_direct,nzlo_direct,nzhi_direct; int nmax_direct; @@ -108,14 +110,19 @@ class MSM : public KSpace { void particle_map(); void make_rho(); virtual void direct(int); + void direct_peratom(int); void direct_top(int); + void direct_peratom_top(int); void restriction(int); void prolongation(int); + void grid_swap_forward(int,double*** &); + void grid_swap_reverse(int,double*** &); void fieldforce(); void fieldforce_peratom(); + void compute_phis(const double &, const double &, const double &); void compute_phis_and_dphis(const double &, const double &, const double &); - double compute_phi(const double &); - double compute_dphi(const double &); + inline double compute_phi(const double &); + inline double compute_dphi(const double &); void get_g_direct(); void get_virial_direct(); void get_g_direct_top(int); @@ -153,9 +160,9 @@ E: Kspace style requires atom attribute q The atom style defined does not have these attributes. -E: Cannot use slab correction with MSM +E: Slab correction not needed for MSM -Slab correction can only be used with Ewald and PPPM, not MSM. +Slab correction can only be used with Ewald and PPPM and is not needed by MSM. E: MSM order must be 4, 6, 8, or 10 diff --git a/src/KSPACE/msm_cg.cpp b/src/KSPACE/msm_cg.cpp index d7b4c39a6e..878e3d1045 100644 --- a/src/KSPACE/msm_cg.cpp +++ b/src/KSPACE/msm_cg.cpp @@ -78,15 +78,19 @@ void MSMCG::compute(int eflag, int vflag) int i,j,n; // set energy/virial flags - // invoke allocate_peratom() if needed for first time if (eflag || vflag) ev_setup(eflag,vflag); else evflag = evflag_atom = eflag_global = vflag_global = eflag_atom = vflag_atom = eflag_either = vflag_either = 0; + // invoke allocate_peratom() if needed for first time + if (vflag_atom && !peratom_allocate_flag) { allocate_peratom(); - for (n=0; nghost_notify(); + cg_peratom_all->setup(); + for (int n=0; nghost_notify(); cg_peratom[n]->setup(); } @@ -161,28 +165,52 @@ void MSMCG::compute(int eflag, int vflag) particle_map(); make_rho(); + // all procs reverse communicate charge density values from their ghost grid points + // to fully sum contribution in their 3d grid + current_level = 0; - cg[0]->reverse_comm(this,REVERSE_RHO); + cg_all->reverse_comm(this,REVERSE_RHO); - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks + // forward communicate charge density values to fill ghost grid points + // compute direct sum interaction and then restrict to coarser grid - for (n=0; n<=levels-2; n++) { + for (int n=0; n<=levels-2; n++) { + if (!active_flag[n]) continue; current_level = n; cg[n]->forward_comm(this,FORWARD_RHO); direct(n); restriction(n); } - - // top grid level - current_level = levels-1; - cg[levels-1]->forward_comm(this,FORWARD_RHO); - direct_top(levels-1); - for (n=levels-2; n>=0; n--) { + // compute direct interation for top grid level for nonperiodic + // and for second from top grid level for periodic + if (active_flag[levels-1]) { + if (domain->nonperiodic) { + current_level = levels-1; + cg[levels-1]->forward_comm(this,FORWARD_RHO); + direct_top(levels-1); + cg[levels-1]->reverse_comm(this,REVERSE_AD); + if (vflag_atom) + cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM); + } else { + // Here using MPI_Allreduce is cheaper than using commgrid + grid_swap_forward(levels-1,qgrid[levels-1]); + direct(levels-1); + grid_swap_reverse(levels-1,egrid[levels-1]); + current_level = levels-1; + if (vflag_atom) + cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM); + } + } + + // prolongate energy/virial from coarser grid to finer grid + // reverse communicate from ghost grid points to get full sum + + for (int n=levels-2; n>=0; n--) { + if (!active_flag[n]) continue; prolongation(n); current_level = n; @@ -198,26 +226,25 @@ void MSMCG::compute(int eflag, int vflag) // to fill ghost cells surrounding their 3d bricks current_level = 0; - - cg[0]->forward_comm(this,FORWARD_AD); + cg_all->forward_comm(this,FORWARD_AD); // extra per-atom energy/virial communication if (vflag_atom) - cg_peratom[0]->forward_comm(this,FORWARD_AD_PERATOM); + cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM); // calculate the force on my particles (interpolation) fieldforce(); - // calculate the per-atom energy for my particles + // calculate the per-atom energy/virial for my particles if (evflag_atom) fieldforce_peratom(); + // total long-range energy + const double qscale = force->qqrd2e * scale; - // Total long-range energy - if (eflag_global) { double energy_all; MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); @@ -228,7 +255,7 @@ void MSMCG::compute(int eflag, int vflag) energy *= 0.5*qscale; } - // Total long-range virial + // total long-range virial if (vflag_global) { double virial_all[6]; @@ -338,7 +365,7 @@ void MSMCG::make_rho() dy = ny - (x[i][1]-boxlo[1])*delyinv[0]; dz = nz - (x[i][2]-boxlo[2])*delzinv[0]; - compute_phis_and_dphis(dx,dy,dz); + compute_phis(dx,dy,dz); z0 = q[i]; for (n = nlower; n <= nupper; n++) { diff --git a/src/STUBS/mpi.h b/src/STUBS/mpi.h index 75a4cd7aac..752465e224 100644 --- a/src/STUBS/mpi.h +++ b/src/STUBS/mpi.h @@ -43,6 +43,9 @@ extern "C" { #define MPI_MINLOC 5 #define MPI_LOR 6 +#define MPI_UNDEFINED -1 +#define MPI_COMM_NULL -1 + #define MPI_ANY_SOURCE -1 #define MPI_Comm int diff --git a/src/USER-OMP/msm_cg_omp.cpp b/src/USER-OMP/msm_cg_omp.cpp index 5d56a42182..bc78414af6 100644 --- a/src/USER-OMP/msm_cg_omp.cpp +++ b/src/USER-OMP/msm_cg_omp.cpp @@ -79,15 +79,19 @@ void MSMCGOMP::compute(int eflag, int vflag) int i,j,n; // set energy/virial flags - // invoke allocate_peratom() if needed for first time if (eflag || vflag) ev_setup(eflag,vflag); else evflag = evflag_atom = eflag_global = vflag_global = eflag_atom = vflag_atom = eflag_either = vflag_either = 0; + // invoke allocate_peratom() if needed for first time + if (vflag_atom && !peratom_allocate_flag) { allocate_peratom(); - for (n=0; nghost_notify(); + cg_peratom_all->setup(); + for (int n=0; nghost_notify(); cg_peratom[n]->setup(); } @@ -162,13 +166,17 @@ void MSMCGOMP::compute(int eflag, int vflag) particle_map(); make_rho(); + // all procs reverse communicate charge density values from their ghost grid points + // to fully sum contribution in their 3d grid + current_level = 0; - cg[0]->reverse_comm(this,REVERSE_RHO); + cg_all->reverse_comm(this,REVERSE_RHO); - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks + // forward communicate charge density values to fill ghost grid points + // compute direct sum interaction and then restrict to coarser grid - for (n=0; n<=levels-2; n++) { + for (int n=0; n<=levels-2; n++) { + if (!active_flag[n]) continue; current_level = n; cg[n]->forward_comm(this,FORWARD_RHO); @@ -176,14 +184,34 @@ void MSMCGOMP::compute(int eflag, int vflag) restriction(n); } - // top grid level - current_level = levels-1; - cg[levels-1]->forward_comm(this,FORWARD_RHO); - direct_top(levels-1); + // compute direct interation for top grid level for nonperiodic + // and for second from top grid level for periodic - for (n=levels-2; n>=0; n--) { + if (active_flag[levels-1]) { + if (domain->nonperiodic) { + current_level = levels-1; + cg[levels-1]->forward_comm(this,FORWARD_RHO); + direct_top(levels-1); + cg[levels-1]->reverse_comm(this,REVERSE_AD); + if (vflag_atom) + cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM); + } else { + // Here using MPI_Allreduce is cheaper than using commgrid + grid_swap_forward(levels-1,qgrid[levels-1]); + direct(levels-1); + grid_swap_reverse(levels-1,egrid[levels-1]); + current_level = levels-1; + if (vflag_atom) + cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM); + } + } + // prolongate energy/virial from coarser grid to finer grid + // reverse communicate from ghost grid points to get full sum + + for (int n=levels-2; n>=0; n--) { + if (!active_flag[n]) continue; prolongation(n); current_level = n; @@ -199,25 +227,24 @@ void MSMCGOMP::compute(int eflag, int vflag) // to fill ghost cells surrounding their 3d bricks current_level = 0; - - cg[0]->forward_comm(this,FORWARD_AD); + cg_all->forward_comm(this,FORWARD_AD); // extra per-atom energy/virial communication if (vflag_atom) - cg_peratom[0]->forward_comm(this,FORWARD_AD_PERATOM); + cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM); // calculate the force on my particles (interpolation) fieldforce(); - // calculate the per-atom energy for my particles + // calculate the per-atom energy/virial for my particles if (evflag_atom) fieldforce_peratom(); - const double qscale = force->qqrd2e * scale; + // total long-range energy - // Total long-range energy + const double qscale = force->qqrd2e * scale; if (eflag_global) { double energy_all; @@ -229,7 +256,7 @@ void MSMCGOMP::compute(int eflag, int vflag) energy *= 0.5*qscale; } - // Total long-range virial + // total long-range virial if (vflag_global) { double virial_all[6]; @@ -352,7 +379,7 @@ void MSMCGOMP::make_rho() dy = ny - (x[i][1]-boxlo[1])*delyinv[0]; dz = nz - (x[i][2]-boxlo[2])*delzinv[0]; - compute_phis_and_dphis(dx,dy,dz); + compute_phis(dx,dy,dz); z0 = q[i]; for (n = nlower; n <= nupper; n++) {