/* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing authors: Paul Crozier, Stan Moore, Stephen Bond, (all SNL) ------------------------------------------------------------------------- */ #include "lmptype.h" #include "mpi.h" #include "string.h" #include "stdio.h" #include "stdlib.h" #include "math.h" #include "msm.h" #include "atom.h" #include "comm.h" #include "neighbor.h" #include "force.h" #include "pair.h" #include "bond.h" #include "angle.h" #include "domain.h" #include "memory.h" #include "error.h" #include "modify.h" #include "fix.h" #include "math_const.h" using namespace LAMMPS_NS; using namespace MathConst; #define MAXORDER 7 #define MAX_LEVELS 10 #define OFFSET 16384 #define SMALL 0.00001 #define LARGE 10000.0 #define EPS_HOC 1.0e-7 /* ---------------------------------------------------------------------- */ MSM::MSM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) { if (narg < 1) error->all(FLERR,"Illegal kspace_style msm command"); accuracy_relative = atof(arg[0]); nfactors = 1; factors = new int[nfactors]; factors[0] = 2; MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); buf1 = buf2 = NULL; phi1d = dphi1d = NULL; nmax = 0; part2grid = NULL; g_direct = NULL; levels = 0; } /* ---------------------------------------------------------------------- free all memory ------------------------------------------------------------------------- */ MSM::~MSM() { delete [] factors; deallocate(); memory->destroy(part2grid); memory->destroy(g_direct); deallocate_levels(); } /* ---------------------------------------------------------------------- called once before run ------------------------------------------------------------------------- */ void MSM::init() { if (me == 0) { if (screen) fprintf(screen,"MSM initialization ...\n"); if (logfile) fprintf(logfile,"MSM initialization ...\n"); } // error check if (domain->triclinic) error->all(FLERR,"Cannot (yet) use MSM with triclinic box"); if (domain->dimension == 2) error->all(FLERR,"Cannot (yet) use MSM with 2d simulation"); if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); if (slabflag == 1) error->all(FLERR,"Cannot use slab correction with MSM"); if (domain->nonperiodic > 0) error->all(FLERR,"Cannot (yet) use nonperiodic boundaries with MSM"); for (int i = 0; i < modify->nfix; i++) { if ((strcmp(modify->fix[i]->style,"npt") == 0) || (strcmp(modify->fix[i]->style,"nph") == 0)) { error->all(FLERR,"Cannot (yet) use MSM with npt/nph simulation"); } } order = 4; // 4th order interpolation scheme has been implemented for MSM if (order > MAXORDER) { char str[128]; sprintf(str,"MSM order cannot be greater than %d",MAXORDER); error->all(FLERR,str); } // free all arrays previously allocated deallocate(); // extract short-range Coulombic cutoff from pair style qqrd2e = force->qqrd2e; scale = 1.0; if (force->pair == NULL) error->all(FLERR,"KSpace style is incompatible with Pair style"); int itmp; double *p_cutoff = (double *) force->pair->extract("cut_msm",itmp); if (p_cutoff == NULL) error->all(FLERR,"KSpace style is incompatible with Pair style"); cutoff = *p_cutoff; if ((strcmp(force->kspace_style,"pppm/tip4p") == 0) || (strcmp(force->kspace_style,"pppm/tip4p/proxy") == 0)) { error->all(FLERR,"KSpace style is incompatible with Pair style"); } // compute qsum & qsqsum and give error if not charge-neutral qsum = qsqsum = 0.0; for (int i = 0; i < atom->nlocal; i++) { qsum += atom->q[i]; qsqsum += atom->q[i]*atom->q[i]; } double tmp; MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); qsum = tmp; MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); qsqsum = tmp; q2 = qsqsum * force->qqrd2e / force->dielectric; if (qsqsum == 0.0) error->all(FLERR,"Cannot use kspace solver on system with no charge"); if (fabs(qsum) > SMALL) { char str[128]; sprintf(str,"System is not charge neutral, net charge = %g",qsum); error->all(FLERR,str); // Not yet sure of the correction needed for non-neutral systems } // set accuracy (force units) from accuracy_relative or accuracy_absolute if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; else accuracy = accuracy_relative * two_charge_force; // setup grid resolution set_grid(); int flag_global = 0; // loop over grid levels for (int n=0; n= OFFSET || ny_msm[n] >= OFFSET || nz_msm[n] >= OFFSET) error->all(FLERR,"MSM grid is too large"); // global indices of MSM grid range from 0 to N-1 // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of // global MSM grid that I own without ghost cells if (n == 0) { nxlo_in_d[n] = static_cast (comm->xsplit[comm->myloc[0]] * nx_msm[n]); nxhi_in_d[n] = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_msm[n]) - 1; nylo_in_d[n] = static_cast (comm->ysplit[comm->myloc[1]] * ny_msm[n]); nyhi_in_d[n] = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_msm[n]) - 1; nzlo_in_d[n] = static_cast (comm->zsplit[comm->myloc[2]] * nz_msm[n]); nzhi_in_d[n] = static_cast (comm->zsplit[comm->myloc[2]+1] * nz_msm[n]) - 1; } else { nxlo_in_d[n] = 0; nxhi_in_d[n] = nx_msm[n] - 1; nylo_in_d[n] = 0; nyhi_in_d[n] = ny_msm[n] - 1; nzlo_in_d[n] = 0; nzhi_in_d[n] = nz_msm[n] - 1; } // Use simple method of parallel communication for now nxlo_in[n] = 0; nxhi_in[n] = nx_msm[n] - 1; nylo_in[n] = 0; nyhi_in[n] = ny_msm[n] - 1; nzlo_in[n] = 0; nzhi_in[n] = nz_msm[n] - 1; // nlower,nupper = stencil size for mapping particles to MSM grid nlower = -(order-1)/2; nupper = order/2; // shift values for particle <-> grid mapping // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of // global MSM grid that my particles can contribute charge to // effectively nlo_in,nhi_in + ghost cells // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest // position a particle in my box can be at // dist[3] = particle position bound = subbox + skin/2.0 // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping double *prd,*sublo,*subhi; //prd = domain->prd; //boxlo = domain->boxlo; //sublo = domain->sublo; //subhi = domain->subhi; // Use only one partition for now prd = domain->prd; boxlo = domain->boxlo; sublo = boxlo; subhi = domain->boxhi; double xprd = prd[0]; double yprd = prd[1]; double zprd = prd[2]; double dist[3]; double cuthalf = 0.0; if (n == 0) cuthalf = 0.5*neighbor->skin; // Only applies to finest grid dist[0] = dist[1] = dist[2] = cuthalf; int nlo,nhi; nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * nx_msm[n]/xprd + OFFSET) - OFFSET; nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * nx_msm[n]/xprd + OFFSET) - OFFSET; nxlo_out[n] = nlo + nlower; nxhi_out[n] = nhi + nupper; nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * ny_msm[n]/yprd + OFFSET) - OFFSET; nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * ny_msm[n]/yprd + OFFSET) - OFFSET; nylo_out[n] = nlo + nlower; nyhi_out[n] = nhi + nupper; nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * nz_msm[n]/zprd + OFFSET) - OFFSET; nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * nz_msm[n]/zprd + OFFSET) - OFFSET; nzlo_out[n] = nlo + nlower; nzhi_out[n] = nhi + nupper; // nlo_ghost,nhi_ghost = # of planes I will recv from 6 directions // that overlay domain I own // proc in that direction tells me via sendrecv() // if no neighbor proc, value is from self since I have ghosts regardless int nplanes; MPI_Status status; nplanes = nxlo_in[n] - nxlo_out[n]; if (comm->procneigh[0][0] != me) MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][0],0, &nxhi_ghost[n],1,MPI_INT,comm->procneigh[0][1],0, world,&status); else nxhi_ghost[n] = nplanes; nplanes = nxhi_out[n] - nxhi_in[n]; if (comm->procneigh[0][1] != me) MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][1],0, &nxlo_ghost[n],1,MPI_INT,comm->procneigh[0][0], 0,world,&status); else nxlo_ghost[n] = nplanes; nplanes = nylo_in[n] - nylo_out[n]; if (comm->procneigh[1][0] != me) MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][0],0, &nyhi_ghost[n],1,MPI_INT,comm->procneigh[1][1],0, world,&status); else nyhi_ghost[n] = nplanes; nplanes = nyhi_out[n] - nyhi_in[n]; if (comm->procneigh[1][1] != me) MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][1],0, &nylo_ghost[n],1,MPI_INT,comm->procneigh[1][0],0, world,&status); else nylo_ghost[n] = nplanes; nplanes = nzlo_in[n] - nzlo_out[n]; if (comm->procneigh[2][0] != me) MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][0],0, &nzhi_ghost[n],1,MPI_INT,comm->procneigh[2][1],0, world,&status); else nzhi_ghost[n] = nplanes; nplanes = nzhi_out[n] - nzhi_in[n]; if (comm->procneigh[2][1] != me) MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][1],0, &nzlo_ghost[n],1,MPI_INT,comm->procneigh[2][0],0, world,&status); else nzlo_ghost[n] = nplanes; int flag = 0; if (n == 0) { // test that ghost overlap is not bigger than my sub-domain if (nxlo_ghost[n] > nxhi_in[n]-nxlo_in[n]+1) flag = 1; if (nxhi_ghost[n] > nxhi_in[n]-nxlo_in[n]+1) flag = 1; if (nylo_ghost[n] > nyhi_in[n]-nylo_in[n]+1) flag = 1; if (nyhi_ghost[n] > nyhi_in[n]-nylo_in[n]+1) flag = 1; if (nzlo_ghost[n] > nzhi_in[n]-nzlo_in[n]+1) flag = 1; if (nzhi_ghost[n] > nzhi_in[n]-nzlo_in[n]+1) flag = 1; } int flag_all; MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); if (flag_all != 0) { char str[128]; sprintf(str,"MSM parallel communication error, try reducing number of procs"); error->all(FLERR,str); } } // Largest MSM grid for this proc, including ghosts ngrid = (nxhi_out[0]-nxlo_out[0]+1) * (nyhi_out[0]-nylo_out[0]+1) * (nzhi_out[0]-nzlo_out[0]+1); // buffer space for use in ghost_swap and fillbrick // idel = max # of ghost planes to send or recv in +/- dir of each dim // nx,ny,nz = owned planes (including ghosts) in each dim // nxx,nyy,nzz = max # of grid cells to send in each dim // nbuf = max in any dim, augment by 3x for components of vd_xyz in fillbrick int idelx,idely,idelz,nx,ny,nz,nxx,nyy,nzz; idelx = MAX(nxlo_ghost[0],nxhi_ghost[0]); idelx = MAX(idelx,nxhi_out[0]-nxhi_in[0]); idelx = MAX(idelx,nxlo_in[0]-nxlo_out[0]); idely = MAX(nylo_ghost[0],nyhi_ghost[0]); idely = MAX(idely,nyhi_out[0]-nyhi_in[0]); idely = MAX(idely,nylo_in[0]-nylo_out[0]); idelz = MAX(nzlo_ghost[0],nzhi_ghost[0]); idelz = MAX(idelz,nzhi_out[0]-nzhi_in[0]); idelz = MAX(idelz,nzlo_in[0]-nzlo_out[0]); nx = nxhi_out[0] - nxlo_out[0] + 1; ny = nyhi_out[0] - nylo_out[0] + 1; nz = nzhi_out[0] - nzlo_out[0] + 1; nxx = idelx * ny * nz; nyy = idely * nx * nz; nzz = idelz * nx * ny; nbuf = MAX(nxx,nyy); nbuf = MAX(nbuf,nzz); nbuf *= 3; double estimated_error = estimate_total_error(); // print stats int ngrid_max,nbuf_max; // All processors have a copy of the complete grid at each level nbuf_max = nbuf; ngrid_max = ngrid; if (me == 0) { if (screen) { fprintf(screen," brick buffer size/proc = %d %d\n", ngrid_max,nbuf_max); fprintf(screen," estimated absolute RMS force accuracy = %g\n", estimated_error); fprintf(screen," estimated relative force accuracy = %g\n", estimated_error/two_charge_force); } if (logfile) { fprintf(logfile," brick buffer size/proc = %d %d\n", ngrid_max,nbuf_max); fprintf(logfile," estimated absolute RMS force accuracy = %g\n", estimated_error); fprintf(logfile," estimated relative force accuracy = %g\n", estimated_error/two_charge_force); } } // allocate K-space dependent memory allocate(); } /* ---------------------------------------------------------------------- estimate 1d grid RMS force error for MSM from Sec 3.1 of Hardy's thesis ------------------------------------------------------------------------- */ double MSM::estimate_1d_error(double h, double prd) { double a = cutoff; int p = order - 1; double error_1d = pow(h,(p-1))/pow(a,(p+1)); error_1d *= 24.0*q2/sqrt(atom->natoms*a*prd*prd); return error_1d; } /* ---------------------------------------------------------------------- estimate 3d grid RMS force error ------------------------------------------------------------------------- */ double MSM::estimate_3d_error() { double xprd = domain->xprd; double yprd = domain->yprd; double zprd = domain->zprd; double error_x = estimate_1d_error(xprd/nx_msm[0],xprd); double error_y = estimate_1d_error(yprd/ny_msm[0],yprd); double error_z = estimate_1d_error(zprd/nz_msm[0],zprd); double error_3d = sqrt(error_x*error_x + error_y*error_y + error_z*error_z) / sqrt(3.0); return error_3d; } /* ---------------------------------------------------------------------- estimate total RMS force error ------------------------------------------------------------------------- */ double MSM::estimate_total_error() { double xprd = domain->xprd; double yprd = domain->yprd; double zprd = domain->zprd; bigint natoms = atom->natoms; double grid_error = estimate_3d_error(); double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd); double short_range_error = 0.0; double table_error = estimate_table_accuracy(q2_over_sqrt,short_range_error); double estimated_total_error = sqrt(grid_error*grid_error + short_range_error*short_range_error + table_error*table_error); return estimated_total_error; } /* ---------------------------------------------------------------------- adjust MSM coeffs, called initially and whenever volume has changed ------------------------------------------------------------------------- */ void MSM::setup() { int i,j,k,l,m,n; double *prd; double a = cutoff; // volume-dependent factors prd = domain->prd; double xprd = prd[0]; double yprd = prd[1]; double zprd = prd[2]; volume = xprd * yprd * zprd; // loop over grid levels for (int n=0; n (2.0*a*delxinv[0]); nxlo_direct = -nxhi_direct; nyhi_direct = static_cast (2.0*a*delyinv[0]); nylo_direct = -nyhi_direct; nzhi_direct = static_cast (2.0*a*delzinv[0]); nzlo_direct = -nzhi_direct; nmax_direct = 8*(nxhi_direct+1)*(nyhi_direct+1)*(nzhi_direct+1); get_g_direct(); boxlo = domain->boxlo; } /* ---------------------------------------------------------------------- compute the MSM long-range force, energy, virial ------------------------------------------------------------------------- */ void MSM::compute(int eflag, int vflag) { int i; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = evflag_atom = eflag_global = vflag_global = eflag_atom = vflag_atom = 0; // extend size of per-atom arrays if necessary if (atom->nlocal > nmax) { memory->destroy(part2grid); nmax = atom->nmax; memory->create(part2grid,nmax,3,"msm:part2grid"); } energy = 0.0; // find grid points for all my particles // map my particle charge onto my local 3d density grid (aninterpolation) particle_map(); make_rho(); // all procs communicate density values from their ghost cells // to fully sum contribution in their 3d bricks ghost_swap(0); charge_swap(0); // Direct sum on finest grid level is parallel direct(eflag_global,vflag_global,0); potential_swap(0); restrict(eflag_global,vflag_global,0); // compute potential gradient on my MSM grid and // portion of e_long on this proc's MSM grid // return gradients (electric fields) in 3d brick decomposition for (int n=1; n=0; n--) prolongate(eflag_global,vflag_global,n); // all procs communicate E-field values // to fill ghost cells surrounding their 3d bricks fillbrick(0); // calculate the force on my particles (interpolation) fieldforce(); // sum energy across procs and add in volume-dependent term if (eflag_global) { double e_self = qsqsum*gamma(0.0)/cutoff; // Self-energy term energy -= e_self; double energy_all; energy *= 0.5*qqrd2e*scale; } } /* ---------------------------------------------------------------------- allocate memory that depends on # of K-vectors and order ------------------------------------------------------------------------- */ void MSM::allocate() { memory->create(buf1,nbuf,"msm:buf1"); memory->create(buf2,nbuf,"msm:buf2"); // summation coeffs memory->create2d_offset(phi1d,3,nlower-2,nupper+2,"msm:phi1d"); memory->create2d_offset(dphi1d,3,nlower-2,nupper+2,"msm:dphi1d"); // allocate grid levels for (int n=0; ncreate3d_offset(qgrid[n],nzlo_out[n],nzhi_out[n], nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:qgrid"); memory->create3d_offset(egrid[n],nzlo_out[n],nzhi_out[n], nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:egrid"); } } /* ---------------------------------------------------------------------- deallocate memory that depends on # of K-vectors and order ------------------------------------------------------------------------- */ void MSM::deallocate() { memory->destroy(buf1); memory->destroy(buf2); memory->destroy2d_offset(phi1d,nlower-2); memory->destroy2d_offset(dphi1d,nlower-2); // deallocate grid levels for (int n=0; ndestroy3d_offset(qgrid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]); if (egrid[n]) memory->destroy3d_offset(egrid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]); } } /* ---------------------------------------------------------------------- allocate memory that depends on # of grid levels ------------------------------------------------------------------------- */ void MSM::allocate_levels() { nx_msm = new int[levels]; ny_msm = new int[levels]; nz_msm = new int[levels]; nxlo_in = new int[levels]; nylo_in = new int[levels]; nzlo_in = new int[levels]; nxhi_in = new int[levels]; nyhi_in = new int[levels]; nzhi_in = new int[levels]; nxlo_in_d = new int[levels]; nylo_in_d = new int[levels]; nzlo_in_d = new int[levels]; nxhi_in_d = new int[levels]; nyhi_in_d = new int[levels]; nzhi_in_d = new int[levels]; nxlo_out = new int[levels]; nylo_out = new int[levels]; nzlo_out = new int[levels]; nxhi_out = new int[levels]; nyhi_out = new int[levels]; nzhi_out = new int[levels]; nxlo_ghost = new int[levels]; nylo_ghost = new int[levels]; nzlo_ghost = new int[levels]; nxhi_ghost = new int[levels]; nyhi_ghost = new int[levels]; nzhi_ghost = new int[levels]; delxinv = new double[levels]; delyinv = new double[levels]; delzinv = new double[levels]; delvolinv = new double[levels]; qgrid = new double***[levels]; egrid = new double***[levels]; } /* ---------------------------------------------------------------------- deallocate memory that depends on # of grid levels ------------------------------------------------------------------------- */ void MSM::deallocate_levels() { delete [] nx_msm; delete [] ny_msm; delete [] nz_msm; delete [] nxlo_in; delete [] nylo_in; delete [] nzlo_in; delete [] nxhi_in; delete [] nyhi_in; delete [] nzhi_in; delete [] nxlo_in_d; delete [] nylo_in_d; delete [] nzlo_in_d; delete [] nxhi_in_d; delete [] nyhi_in_d; delete [] nzhi_in_d; delete [] nxlo_out; delete [] nylo_out; delete [] nzlo_out; delete [] nxhi_out; delete [] nyhi_out; delete [] nzhi_out; delete [] nxlo_ghost; delete [] nylo_ghost; delete [] nzlo_ghost; delete [] nxhi_ghost; delete [] nyhi_ghost; delete [] nzhi_ghost; delete [] delxinv; delete [] delyinv; delete [] delzinv; delete [] delvolinv; delete [] qgrid; delete [] egrid; } /* ---------------------------------------------------------------------- set size of MSM grids ------------------------------------------------------------------------- */ void MSM::set_grid() { if (accuracy_relative <= 0.0) error->all(FLERR,"KSpace accuracy must be > 0"); double xprd = domain->xprd; double yprd = domain->yprd; double zprd = domain->zprd; int nx_max,ny_max,nz_max; if (!gridflag) { nx_max = ny_max = nz_max = 2; double hx = xprd/nx_max; double hy = yprd/ny_max; double hz = zprd/nz_max; double x_error = 2.0*accuracy; double y_error = 2.0*accuracy; double z_error = 2.0*accuracy; while (x_error > accuracy) { nx_max *= 2; hx = xprd/nx_max; x_error = estimate_1d_error(hx,xprd); } while (y_error > accuracy) { ny_max *= 2; hy = yprd/ny_max; y_error = estimate_1d_error(hy,yprd); } while (z_error > accuracy) { nz_max *= 2; hz = zprd/nz_max; z_error = estimate_1d_error(hz,zprd); } } else { nx_max = nx_msm_max; ny_max = ny_msm_max; nz_max = nz_msm_max; } // boost grid size until it is factorable int flag = 0; int xlevels,ylevels,zlevels; while (!factorable(nx_max,flag,xlevels)) nx_max++; while (!factorable(ny_max,flag,ylevels)) ny_max++; while (!factorable(nz_max,flag,zlevels)) nz_max++; if (flag) error->warning(FLERR,"Number of MSM mesh points increased to be a multiple of 2"); // Find maximum number of levels levels = MAX(xlevels,ylevels); levels = MAX(levels,zlevels); if (levels > MAX_LEVELS) error->all(FLERR,"Too many MSM grid levels"); allocate_levels(); for (int n = 0; n < levels; n++) { if (xlevels-n-1 > 0) nx_msm[n] = static_cast (pow(2.0,xlevels-n-1)); else nx_msm[n] = 1; if (ylevels-n-1 > 0) ny_msm[n] = static_cast (pow(2.0,ylevels-n-1)); else ny_msm[n] = 1; if (zlevels-n-1 > 0) nz_msm[n] = static_cast (pow(2.0,zlevels-n-1)); else nz_msm[n] = 1; } // Output grid stats if (me == 0) { if (screen) { fprintf(screen," grid = %d %d %d\n",nx_msm[0],ny_msm[0],nz_msm[0]); fprintf(screen," stencil order = %d\n",order); } if (logfile) { fprintf(logfile," grid = %d %d %d\n",nx_msm[0],ny_msm[0],nz_msm[0]); fprintf(logfile," stencil order = %d\n",order); } } } /* ---------------------------------------------------------------------- check if all factors of n are in list of factors return 1 if yes, 0 if no ------------------------------------------------------------------------- */ int MSM::factorable(int n, int &flag, int &levels) { int i,norig; norig = n; levels = 1; while (n > 1) { for (i = 0; i < nfactors; i++) { if (n % factors[i] == 0) { n /= factors[i]; levels++; break; } } if (i == nfactors) { flag = 1; return 0; } } return 1; } /* ---------------------------------------------------------------------- MPI-Reduce so each processor has all the info it needs ------------------------------------------------------------------------- */ void MSM::charge_swap(int n) { double ***qgridn = qgrid[n]; double ***qgridn_all; memory->create3d_offset(qgridn_all,nzlo_out[n],nzhi_out[n],nylo_out[n],nyhi_out[n], nxlo_out[n],nxhi_out[n],"msm:qgrid_all"); memset(&(qgridn_all[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid*sizeof(double)); MPI_Allreduce(&(qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]), &(qgridn_all[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]), ngrid,MPI_DOUBLE,MPI_SUM,world); // Swap pointers between qgridn and qgridn_all to avoid need of copy operation double ***tmp; tmp = qgridn; qgrid[n] = qgridn_all; qgridn_all = tmp; memory->destroy3d_offset(qgridn_all,nzlo_out[n],nylo_out[n],nxlo_out[n]); } /* ---------------------------------------------------------------------- MPI-Reduce so each processor has all the info it needs ------------------------------------------------------------------------- */ void MSM::potential_swap(int n) { double ***egridn = egrid[n]; double ***egridn_all; memory->create3d_offset(egridn_all,nzlo_out[n],nzhi_out[n],nylo_out[n],nyhi_out[n], nxlo_out[n],nxhi_out[n],"msm:qgrid_all"); memset(&(egridn_all[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid*sizeof(double)); MPI_Allreduce(&(egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]), &(egridn_all[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]), ngrid,MPI_DOUBLE,MPI_SUM,world); // Swap pointers between egridn and egridn_all to avoid need of copy operation double ***tmp; tmp = egridn; egrid[n] = egridn_all; egridn_all = tmp; memory->destroy3d_offset(egridn_all,nzlo_out[n],nylo_out[n],nxlo_out[n]); } /* ---------------------------------------------------------------------- ghost-swap to accumulate full density in brick decomposition ------------------------------------------------------------------------- */ void MSM::ghost_swap(int n) { double ***qgridn = qgrid[n]; int i,k,ix,iy,iz; MPI_Request request; MPI_Status status; // pack my ghosts for +x processor // pass data to self or +x processor // unpack and sum recv data into my real cells k = 0; for (iz = nzlo_out[n]; iz <= nzhi_out[n]; iz++) for (iy = nylo_out[n]; iy <= nyhi_out[n]; iy++) for (ix = nxhi_in[n]+1; ix <= nxhi_out[n]; ix++) buf1[k++] = qgridn[iz][iy][ix]; if (comm->procneigh[0][1] == me) for (i = 0; i < k; i++) buf2[i] = buf1[i]; else { MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][0],0,world,&request); MPI_Send(buf1,k,MPI_DOUBLE,comm->procneigh[0][1],0,world); MPI_Wait(&request,&status); } k = 0; for (iz = nzlo_out[n]; iz <= nzhi_out[n]; iz++) for (iy = nylo_out[n]; iy <= nyhi_out[n]; iy++) for (ix = nxlo_in[n]; ix < nxlo_in[n]+nxlo_ghost[n]; ix++) qgridn[iz][iy][ix] += buf2[k++]; // pack my ghosts for -x processor // pass data to self or -x processor // unpack and sum recv data into my real cells k = 0; for (iz = nzlo_out[n]; iz <= nzhi_out[n]; iz++) for (iy = nylo_out[n]; iy <= nyhi_out[n]; iy++) for (ix = nxlo_out[n]; ix < nxlo_in[n]; ix++) buf1[k++] = qgridn[iz][iy][ix]; if (comm->procneigh[0][0] == me) for (i = 0; i < k; i++) buf2[i] = buf1[i]; else { MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][1],0,world,&request); MPI_Send(buf1,k,MPI_DOUBLE,comm->procneigh[0][0],0,world); MPI_Wait(&request,&status); } k = 0; for (iz = nzlo_out[n]; iz <= nzhi_out[n]; iz++) for (iy = nylo_out[n]; iy <= nyhi_out[n]; iy++) for (ix = nxhi_in[n]-nxhi_ghost[n]+1; ix <= nxhi_in[n]; ix++) qgridn[iz][iy][ix] += buf2[k++]; // pack my ghosts for +y processor // pass data to self or +y processor // unpack and sum recv data into my real cells k = 0; for (iz = nzlo_out[n]; iz <= nzhi_out[n]; iz++) for (iy = nyhi_in[n]+1; iy <= nyhi_out[n]; iy++) for (ix = nxlo_in[n]; ix <= nxhi_in[n]; ix++) buf1[k++] = qgridn[iz][iy][ix]; if (comm->procneigh[1][1] == me) for (i = 0; i < k; i++) buf2[i] = buf1[i]; else { MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][0],0,world,&request); MPI_Send(buf1,k,MPI_DOUBLE,comm->procneigh[1][1],0,world); MPI_Wait(&request,&status); } k = 0; for (iz = nzlo_out[n]; iz <= nzhi_out[n]; iz++) for (iy = nylo_in[n]; iy < nylo_in[n]+nylo_ghost[n]; iy++) for (ix = nxlo_in[n]; ix <= nxhi_in[n]; ix++) qgridn[iz][iy][ix] += buf2[k++]; // pack my ghosts for -y processor // pass data to self or -y processor // unpack and sum recv data into my real cells k = 0; for (iz = nzlo_out[n]; iz <= nzhi_out[n]; iz++) for (iy = nylo_out[n]; iy < nylo_in[n]; iy++) for (ix = nxlo_in[n]; ix <= nxhi_in[n]; ix++) buf1[k++] = qgridn[iz][iy][ix]; if (comm->procneigh[1][0] == me) for (i = 0; i < k; i++) buf2[i] = buf1[i]; else { MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][1],0,world,&request); MPI_Send(buf1,k,MPI_DOUBLE,comm->procneigh[1][0],0,world); MPI_Wait(&request,&status); } k = 0; for (iz = nzlo_out[n]; iz <= nzhi_out[n]; iz++) for (iy = nyhi_in[n]-nyhi_ghost[n]+1; iy <= nyhi_in[n]; iy++) for (ix = nxlo_in[n]; ix <= nxhi_in[n]; ix++) qgridn[iz][iy][ix] += buf2[k++]; // pack my ghosts for +z processor // pass data to self or +z processor // unpack and sum recv data into my real cells k = 0; for (iz = nzhi_in[n]+1; iz <= nzhi_out[n]; iz++) for (iy = nylo_in[n]; iy <= nyhi_in[n]; iy++) for (ix = nxlo_in[n]; ix <= nxhi_in[n]; ix++) buf1[k++] = qgridn[iz][iy][ix]; if (comm->procneigh[2][1] == me) for (i = 0; i < k; i++) buf2[i] = buf1[i]; else { MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][0],0,world,&request); MPI_Send(buf1,k,MPI_DOUBLE,comm->procneigh[2][1],0,world); MPI_Wait(&request,&status); } k = 0; for (iz = nzlo_in[n]; iz < nzlo_in[n]+nzlo_ghost[n]; iz++) for (iy = nylo_in[n]; iy <= nyhi_in[n]; iy++) for (ix = nxlo_in[n]; ix <= nxhi_in[n]; ix++) qgridn[iz][iy][ix] += buf2[k++]; // pack my ghosts for -z processor // pass data to self or -z processor // unpack and sum recv data into my real cells k = 0; for (iz = nzlo_out[n]; iz < nzlo_in[n]; iz++) for (iy = nylo_in[n]; iy <= nyhi_in[n]; iy++) for (ix = nxlo_in[n]; ix <= nxhi_in[n]; ix++) buf1[k++] = qgridn[iz][iy][ix]; if (comm->procneigh[2][0] == me) for (i = 0; i < k; i++) buf2[i] = buf1[i]; else { MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][1],0,world,&request); MPI_Send(buf1,k,MPI_DOUBLE,comm->procneigh[2][0],0,world); MPI_Wait(&request,&status); } k = 0; for (iz = nzhi_in[n]-nzhi_ghost[n]+1; iz <= nzhi_in[n]; iz++) for (iy = nylo_in[n]; iy <= nyhi_in[n]; iy++) for (ix = nxlo_in[n]; ix <= nxhi_in[n]; ix++) qgridn[iz][iy][ix] += buf2[k++]; } /* ---------------------------------------------------------------------- find center grid pt for each of my particles check that full stencil for the particle will fit in my 3d brick store central grid pt indices in part2grid array ------------------------------------------------------------------------- */ void MSM::particle_map() { int nx,ny,nz; double **x = atom->x; int nlocal = atom->nlocal; int flag = 0; for (int i = 0; i < nlocal; i++) { // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // current particle coord can be outside global and local box // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 nx = static_cast ((x[i][0]-boxlo[0])*delxinv[0]+OFFSET) - OFFSET; ny = static_cast ((x[i][1]-boxlo[1])*delyinv[0]+OFFSET) - OFFSET; nz = static_cast ((x[i][2]-boxlo[2])*delzinv[0]+OFFSET) - OFFSET; part2grid[i][0] = nx; part2grid[i][1] = ny; part2grid[i][2] = nz; // check that entire stencil around nx,ny,nz will fit in my 3d brick if (nx+nlower < nxlo_out[0] || nx+nupper > nxhi_out[0] || ny+nlower < nylo_out[0] || ny+nupper > nyhi_out[0] || nz+nlower < nzlo_out[0] || nz+nupper > nzhi_out[0]) flag = 1; } if (flag) error->one(FLERR,"Out of range atoms - cannot compute MSM"); } /* ---------------------------------------------------------------------- create discretized "density" on section of global grid due to my particles density(x,y,z) = charge "density" at grid points of my 3d brick (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) in global grid ------------------------------------------------------------------------- */ void MSM::make_rho() { //fprintf(screen,"MSM aninterpolation\n\n"); int i,l,m,n,nn,nx,ny,nz,mx,my,mz; double dx,dy,dz,x0,y0,z0; // clear 3d density array double ***qgridn = qgrid[0]; memset(&(qgridn[nzlo_out[0]][nylo_out[0]][nxlo_out[0]]),0,ngrid*sizeof(double)); // loop over my charges, add their contribution to nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt double *q = atom->q; double **x = atom->x; int nlocal = atom->nlocal; for (i = 0; i < nlocal; i++) { nx = part2grid[i][0]; ny = part2grid[i][1]; nz = part2grid[i][2]; dx = nx - (x[i][0]-boxlo[0])*delxinv[0]; dy = ny - (x[i][1]-boxlo[1])*delyinv[0]; dz = nz - (x[i][2]-boxlo[2])*delzinv[0]; compute_phis_and_dphis(dx,dy,dz); z0 = q[i]; for (n = nlower; n <= nupper; n++) { mz = n+nz; y0 = z0*phi1d[2][n]; for (m = nlower; m <= nupper; m++) { my = m+ny; x0 = y0*phi1d[1][m]; for (l = nlower; l <= nupper; l++) { mx = l+nx; qgridn[mz][my][mx] += x0*phi1d[0][l]; } } } } } /* ---------------------------------------------------------------------- MSM direct part procedure for intermediate grid levels ------------------------------------------------------------------------- */ void MSM::direct(int eflag, int vflag, int n) { //fprintf(screen,"Direct contribution on level %i\n\n",n); double ***egridn = egrid[n]; double ***qgridn = qgrid[n]; // bitmask for PBCs (only works for power of 2 numbers) int PBCx,PBCy,PBCz; PBCx = nx_msm[n]-1; PBCy = ny_msm[n]-1; PBCz = nz_msm[n]-1; // zero out electric field brick for (int icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) for (int icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) for (int icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) egridn[icz][icy][icx] = 0.0; // Simple parallelization of direct sum for (int icz = nzlo_in_d[n]; icz <= nzhi_in_d[n]; icz++) { for (int icy = nylo_in_d[n]; icy <= nyhi_in_d[n]; icy++) { for (int icx = nxlo_in_d[n]; icx <= nxhi_in_d[n]; icx++) { // do double loop over points on the intermediate grid level // for now, assume I own all points on the intermediate grid level int k = 0; for (int iz = nzlo_direct; iz <= nzhi_direct; iz++) { for (int iy = nylo_direct; iy <= nyhi_direct; iy++) { for (int ix = nxlo_direct; ix <= nxhi_direct; ix++) { egridn[icz][icy][icx] += g_direct[n][k++] * qgridn[(icz+iz) & PBCz][(icy+iy) & PBCy][(icx+ix) & PBCx]; } } } } } } } /* ---------------------------------------------------------------------- MSM restrict procedure for intermediate grid levels ------------------------------------------------------------------------- */ void MSM::restrict(int eflag, int vflag,int n) { //fprintf(screen,"Restricting from level %i to %i\n\n",n,n+1); int p = order-1; double ***qgrid1 = qgrid[n]; double ***qgrid2 = qgrid[n+1]; // bitmask for PBCs (only works for power of 2 numbers) int PBCx,PBCy,PBCz; PBCx = nx_msm[n]-1; PBCy = ny_msm[n]-1; PBCz = nz_msm[n]-1; //restrict grid (going from grid n to grid n+1, i.e. to a coarser grid) for (int nu=-p; nu<=p; nu++) { phi1d[0][nu] = compute_phi(nu*delxinv[n+1]/delxinv[n]); phi1d[1][nu] = compute_phi(nu*delyinv[n+1]/delyinv[n]); phi1d[2][nu] = compute_phi(nu*delzinv[n+1]/delzinv[n]); } for (int kp = nzlo_in[n+1]; kp <= nzhi_in[n+1]; kp++) for (int jp = nylo_in[n+1]; jp <= nyhi_in[n+1]; jp++) for (int ip = nxlo_in[n+1]; ip <= nxhi_in[n+1]; ip++) { qgrid2[kp][jp][ip] = 0.0; int ic = static_cast (ip*delxinv[n]/delxinv[n+1]); int jc = static_cast (jp*delyinv[n]/delyinv[n+1]); int kc = static_cast (kp*delzinv[n]/delzinv[n+1]); for (int k=-p; k<=p; k++) // Could make this faster by eliminating zeros for (int j=-p; j<=p; j++) for (int i=-p; i<=p; i++) qgrid2[kp][jp][ip] += qgrid1[(kc+k)&PBCz][(jc+j)&PBCy][(ic+i)&PBCx] * phi1d[0][i]*phi1d[1][j]*phi1d[2][k]; } } /* ---------------------------------------------------------------------- MSM prolongate procedure for intermediate grid levels ------------------------------------------------------------------------- */ void MSM::prolongate(int eflag, int vflag,int n) { //fprintf(screen,"Prolongating from level %i to %i\n\n",n+1,n); int p = order-1; double ***egrid1 = egrid[n]; double ***egrid2 = egrid[n+1]; // bitmask for PBCs (only works for power of 2 numbers) int PBCx,PBCy,PBCz; PBCx = nx_msm[n]-1; PBCy = ny_msm[n]-1; PBCz = nz_msm[n]-1; //prolongate grid (going from grid n to grid n-1, i.e. to a finer grid) for (int nu=-p; nu<=p; nu++) { phi1d[0][nu] = compute_phi(nu*delxinv[n+1]/delxinv[n]); phi1d[1][nu] = compute_phi(nu*delyinv[n+1]/delyinv[n]); phi1d[2][nu] = compute_phi(nu*delzinv[n+1]/delzinv[n]); } for (int kp = nzlo_in[n+1]; kp <= nzhi_in[n+1]; kp++) for (int jp = nylo_in[n+1]; jp <= nyhi_in[n+1]; jp++) for (int ip = nxlo_in[n+1]; ip <= nxhi_in[n+1]; ip++) { int ic = static_cast (ip*delxinv[n]/delxinv[n+1]); int jc = static_cast (jp*delyinv[n]/delyinv[n+1]); int kc = static_cast (kp*delzinv[n]/delzinv[n+1]); for (int k=-p; k<=p; k++) // Could make this faster by eliminating zeros for (int j=-p; j<=p; j++) for (int i=-p; i<=p; i++) egrid1[(kc+k)&PBCz][(jc+j)&PBCy][(ic+i)&PBCx] += egrid2[kp][jp][ip] * phi1d[0][i]*phi1d[1][j]*phi1d[2][k]; } } /* ---------------------------------------------------------------------- ghost-swap to fill ghost cells of my brick with field values ------------------------------------------------------------------------- */ void MSM::fillbrick(int n) { double ***egridn = egrid[n]; int i,k,ix,iy,iz; MPI_Request request; MPI_Status status; // pack my real cells for +z processor // pass data to self or +z processor // unpack and sum recv data into my ghost cells k = 0; for (iz = nzhi_in[n]-nzhi_ghost[n]+1; iz <= nzhi_in[n]; iz++) for (iy = nylo_in[n]; iy <= nyhi_in[n]; iy++) for (ix = nxlo_in[n]; ix <= nxhi_in[n]; ix++) { buf1[k++] = egridn[iz][iy][ix]; } if (comm->procneigh[2][1] == me) for (i = 0; i < k; i++) buf2[i] = buf1[i]; else { MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][0],0,world,&request); MPI_Send(buf1,k,MPI_DOUBLE,comm->procneigh[2][1],0,world); MPI_Wait(&request,&status); } k = 0; for (iz = nzlo_out[n]; iz < nzlo_in[n]; iz++) for (iy = nylo_in[n]; iy <= nyhi_in[n]; iy++) for (ix = nxlo_in[n]; ix <= nxhi_in[n]; ix++) { egridn[iz][iy][ix] = buf2[k++]; } // pack my real cells for -z processor // pass data to self or -z processor // unpack and sum recv data into my ghost cells k = 0; for (iz = nzlo_in[n]; iz < nzlo_in[n]+nzlo_ghost[n]; iz++) for (iy = nylo_in[n]; iy <= nyhi_in[n]; iy++) for (ix = nxlo_in[n]; ix <= nxhi_in[n]; ix++) { buf1[k++] = egridn[iz][iy][ix]; } if (comm->procneigh[2][0] == me) for (i = 0; i < k; i++) buf2[i] = buf1[i]; else { MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][1],0,world,&request); MPI_Send(buf1,k,MPI_DOUBLE,comm->procneigh[2][0],0,world); MPI_Wait(&request,&status); } k = 0; for (iz = nzhi_in[n]+1; iz <= nzhi_out[n]; iz++) for (iy = nylo_in[n]; iy <= nyhi_in[n]; iy++) for (ix = nxlo_in[n]; ix <= nxhi_in[n]; ix++) { egridn[iz][iy][ix] = buf2[k++]; } // pack my real cells for +y processor // pass data to self or +y processor // unpack and sum recv data into my ghost cells k = 0; for (iz = nzlo_out[n]; iz <= nzhi_out[n]; iz++) for (iy = nyhi_in[n]-nyhi_ghost[n]+1; iy <= nyhi_in[n]; iy++) for (ix = nxlo_in[n]; ix <= nxhi_in[n]; ix++) { buf1[k++] = egridn[iz][iy][ix]; } if (comm->procneigh[1][1] == me) for (i = 0; i < k; i++) buf2[i] = buf1[i]; else { MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][0],0,world,&request); MPI_Send(buf1,k,MPI_DOUBLE,comm->procneigh[1][1],0,world); MPI_Wait(&request,&status); } k = 0; for (iz = nzlo_out[n]; iz <= nzhi_out[n]; iz++) for (iy = nylo_out[n]; iy < nylo_in[n]; iy++) for (ix = nxlo_in[n]; ix <= nxhi_in[n]; ix++) { egridn[iz][iy][ix] = buf2[k++]; } // pack my real cells for -y processor // pass data to self or -y processor // unpack and sum recv data into my ghost cells k = 0; for (iz = nzlo_out[n]; iz <= nzhi_out[n]; iz++) for (iy = nylo_in[n]; iy < nylo_in[n]+nylo_ghost[n]; iy++) for (ix = nxlo_in[n]; ix <= nxhi_in[n]; ix++) { buf1[k++] = egridn[iz][iy][ix]; } if (comm->procneigh[1][0] == me) for (i = 0; i < k; i++) buf2[i] = buf1[i]; else { MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][1],0,world,&request); MPI_Send(buf1,k,MPI_DOUBLE,comm->procneigh[1][0],0,world); MPI_Wait(&request,&status); } k = 0; for (iz = nzlo_out[n]; iz <= nzhi_out[n]; iz++) for (iy = nyhi_in[n]+1; iy <= nyhi_out[n]; iy++) for (ix = nxlo_in[n]; ix <= nxhi_in[n]; ix++) { egridn[iz][iy][ix] = buf2[k++]; } // pack my real cells for +x processor // pass data to self or +x processor // unpack and sum recv data into my ghost cells k = 0; for (iz = nzlo_out[n]; iz <= nzhi_out[n]; iz++) for (iy = nylo_out[n]; iy <= nyhi_out[n]; iy++) for (ix = nxhi_in[n]-nxhi_ghost[n]+1; ix <= nxhi_in[n]; ix++) { buf1[k++] = egridn[iz][iy][ix]; } if (comm->procneigh[0][1] == me) for (i = 0; i < k; i++) buf2[i] = buf1[i]; else { MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][0],0,world,&request); MPI_Send(buf1,k,MPI_DOUBLE,comm->procneigh[0][1],0,world); MPI_Wait(&request,&status); } k = 0; for (iz = nzlo_out[n]; iz <= nzhi_out[n]; iz++) for (iy = nylo_out[n]; iy <= nyhi_out[n]; iy++) for (ix = nzlo_out[n]; ix < nxlo_in[n]; ix++) { egridn[iz][iy][ix] = buf2[k++]; } // pack my real cells for -x processor // pass data to self or -x processor // unpack and sum recv data into my ghost cells k = 0; for (iz = nzlo_out[n]; iz <= nzhi_out[n]; iz++) for (iy = nylo_out[n]; iy <= nyhi_out[n]; iy++) for (ix = nxlo_in[n]; ix < nxlo_in[n]+nxlo_ghost[n]; ix++) { buf1[k++] = egridn[iz][iy][ix]; } if (comm->procneigh[0][0] == me) for (i = 0; i < k; i++) buf2[i] = buf1[i]; else { MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][1],0,world,&request); MPI_Send(buf1,k,MPI_DOUBLE,comm->procneigh[0][0],0,world); MPI_Wait(&request,&status); } k = 0; for (iz = nzlo_out[n]; iz <= nzhi_out[n]; iz++) for (iy = nylo_out[n]; iy <= nyhi_out[n]; iy++) for (ix = nxhi_in[n]+1; ix <= nxhi_out[n]; ix++) { egridn[iz][iy][ix] = buf2[k++]; } } /* ---------------------------------------------------------------------- interpolate from grid to get force on my particles ------------------------------------------------------------------------- */ void MSM::fieldforce() { //fprintf(screen,"MSM interpolation\n\n"); double ***egridn = egrid[0]; double ***qgridn = qgrid[0]; int i,l,m,n,nx,ny,nz,mx,my,mz; double dx,dy,dz; double phi_x,phi_y,phi_z; double dphi_x,dphi_y,dphi_z; double ekx,eky,ekz; // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt // ek = 3 components of E-field on particle double *q = atom->q; double **x = atom->x; double **f = atom->f; int nlocal = atom->nlocal; for (i = 0; i < nlocal; i++) { nx = part2grid[i][0]; ny = part2grid[i][1]; nz = part2grid[i][2]; dx = nx - (x[i][0]-boxlo[0])*delxinv[0]; dy = ny - (x[i][1]-boxlo[1])*delyinv[0]; dz = nz - (x[i][2]-boxlo[2])*delzinv[0]; compute_phis_and_dphis(dx,dy,dz); ekx = eky = ekz = 0.0; for (n = nlower; n <= nupper; n++) { mz = n+nz; phi_z = phi1d[2][n]; dphi_z = dphi1d[2][n]; for (m = nlower; m <= nupper; m++) { my = m+ny; phi_y = phi1d[1][m]; dphi_y = dphi1d[1][m]; for (l = nlower; l <= nupper; l++) { mx = l+nx; phi_x = phi1d[0][l]; dphi_x = dphi1d[0][l]; ekx += dphi_x*phi_y*phi_z*egridn[mz][my][mx]; eky += phi_x*dphi_y*phi_z*egridn[mz][my][mx]; ekz += phi_x*phi_y*dphi_z*egridn[mz][my][mx]; } } } ekx *= delxinv[0]; eky *= delyinv[0]; ekz *= delzinv[0]; // convert E-field to force const double qfactor = force->qqrd2e*scale*q[i]; f[i][0] += qfactor*ekx; f[i][1] += qfactor*eky; f[i][2] += qfactor*ekz; } // Sum total long-range energy for (int kp=0; kpdestroy(g_direct); memory->create(g_direct,levels,nmax_direct,"msm:g_direct"); double a = cutoff; for (int n=0; n