From cf16cc1dd2e72c0b008042e6cab202a73e7d0c83 Mon Sep 17 00:00:00 2001
From: pscrozi <pscrozi@f3b2605a-c512-4ea7-a41b-209d697bcdaa>
Date: Sat, 7 Jul 2012 17:46:24 +0000
Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@8482
 f3b2605a-c512-4ea7-a41b-209d697bcdaa

---
 src/KSPACE/pppm_cg.cpp | 1224 ++++++++++++++++++++--------------------
 1 file changed, 612 insertions(+), 612 deletions(-)
diff --git a/src/KSPACE/pppm_cg.cpp b/src/KSPACE/pppm_cg.cpp
index 36945ac987..a2ad349b48 100644
--- a/src/KSPACE/pppm_cg.cpp
+++ b/src/KSPACE/pppm_cg.cpp
@@ -1,612 +1,612 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing author: Axel Kohlmeyer (Temple U)
-------------------------------------------------------------------------- */
-
-#include "lmptype.h"
-#include "mpi.h"
-#include "math.h"
-#include "stdlib.h"
-
-#include "atom.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "memory.h"
-#include "pppm_cg.h"
-
-#include "math_const.h"
-
-using namespace LAMMPS_NS;
-using namespace MathConst;
-
-#define OFFSET 16384
-#define SMALLQ 0.00001
-#if defined(FFT_SINGLE)
-#define ZEROF 0.0f
-#else
-#define ZEROF 0.0
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-PPPMCG::PPPMCG(LAMMPS *lmp, int narg, char **arg) : PPPM(lmp, narg, arg)
-{
-  if ((narg < 1) || (narg > 2))
-    error->all(FLERR,"Illegal kspace_style pppm/cg command");
-
-  if (narg == 2)
-    smallq = atof(arg[1]);
-  else
-    smallq = SMALLQ;
-
-  num_charged = -1;
-  is_charged = NULL;
-}
-
-/* ----------------------------------------------------------------------
-   free all memory
-------------------------------------------------------------------------- */
-
-PPPMCG::~PPPMCG()
-{
-  memory->destroy(is_charged);
-}
-
-/* ----------------------------------------------------------------------
-   compute the PPPM long-range force, energy, virial
-------------------------------------------------------------------------- */
-
-void PPPMCG::compute(int eflag, int vflag)
-{
-  int i,j;
-
-  // set energy/virial flags
-  // invoke allocate_peratom() if needed for first time
-
-  if (eflag || vflag) ev_setup(eflag,vflag);
-  else evflag = evflag_atom = eflag_global = vflag_global =
-         eflag_atom = vflag_atom = 0;
-
-  if (evflag_atom && !peratom_allocate_flag) {
-    allocate_peratom();
-    peratom_allocate_flag = 1;
-  }
-
-  // convert atoms from box to lamda coords
-
-  if (triclinic == 0) boxlo = domain->boxlo;
-  else {
-    boxlo = domain->boxlo_lamda;
-    domain->x2lamda(atom->nlocal);
-  }
-
-  // extend size of per-atom arrays if necessary
-
-  if (atom->nlocal > nmax) {
-    memory->destroy(part2grid);
-    memory->destroy(is_charged);
-    nmax = atom->nmax;
-    memory->create(part2grid,nmax,3,"pppm:part2grid");
-    memory->create(is_charged,nmax,"pppm/cg:is_charged");
-  }
-
-  // one time setup message
-
-  if (num_charged < 0) {
-    bigint charged_all, charged_num;
-    double charged_frac, charged_fmax, charged_fmin;
-
-    num_charged=0;
-    for (i=0; i < atom->nlocal; ++i)
-      if (fabs(atom->q[i]) > smallq)
-        ++num_charged;
-
-    // get fraction of charged particles per domain
-
-    if (atom->nlocal > 0)
-      charged_frac = static_cast<double>(num_charged) * 100.0
-                   / static_cast<double>(atom->nlocal);
-    else
-      charged_frac = 0.0;
-    MPI_Reduce(&charged_frac,&charged_fmax,1,MPI_DOUBLE,MPI_MAX,0,world);
-    MPI_Reduce(&charged_frac,&charged_fmin,1,MPI_DOUBLE,MPI_MIN,0,world);
-
-    // get fraction of charged particles overall
-
-    charged_num = num_charged;
-    MPI_Reduce(&charged_num,&charged_all,1,MPI_LMP_BIGINT,MPI_SUM,0,world);
-    charged_frac = static_cast<double>(charged_all) * 100.0
-                   / static_cast<double>(atom->natoms);
-
-    if (me == 0) {
-      if (screen)
-        fprintf(screen,
-                "  PPPM/cg optimization cutoff: %g\n"
-                "  Total charged atoms: %.1f%%\n"
-                "  Min/max charged atoms/proc: %.1f%% %.1f%%\n",
-                smallq,charged_frac,charged_fmin,charged_fmax);
-      if (logfile)
-        fprintf(logfile,
-                "  PPPM/cg optimization cutoff: %g\n"
-                "  Total charged atoms: %.1f%%\n"
-                "  Min/max charged atoms/proc: %.1f%% %.1f%%\n",
-                smallq,charged_frac,charged_fmin,charged_fmax);
-    }
-  }
-
-  num_charged = 0;
-  for (i = 0; i < atom->nlocal; ++i)
-    if (fabs(atom->q[i]) > smallq) {
-      is_charged[num_charged] = i;
-      ++num_charged;
-    }
-
-  // find grid points for all my particles
-  // map my particle charge onto my local 3d density grid
-
-  particle_map();
-  make_rho();
-
-  // all procs communicate density values from their ghost cells
-  //   to fully sum contribution in their 3d bricks
-  // remap from 3d decomposition to FFT decomposition
-
-  brick2fft();
-
-  // compute potential gradient on my FFT grid and
-  //   portion of e_long on this proc's FFT grid
-  // return gradients (electric fields) in 3d brick decomposition
-  // also performs per-atom calculations via poisson_peratom()
-
-  if (differentiation_flag == 1) {
-    poisson_ad();
-    fillbrick_ad();
-    fieldforce_ad();
-    if (vflag_atom) fillbrick_peratom_ad();
-  } else {
-    poisson_ik();
-    fillbrick_ik();
-    fieldforce_ik();
-    if (evflag_atom) fillbrick_peratom_ik();
-  }
-
-  if (evflag_atom) fieldforce_peratom();
-
-  // sum global energy across procs and add in volume-dependent term
-
-  const double qscale = force->qqrd2e * scale;
-
-  if (eflag_global) {
-    double energy_all;
-    MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
-    energy = energy_all;
-
-    energy *= 0.5*volume;
-    energy -= g_ewald*qsqsum/MY_PIS +
-      MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
-    energy *= qscale;
-  }
-
-  // sum global virial across procs
-
-  if (vflag_global) {
-    double virial_all[6];
-    MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
-    for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
-  }
-
-  // per-atom energy/virial
-  // energy includes self-energy correction
-
-  if (evflag_atom) {
-    double *q = atom->q;
-    int nlocal = atom->nlocal;
-
-    if (eflag_atom) {
-      for (int j = 0; j < num_charged; j++) {
-        int i = is_charged[j];
-        eatom[i] *= 0.5;
-        eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
-          (g_ewald*g_ewald*volume);
-        eatom[i] *= qscale;
-      }
-    }
-
-    if (vflag_atom) {
-      for (int j = 0; j < num_charged; j++) {
-        int i = is_charged[j];
-        for (int n = 0; n < 6; n++) vatom[i][n] *= 0.5*q[i]*qscale;
-      }
-    }
-  }
-
-  // 2d slab correction
-
-  if (slabflag == 1) slabcorr();
-
-  // convert atoms back from lamda to box coords
-
-  if (triclinic) domain->lamda2x(atom->nlocal);
-}
-
-/* ----------------------------------------------------------------------
-   find center grid pt for each of my particles
-   check that full stencil for the particle will fit in my 3d brick
-   store central grid pt indices in part2grid array
-------------------------------------------------------------------------- */
-
-void PPPMCG::particle_map()
-{
-  int nx,ny,nz;
-
-  double **x = atom->x;
-
-  int flag = 0;
-  for (int j = 0; j < num_charged; j++) {
-    int i = is_charged[j];
-
-    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-    // current particle coord can be outside global and local box
-    // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
-
-    nx = static_cast<int> ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET;
-    ny = static_cast<int> ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET;
-    nz = static_cast<int> ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET;
-
-    part2grid[i][0] = nx;
-    part2grid[i][1] = ny;
-    part2grid[i][2] = nz;
-
-    // check that entire stencil around nx,ny,nz will fit in my 3d brick
-
-    if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
-        ny+nlower < nylo_out || ny+nupper > nyhi_out ||
-        nz+nlower < nzlo_out || nz+nupper > nzhi_out) flag = 1;
-  }
-
-  if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
-}
-
-/* ----------------------------------------------------------------------
-   create discretized "density" on section of global grid due to my particles
-   density(x,y,z) = charge "density" at grid points of my 3d brick
-   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
-   in global grid
-------------------------------------------------------------------------- */
-
-void PPPMCG::make_rho()
-{
-  int i,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-
-  // clear 3d density array
-
-  FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
-  for (i = 0; i < ngrid; i++) vec[i] = ZEROF;
-
-  // loop over my charges, add their contribution to nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-
-  double *q = atom->q;
-  double **x = atom->x;
-
-  for (int j = 0; j < num_charged; j++) {
-    int i = is_charged[j];
-
-    nx = part2grid[i][0];
-    ny = part2grid[i][1];
-    nz = part2grid[i][2];
-    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
-    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
-    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
-    compute_rho1d(dx,dy,dz);
-
-    z0 = delvolinv * q[i];
-    for (n = nlower; n <= nupper; n++) {
-      mz = n+nz;
-      y0 = z0*rho1d[2][n];
-      for (m = nlower; m <= nupper; m++) {
-        my = m+ny;
-        x0 = y0*rho1d[1][m];
-        for (l = nlower; l <= nupper; l++) {
-          mx = l+nx;
-          density_brick[mz][my][mx] += x0*rho1d[0][l];
-        }
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   interpolate from grid to get electric field & force on my particles
-------------------------------------------------------------------------- */
-
-void PPPMCG::fieldforce_ik()
-{
-  int i,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-  FFT_SCALAR ekx,eky,ekz;
-
-  // loop over my charges, interpolate electric field from nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-  // ek = 3 components of E-field on particle
-
-  double *q = atom->q;
-  double **x = atom->x;
-  double **f = atom->f;
-
-  for (int j = 0; j < num_charged; j++) {
-    i = is_charged[j];
-
-    nx = part2grid[i][0];
-    ny = part2grid[i][1];
-    nz = part2grid[i][2];
-    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
-    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
-    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
-    compute_rho1d(dx,dy,dz);
-
-    ekx = eky = ekz = ZEROF;
-    for (n = nlower; n <= nupper; n++) {
-      mz = n+nz;
-      z0 = rho1d[2][n];
-      for (m = nlower; m <= nupper; m++) {
-        my = m+ny;
-        y0 = z0*rho1d[1][m];
-        for (l = nlower; l <= nupper; l++) {
-          mx = l+nx;
-          x0 = y0*rho1d[0][l];
-          ekx -= x0*vdx_brick[mz][my][mx];
-          eky -= x0*vdy_brick[mz][my][mx];
-          ekz -= x0*vdz_brick[mz][my][mx];
-        }
-      }
-    }
-
-    // convert E-field to force
-
-    const double qfactor = force->qqrd2e * scale * q[i];
-    f[i][0] += qfactor*ekx;
-    f[i][1] += qfactor*eky;
-    if (slabflag != 2) f[i][2] += qfactor*ekz;
-  }
-}
-
-/* ----------------------------------------------------------------------
-   interpolate from grid to get electric field & force on my particles
-------------------------------------------------------------------------- */
-
-void PPPMCG::fieldforce_ad()
-{
-  int i,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0,dx0,dy0,dz0;
-  FFT_SCALAR ekx,eky,ekz;
-  double s1,s2,s3;
-  double sf = 0.0;
-  double *prd;
-
-  if (triclinic == 0) prd = domain->prd;
-  else prd = domain->prd_lamda;
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-
-  double hx_inv = nx_pppm/xprd;
-  double hy_inv = ny_pppm/yprd;
-  double hz_inv = nz_pppm/zprd;
-
-  // loop over my charges, interpolate electric field from nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-  // ek = 3 components of E-field on particle
-
-  double *q = atom->q;
-  double **x = atom->x;
-  double **f = atom->f;
-
-  int nlocal = atom->nlocal;
-
-  for (int j = 0; j < num_charged; j++) {
-    i = is_charged[j];
-
-    nx = part2grid[i][0];
-    ny = part2grid[i][1];
-    nz = part2grid[i][2];
-    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
-    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
-    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
-    compute_rho1d(dx,dy,dz);
-    compute_drho1d(dx,dy,dz);
-
-    ekx = eky = ekz = ZEROF;
-    for (n = nlower; n <= nupper; n++) {
-      mz = n+nz;
-      for (m = nlower; m <= nupper; m++) {
-        my = m+ny;
-        for (l = nlower; l <= nupper; l++) {
-          mx = l+nx;
-          ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
-          eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
-          ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
-        }
-      }
-    }
-    ekx *= hx_inv;
-    eky *= hy_inv;
-    ekz *= hz_inv;
-    // convert E-field to force and substract self forces
-    const double qfactor = force->qqrd2e * scale;
-
-    s1 = x[i][0]*hx_inv;
-    s2 = x[i][1]*hy_inv;
-    s3 = x[i][2]*hz_inv;
-    sf = sf_coeff[0]*sin(2*MY_PI*s1);
-    sf += sf_coeff[1]*sin(4*MY_PI*s1);
-    sf *= 2*q[i]*q[i];
-    f[i][0] += qfactor*(ekx*q[i] - sf);
-
-    sf = sf_coeff[2]*sin(2*MY_PI*s2);
-    sf += sf_coeff[3]*sin(4*MY_PI*s2);
-    sf *= 2*q[i]*q[i];
-    f[i][1] += qfactor*(eky*q[i] - sf);
-
-
-    sf = sf_coeff[4]*sin(2*MY_PI*s3);
-    sf += sf_coeff[5]*sin(4*MY_PI*s3);
-    sf *= 2*q[i]*q[i];
-    if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
-  }
-}
-
-/* ----------------------------------------------------------------------
- interpolate from grid to get per-atom energy/virial
- ------------------------------------------------------------------------- */
-
-void PPPMCG::fieldforce_peratom()
-{
-    int i,l,m,n,nx,ny,nz,mx,my,mz;
-    FFT_SCALAR dx,dy,dz,x0,y0,z0;
-    FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
-
-    // loop over my charges, interpolate from nearby grid points
-    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-    // (dx,dy,dz) = distance to "lower left" grid pt
-    // (mx,my,mz) = global coords of moving stencil pt
-
-    double *q = atom->q;
-    double **x = atom->x;
-    double **f = atom->f;
-
-    for (int j = 0; j < num_charged; j++) {
-        i = is_charged[j];
-
-        nx = part2grid[i][0];
-        ny = part2grid[i][1];
-        nz = part2grid[i][2];
-        dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
-        dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
-        dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
-        compute_rho1d(dx,dy,dz);
-
-        u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
-        for (n = nlower; n <= nupper; n++) {
-            mz = n+nz;
-            z0 = rho1d[2][n];
-            for (m = nlower; m <= nupper; m++) {
-                my = m+ny;
-                y0 = z0*rho1d[1][m];
-                for (l = nlower; l <= nupper; l++) {
-                    mx = l+nx;
-                    x0 = y0*rho1d[0][l];
-                    if (eflag_atom) u += x0*u_brick[mz][my][mx];
-                    if (vflag_atom) {
-                        v0 += x0*v0_brick[mz][my][mx];
-                        v1 += x0*v1_brick[mz][my][mx];
-                        v2 += x0*v2_brick[mz][my][mx];
-                        v3 += x0*v3_brick[mz][my][mx];
-                        v4 += x0*v4_brick[mz][my][mx];
-                        v5 += x0*v5_brick[mz][my][mx];
-                    }
-                }
-            }
-        }
-
-        if (eflag_atom) eatom[i] += q[i]*u;
-        if (vflag_atom) {
-            vatom[i][0] += v0;
-            vatom[i][1] += v1;
-            vatom[i][2] += v2;
-            vatom[i][3] += v3;
-            vatom[i][4] += v4;
-            vatom[i][5] += v5;
-        }
-    }
-}
-
-/* ----------------------------------------------------------------------
-   Slab-geometry correction term to dampen inter-slab interactions between
-   periodically repeating slabs.  Yields good approximation to 2D Ewald if
-   adequate empty space is left between repeating slabs (J. Chem. Phys.
-   111, 3155).  Slabs defined here to be parallel to the xy plane.
-------------------------------------------------------------------------- */
-
-void PPPMCG::slabcorr()
-{
-  // compute local contribution to global dipole moment
-
-  double *q = atom->q;
-  double **x = atom->x;
-
-  double dipole = 0.0;
-  for (int j = 0; j < num_charged; j++) {
-    int i = is_charged[j];
-    dipole += q[i]*x[i][2];
-  }
-
-  // sum local contributions to get global dipole moment
-
-  double dipole_all;
-  MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
-
-  // compute corrections
-
-  const double e_slabcorr = 2.0*MY_PI*dipole_all*dipole_all/volume;
-  const double qscale = force->qqrd2e * scale;
-
-  if (eflag_global) energy += qscale * e_slabcorr;
-
-  //per-atom energy
-
-  if (eflag_atom) {
-    double efact = 2.0*MY_PI*dipole_all/volume;
-    for (int j = 0; j < num_charged; j++) {
-      int i = is_charged[j];
-      eatom[i] += qscale * q[i]*x[i][2]*efact;
-    }
-  }
-
-  // add on force corrections
-
-  const double ffact = -4.0*MY_PI*dipole_all/volume * qscale;
-  double **f = atom->f;
-
-  for (int j = 0; j < num_charged; j++) {
-    int i = is_charged[j];
-    f[i][2] += q[i]*ffact;
-  }
-}
-
-/* ----------------------------------------------------------------------
-   memory usage of local arrays
-------------------------------------------------------------------------- */
-
-double PPPMCG::memory_usage()
-{
-  double bytes = PPPM::memory_usage();
-  bytes += nmax * sizeof(int);
-  return bytes;
-}
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "math.h"
+#include "stdlib.h"
+
+#include "atom.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "pppm_cg.h"
+
+#include "math_const.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define OFFSET 16384
+#define SMALLQ 0.00001
+#if defined(FFT_SINGLE)
+#define ZEROF 0.0f
+#else
+#define ZEROF 0.0
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+PPPMCG::PPPMCG(LAMMPS *lmp, int narg, char **arg) : PPPM(lmp, narg, arg)
+{
+  if ((narg < 1) || (narg > 2))
+    error->all(FLERR,"Illegal kspace_style pppm/cg command");
+
+  if (narg == 2)
+    smallq = atof(arg[1]);
+  else
+    smallq = SMALLQ;
+
+  num_charged = -1;
+  is_charged = NULL;
+}
+
+/* ----------------------------------------------------------------------
+   free all memory
+------------------------------------------------------------------------- */
+
+PPPMCG::~PPPMCG()
+{
+  memory->destroy(is_charged);
+}
+
+/* ----------------------------------------------------------------------
+   compute the PPPM long-range force, energy, virial
+------------------------------------------------------------------------- */
+
+void PPPMCG::compute(int eflag, int vflag)
+{
+  int i,j;
+
+  // set energy/virial flags
+  // invoke allocate_peratom() if needed for first time
+
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = evflag_atom = eflag_global = vflag_global =
+         eflag_atom = vflag_atom = 0;
+
+  if (evflag_atom && !peratom_allocate_flag) {
+    allocate_peratom();
+    peratom_allocate_flag = 1;
+  }
+
+  // convert atoms from box to lamda coords
+
+  if (triclinic == 0) boxlo = domain->boxlo;
+  else {
+    boxlo = domain->boxlo_lamda;
+    domain->x2lamda(atom->nlocal);
+  }
+
+  // extend size of per-atom arrays if necessary
+
+  if (atom->nlocal > nmax) {
+    memory->destroy(part2grid);
+    memory->destroy(is_charged);
+    nmax = atom->nmax;
+    memory->create(part2grid,nmax,3,"pppm:part2grid");
+    memory->create(is_charged,nmax,"pppm/cg:is_charged");
+  }
+
+  // one time setup message
+
+  if (num_charged < 0) {
+    bigint charged_all, charged_num;
+    double charged_frac, charged_fmax, charged_fmin;
+
+    num_charged=0;
+    for (i=0; i < atom->nlocal; ++i)
+      if (fabs(atom->q[i]) > smallq)
+        ++num_charged;
+
+    // get fraction of charged particles per domain
+
+    if (atom->nlocal > 0)
+      charged_frac = static_cast<double>(num_charged) * 100.0
+                   / static_cast<double>(atom->nlocal);
+    else
+      charged_frac = 0.0;
+    MPI_Reduce(&charged_frac,&charged_fmax,1,MPI_DOUBLE,MPI_MAX,0,world);
+    MPI_Reduce(&charged_frac,&charged_fmin,1,MPI_DOUBLE,MPI_MIN,0,world);
+
+    // get fraction of charged particles overall
+
+    charged_num = num_charged;
+    MPI_Reduce(&charged_num,&charged_all,1,MPI_LMP_BIGINT,MPI_SUM,0,world);
+    charged_frac = static_cast<double>(charged_all) * 100.0
+                   / static_cast<double>(atom->natoms);
+
+    if (me == 0) {
+      if (screen)
+        fprintf(screen,
+                "  PPPM/cg optimization cutoff: %g\n"
+                "  Total charged atoms: %.1f%%\n"
+                "  Min/max charged atoms/proc: %.1f%% %.1f%%\n",
+                smallq,charged_frac,charged_fmin,charged_fmax);
+      if (logfile)
+        fprintf(logfile,
+                "  PPPM/cg optimization cutoff: %g\n"
+                "  Total charged atoms: %.1f%%\n"
+                "  Min/max charged atoms/proc: %.1f%% %.1f%%\n",
+                smallq,charged_frac,charged_fmin,charged_fmax);
+    }
+  }
+
+  num_charged = 0;
+  for (i = 0; i < atom->nlocal; ++i)
+    if (fabs(atom->q[i]) > smallq) {
+      is_charged[num_charged] = i;
+      ++num_charged;
+    }
+
+  // find grid points for all my particles
+  // map my particle charge onto my local 3d density grid
+
+  particle_map();
+  make_rho();
+
+  // all procs communicate density values from their ghost cells
+  //   to fully sum contribution in their 3d bricks
+  // remap from 3d decomposition to FFT decomposition
+
+  brick2fft();
+
+  // compute potential gradient on my FFT grid and
+  //   portion of e_long on this proc's FFT grid
+  // return gradients (electric fields) in 3d brick decomposition
+  // also performs per-atom calculations via poisson_peratom()
+
+  if (differentiation_flag == 1) {
+    poisson_ad();
+    fillbrick_ad();
+    fieldforce_ad();
+    if (vflag_atom) fillbrick_peratom_ad();
+  } else {
+    poisson_ik();
+    fillbrick_ik();
+    fieldforce_ik();
+    if (evflag_atom) fillbrick_peratom_ik();
+  }
+
+  if (evflag_atom) fieldforce_peratom();
+
+  // sum global energy across procs and add in volume-dependent term
+
+  const double qscale = force->qqrd2e * scale;
+
+  if (eflag_global) {
+    double energy_all;
+    MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
+    energy = energy_all;
+
+    energy *= 0.5*volume;
+    energy -= g_ewald*qsqsum/MY_PIS +
+      MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
+    energy *= qscale;
+  }
+
+  // sum global virial across procs
+
+  if (vflag_global) {
+    double virial_all[6];
+    MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
+    for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
+  }
+
+  // per-atom energy/virial
+  // energy includes self-energy correction
+
+  if (evflag_atom) {
+    double *q = atom->q;
+    int nlocal = atom->nlocal;
+
+    if (eflag_atom) {
+      for (int j = 0; j < num_charged; j++) {
+        int i = is_charged[j];
+        eatom[i] *= 0.5;
+        eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
+          (g_ewald*g_ewald*volume);
+        eatom[i] *= qscale;
+      }
+    }
+
+    if (vflag_atom) {
+      for (int j = 0; j < num_charged; j++) {
+        int i = is_charged[j];
+        for (int n = 0; n < 6; n++) vatom[i][n] *= 0.5*q[i]*qscale;
+      }
+    }
+  }
+
+  // 2d slab correction
+
+  if (slabflag == 1) slabcorr();
+
+  // convert atoms back from lamda to box coords
+
+  if (triclinic) domain->lamda2x(atom->nlocal);
+}
+
+/* ----------------------------------------------------------------------
+   find center grid pt for each of my particles
+   check that full stencil for the particle will fit in my 3d brick
+   store central grid pt indices in part2grid array
+------------------------------------------------------------------------- */
+
+void PPPMCG::particle_map()
+{
+  int nx,ny,nz;
+
+  double **x = atom->x;
+
+  int flag = 0;
+  for (int j = 0; j < num_charged; j++) {
+    int i = is_charged[j];
+
+    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+    // current particle coord can be outside global and local box
+    // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
+
+    nx = static_cast<int> ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET;
+    ny = static_cast<int> ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET;
+    nz = static_cast<int> ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET;
+
+    part2grid[i][0] = nx;
+    part2grid[i][1] = ny;
+    part2grid[i][2] = nz;
+
+    // check that entire stencil around nx,ny,nz will fit in my 3d brick
+
+    if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
+        ny+nlower < nylo_out || ny+nupper > nyhi_out ||
+        nz+nlower < nzlo_out || nz+nupper > nzhi_out) flag = 1;
+  }
+
+  if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
+}
+
+/* ----------------------------------------------------------------------
+   create discretized "density" on section of global grid due to my particles
+   density(x,y,z) = charge "density" at grid points of my 3d brick
+   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+   in global grid
+------------------------------------------------------------------------- */
+
+void PPPMCG::make_rho()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+
+  // clear 3d density array
+
+  FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
+  for (i = 0; i < ngrid; i++) vec[i] = ZEROF;
+
+  // loop over my charges, add their contribution to nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+
+  double *q = atom->q;
+  double **x = atom->x;
+
+  for (int j = 0; j < num_charged; j++) {
+    int i = is_charged[j];
+
+    nx = part2grid[i][0];
+    ny = part2grid[i][1];
+    nz = part2grid[i][2];
+    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+    compute_rho1d(dx,dy,dz);
+
+    z0 = delvolinv * q[i];
+    for (n = nlower; n <= nupper; n++) {
+      mz = n+nz;
+      y0 = z0*rho1d[2][n];
+      for (m = nlower; m <= nupper; m++) {
+        my = m+ny;
+        x0 = y0*rho1d[1][m];
+        for (l = nlower; l <= nupper; l++) {
+          mx = l+nx;
+          density_brick[mz][my][mx] += x0*rho1d[0][l];
+        }
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get electric field & force on my particles
+------------------------------------------------------------------------- */
+
+void PPPMCG::fieldforce_ik()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+  FFT_SCALAR ekx,eky,ekz;
+
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of E-field on particle
+
+  double *q = atom->q;
+  double **x = atom->x;
+  double **f = atom->f;
+
+  for (int j = 0; j < num_charged; j++) {
+    i = is_charged[j];
+
+    nx = part2grid[i][0];
+    ny = part2grid[i][1];
+    nz = part2grid[i][2];
+    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+    compute_rho1d(dx,dy,dz);
+
+    ekx = eky = ekz = ZEROF;
+    for (n = nlower; n <= nupper; n++) {
+      mz = n+nz;
+      z0 = rho1d[2][n];
+      for (m = nlower; m <= nupper; m++) {
+        my = m+ny;
+        y0 = z0*rho1d[1][m];
+        for (l = nlower; l <= nupper; l++) {
+          mx = l+nx;
+          x0 = y0*rho1d[0][l];
+          ekx -= x0*vdx_brick[mz][my][mx];
+          eky -= x0*vdy_brick[mz][my][mx];
+          ekz -= x0*vdz_brick[mz][my][mx];
+        }
+      }
+    }
+
+    // convert E-field to force
+
+    const double qfactor = force->qqrd2e * scale * q[i];
+    f[i][0] += qfactor*ekx;
+    f[i][1] += qfactor*eky;
+    if (slabflag != 2) f[i][2] += qfactor*ekz;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get electric field & force on my particles
+------------------------------------------------------------------------- */
+
+void PPPMCG::fieldforce_ad()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0,dx0,dy0,dz0;
+  FFT_SCALAR ekx,eky,ekz;
+  double s1,s2,s3;
+  double sf = 0.0;
+  double *prd;
+
+  if (triclinic == 0) prd = domain->prd;
+  else prd = domain->prd_lamda;
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+
+  double hx_inv = nx_pppm/xprd;
+  double hy_inv = ny_pppm/yprd;
+  double hz_inv = nz_pppm/zprd;
+
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of E-field on particle
+
+  double *q = atom->q;
+  double **x = atom->x;
+  double **f = atom->f;
+
+  int nlocal = atom->nlocal;
+
+  for (int j = 0; j < num_charged; j++) {
+    i = is_charged[j];
+
+    nx = part2grid[i][0];
+    ny = part2grid[i][1];
+    nz = part2grid[i][2];
+    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+    compute_rho1d(dx,dy,dz);
+    compute_drho1d(dx,dy,dz);
+
+    ekx = eky = ekz = ZEROF;
+    for (n = nlower; n <= nupper; n++) {
+      mz = n+nz;
+      for (m = nlower; m <= nupper; m++) {
+        my = m+ny;
+        for (l = nlower; l <= nupper; l++) {
+          mx = l+nx;
+          ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
+          eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
+          ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
+        }
+      }
+    }
+    ekx *= hx_inv;
+    eky *= hy_inv;
+    ekz *= hz_inv;
+    // convert E-field to force and substract self forces
+    const double qfactor = force->qqrd2e * scale;
+
+    s1 = x[i][0]*hx_inv;
+    s2 = x[i][1]*hy_inv;
+    s3 = x[i][2]*hz_inv;
+    sf = sf_coeff[0]*sin(2*MY_PI*s1);
+    sf += sf_coeff[1]*sin(4*MY_PI*s1);
+    sf *= 2*q[i]*q[i];
+    f[i][0] += qfactor*(ekx*q[i] - sf);
+
+    sf = sf_coeff[2]*sin(2*MY_PI*s2);
+    sf += sf_coeff[3]*sin(4*MY_PI*s2);
+    sf *= 2*q[i]*q[i];
+    f[i][1] += qfactor*(eky*q[i] - sf);
+
+
+    sf = sf_coeff[4]*sin(2*MY_PI*s3);
+    sf += sf_coeff[5]*sin(4*MY_PI*s3);
+    sf *= 2*q[i]*q[i];
+    if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
+  }
+}
+
+/* ----------------------------------------------------------------------
+ interpolate from grid to get per-atom energy/virial
+ ------------------------------------------------------------------------- */
+
+void PPPMCG::fieldforce_peratom()
+{
+    int i,l,m,n,nx,ny,nz,mx,my,mz;
+    FFT_SCALAR dx,dy,dz,x0,y0,z0;
+    FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
+
+    // loop over my charges, interpolate from nearby grid points
+    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+    // (dx,dy,dz) = distance to "lower left" grid pt
+    // (mx,my,mz) = global coords of moving stencil pt
+
+    double *q = atom->q;
+    double **x = atom->x;
+    double **f = atom->f;
+
+    for (int j = 0; j < num_charged; j++) {
+        i = is_charged[j];
+
+        nx = part2grid[i][0];
+        ny = part2grid[i][1];
+        nz = part2grid[i][2];
+        dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+        dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+        dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+        compute_rho1d(dx,dy,dz);
+
+        u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
+        for (n = nlower; n <= nupper; n++) {
+            mz = n+nz;
+            z0 = rho1d[2][n];
+            for (m = nlower; m <= nupper; m++) {
+                my = m+ny;
+                y0 = z0*rho1d[1][m];
+                for (l = nlower; l <= nupper; l++) {
+                    mx = l+nx;
+                    x0 = y0*rho1d[0][l];
+                    if (eflag_atom) u += x0*u_brick[mz][my][mx];
+                    if (vflag_atom) {
+                        v0 += x0*v0_brick[mz][my][mx];
+                        v1 += x0*v1_brick[mz][my][mx];
+                        v2 += x0*v2_brick[mz][my][mx];
+                        v3 += x0*v3_brick[mz][my][mx];
+                        v4 += x0*v4_brick[mz][my][mx];
+                        v5 += x0*v5_brick[mz][my][mx];
+                    }
+                }
+            }
+        }
+
+        if (eflag_atom) eatom[i] += q[i]*u;
+        if (vflag_atom) {
+            vatom[i][0] += v0;
+            vatom[i][1] += v1;
+            vatom[i][2] += v2;
+            vatom[i][3] += v3;
+            vatom[i][4] += v4;
+            vatom[i][5] += v5;
+        }
+    }
+}
+
+/* ----------------------------------------------------------------------
+   Slab-geometry correction term to dampen inter-slab interactions between
+   periodically repeating slabs.  Yields good approximation to 2D Ewald if
+   adequate empty space is left between repeating slabs (J. Chem. Phys.
+   111, 3155).  Slabs defined here to be parallel to the xy plane.
+------------------------------------------------------------------------- */
+
+void PPPMCG::slabcorr()
+{
+  // compute local contribution to global dipole moment
+
+  double *q = atom->q;
+  double **x = atom->x;
+
+  double dipole = 0.0;
+  for (int j = 0; j < num_charged; j++) {
+    int i = is_charged[j];
+    dipole += q[i]*x[i][2];
+  }
+
+  // sum local contributions to get global dipole moment
+
+  double dipole_all;
+  MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
+
+  // compute corrections
+
+  const double e_slabcorr = 2.0*MY_PI*dipole_all*dipole_all/volume;
+  const double qscale = force->qqrd2e * scale;
+
+  if (eflag_global) energy += qscale * e_slabcorr;
+
+  //per-atom energy
+
+  if (eflag_atom) {
+    double efact = 2.0*MY_PI*dipole_all/volume;
+    for (int j = 0; j < num_charged; j++) {
+      int i = is_charged[j];
+      eatom[i] += qscale * q[i]*x[i][2]*efact;
+    }
+  }
+
+  // add on force corrections
+
+  const double ffact = -4.0*MY_PI*dipole_all/volume * qscale;
+  double **f = atom->f;
+
+  for (int j = 0; j < num_charged; j++) {
+    int i = is_charged[j];
+    f[i][2] += q[i]*ffact;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of local arrays
+------------------------------------------------------------------------- */
+
+double PPPMCG::memory_usage()
+{
+  double bytes = PPPM::memory_usage();
+  bytes += nmax * sizeof(int);
+  return bytes;
+}