lammps/src/KSPACE/pppm_disp.cpp

/* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov

   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.

   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */

/* ----------------------------------------------------------------------
   Contributing authors: Rolf Isele-Holder (Aachen University)
                         Paul Crozier (SNL)
------------------------------------------------------------------------- */

#include <mpi.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "pppm_disp.h"
#include "math_const.h"
#include "atom.h"
#include "comm.h"
#include "gridcomm.h"
#include "neighbor.h"
#include "force.h"
#include "pair.h"
#include "bond.h"
#include "angle.h"
#include "domain.h"
#include "fft3d_wrap.h"
#include "remap_wrap.h"
#include "memory.h"
#include "error.h"

using namespace LAMMPS_NS;
using namespace MathConst;

#define MAXORDER   7
#define OFFSET 16384
#define SMALL 0.00001
#define LARGE 10000.0
#define EPS_HOC 1.0e-7

enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER};
enum{REVERSE_RHO, REVERSE_RHO_G, REVERSE_RHO_A, REVERSE_RHO_NONE};
enum{FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM,
     FORWARD_IK_G, FORWARD_AD_G, FORWARD_IK_PERATOM_G, FORWARD_AD_PERATOM_G,
     FORWARD_IK_A, FORWARD_AD_A, FORWARD_IK_PERATOM_A, FORWARD_AD_PERATOM_A,
     FORWARD_IK_NONE, FORWARD_AD_NONE, FORWARD_IK_PERATOM_NONE, FORWARD_AD_PERATOM_NONE};


#ifdef FFT_SINGLE
#define ZEROF 0.0f
#define ONEF  1.0f
#else
#define ZEROF 0.0
#define ONEF  1.0
#endif

/* ---------------------------------------------------------------------- */

PPPMDisp::PPPMDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
{
  if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm/disp command");

  triclinic_support = 0;
  pppmflag = dispersionflag = 1;
  accuracy_relative = fabs(force->numeric(FLERR,arg[0]));

  nfactors = 3;
  factors = new int[nfactors];
  factors[0] = 2;
  factors[1] = 3;
  factors[2] = 5;

  MPI_Comm_rank(world,&me);
  MPI_Comm_size(world,&nprocs);

  csumflag = 0;
  B = NULL;
  cii = NULL;
  csumi = NULL;
  peratom_allocate_flag = 0;

  density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
  density_fft = NULL;
  u_brick = v0_brick = v1_brick = v2_brick = v3_brick =
    v4_brick = v5_brick = NULL;

  density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL;
  density_fft_g = NULL;
  u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g =
    v4_brick_g = v5_brick_g = NULL;

  density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL;
  density_fft_a0 = NULL;
  u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 =
    v4_brick_a0 = v5_brick_a0 = NULL;

  density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL;
  density_fft_a1 = NULL;
  u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 =
    v4_brick_a1 = v5_brick_a1 = NULL;

  density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL;
  density_fft_a2 = NULL;
  u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 =
    v4_brick_a2 = v5_brick_a2 = NULL;

  density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL;
  density_fft_a3 = NULL;
  u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 =
    v4_brick_a3 = v5_brick_a3 = NULL;

  density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL;
  density_fft_a4 = NULL;
  u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 =
    v4_brick_a4 = v5_brick_a4 = NULL;

  density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL;
  density_fft_a5 = NULL;
  u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 =
    v4_brick_a5 = v5_brick_a5 = NULL;

  density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL;
  density_fft_a6 = NULL;
  u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 =
    v4_brick_a6 = v5_brick_a6 = NULL;

  density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL;
  density_fft_none = NULL;
  u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none =
    v4_brick_none = v5_brick_none = NULL;

  greensfn = NULL;
  greensfn_6 = NULL;
  work1 = work2 = NULL;
  work1_6 = work2_6 = NULL;
  vg = NULL;
  vg2 = NULL;
  vg_6 = NULL;
  vg2_6 = NULL;
  fkx = fky = fkz = NULL;
  fkx2 = fky2 = fkz2 = NULL;
  fkx_6 = fky_6 = fkz_6 = NULL;
  fkx2_6 = fky2_6 = fkz2_6 = NULL;

  sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 =
    sf_precoeff5 = sf_precoeff6 = NULL;
  sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 =
    sf_precoeff5_6 = sf_precoeff6_6 = NULL;

  gf_b = NULL;
  gf_b_6 = NULL;
  rho1d = rho_coeff = NULL;
  drho1d = drho_coeff = NULL;
  rho1d_6 = rho_coeff_6 = NULL;
  drho1d_6 = drho_coeff_6 = NULL;
  fft1 = fft2 = NULL;
  fft1_6 = fft2_6 = NULL;
  remap = NULL;
  remap_6 = NULL;

  nmax = 0;
  part2grid = NULL;
  part2grid_6 = NULL;

  cg = NULL;
  cg_peratom = NULL;
  cg_6 = NULL;
  cg_peratom_6 = NULL;

  memset(function, 0, EWALD_FUNCS*sizeof(int));
}

/* ----------------------------------------------------------------------
   free all memory
------------------------------------------------------------------------- */

PPPMDisp::~PPPMDisp()
{
  delete [] factors;
  delete [] B;
  B = NULL;
  delete [] cii;
  cii = NULL;
  delete [] csumi;
  csumi = NULL;
  deallocate();
  deallocate_peratom();
  memory->destroy(part2grid);
  memory->destroy(part2grid_6);
  part2grid = part2grid_6 = NULL;
}

/* ----------------------------------------------------------------------
   called once before run
------------------------------------------------------------------------- */

void PPPMDisp::init()
{
  if (me == 0) {
    if (screen) fprintf(screen,"PPPMDisp initialization ...\n");
    if (logfile) fprintf(logfile,"PPPMDisp initialization ...\n");
  }

  triclinic_check();
  if (domain->dimension == 2)
    error->all(FLERR,"Cannot use PPPMDisp with 2d simulation");
  if (comm->style != 0)
    error->universe_all(FLERR,"PPPMDisp can only currently be used with "
                        "comm_style brick");

  if (slabflag == 0 && domain->nonperiodic > 0)
    error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDisp");
  if (slabflag == 1) {
    if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
	domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
      error->all(FLERR,"Incorrect boundaries with slab PPPMDisp");
  }

  if (order > MAXORDER || order_6 > MAXORDER) {
    char str[128];
    sprintf(str,"PPPMDisp coulomb order cannot be greater than %d",MAXORDER);
    error->all(FLERR,str);
  }

  // free all arrays previously allocated

  deallocate();
  deallocate_peratom();

  // check whether cutoff and pair style are set

  triclinic = domain->triclinic;
  pair_check();

  int tmp;
  Pair *pair = force->pair;
  int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL;
  double *p_cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL;
  double *p_cutoff_lj = pair ? (double *) pair->extract("cut_LJ",tmp) : NULL;
  if (!(ptr||p_cutoff||p_cutoff_lj))
    error->all(FLERR,"KSpace style is incompatible with Pair style");
  cutoff = *p_cutoff;
  cutoff_lj = *p_cutoff_lj;

  double tmp2;
  MPI_Allreduce(&cutoff, &tmp2,1,MPI_DOUBLE,MPI_SUM,world);

  // check out which types of potentials will have to be calculated

  int ewald_order = ptr ? *((int *) ptr) : 1<<1;
  int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC;
  memset(function, 0, EWALD_FUNCS*sizeof(int));
  for (int i=0; i<=EWALD_MAXORDER; ++i)			// transcribe order
    if (ewald_order&(1<<i)) {				// from pair_style
      int  k=0;
      char str[128];
      switch (i) {
	case 1:
	  k = 0; break;
	case 6:
	  if ((ewald_mix==GEOMETRIC || ewald_mix==SIXTHPOWER ||
               mixflag == 1) && mixflag!= 2) { k = 1; break; }
	  else if (ewald_mix==ARITHMETIC && mixflag!=2) { k = 2; break; }
	  else if (mixflag == 2) { k = 3; break; }
	default:
	  sprintf(str, "Unsupported order in kspace_style "
                  "pppm/disp, pair_style %s", force->pair_style);
	  error->all(FLERR,str);
      }
      function[k] = 1;
    }


  // warn, if function[0] is not set but charge attribute is set!

  if (!function[0] && atom->q_flag && me == 0) {
    char str[128];
    sprintf(str, "Charges are set, but coulombic solver is not used");
    error->warning(FLERR, str);
  }

  // show error message if pppm/disp is not used correctly

  if (function[1] || function[2] || function[3]) {
    if (!gridflag_6 && !gewaldflag_6 && accuracy_real_6 < 0
        && accuracy_kspace_6 < 0 && !auto_disp_flag) {
      error->all(FLERR, "PPPMDisp used but no parameters set, "
              "for further information please see the pppm/disp "
              "documentation");
    }
  }

  // compute qsum & qsqsum, if function[0] is set, warn if not charge-neutral

  scale = 1.0;
  qqrd2e = force->qqrd2e;
  natoms_original = atom->natoms;

  if (function[0]) qsum_qsq();

  // if kspace is TIP4P, extract TIP4P params from pair style
  // bond/angle are not yet init(), so insure equilibrium request is valid

  qdist = 0.0;

  if (tip4pflag) {
    int itmp;
    double *p_qdist = (double *) force->pair->extract("qdist",itmp);
    int *p_typeO = (int *) force->pair->extract("typeO",itmp);
    int *p_typeH = (int *) force->pair->extract("typeH",itmp);
    int *p_typeA = (int *) force->pair->extract("typeA",itmp);
    int *p_typeB = (int *) force->pair->extract("typeB",itmp);
    if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB)
      error->all(FLERR,"KSpace style is incompatible with Pair style");
    qdist = *p_qdist;
    typeO = *p_typeO;
    typeH = *p_typeH;
    int typeA = *p_typeA;
    int typeB = *p_typeB;

    if (force->angle == NULL || force->bond == NULL)
      error->all(FLERR,"Bond and angle potentials must be defined for TIP4P");
    if (typeA < 1 || typeA > atom->nangletypes ||
	force->angle->setflag[typeA] == 0)
      error->all(FLERR,"Bad TIP4P angle type for PPPMDisp/TIP4P");
    if (typeB < 1 || typeB > atom->nbondtypes ||
	force->bond->setflag[typeB] == 0)
      error->all(FLERR,"Bad TIP4P bond type for PPPMDisp/TIP4P");
    double theta = force->angle->equilibrium_angle(typeA);
    double blen = force->bond->equilibrium_distance(typeB);
    alpha = qdist / (cos(0.5*theta) * blen);
  }

  // initialize the pair style to get the coefficients

  neighrequest_flag = 0;
  pair->init();
  neighrequest_flag = 1;
  init_coeffs();

  //if g_ewald and g_ewald_6 have not been specified, set some initial value
  //  to avoid problems when calculating the energies!

  if (!gewaldflag) g_ewald = 1;
  if (!gewaldflag_6) g_ewald_6 = 1;

  // set accuracy (force units) from accuracy_relative or accuracy_absolute

  if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
  else accuracy = accuracy_relative * two_charge_force;

  int (*procneigh)[2] = comm->procneigh;

  int iteration = 0;
  if (function[0]) {
    GridComm *cgtmp = NULL;
    while (order >= minorder) {

      if (iteration && me == 0)
          error->warning(FLERR,"Reducing PPPMDisp Coulomb order "
                         "b/c stencil extends beyond neighbor processor");
      iteration++;

      // set grid for dispersion interaction and coulomb interactions

      set_grid();

      if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
      error->all(FLERR,"PPPMDisp Coulomb grid is too large");

      set_fft_parameters(nx_pppm, ny_pppm, nz_pppm,
                         nxlo_fft, nylo_fft, nzlo_fft,
                         nxhi_fft, nyhi_fft, nzhi_fft,
                         nxlo_in, nylo_in, nzlo_in,
                         nxhi_in, nyhi_in, nzhi_in,
                         nxlo_out, nylo_out, nzlo_out,
                         nxhi_out, nyhi_out, nzhi_out,
                         nlower, nupper,
                         ngrid, nfft, nfft_both,
                         shift, shiftone, order);

      if (overlap_allowed) break;

      cgtmp = new GridComm(lmp, world,1,1,
                           nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                           nxlo_out,nxhi_out,nylo_out,nyhi_out,
                           nzlo_out,nzhi_out,
                           procneigh[0][0],procneigh[0][1],procneigh[1][0],
                           procneigh[1][1],procneigh[2][0],procneigh[2][1]);
      cgtmp->ghost_notify();
      if (!cgtmp->ghost_overlap()) break;
      delete cgtmp;

      order--;
    }

    if (order < minorder)
      error->all(FLERR,
                 "Coulomb PPPMDisp order has been reduced below minorder");
    if (cgtmp) delete cgtmp;

    // adjust g_ewald

    if (!gewaldflag) adjust_gewald();

    // calculate the final accuracy

    double acc = final_accuracy();

    // print stats

    int ngrid_max,nfft_both_max;
    MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
    MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);

    if (me == 0) {
    #ifdef FFT_SINGLE
      const char fft_prec[] = "single";
    #else
      const char fft_prec[] = "double";
    #endif

      if (screen) {
        fprintf(screen,"  Coulomb G vector (1/distance)= %g\n",g_ewald);
        fprintf(screen,"  Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
        fprintf(screen,"  Coulomb stencil order = %d\n",order);
        fprintf(screen,"  Coulomb estimated absolute RMS force accuracy = %g\n",
                acc);
        fprintf(screen,"  Coulomb estimated relative force accuracy = %g\n",
                acc/two_charge_force);
        fprintf(screen,"  using %s precision FFTs\n",fft_prec);
        fprintf(screen,"  3d grid and FFT values/proc = %d %d\n",
		ngrid_max, nfft_both_max);
      }
      if (logfile) {
        fprintf(logfile,"  Coulomb G vector (1/distance) = %g\n",g_ewald);
        fprintf(logfile,"  Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
        fprintf(logfile,"  Coulomb stencil order = %d\n",order);
        fprintf(logfile,
                "  Coulomb estimated absolute RMS force accuracy = %g\n",
                acc);
        fprintf(logfile,"  Coulomb estimated relative force accuracy = %g\n",
                acc/two_charge_force);
        fprintf(logfile,"  using %s precision FFTs\n",fft_prec);
        fprintf(logfile,"  3d grid and FFT values/proc = %d %d\n",
		ngrid_max, nfft_both_max);
      }
    }
  }

  iteration = 0;
  if (function[1] + function[2] + function[3]) {
    GridComm *cgtmp = NULL;
    while (order_6 >= minorder) {

      if (iteration && me == 0)
          error->warning(FLERR,"Reducing PPPMDisp dispersion order "
                         "b/c stencil extends beyond neighbor processor");
      iteration++;

      set_grid_6();

      if (nx_pppm_6 >= OFFSET || ny_pppm_6 >= OFFSET || nz_pppm_6 >= OFFSET)
      error->all(FLERR,"PPPMDisp Dispersion grid is too large");

      set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6,
                         nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
                         nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                         nxlo_in_6, nylo_in_6, nzlo_in_6,
                         nxhi_in_6, nyhi_in_6, nzhi_in_6,
                         nxlo_out_6, nylo_out_6, nzlo_out_6,
                         nxhi_out_6, nyhi_out_6, nzhi_out_6,
                         nlower_6, nupper_6,
                         ngrid_6, nfft_6, nfft_both_6,
                         shift_6, shiftone_6, order_6);

      if (overlap_allowed) break;

      cgtmp = new GridComm(lmp,world,1,1,
                           nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,
                           nzlo_in_6,nzhi_in_6,
                           nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,
                           nzlo_out_6,nzhi_out_6,
                           procneigh[0][0],procneigh[0][1],procneigh[1][0],
                           procneigh[1][1],procneigh[2][0],procneigh[2][1]);
      cgtmp->ghost_notify();
      if (!cgtmp->ghost_overlap()) break;
      delete cgtmp;
      order_6--;
    }

    if (order_6 < minorder)
      error->all(FLERR,"Dispersion PPPMDisp order has been "
                 "reduced below minorder");
    if (cgtmp) delete cgtmp;

    // adjust g_ewald_6

    if (!gewaldflag_6 && accuracy_kspace_6 == accuracy_real_6)
      adjust_gewald_6();

    // calculate the final accuracy

    double acc, acc_real, acc_kspace;
    final_accuracy_6(acc, acc_real, acc_kspace);


    // print stats

    int ngrid_max,nfft_both_max;
    MPI_Allreduce(&ngrid_6,&ngrid_max,1,MPI_INT,MPI_MAX,world);
    MPI_Allreduce(&nfft_both_6,&nfft_both_max,1,MPI_INT,MPI_MAX,world);

    if (me == 0) {
    #ifdef FFT_SINGLE
      const char fft_prec[] = "single";
    #else
      const char fft_prec[] = "double";
    #endif

      if (screen) {
        fprintf(screen,"  Dispersion G vector (1/distance)= %g\n",g_ewald_6);
        fprintf(screen,"  Dispersion grid = %d %d %d\n",
                nx_pppm_6,ny_pppm_6,nz_pppm_6);
        fprintf(screen,"  Dispersion stencil order = %d\n",order_6);
        fprintf(screen,"  Dispersion estimated absolute "
                "RMS force accuracy = %g\n",acc);
        fprintf(screen,"  Dispersion estimated absolute "
                "real space RMS force accuracy = %g\n",acc_real);
        fprintf(screen,"  Dispersion estimated absolute "
                "kspace RMS force accuracy = %g\n",acc_kspace);
        fprintf(screen,"  Dispersion estimated relative force accuracy = %g\n",
                acc/two_charge_force);
        fprintf(screen,"  using %s precision FFTs\n",fft_prec);
        fprintf(screen,"  3d grid and FFT values/proc dispersion = %d %d\n",
                          ngrid_max,nfft_both_max);
      }
      if (logfile) {
        fprintf(logfile,"  Dispersion G vector (1/distance) = %g\n",g_ewald_6);
        fprintf(logfile,"  Dispersion grid = %d %d %d\n",
                nx_pppm_6,ny_pppm_6,nz_pppm_6);
        fprintf(logfile,"  Dispersion stencil order = %d\n",order_6);
        fprintf(logfile,"  Dispersion estimated absolute "
                "RMS force accuracy = %g\n",acc);
        fprintf(logfile,"  Dispersion estimated absolute "
                "real space RMS force accuracy = %g\n",acc_real);
        fprintf(logfile,"  Dispersion estimated absolute "
                "kspace RMS force accuracy = %g\n",acc_kspace);
        fprintf(logfile,"  Disperion estimated relative force accuracy = %g\n",
                acc/two_charge_force);
        fprintf(logfile,"  using %s precision FFTs\n",fft_prec);
        fprintf(logfile,"  3d grid and FFT values/proc dispersion = %d %d\n",
                           ngrid_max,nfft_both_max);
      }
    }
  }

  // allocate K-space dependent memory

  allocate();

  // pre-compute Green's function denomiator expansion
  // pre-compute 1d charge distribution coefficients

  if (function[0]) {
    compute_gf_denom(gf_b, order);
    compute_rho_coeff(rho_coeff, drho_coeff, order);
    cg->ghost_notify();
    cg->setup();
    if (differentiation_flag == 1)
      compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order,
                          nxlo_fft, nylo_fft, nzlo_fft,
                          nxhi_fft, nyhi_fft, nzhi_fft,
                          sf_precoeff1, sf_precoeff2, sf_precoeff3,
                          sf_precoeff4, sf_precoeff5, sf_precoeff6);
  }
  if (function[1] + function[2] + function[3]) {
    compute_gf_denom(gf_b_6, order_6);
    compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6);
    cg_6->ghost_notify();
    cg_6->setup();
    if (differentiation_flag == 1)
      compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6,
                          nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
                          nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                          sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6,
                          sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6);
  }

}

/* ----------------------------------------------------------------------
   adjust PPPM coeffs, called initially and whenever volume has changed
------------------------------------------------------------------------- */

void PPPMDisp::setup()
{

  if (slabflag == 0 && domain->nonperiodic > 0)
    error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDisp");
  if (slabflag == 1) {
    if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
	domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
      error->all(FLERR,"Incorrect boundaries with slab PPPMDisp");
  }

  double *prd;

  // volume-dependent factors
  // adjust z dimension for 2d slab PPPM
  // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0

  if (triclinic == 0) prd = domain->prd;
  else prd = domain->prd_lamda;

  double xprd = prd[0];
  double yprd = prd[1];
  double zprd = prd[2];
  double zprd_slab = zprd*slab_volfactor;
  volume = xprd * yprd * zprd_slab;

 // compute fkx,fky,fkz for my FFT grid pts

  double unitkx = (2.0*MY_PI/xprd);
  double unitky = (2.0*MY_PI/yprd);
  double unitkz = (2.0*MY_PI/zprd_slab);

  //compute the virial coefficients and green functions
  if (function[0]){

    delxinv = nx_pppm/xprd;
    delyinv = ny_pppm/yprd;
    delzinv = nz_pppm/zprd_slab;

    delvolinv = delxinv*delyinv*delzinv;

    double per;
    int i, j, k, n;

    for (i = nxlo_fft; i <= nxhi_fft; i++) {
      per = i - nx_pppm*(2*i/nx_pppm);
      fkx[i] = unitkx*per;
      j = (nx_pppm - i) % nx_pppm;
      per = j - nx_pppm*(2*j/nx_pppm);
      fkx2[i] = unitkx*per;
    }

    for (i = nylo_fft; i <= nyhi_fft; i++) {
      per = i - ny_pppm*(2*i/ny_pppm);
      fky[i] = unitky*per;
      j = (ny_pppm - i) % ny_pppm;
      per = j - ny_pppm*(2*j/ny_pppm);
      fky2[i] = unitky*per;
    }

    for (i = nzlo_fft; i <= nzhi_fft; i++) {
      per = i - nz_pppm*(2*i/nz_pppm);
      fkz[i] = unitkz*per;
      j = (nz_pppm - i) % nz_pppm;
      per = j - nz_pppm*(2*j/nz_pppm);
      fkz2[i] = unitkz*per;
    }

    double sqk,vterm;
    double gew2inv = 1/(g_ewald*g_ewald);
    n = 0;
    for (k = nzlo_fft; k <= nzhi_fft; k++) {
      for (j = nylo_fft; j <= nyhi_fft; j++) {
        for (i = nxlo_fft; i <= nxhi_fft; i++) {
	  sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
	  if (sqk == 0.0) {
	    vg[n][0] = 0.0;
	    vg[n][1] = 0.0;
	    vg[n][2] = 0.0;
	    vg[n][3] = 0.0;
	    vg[n][4] = 0.0;
	    vg[n][5] = 0.0;
	  } else {
	    vterm = -2.0 * (1.0/sqk + 0.25*gew2inv);
	    vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
	    vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
	    vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
	    vg[n][3] = vterm*fkx[i]*fky[j];
	    vg[n][4] = vterm*fkx[i]*fkz[k];
	    vg[n][5] = vterm*fky[j]*fkz[k];
            vg2[n][0] = vterm*0.5*(fkx[i]*fky[j] + fkx2[i]*fky2[j]);
            vg2[n][1] = vterm*0.5*(fkx[i]*fkz[k] + fkx2[i]*fkz2[k]);
            vg2[n][2] = vterm*0.5*(fky[j]*fkz[k] + fky2[j]*fkz2[k]);
  	  }
	  n++;
        }
      }
    }
    compute_gf();
    if (differentiation_flag == 1) compute_sf_coeff();
  }

  if (function[1] + function[2] + function[3]) {
    delxinv_6 = nx_pppm_6/xprd;
    delyinv_6 = ny_pppm_6/yprd;
    delzinv_6 = nz_pppm_6/zprd_slab;
    delvolinv_6 = delxinv_6*delyinv_6*delzinv_6;

    double per;
    int i, j, k, n;
    for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
      per = i - nx_pppm_6*(2*i/nx_pppm_6);
      fkx_6[i] = unitkx*per;
      j = (nx_pppm_6 - i) % nx_pppm_6;
      per = j - nx_pppm_6*(2*j/nx_pppm_6);
      fkx2_6[i] = unitkx*per;
    }
    for (i = nylo_fft_6; i <= nyhi_fft_6; i++) {
      per = i - ny_pppm_6*(2*i/ny_pppm_6);
      fky_6[i] = unitky*per;
      j = (ny_pppm_6 - i) % ny_pppm_6;
      per = j - ny_pppm_6*(2*j/ny_pppm_6);
      fky2_6[i] = unitky*per;
    }
    for (i = nzlo_fft_6; i <= nzhi_fft_6; i++) {
      per = i - nz_pppm_6*(2*i/nz_pppm_6);
      fkz_6[i] = unitkz*per;
      j = (nz_pppm_6 - i) % nz_pppm_6;
      per = j - nz_pppm_6*(2*j/nz_pppm_6);
      fkz2_6[i] = unitkz*per;
    }
    double sqk,vterm;
    long double erft, expt,nom, denom;
    long double b, bs, bt;
    double rtpi = sqrt(MY_PI);
    double gewinv = 1/g_ewald_6;
    n = 0;
    for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) {
      for (j = nylo_fft_6; j <= nyhi_fft_6; j++) {
        for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
	  sqk = fkx_6[i]*fkx_6[i] + fky_6[j]*fky_6[j] + fkz_6[k]*fkz_6[k];
	  if (sqk == 0.0) {
	    vg_6[n][0] = 0.0;
	    vg_6[n][1] = 0.0;
	    vg_6[n][2] = 0.0;
	    vg_6[n][3] = 0.0;
	    vg_6[n][4] = 0.0;
	    vg_6[n][5] = 0.0;
	  } else {
            b = 0.5*sqrt(sqk)*gewinv;
            bs = b*b;
            bt = bs*b;
            erft = 2*bt*rtpi*erfc((double) b);
            expt = exp(-bs);
            nom = erft - 2*bs*expt;
            denom = nom + expt;
            if (denom == 0) vterm = 3.0/sqk;
            else vterm = 3.0*nom/(sqk*denom);
	    vg_6[n][0] = 1.0 + vterm*fkx_6[i]*fkx_6[i];
	    vg_6[n][1] = 1.0 + vterm*fky_6[j]*fky_6[j];
	    vg_6[n][2] = 1.0 + vterm*fkz_6[k]*fkz_6[k];
	    vg_6[n][3] = vterm*fkx_6[i]*fky_6[j];
	    vg_6[n][4] = vterm*fkx_6[i]*fkz_6[k];
	    vg_6[n][5] = vterm*fky_6[j]*fkz_6[k];
            vg2_6[n][0] = vterm*0.5*(fkx_6[i]*fky_6[j] + fkx2_6[i]*fky2_6[j]);
            vg2_6[n][1] = vterm*0.5*(fkx_6[i]*fkz_6[k] + fkx2_6[i]*fkz2_6[k]);
            vg2_6[n][2] = vterm*0.5*(fky_6[j]*fkz_6[k] + fky2_6[j]*fkz2_6[k]);
	  }
	  n++;
        }
      }
    }
    compute_gf_6();
    if (differentiation_flag == 1) compute_sf_coeff_6();
  }
}

/* ----------------------------------------------------------------------
   reset local grid arrays and communication stencils
   called by fix balance b/c it changed sizes of processor sub-domains
------------------------------------------------------------------------- */

void PPPMDisp::setup_grid()
{
  // free all arrays previously allocated

  deallocate();
  deallocate_peratom();

  // reset portion of global grid that each proc owns

  if (function[0])
    set_fft_parameters(nx_pppm, ny_pppm, nz_pppm,
                       nxlo_fft, nylo_fft, nzlo_fft,
                       nxhi_fft, nyhi_fft, nzhi_fft,
                       nxlo_in, nylo_in, nzlo_in,
                       nxhi_in, nyhi_in, nzhi_in,
                       nxlo_out, nylo_out, nzlo_out,
                       nxhi_out, nyhi_out, nzhi_out,
                       nlower, nupper,
                       ngrid, nfft, nfft_both,
                       shift, shiftone, order);

  if (function[1] + function[2] + function[3])
    set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6,
                       nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
                       nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                       nxlo_in_6, nylo_in_6, nzlo_in_6,
                       nxhi_in_6, nyhi_in_6, nzhi_in_6,
                       nxlo_out_6, nylo_out_6, nzlo_out_6,
                       nxhi_out_6, nyhi_out_6, nzhi_out_6,
                       nlower_6, nupper_6,
                       ngrid_6, nfft_6, nfft_both_6,
                       shift_6, shiftone_6, order_6);

  // reallocate K-space dependent memory
  // check if grid communication is now overlapping if not allowed
  // don't invoke allocate_peratom(), compute() will allocate when needed

  allocate();

  if (function[0]) {
    cg->ghost_notify();
    if (overlap_allowed == 0 && cg->ghost_overlap())
      error->all(FLERR,"PPPM grid stencil extends "
                 "beyond nearest neighbor processor");
    cg->setup();
  }
  if (function[1] + function[2] + function[3]) {
    cg_6->ghost_notify();
    if (overlap_allowed == 0 && cg_6->ghost_overlap())
      error->all(FLERR,"PPPM grid stencil extends "
                 "beyond nearest neighbor processor");
    cg_6->setup();
  }

  // pre-compute Green's function denomiator expansion
  // pre-compute 1d charge distribution coefficients

  if (function[0]) {
    compute_gf_denom(gf_b, order);
    compute_rho_coeff(rho_coeff, drho_coeff, order);
    if (differentiation_flag == 1)
      compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order,
                          nxlo_fft, nylo_fft, nzlo_fft,
                          nxhi_fft, nyhi_fft, nzhi_fft,
                          sf_precoeff1, sf_precoeff2, sf_precoeff3,
                          sf_precoeff4, sf_precoeff5, sf_precoeff6);
  }
  if (function[1] + function[2] + function[3]) {
    compute_gf_denom(gf_b_6, order_6);
    compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6);
    if (differentiation_flag == 1)
      compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6,
                          nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
                          nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                          sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6,
                          sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6);
  }

  // pre-compute volume-dependent coeffs

  setup();
}

/* ----------------------------------------------------------------------
   compute the PPPM long-range force, energy, virial
------------------------------------------------------------------------- */

void PPPMDisp::compute(int eflag, int vflag)
{

  int i;
  // convert atoms from box to lamda coords

  if (eflag || vflag) ev_setup(eflag,vflag);
  else evflag = evflag_atom = eflag_global = vflag_global =
	 eflag_atom = vflag_atom = 0;

  if (evflag_atom && !peratom_allocate_flag) {
    allocate_peratom();
    if (function[0]) {
      cg_peratom->ghost_notify();
      cg_peratom->setup();
    }
    if (function[1] + function[2] + function[3]) {
      cg_peratom_6->ghost_notify();
      cg_peratom_6->setup();
    }
    peratom_allocate_flag = 1;
  }

  if (triclinic == 0) boxlo = domain->boxlo;
  else {
    boxlo = domain->boxlo_lamda;
    domain->x2lamda(atom->nlocal);
  }
  // extend size of per-atom arrays if necessary

  if (atom->nlocal > nmax) {

    if (function[0]) memory->destroy(part2grid);
    if (function[1] + function[2] + function[3]) memory->destroy(part2grid_6);
    nmax = atom->nmax;
    if (function[0]) memory->create(part2grid,nmax,3,"pppm/disp:part2grid");
    if (function[1] + function[2] + function[3])
      memory->create(part2grid_6,nmax,3,"pppm/disp:part2grid_6");
  }


  energy = 0.0;
  energy_1 = 0.0;
  energy_6 = 0.0;
  if (vflag) for (i = 0; i < 6; i++) virial_6[i] = virial_1[i] = 0.0;

  // find grid points for all my particles
  // distribute partcles' charges/dispersion coefficients on the grid
  // communication between processors and remapping two fft
  // Solution of poissons equation in k-space and backtransformation
  // communication between processors
  // calculation of forces

  if (function[0]) {

    //perfrom calculations for coulomb interactions only

    particle_map_c(delxinv, delyinv, delzinv, shift, part2grid, nupper, nlower,
                 nxlo_out, nylo_out, nzlo_out, nxhi_out, nyhi_out, nzhi_out);

    make_rho_c();

    cg->reverse_comm(this,REVERSE_RHO);

    brick2fft(nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
	      density_brick, density_fft, work1,remap);

    if (differentiation_flag == 1) {

      poisson_ad(work1, work2, density_fft, fft1, fft2,
                 nx_pppm, ny_pppm, nz_pppm, nfft,
                 nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft,
                 nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
                 energy_1, greensfn,
                 virial_1, vg,vg2,
                 u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick);

      cg->forward_comm(this,FORWARD_AD);

      fieldforce_c_ad();

      if (vflag_atom) cg_peratom->forward_comm(this, FORWARD_AD_PERATOM);

    } else {
      poisson_ik(work1, work2, density_fft, fft1, fft2,
                 nx_pppm, ny_pppm, nz_pppm, nfft,
                 nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft,
                 nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
                 energy_1, greensfn,
	         fkx, fky, fkz,fkx2, fky2, fkz2,
                 vdx_brick, vdy_brick, vdz_brick, virial_1, vg,vg2,
                 u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick);

      cg->forward_comm(this, FORWARD_IK);

      fieldforce_c_ik();

      if (evflag_atom) cg_peratom->forward_comm(this, FORWARD_IK_PERATOM);
    }
    if (evflag_atom) fieldforce_c_peratom();
  }

  if (function[1]) {
    //perfrom calculations for geometric mixing
    particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
                 nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
    make_rho_g();


    cg_6->reverse_comm(this, REVERSE_RHO_G);

    brick2fft(nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
	      density_brick_g, density_fft_g, work1_6,remap_6);

    if (differentiation_flag == 1) {

      poisson_ad(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
                 nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
                 nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                 nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
                 energy_6, greensfn_6,
                 virial_6, vg_6, vg2_6,
                 u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);

      cg_6->forward_comm(this,FORWARD_AD_G);

      fieldforce_g_ad();

      if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_G);

    } else {
      poisson_ik(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
                 nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
                 nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                 nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
                 energy_6, greensfn_6,
	         fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6,
                 vdx_brick_g, vdy_brick_g, vdz_brick_g, virial_6, vg_6, vg2_6,
                 u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);

      cg_6->forward_comm(this,FORWARD_IK_G);

      fieldforce_g_ik();


      if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_G);
    }
    if (evflag_atom) fieldforce_g_peratom();
  }

  if (function[2]) {
    //perform calculations for arithmetic mixing
    particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
                 nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
    make_rho_a();

    cg_6->reverse_comm(this, REVERSE_RHO_A);

    brick2fft_a();

    if ( differentiation_flag == 1) {

      poisson_ad(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
                 nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
                 nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                 nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
                 energy_6, greensfn_6,
                 virial_6, vg_6, vg2_6,
                 u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3);
      poisson_2s_ad(density_fft_a0, density_fft_a6,
                    u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0,
                    u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6);
      poisson_2s_ad(density_fft_a1, density_fft_a5,
                    u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1,
                    u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5);
      poisson_2s_ad(density_fft_a2, density_fft_a4,
                    u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2,
                    u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4);

      cg_6->forward_comm(this, FORWARD_AD_A);

      fieldforce_a_ad();

      if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_AD_PERATOM_A);

    }  else {

      poisson_ik(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
                 nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
                 nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
                 nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
                 energy_6, greensfn_6,
	         fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6,
                 vdx_brick_a3, vdy_brick_a3, vdz_brick_a3, virial_6, vg_6, vg2_6,
                 u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3);
      poisson_2s_ik(density_fft_a0, density_fft_a6,
                    vdx_brick_a0, vdy_brick_a0, vdz_brick_a0,
                    vdx_brick_a6, vdy_brick_a6, vdz_brick_a6,
                    u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0,
                    u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6);
      poisson_2s_ik(density_fft_a1, density_fft_a5,
                    vdx_brick_a1, vdy_brick_a1, vdz_brick_a1,
                    vdx_brick_a5, vdy_brick_a5, vdz_brick_a5,
                    u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1,
                    u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5);
      poisson_2s_ik(density_fft_a2, density_fft_a4,
                    vdx_brick_a2, vdy_brick_a2, vdz_brick_a2,
                    vdx_brick_a4, vdy_brick_a4, vdz_brick_a4,
                    u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2,
                    u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4);

      cg_6->forward_comm(this, FORWARD_IK_A);

      fieldforce_a_ik();

      if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_A);
    }
    if (evflag_atom) fieldforce_a_peratom();
  }

  if (function[3]) {
    //perfrom calculations if no mixing rule applies
    particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
                 nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);

    make_rho_none();

    cg_6->reverse_comm(this, REVERSE_RHO_NONE);

    brick2fft_none();

    if (differentiation_flag == 1) {

      int n = 0;
      for (int k = 0; k<nsplit_alloc/2; k++) {
        poisson_none_ad(n,n+1,density_fft_none[n],density_fft_none[n+1],
                        u_brick_none[n],u_brick_none[n+1],
                        v0_brick_none, v1_brick_none, v2_brick_none,
                        v3_brick_none, v4_brick_none, v5_brick_none);
        n += 2;
      }

      cg_6->forward_comm(this,FORWARD_AD_NONE);

      fieldforce_none_ad();

      if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_NONE);

    } else {
      int n = 0;
      for (int k = 0; k<nsplit_alloc/2; k++) {

        poisson_none_ik(n,n+1,density_fft_none[n], density_fft_none[n+1],
                        vdx_brick_none[n], vdy_brick_none[n], vdz_brick_none[n],
                        vdx_brick_none[n+1], vdy_brick_none[n+1], vdz_brick_none[n+1],
                        u_brick_none, v0_brick_none, v1_brick_none, v2_brick_none,
                        v3_brick_none, v4_brick_none, v5_brick_none);
        n += 2;
      }

      cg_6->forward_comm(this,FORWARD_IK_NONE);

      fieldforce_none_ik();

      if (evflag_atom)
        cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_NONE);
    }
    if (evflag_atom) fieldforce_none_peratom();
  }

  // update qsum and qsqsum, if atom count has changed and energy needed

  if ((eflag_global || eflag_atom) && atom->natoms != natoms_original) {
    qsum_qsq();
    natoms_original = atom->natoms;
  }

  // sum energy across procs and add in volume-dependent term

  const double qscale = force->qqrd2e * scale;
  if (eflag_global) {
    double energy_all;
    MPI_Allreduce(&energy_1,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
    energy_1 = energy_all;
    MPI_Allreduce(&energy_6,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
    energy_6 = energy_all;

    energy_1 *= 0.5*volume;
    energy_6 *= 0.5*volume;

    energy_1 -= g_ewald*qsqsum/MY_PIS +
      MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
    energy_6 += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij +
      1.0/12.0*pow(g_ewald_6,6)*csum;
    energy_1 *= qscale;
  }

  // sum virial across procs

  if (vflag_global) {
    double virial_all[6];
    MPI_Allreduce(virial_1,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
    for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
    MPI_Allreduce(virial_6,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
    for (i = 0; i < 6; i++) virial[i] += 0.5*volume*virial_all[i];
    if (function[1]+function[2]+function[3]){
      double a =  MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij;
      virial[0] -= a;
      virial[1] -= a;
      virial[2] -= a;
    }
  }

  if (eflag_atom) {
    if (function[0]) {
      double *q = atom->q;
      for (i = 0; i < atom->nlocal; i++) {
        eatom[i] -= qscale*g_ewald*q[i]*q[i]/MY_PIS + qscale*MY_PI2*q[i]*qsum / (g_ewald*g_ewald*volume); //coulomb self energy correction
      }
    }
    if (function[1] + function[2] + function[3]) {
      int tmp;
      for (i = 0; i < atom->nlocal; i++) {
        tmp = atom->type[i];
        eatom[i] += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp] +
                      1.0/12.0*pow(g_ewald_6,6)*cii[tmp];
      }
    }
  }

  if (vflag_atom) {
    if (function[1] + function[2] + function[3]) {
      int tmp;
      for (i = 0; i < atom->nlocal; i++) {
        tmp = atom->type[i];
        for (int n = 0; n < 3; n++) vatom[i][n] -= MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp]; //dispersion self virial correction
      }
    }
  }


  // 2d slab correction

  if (slabflag) slabcorr(eflag);
  if (function[0]) energy += energy_1;
  if (function[1] + function[2] + function[3]) energy += energy_6;

  // convert atoms back from lamda to box coords

  if (triclinic) domain->lamda2x(atom->nlocal);
}

/* ----------------------------------------------------------------------
   initialize coefficients needed for the dispersion density on the grids
------------------------------------------------------------------------- */

void PPPMDisp::init_coeffs()				// local pair coeffs
{
  int tmp;
  int n = atom->ntypes;
  int converged;
  delete [] B;
  B = NULL;
  if (function[3] + function[2]) {                     // no mixing rule or arithmetic
    if (function[2] && me == 0) {
      if (screen) fprintf(screen,"  Optimizing splitting of Dispersion coefficients\n");
      if (logfile) fprintf(logfile,"  Optimizing splitting of Dispersion coefficients\n");
    }

    // allocate data for eigenvalue decomposition
    double **A=NULL;
    double **Q=NULL;
    if ( n > 1 ) {
      // get dispersion coefficients
      double **b = (double **) force->pair->extract("B",tmp);
      memory->create(A,n,n,"pppm/disp:A");
      memory->create(Q,n,n,"pppm/disp:Q");
      // fill coefficients to matrix a
      for (int i = 1; i <= n; i++)
        for (int j = 1; j <= n; j++)
          A[i-1][j-1] = b[i][j];
      // transform q to a unity matrix
      for (int i = 0; i < n; i++)
        for (int j = 0; j < n; j++)
          Q[i][j] = 0.0;
      for (int i = 0; i < n; i++)
        Q[i][i] = 1.0;
      // perfrom eigenvalue decomposition with QR algorithm
      converged = qr_alg(A,Q,n);
      if (function[3] && !converged) {
        error->all(FLERR,"Matrix factorization to split dispersion coefficients failed");
      }
      // determine number of used eigenvalues
      //   based on maximum allowed number or cutoff criterion
      //   sort eigenvalues according to their size with bubble sort
      double t;
      for (int i = 0; i < n; i++) {
        for (int j = 0; j < n-1-i; j++) {
          if (fabs(A[j][j]) < fabs(A[j+1][j+1])) {
            t = A[j][j];
            A[j][j] = A[j+1][j+1];
            A[j+1][j+1] = t;
            for (int k = 0; k < n; k++) {
              t = Q[k][j];
              Q[k][j] = Q[k][j+1];
              Q[k][j+1] = t;
            }
          }
        }
      }

      //   check which eigenvalue is the first that is smaller
      //   than a specified tolerance
      //   check how many are maximum allowed by the user
      double amax = fabs(A[0][0]);
      double acrit = amax*splittol;
      double bmax = 0;
      double err = 0;
      nsplit = 0;
      for (int i = 0; i < n; i++) {
        if (fabs(A[i][i]) > acrit) nsplit++;
        else {
          bmax = fabs(A[i][i]);
          break;
        }
      }

      err =  bmax/amax;
      if (err > 1.0e-4) {
        char str[128];
        sprintf(str,"Estimated error in splitting of dispersion coeffs is %g",err);
        error->warning(FLERR, str);
      }
      // set B
      B = new double[nsplit*n+nsplit];
      for (int i = 0; i< nsplit; i++) {
        B[i] = A[i][i];
        for (int j = 0; j < n; j++) {
          B[nsplit*(j+1) + i] = Q[j][i];
        }
      }

      nsplit_alloc = nsplit;
      if (nsplit%2 == 1) nsplit_alloc = nsplit + 1;
    } else
        nsplit = 1; // use geometric mixing

    // check if the function should preferably be [1] or [2] or [3]
    if (nsplit == 1) {
      if ( B ) delete [] B;
      function[3] = 0;
      function[2] = 0;
      function[1] = 1;
      if (me == 0) {
        if (screen) fprintf(screen,"  Using geometric mixing for reciprocal space\n");
        if (logfile) fprintf(logfile,"  Using geometric mixing for reciprocal space\n");
      }
    }
    if (function[2] && nsplit <= 6) {
      if (me == 0) {
        if (screen) fprintf(screen,"  Using %d instead of 7 structure factors\n",nsplit);
        if (logfile) fprintf(logfile,"  Using %d instead of 7 structure factors\n",nsplit);
      }
      function[3] = 1;
      function[2] = 0;
    }
    if (function[2] && (nsplit > 6)) {
      if (me == 0) {
        if (screen) fprintf(screen,"  Using 7 structure factors\n");
        if (logfile) fprintf(logfile,"  Using 7 structure factors\n");
      }
      if ( B ) delete [] B;
    }
    if (function[3]) {
      if (me == 0) {
        if (screen) fprintf(screen,"  Using %d structure factors\n",nsplit);
        if (logfile) fprintf(logfile,"  Using %d structure factors\n",nsplit);
      }
      if (nsplit > 9) error->warning(FLERR, "Simulations might be very slow because of large number of structure factors");
    }

    memory->destroy(A);
    memory->destroy(Q);
  }
  if (function[1]) {					// geometric 1/r^6
    double **b = (double **) force->pair->extract("B",tmp);
    B = new double[n+1];
    for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i]));
  }
  if (function[2]) {					// arithmetic 1/r^6
    //cannot use epsilon, because this has not been set yet
    double **epsilon = (double **) force->pair->extract("epsilon",tmp);
    //cannot use sigma, because this has not been set yet
    double **sigma = (double **) force->pair->extract("sigma",tmp);
    if (!(epsilon&&sigma))
      error->all(FLERR,"Epsilon or sigma reference not set by pair style in PPPMDisp");
    double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7];
    double c[7] = {
      1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0};
    for (int i=0; i<=n; ++i) {
      eps_i = sqrt(epsilon[i][i]);
      sigma_i = sigma[i][i];
      sigma_n = 1.0;
      for (int j=0; j<7; ++j) {
        *(bi++) = sigma_n*eps_i*c[j]*0.25;
        sigma_n *= sigma_i;
      }
    }
  }
}

/* ----------------------------------------------------------------------
   Eigenvalue decomposition of a real, symmetric matrix with the QR
   method (includes transpformation to Tridiagonal Matrix + Wilkinson
   shift)
------------------------------------------------------------------------- */

int PPPMDisp::qr_alg(double **A, double **Q, int n)
{
  int converged = 0;
  double an1, an, bn1, d, mue;
  // allocate some memory for the required operations
  double **A0,**Qi,**C,**D,**E;
  // make a copy of A for convergence check
  memory->create(A0,n,n,"pppm/disp:A0");
  for (int i = 0; i < n; i++)
    for (int j = 0; j < n; j++)
      A0[i][j] = A[i][j];

  // allocate an auxiliary matrix Qi
  memory->create(Qi,n,n,"pppm/disp:Qi");

  // alllocate an auxillary matrices for the matrix multiplication
  memory->create(C,n,n,"pppm/disp:C");
  memory->create(D,n,n,"pppm/disp:D");
  memory->create(E,n,n,"pppm/disp:E");

  // transform Matrix A to Tridiagonal form
  hessenberg(A,Q,n);

  // start loop for the matrix factorization
  int count = 0;
  int countmax = 100000;
  while (1) {
    // make a Wilkinson shift
    an1 = A[n-2][n-2];
    an = A[n-1][n-1];
    bn1 = A[n-2][n-1];
    d = (an1-an)/2;
    mue = an + d - copysign(1.,d)*sqrt(d*d + bn1*bn1);
    for (int i = 0; i < n; i++)
      A[i][i] -= mue;

    // perform a QR factorization for a tridiagonal matrix A
    qr_tri(Qi,A,n);

    // update the matrices
    mmult(A,Qi,C,n);
    mmult(Q,Qi,C,n);

    // backward Wilkinson shift
    for (int i = 0; i < n; i++)
      A[i][i] += mue;

    // check the convergence
    converged = check_convergence(A,Q,A0,C,D,E,n);
    if (converged) break;
    count = count + 1;
    if (count == countmax) break;
  }

  // free allocated memory
  memory->destroy(Qi);
  memory->destroy(A0);
  memory->destroy(C);
  memory->destroy(D);
  memory->destroy(E);

  return converged;
}

/* ----------------------------------------------------------------------
   Transform a Matrix to Hessenberg form (for symmetric Matrices, the
   result will be a tridiagonal matrix)
------------------------------------------------------------------------- */

void PPPMDisp::hessenberg(double **A, double **Q, int n)
{
  double r,a,b,c,s,x1,x2;
  for (int i = 0; i < n-1; i++) {
    for (int j = i+2; j < n; j++) {
      // compute coeffs for the rotation matrix
      a = A[i+1][i];
      b = A[j][i];
      r = sqrt(a*a + b*b);
      c = a/r;
      s = b/r;
      // update the entries of A with multiplication from the left
      for (int k = 0; k < n; k++) {
        x1 = A[i+1][k];
        x2 = A[j][k];
        A[i+1][k] = c*x1 + s*x2;
        A[j][k] = -s*x1 + c*x2;
      }
      // update the entries of A and Q with a multiplication from the right
      for (int k = 0; k < n; k++) {
        x1 = A[k][i+1];
        x2 = A[k][j];
        A[k][i+1] = c*x1 + s*x2;
        A[k][j] = -s*x1 + c*x2;
        x1 = Q[k][i+1];
        x2 = Q[k][j];
        Q[k][i+1] = c*x1 + s*x2;
        Q[k][j] = -s*x1 + c*x2;
      }
    }
  }
}

/* ----------------------------------------------------------------------
   QR factorization for a tridiagonal matrix; Result of the factorization
   is stored in A and Qi
------------------------------------------------------------------------- */

void PPPMDisp::qr_tri(double** Qi,double** A,int n)
{
  double r,a,b,c,s,x1,x2;
  int j,k,k0,kmax;
  // make Qi a unity matrix
  for (int i = 0; i < n; i++)
    for (int j = 0; j < n; j++)
      Qi[i][j] = 0.0;
  for (int i = 0; i < n; i++)
    Qi[i][i] = 1.0;
  // loop over main diagonal and first of diagonal of A
  for (int i = 0; i < n-1; i++) {
    j = i+1;
    // coefficients of the rotation matrix
    a = A[i][i];
    b = A[j][i];
    r = sqrt(a*a + b*b);
    c = a/r;
    s = b/r;
    // update the entries of A and Q
    k0 = (i-1>0)?i-1:0;   //min(i-1,0);
    kmax = (i+3<n)?i+3:n;  //min(i+3,n);
    for (k = k0; k < kmax; k++) {
      x1 = A[i][k];
      x2 = A[j][k];
      A[i][k] = c*x1 + s*x2;
      A[j][k] = -s*x1 + c*x2;
    }
    for (k = 0; k < n; k++) {
      x1 = Qi[k][i];
      x2 = Qi[k][j];
      Qi[k][i] = c*x1 + s*x2;
      Qi[k][j] = -s*x1 + c*x2;
    }
  }
}

/* ----------------------------------------------------------------------
   Multiply two matrices A and B, store the result in A; C provides
   some memory to store intermediate results
------------------------------------------------------------------------- */

void PPPMDisp::mmult(double** A, double** B, double** C, int n)
{
  for (int i = 0; i < n; i++)
    for (int j = 0; j < n; j++)
      C[i][j] = 0.0;

  // perform matrix multiplication
  for (int i = 0; i < n; i++)
    for (int j = 0; j < n; j++)
      for (int k = 0; k < n; k++)
        C[i][j] += A[i][k] * B[k][j];
  // copy the result back to matrix A
  for (int i = 0; i < n; i++)
    for (int j = 0; j < n; j++)
      A[i][j] = C[i][j];
}

/* ----------------------------------------------------------------------
   Check if the factorization has converged by comparing all elements of the
   original matrix and the new matrix
------------------------------------------------------------------------- */

int PPPMDisp::check_convergence(double** A,double** Q,double** A0,
                                double** C,double** D,double** E,int n)
{
  double eps = 1.0e-8;
  int converged = 1;
  double epsmax = -1;
  double Bmax = 0.0;
  double diff;
  // get the largest eigenvalue of the original matrix
  for (int i = 0; i < n; i++)
    for (int j = 0; j < n; j++)
      Bmax = (Bmax>A0[i][j])?Bmax:A0[i][j];  //max(Bmax,A0[i][j]);
  double epsabs = eps*Bmax;

  // reconstruct the original matrix
  // store the diagonal elements in D
  for (int i = 0; i < n; i++)
    for (int j = 0; j < n; j++)
      D[i][j] = 0.0;
  for (int i = 0; i < n; i++)
    D[i][i] = A[i][i];
  // store matrix Q in E
  for (int i = 0; i < n; i++)
    for (int j = 0; j < n; j++)
      E[i][j] = Q[i][j];
  // E = Q*A
  mmult(E,D,C,n);
  // store transpose of Q in D
  for (int i = 0; i < n; i++)
    for (int j = 0; j < n; j++)
      D[i][j] = Q[j][i];
  // E = Q*A*Q.t
  mmult(E,D,C,n);

  //compare the original matrix and the final matrix
  for (int i = 0; i < n; i++) {
    for (int j = 0; j < n; j++) {
      diff = A0[i][j] - E[i][j];
      epsmax = (epsmax>fabs(diff))?epsmax:fabs(diff);//max(epsmax,fabs(diff));
    }
  }
  if (epsmax > epsabs) converged = 0;
  return converged;
}

/* ----------------------------------------------------------------------
   allocate memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */

void PPPMDisp::allocate()
{

  int (*procneigh)[2] = comm->procneigh;

  if (function[0]) {
    memory->create(work1,2*nfft_both,"pppm/disp:work1");
    memory->create(work2,2*nfft_both,"pppm/disp:work2");

    memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm/disp:fkx");
    memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm/disp:fky");
    memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm/disp:fkz");

    memory->create1d_offset(fkx2,nxlo_fft,nxhi_fft,"pppm/disp:fkx2");
    memory->create1d_offset(fky2,nylo_fft,nyhi_fft,"pppm/disp:fky2");
    memory->create1d_offset(fkz2,nzlo_fft,nzhi_fft,"pppm/disp:fkz2");


    memory->create(gf_b,order,"pppm/disp:gf_b");
    memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm/disp:rho1d");
    memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm/disp:rho_coeff");
    memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm/disp:rho1d");
    memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,"pppm/disp:drho_coeff");

    memory->create(greensfn,nfft_both,"pppm/disp:greensfn");
    memory->create(vg,nfft_both,6,"pppm/disp:vg");
    memory->create(vg2,nfft_both,3,"pppm/disp:vg2");

    memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
  			    nxlo_out,nxhi_out,"pppm/disp:density_brick");
    if ( differentiation_flag == 1) {
      memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
  		  	      nxlo_out,nxhi_out,"pppm/disp:u_brick");
      memory->create(sf_precoeff1,nfft_both,"pppm/disp:sf_precoeff1");
      memory->create(sf_precoeff2,nfft_both,"pppm/disp:sf_precoeff2");
      memory->create(sf_precoeff3,nfft_both,"pppm/disp:sf_precoeff3");
      memory->create(sf_precoeff4,nfft_both,"pppm/disp:sf_precoeff4");
      memory->create(sf_precoeff5,nfft_both,"pppm/disp:sf_precoeff5");
      memory->create(sf_precoeff6,nfft_both,"pppm/disp:sf_precoeff6");

    } else {
      memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
  			      nxlo_out,nxhi_out,"pppm/disp:vdx_brick");
      memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
			      nxlo_out,nxhi_out,"pppm/disp:vdy_brick");
      memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
			      nxlo_out,nxhi_out,"pppm/disp:vdz_brick");
    }
    memory->create(density_fft,nfft_both,"pppm/disp:density_fft");

    int tmp;

    fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
		     nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
		     nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
		     0,0,&tmp,collective_flag);

    fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
		     nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
		     nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
		     0,0,&tmp,collective_flag);

    remap = new Remap(lmp,world,
		      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
		      nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
		      1,0,0,FFT_PRECISION,collective_flag);

  // create ghost grid object for rho and electric field communication

  if (differentiation_flag == 1)
    cg = new GridComm(lmp,world,1,1,
                      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                      nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
  else
    cg = new GridComm(lmp,world,3,1,
                      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                      nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
  }

  if (function[1]) {
    memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
    memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");

    memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
    memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
    memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");

    memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
    memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
    memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");

    memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
    memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
    memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
    memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
    memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");

    memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
    memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
    memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");

    memory->create3d_offset(density_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_g");
    if ( differentiation_flag == 1) {
      memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g");

      memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
      memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
      memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
      memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
      memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
      memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");

    }  else {
      memory->create3d_offset(vdx_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_g");
      memory->create3d_offset(vdy_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_g");
      memory->create3d_offset(vdz_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_g");
    }
    memory->create(density_fft_g,nfft_both_6,"pppm/disp:density_fft_g");


    int tmp;

    fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
		     0,0,&tmp,collective_flag);

    fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
		     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
		     0,0,&tmp,collective_flag);

    remap_6 = new Remap(lmp,world,
		      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
		      nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
		      1,0,0,FFT_PRECISION,collective_flag);

    // create ghost grid object for rho and electric field communication

    if (differentiation_flag == 1)
      cg_6 = new GridComm(lmp,world,1,1,
                        nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                        nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                        procneigh[0][0],procneigh[0][1],procneigh[1][0],
                        procneigh[1][1],procneigh[2][0],procneigh[2][1]);
    else
      cg_6 = new GridComm(lmp,world,3,1,
                        nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                        nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                        procneigh[0][0],procneigh[0][1],procneigh[1][0],
                        procneigh[1][1],procneigh[2][0],procneigh[2][1]);
  }

  if (function[2]) {
    memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
    memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");

    memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
    memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
    memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");

    memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
    memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
    memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");

    memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
    memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
    memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
    memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
    memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");

    memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
    memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
    memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");

    memory->create3d_offset(density_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a0");
    memory->create3d_offset(density_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a1");
    memory->create3d_offset(density_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a2");
    memory->create3d_offset(density_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a3");
    memory->create3d_offset(density_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a4");
    memory->create3d_offset(density_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a5");
    memory->create3d_offset(density_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a6");

    memory->create(density_fft_a0,nfft_both_6,"pppm/disp:density_fft_a0");
    memory->create(density_fft_a1,nfft_both_6,"pppm/disp:density_fft_a1");
    memory->create(density_fft_a2,nfft_both_6,"pppm/disp:density_fft_a2");
    memory->create(density_fft_a3,nfft_both_6,"pppm/disp:density_fft_a3");
    memory->create(density_fft_a4,nfft_both_6,"pppm/disp:density_fft_a4");
    memory->create(density_fft_a5,nfft_both_6,"pppm/disp:density_fft_a5");
    memory->create(density_fft_a6,nfft_both_6,"pppm/disp:density_fft_a6");


    if ( differentiation_flag == 1 ) {
      memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0");
      memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1");
      memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2");
      memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3");
      memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4");
      memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5");
      memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6");

      memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
      memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
      memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
      memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
      memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
      memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");

    } else {

      memory->create3d_offset(vdx_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a0");
      memory->create3d_offset(vdy_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a0");
      memory->create3d_offset(vdz_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a0");

      memory->create3d_offset(vdx_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a1");
      memory->create3d_offset(vdy_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a1");
      memory->create3d_offset(vdz_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a1");

      memory->create3d_offset(vdx_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a2");
      memory->create3d_offset(vdy_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a2");
      memory->create3d_offset(vdz_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a2");

      memory->create3d_offset(vdx_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a3");
      memory->create3d_offset(vdy_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a3");
      memory->create3d_offset(vdz_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a3");

      memory->create3d_offset(vdx_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a4");
      memory->create3d_offset(vdy_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a4");
      memory->create3d_offset(vdz_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a4");

      memory->create3d_offset(vdx_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a5");
      memory->create3d_offset(vdy_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a5");
      memory->create3d_offset(vdz_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a5");

      memory->create3d_offset(vdx_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a6");
      memory->create3d_offset(vdy_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a6");
      memory->create3d_offset(vdz_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a6");
    }


    int tmp;

    fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
		     0,0,&tmp,collective_flag);

    fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
		     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
		     0,0,&tmp,collective_flag);

    remap_6 = new Remap(lmp,world,
		      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
		      nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
		      1,0,0,FFT_PRECISION,collective_flag);

    // create ghost grid object for rho and electric field communication


    if (differentiation_flag == 1)
      cg_6 = new GridComm(lmp,world,7,7,
                        nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                        nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                        procneigh[0][0],procneigh[0][1],procneigh[1][0],
                        procneigh[1][1],procneigh[2][0],procneigh[2][1]);
    else
      cg_6 = new GridComm(lmp,world,21,7,
                        nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                        nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                        procneigh[0][0],procneigh[0][1],procneigh[1][0],
                        procneigh[1][1],procneigh[2][0],procneigh[2][1]);
  }

  if (function[3]) {
    memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
    memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");

    memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
    memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
    memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");

    memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
    memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
    memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");

    memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
    memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
    memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
    memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
    memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");

    memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
    memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
    memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");

    memory->create4d_offset(density_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_none");
    if ( differentiation_flag == 1) {
      memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none");

      memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
      memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
      memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
      memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
      memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
      memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");

    }  else {
      memory->create4d_offset(vdx_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_none");
      memory->create4d_offset(vdy_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_none");
      memory->create4d_offset(vdz_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_none");
    }
    memory->create(density_fft_none,nsplit_alloc,nfft_both_6,"pppm/disp:density_fft_none");


    int tmp;

    fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
		     0,0,&tmp,collective_flag);

    fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
		     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
		     0,0,&tmp,collective_flag);

    remap_6 = new Remap(lmp,world,
		      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
		      nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
		      1,0,0,FFT_PRECISION,collective_flag);

    // create ghost grid object for rho and electric field communication

    if (differentiation_flag == 1)
      cg_6 = new GridComm(lmp,world,nsplit_alloc,nsplit_alloc,
                        nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                        nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                        procneigh[0][0],procneigh[0][1],procneigh[1][0],
                        procneigh[1][1],procneigh[2][0],procneigh[2][1]);
    else
      cg_6 = new GridComm(lmp,world,3*nsplit_alloc,nsplit_alloc,
                        nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                        nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                        procneigh[0][0],procneigh[0][1],procneigh[1][0],
                        procneigh[1][1],procneigh[2][0],procneigh[2][1]);
  }

}

/* ----------------------------------------------------------------------
   allocate memory that depends on # of K-vectors and order
   for per atom calculations
------------------------------------------------------------------------- */

void PPPMDisp::allocate_peratom()
{

  int (*procneigh)[2] = comm->procneigh;

  if (function[0]) {

    if (differentiation_flag != 1)
      memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
    	                      nxlo_out,nxhi_out,"pppm/disp:u_brick");

    memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
			    nxlo_out,nxhi_out,"pppm/disp:v0_brick");
    memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
  			    nxlo_out,nxhi_out,"pppm/disp:v1_brick");
    memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
  			    nxlo_out,nxhi_out,"pppm/disp:v2_brick");
    memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
  			    nxlo_out,nxhi_out,"pppm/disp:v3_brick");
    memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
  			    nxlo_out,nxhi_out,"pppm/disp:v4_brick");
    memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
  			    nxlo_out,nxhi_out,"pppm/disp:v5_brick");

    // create ghost grid object for rho and electric field communication

    if (differentiation_flag == 1)
      cg_peratom =
        new GridComm(lmp,world,6,1,
                     nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                     nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
    else
      cg_peratom =
        new GridComm(lmp,world,7,1,
                     nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
                     nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);

  }


  if (function[1]) {

    if ( differentiation_flag != 1 )
      memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g");

    memory->create3d_offset(v0_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_g");
    memory->create3d_offset(v1_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_g");
    memory->create3d_offset(v2_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_g");
    memory->create3d_offset(v3_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_g");
    memory->create3d_offset(v4_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_g");
    memory->create3d_offset(v5_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_g");

    // create ghost grid object for rho and electric field communication

    if (differentiation_flag == 1)
      cg_peratom_6 =
        new GridComm(lmp,world,6,1,
                     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                     nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
    else
      cg_peratom_6 =
        new GridComm(lmp,world,7,1,
                     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                     nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);

  }

  if (function[2]) {

    if ( differentiation_flag != 1 ) {
      memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0");
      memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1");
      memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2");
      memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3");
      memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4");
      memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5");
      memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6");
    }

    memory->create3d_offset(v0_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a0");
    memory->create3d_offset(v1_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
    	                        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a0");
    memory->create3d_offset(v2_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a0");
    memory->create3d_offset(v3_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a0");
    memory->create3d_offset(v4_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a0");
    memory->create3d_offset(v5_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a0");

    memory->create3d_offset(v0_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a1");
    memory->create3d_offset(v1_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
   	                        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a1");
    memory->create3d_offset(v2_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a1");
    memory->create3d_offset(v3_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a1");
    memory->create3d_offset(v4_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  	  	                nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a1");
    memory->create3d_offset(v5_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a1");

    memory->create3d_offset(v0_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a2");
    memory->create3d_offset(v1_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a2");
    memory->create3d_offset(v2_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a2");
    memory->create3d_offset(v3_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a2");
    memory->create3d_offset(v4_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a2");
    memory->create3d_offset(v5_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a2");

    memory->create3d_offset(v0_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a3");
    memory->create3d_offset(v1_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a3");
    memory->create3d_offset(v2_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a3");
    memory->create3d_offset(v3_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  	  	                nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a3");
    memory->create3d_offset(v4_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a3");
    memory->create3d_offset(v5_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a3");

    memory->create3d_offset(v0_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a4");
    memory->create3d_offset(v1_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a4");
    memory->create3d_offset(v2_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a4");
    memory->create3d_offset(v3_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a4");
    memory->create3d_offset(v4_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a4");
    memory->create3d_offset(v5_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a4");

    memory->create3d_offset(v0_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a5");
    memory->create3d_offset(v1_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a5");
    memory->create3d_offset(v2_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a5");
    memory->create3d_offset(v3_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a5");
    memory->create3d_offset(v4_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a5");
    memory->create3d_offset(v5_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a5");

    memory->create3d_offset(v0_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  	  	                nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a6");
    memory->create3d_offset(v1_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a6");
    memory->create3d_offset(v2_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a6");
    memory->create3d_offset(v3_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a6");
    memory->create3d_offset(v4_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a6");
    memory->create3d_offset(v5_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a6");

    // create ghost grid object for rho and electric field communication

    if (differentiation_flag == 1)
      cg_peratom_6 =
        new GridComm(lmp,world,42,1,
                     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                     nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
    else
      cg_peratom_6 =
        new GridComm(lmp,world,49,1,
                     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                     nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);

  }

  if (function[3]) {

    if ( differentiation_flag != 1 )
      memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none");

    memory->create4d_offset(v0_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_none");
    memory->create4d_offset(v1_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_none");
    memory->create4d_offset(v2_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_none");
    memory->create4d_offset(v3_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_none");
    memory->create4d_offset(v4_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_none");
    memory->create4d_offset(v5_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_none");

    // create ghost grid object for rho and electric field communication

    if (differentiation_flag == 1)
      cg_peratom_6 =
        new GridComm(lmp,world,6*nsplit_alloc,1,
                     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                     nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
    else
      cg_peratom_6 =
        new GridComm(lmp,world,7*nsplit_alloc,1,
                     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
                     nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);

  }
}


/* ----------------------------------------------------------------------
   deallocate memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */

void PPPMDisp::deallocate()
{
  memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
  memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
  memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
  memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
  memory->destroy(density_fft);
  density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
  density_fft = NULL;

  memory->destroy3d_offset(density_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdx_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdy_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdz_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy(density_fft_g);
  density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL;
  density_fft_g = NULL;

  memory->destroy3d_offset(density_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdx_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdy_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdz_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy(density_fft_a0);
  density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL;
  density_fft_a0 = NULL;

  memory->destroy3d_offset(density_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdx_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdy_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdz_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy(density_fft_a1);
  density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL;
  density_fft_a1 = NULL;

  memory->destroy3d_offset(density_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdx_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdy_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdz_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy(density_fft_a2);
  density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL;
  density_fft_a2 = NULL;

  memory->destroy3d_offset(density_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdx_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdy_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdz_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy(density_fft_a3);
  density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL;
  density_fft_a3 = NULL;

  memory->destroy3d_offset(density_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdx_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdy_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdz_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy(density_fft_a4);
  density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL;
  density_fft_a4 = NULL;

  memory->destroy3d_offset(density_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdx_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdy_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdz_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy(density_fft_a5);
  density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL;
  density_fft_a5 = NULL;

  memory->destroy3d_offset(density_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdx_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdy_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy3d_offset(vdz_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy(density_fft_a6);
  density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL;
  density_fft_a6 = NULL;

  memory->destroy4d_offset(density_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy4d_offset(vdx_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy4d_offset(vdy_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy4d_offset(vdz_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
  memory->destroy(density_fft_none);
  density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL;
  density_fft_none = NULL;

  memory->destroy(sf_precoeff1);
  memory->destroy(sf_precoeff2);
  memory->destroy(sf_precoeff3);
  memory->destroy(sf_precoeff4);
  memory->destroy(sf_precoeff5);
  memory->destroy(sf_precoeff6);
  sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL;

  memory->destroy(sf_precoeff1_6);
  memory->destroy(sf_precoeff2_6);
  memory->destroy(sf_precoeff3_6);
  memory->destroy(sf_precoeff4_6);
  memory->destroy(sf_precoeff5_6);
  memory->destroy(sf_precoeff6_6);
  sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = sf_precoeff5_6 = sf_precoeff6_6 = NULL;

  memory->destroy(greensfn);
  memory->destroy(greensfn_6);
  memory->destroy(work1);
  memory->destroy(work2);
  memory->destroy(work1_6);
  memory->destroy(work2_6);
  memory->destroy(vg);
  memory->destroy(vg2);
  memory->destroy(vg_6);
  memory->destroy(vg2_6);
  greensfn = greensfn_6 = NULL;
  work1 = work2 = work1_6 = work2_6 = NULL;
  vg = vg2 = vg_6 = vg2_6 = NULL;

  memory->destroy1d_offset(fkx,nxlo_fft);
  memory->destroy1d_offset(fky,nylo_fft);
  memory->destroy1d_offset(fkz,nzlo_fft);
  fkx = fky = fkz = NULL;

  memory->destroy1d_offset(fkx2,nxlo_fft);
  memory->destroy1d_offset(fky2,nylo_fft);
  memory->destroy1d_offset(fkz2,nzlo_fft);
  fkx2 = fky2 = fkz2 = NULL;

  memory->destroy1d_offset(fkx_6,nxlo_fft_6);
  memory->destroy1d_offset(fky_6,nylo_fft_6);
  memory->destroy1d_offset(fkz_6,nzlo_fft_6);
  fkx_6 = fky_6 = fkz_6 = NULL;

  memory->destroy1d_offset(fkx2_6,nxlo_fft_6);
  memory->destroy1d_offset(fky2_6,nylo_fft_6);
  memory->destroy1d_offset(fkz2_6,nzlo_fft_6);
  fkx2_6 = fky2_6 = fkz2_6 = NULL;


  memory->destroy(gf_b);
  memory->destroy2d_offset(rho1d,-order/2);
  memory->destroy2d_offset(rho_coeff,(1-order)/2);
  memory->destroy2d_offset(drho1d,-order/2);
  memory->destroy2d_offset(drho_coeff, (1-order)/2);
  gf_b = NULL;
  rho1d = rho_coeff = drho1d = drho_coeff = NULL;

  memory->destroy(gf_b_6);
  memory->destroy2d_offset(rho1d_6,-order_6/2);
  memory->destroy2d_offset(rho_coeff_6,(1-order_6)/2);
  memory->destroy2d_offset(drho1d_6,-order_6/2);
  memory->destroy2d_offset(drho_coeff_6,(1-order_6)/2);
  gf_b_6 = NULL;
  rho1d_6 = rho_coeff_6 = drho1d_6 = drho_coeff_6 = NULL;

  delete fft1;
  delete fft2;
  delete remap;
  delete cg;
  fft1 = fft2 = NULL;
  remap = NULL;
  cg = NULL;

  delete fft1_6;
  delete fft2_6;
  delete remap_6;
  delete cg_6;
  fft1_6 = fft2_6 = NULL;
  remap_6 = NULL;
  cg_6 = NULL;
}


/* ----------------------------------------------------------------------
   deallocate memory that depends on # of K-vectors and order
   for per atom calculations
------------------------------------------------------------------------- */

void PPPMDisp::deallocate_peratom()
{
  peratom_allocate_flag = 0;

  memory->destroy3d_offset(u_brick, nzlo_out, nylo_out, nxlo_out);
  memory->destroy3d_offset(v0_brick, nzlo_out, nylo_out, nxlo_out);
  memory->destroy3d_offset(v1_brick, nzlo_out, nylo_out, nxlo_out);
  memory->destroy3d_offset(v2_brick, nzlo_out, nylo_out, nxlo_out);
  memory->destroy3d_offset(v3_brick, nzlo_out, nylo_out, nxlo_out);
  memory->destroy3d_offset(v4_brick, nzlo_out, nylo_out, nxlo_out);
  memory->destroy3d_offset(v5_brick, nzlo_out, nylo_out, nxlo_out);
  u_brick = v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL;

  memory->destroy3d_offset(u_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v0_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v1_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v2_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v3_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v4_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v5_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
  u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = v4_brick_g = v5_brick_g = NULL;

  memory->destroy3d_offset(u_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v0_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v1_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v2_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v3_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v4_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v5_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
  u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = v4_brick_a0 = v5_brick_a0 = NULL;

  memory->destroy3d_offset(u_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v0_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v1_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v2_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v3_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v4_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v5_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
  u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = v4_brick_a1 = v5_brick_a1 = NULL;

  memory->destroy3d_offset(u_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v0_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v1_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v2_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v3_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v4_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v5_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
  u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = v4_brick_a2 = v5_brick_a2 = NULL;

  memory->destroy3d_offset(u_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v0_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v1_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v2_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v3_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v4_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v5_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
  u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = v4_brick_a3 = v5_brick_a3 = NULL;

  memory->destroy3d_offset(u_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v0_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v1_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v2_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v3_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v4_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v5_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
  u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = v4_brick_a4 = v5_brick_a4 = NULL;

  memory->destroy3d_offset(u_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v0_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v1_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v2_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v3_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v4_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v5_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
  u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = v4_brick_a5 = v5_brick_a5 = NULL;

  memory->destroy3d_offset(u_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v0_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v1_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v2_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v3_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v4_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy3d_offset(v5_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
  u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = v4_brick_a6 = v5_brick_a6 = NULL;

  memory->destroy4d_offset(u_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy4d_offset(v0_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy4d_offset(v1_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy4d_offset(v2_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy4d_offset(v3_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy4d_offset(v4_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
  memory->destroy4d_offset(v5_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
  u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = v4_brick_none = v5_brick_none = NULL;

  delete cg_peratom;
  delete cg_peratom_6;
  cg_peratom = cg_peratom_6 = NULL;
}

/* ----------------------------------------------------------------------
   set size of FFT grid (nx,ny,nz_pppm) and g_ewald
   for Coulomb interactions
------------------------------------------------------------------------- */

void PPPMDisp::set_grid()
{
  double q2 = qsqsum * force->qqrd2e;

  // use xprd,yprd,zprd even if triclinic so grid size is the same
  // adjust z dimension for 2d slab PPPM
  // 3d PPPM just uses zprd since slab_volfactor = 1.0

  double xprd = domain->xprd;
  double yprd = domain->yprd;
  double zprd = domain->zprd;
  double zprd_slab = zprd*slab_volfactor;

  // make initial g_ewald estimate
  // based on desired accuracy and real space cutoff
  // fluid-occupied volume used to estimate real-space error
  // zprd used rather than zprd_slab

  double h, h_x,h_y,h_z;
  bigint natoms = atom->natoms;

  if (!gewaldflag) {
    g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
    if (g_ewald >= 1.0)
      error->all(FLERR,"KSpace accuracy too large to estimate G vector");
    g_ewald = sqrt(-log(g_ewald)) / cutoff;
  }

  // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy
  // nz_pppm uses extended zprd_slab instead of zprd
  // reduce it until accuracy target is met

  if (!gridflag) {
    h = h_x = h_y = h_z = 4.0/g_ewald;
    int count = 0;
    while (1) {

      // set grid dimension
      nx_pppm = static_cast<int> (xprd/h_x);
      ny_pppm = static_cast<int> (yprd/h_y);
      nz_pppm = static_cast<int> (zprd_slab/h_z);

      if (nx_pppm <= 1) nx_pppm = 2;
      if (ny_pppm <= 1) ny_pppm = 2;
      if (nz_pppm <= 1) nz_pppm = 2;

      //set local grid dimension
      int npey_fft,npez_fft;
      if (nz_pppm >= nprocs) {
        npey_fft = 1;
        npez_fft = nprocs;
      } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);

      int me_y = me % npey_fft;
      int me_z = me / npey_fft;

      nxlo_fft = 0;
      nxhi_fft = nx_pppm - 1;
      nylo_fft = me_y*ny_pppm/npey_fft;
      nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
      nzlo_fft = me_z*nz_pppm/npez_fft;
      nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;

      double qopt = compute_qopt();

      double dfkspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);

      count++;

      // break loop if the accuracy has been reached or too many loops have been performed
      if (dfkspace <= accuracy) break;
      if (count > 500) error->all(FLERR, "Could not compute grid size for Coulomb interaction");
      h *= 0.95;
      h_x = h_y = h_z = h;
    }
  }

  // boost grid size until it is factorable

  while (!factorable(nx_pppm)) nx_pppm++;
  while (!factorable(ny_pppm)) ny_pppm++;
  while (!factorable(nz_pppm)) nz_pppm++;
}

/* ----------------------------------------------------------------------
   set the FFT parameters
------------------------------------------------------------------------- */

void PPPMDisp::set_fft_parameters(int& nx_p,int& ny_p,int& nz_p,
                                   int& nxlo_f,int& nylo_f,int& nzlo_f,
                                   int& nxhi_f,int& nyhi_f,int& nzhi_f,
                                   int& nxlo_i,int& nylo_i,int& nzlo_i,
                                   int& nxhi_i,int& nyhi_i,int& nzhi_i,
                                   int& nxlo_o,int& nylo_o,int& nzlo_o,
                                   int& nxhi_o,int& nyhi_o,int& nzhi_o,
		                   int& nlow, int& nupp,
                                   int& ng, int& nf, int& nfb,
		                   double& sft,double& sftone, int& ord)
{
  // global indices of PPPM grid range from 0 to N-1
  // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
  //   global PPPM grid that I own without ghost cells
  // for slab PPPM, assign z grid as if it were not extended

  nxlo_i = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_p);
  nxhi_i = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_p) - 1;

  nylo_i = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_p);
  nyhi_i = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_p) - 1;

  nzlo_i = static_cast<int>
      (comm->zsplit[comm->myloc[2]] * nz_p/slab_volfactor);
  nzhi_i = static_cast<int>
      (comm->zsplit[comm->myloc[2]+1] * nz_p/slab_volfactor) - 1;


  // nlow,nupp = stencil size for mapping particles to PPPM grid

  nlow = -(ord-1)/2;
  nupp = ord/2;

  // sft values for particle <-> grid mapping
  // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1

  if (ord % 2) sft = OFFSET + 0.5;
  else sft = OFFSET;
  if (ord % 2) sftone = 0.0;
  else sftone = 0.5;

  // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
  //   global PPPM grid that my particles can contribute charge to
  // effectively nlo_in,nhi_in + ghost cells
  // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
  //           position a particle in my box can be at
  // dist[3] = particle position bound = subbox + skin/2.0 + qdist
  //   qdist = offset due to TIP4P fictitious charge
  //   convert to triclinic if necessary
  // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
  // for slab PPPM, assign z grid as if it were not extended

  double *prd,*sublo,*subhi;

  if (triclinic == 0) {
    prd = domain->prd;
    boxlo = domain->boxlo;
    sublo = domain->sublo;
    subhi = domain->subhi;
  } else {
    prd = domain->prd_lamda;
    boxlo = domain->boxlo_lamda;
    sublo = domain->sublo_lamda;
    subhi = domain->subhi_lamda;
  }

  double xprd = prd[0];
  double yprd = prd[1];
  double zprd = prd[2];
  double zprd_slab = zprd*slab_volfactor;

  double dist[3];
  double cuthalf = 0.5*neighbor->skin + qdist;
  if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
  else {
    dist[0] = cuthalf/domain->prd[0];
    dist[1] = cuthalf/domain->prd[1];
    dist[2] = cuthalf/domain->prd[2];
  }

  int nlo,nhi;

  nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) *
                            nx_p/xprd + sft) - OFFSET;
  nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) *
                            nx_p/xprd + sft) - OFFSET;
  nxlo_o = nlo + nlow;
  nxhi_o = nhi + nupp;

  nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) *
                            ny_p/yprd + sft) - OFFSET;
  nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) *
                            ny_p/yprd + sft) - OFFSET;
  nylo_o = nlo + nlow;
  nyhi_o = nhi + nupp;

  nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) *
                            nz_p/zprd_slab + sft) - OFFSET;
  nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) *
                            nz_p/zprd_slab + sft) - OFFSET;
  nzlo_o = nlo + nlow;
  nzhi_o = nhi + nupp;

  // for slab PPPM, change the grid boundary for processors at +z end
  //   to include the empty volume between periodically repeating slabs
  // for slab PPPM, want charge data communicated from -z proc to +z proc,
  //   but not vice versa, also want field data communicated from +z proc to
  //   -z proc, but not vice versa
  // this is accomplished by nzhi_i = nzhi_o on +z end (no ghost cells)

  if (slabflag && (comm->myloc[2] == comm->procgrid[2]-1)) {
    nzhi_i = nz_p - 1;
    nzhi_o = nz_p - 1;
  }

  // decomposition of FFT mesh
  // global indices range from 0 to N-1
  // proc owns entire x-dimension, clump of columns in y,z dimensions
  // npey_fft,npez_fft = # of procs in y,z dims
  // if nprocs is small enough, proc can own 1 or more entire xy planes,
  //   else proc owns 2d sub-blocks of yz plane
  // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
  // nlo_fft,nhi_fft = lower/upper limit of the section
  //   of the global FFT mesh that I own

  int npey_fft,npez_fft;
  if (nz_p >= nprocs) {
    npey_fft = 1;
    npez_fft = nprocs;
  } else procs2grid2d(nprocs,ny_p,nz_p,&npey_fft,&npez_fft);

  int me_y = me % npey_fft;
  int me_z = me / npey_fft;

  nxlo_f = 0;
  nxhi_f = nx_p - 1;
  nylo_f = me_y*ny_p/npey_fft;
  nyhi_f = (me_y+1)*ny_p/npey_fft - 1;
  nzlo_f = me_z*nz_p/npez_fft;
  nzhi_f = (me_z+1)*nz_p/npez_fft - 1;

  // PPPM grid for this proc, including ghosts

  ng = (nxhi_o-nxlo_o+1) * (nyhi_o-nylo_o+1) *
    (nzhi_o-nzlo_o+1);

  // FFT arrays on this proc, without ghosts
  // nfft = FFT points in FFT decomposition on this proc
  // nfft_brick = FFT points in 3d brick-decomposition on this proc
  // nfft_both = greater of 2 values

  nf = (nxhi_f-nxlo_f+1) * (nyhi_f-nylo_f+1) *
    (nzhi_f-nzlo_f+1);
  int nfft_brick = (nxhi_i-nxlo_i+1) * (nyhi_i-nylo_i+1) *
    (nzhi_i-nzlo_i+1);
  nfb = MAX(nf,nfft_brick);

}

/* ----------------------------------------------------------------------
   check if all factors of n are in list of factors
   return 1 if yes, 0 if no
------------------------------------------------------------------------- */

int PPPMDisp::factorable(int n)
{
  int i;

  while (n > 1) {
    for (i = 0; i < nfactors; i++) {
      if (n % factors[i] == 0) {
	n /= factors[i];
	break;
      }
    }
    if (i == nfactors) return 0;
  }

  return 1;
}

/* ----------------------------------------------------------------------
   pre-compute Green's function denominator expansion coeffs, Gamma(2n)
------------------------------------------------------------------------- */
void PPPMDisp::adjust_gewald()
{

  // Use Newton solver to find g_ewald

  double dx;

  // Begin algorithm

  for (int i = 0; i < LARGE; i++) {
    dx = f() / derivf();
    g_ewald -= dx; //Update g_ewald
    if (fabs(f()) < SMALL) return;
  }

  // Failed to converge

  char str[128];
  sprintf(str, "Could not compute g_ewald");
  error->all(FLERR, str);

}

/* ----------------------------------------------------------------------
 Calculate f(x)
 ------------------------------------------------------------------------- */

double PPPMDisp::f()
{
  double df_rspace, df_kspace;
  double q2 = qsqsum * force->qqrd2e;
  double xprd = domain->xprd;
  double yprd = domain->yprd;
  double zprd = domain->zprd;
  double zprd_slab = zprd*slab_volfactor;
  bigint natoms = atom->natoms;

  df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) /
       sqrt(natoms*cutoff*xprd*yprd*zprd);

  double qopt = compute_qopt();
  df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);

  return df_rspace - df_kspace;
}

/* ----------------------------------------------------------------------
 Calculate numerical derivative f'(x) using forward difference
 [f(x + h) - f(x)] / h
 ------------------------------------------------------------------------- */

double PPPMDisp::derivf()
{
  double h = 0.000001;  //Derivative step-size
  double df,f1,f2,g_ewald_old;

  f1 = f();
  g_ewald_old = g_ewald;
  g_ewald += h;
  f2 = f();
  g_ewald = g_ewald_old;
  df = (f2 - f1)/h;

  return df;
}

/* ----------------------------------------------------------------------
   Calculate the final estimator for the accuracy
------------------------------------------------------------------------- */

double PPPMDisp::final_accuracy()
{
  double df_rspace, df_kspace;
  double q2 = qsqsum * force->qqrd2e;
  double xprd = domain->xprd;
  double yprd = domain->yprd;
  double zprd = domain->zprd;
  double zprd_slab = zprd*slab_volfactor;
  bigint natoms = atom->natoms;
  df_rspace = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) /
             sqrt(natoms*cutoff*xprd*yprd*zprd);

  double qopt = compute_qopt();

  df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);

  double acc = sqrt(df_rspace*df_rspace + df_kspace*df_kspace);
  return acc;
}

/* ----------------------------------------------------------------------
   Calculate the final estimator for the Dispersion accuracy
------------------------------------------------------------------------- */

void PPPMDisp::final_accuracy_6(double& acc, double& acc_real, double& acc_kspace)
{
  double xprd = domain->xprd;
  double yprd = domain->yprd;
  double zprd = domain->zprd;
  double zprd_slab = zprd*slab_volfactor;
  bigint natoms = atom->natoms;
  acc_real = lj_rspace_error();

  double qopt = compute_qopt_6();

  acc_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);

  acc = sqrt(acc_real*acc_real + acc_kspace*acc_kspace);
  return;
}

/* ----------------------------------------------------------------------
   Compute qopt for Coulomb interactions
------------------------------------------------------------------------- */

double PPPMDisp::compute_qopt()
{
  double qopt;
  if (differentiation_flag == 1) {
    qopt = compute_qopt_ad();
  } else {
    qopt = compute_qopt_ik();
  }
  double qopt_all;
  MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
  return qopt_all;
}

/* ----------------------------------------------------------------------
   Compute qopt for Dispersion interactions
------------------------------------------------------------------------- */

double PPPMDisp::compute_qopt_6()
{
  double qopt;
  if (differentiation_flag == 1) {
    qopt = compute_qopt_6_ad();
  } else {
    qopt = compute_qopt_6_ik();
  }
  double qopt_all;
  MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
  return qopt_all;
}

/* ----------------------------------------------------------------------
   Compute qopt for the ik differentiation scheme and Coulomb interaction
------------------------------------------------------------------------- */

double PPPMDisp::compute_qopt_ik()
{
  double qopt = 0.0;
  int k,l,m;
  double *prd;

  if (triclinic == 0) prd = domain->prd;
  else prd = domain->prd_lamda;

  double xprd = prd[0];
  double yprd = prd[1];
  double zprd = prd[2];
  double zprd_slab = zprd*slab_volfactor;

  double unitkx = (2.0*MY_PI/xprd);
  double unitky = (2.0*MY_PI/yprd);
  double unitkz = (2.0*MY_PI/zprd_slab);

  int nx,ny,nz,kper,lper,mper;
  double sqk, u2;
  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
  double sum1,sum2, sum3,dot1,dot2;

  int nbx = 2;
  int nby = 2;
  int nbz = 2;

  for (m = nzlo_fft; m <= nzhi_fft; m++) {
    mper = m - nz_pppm*(2*m/nz_pppm);

    for (l = nylo_fft; l <= nyhi_fft; l++) {
      lper = l - ny_pppm*(2*l/ny_pppm);

      for (k = nxlo_fft; k <= nxhi_fft; k++) {
        kper = k - nx_pppm*(2*k/nx_pppm);

        sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) +
          pow(unitkz*mper,2.0);

        if (sqk != 0.0) {
          sum1 = 0.0;
          sum2 = 0.0;
          sum3 = 0.0;
          for (nx = -nbx; nx <= nbx; nx++) {
            qx = unitkx*(kper+nx_pppm*nx);
            sx = exp(-0.25*pow(qx/g_ewald,2.0));
            wx = 1.0;
            argx = 0.5*qx*xprd/nx_pppm;
            if (argx != 0.0) wx = pow(sin(argx)/argx,order);
            for (ny = -nby; ny <= nby; ny++) {
              qy = unitky*(lper+ny_pppm*ny);
              sy = exp(-0.25*pow(qy/g_ewald,2.0));
              wy = 1.0;
              argy = 0.5*qy*yprd/ny_pppm;
              if (argy != 0.0) wy = pow(sin(argy)/argy,order);
              for (nz = -nbz; nz <= nbz; nz++) {
                qz = unitkz*(mper+nz_pppm*nz);
                sz = exp(-0.25*pow(qz/g_ewald,2.0));
                wz = 1.0;
                argz = 0.5*qz*zprd_slab/nz_pppm;
                if (argz != 0.0) wz = pow(sin(argz)/argz,order);

                dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
                dot2 = qx*qx+qy*qy+qz*qz;
                u2 =  pow(wx*wy*wz,2.0);
                sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI;
                sum2 += u2*sx*sy*sz*4.0*MY_PI/dot2*dot1;
		sum3 += u2;
              }
            }
          }
	  sum2 *= sum2;
	  sum3 *= sum3*sqk;
          qopt += sum1 -sum2/sum3;
        }
      }
    }
  }
  return qopt;
}

/* ----------------------------------------------------------------------
   Compute qopt for the ad differentiation scheme and Coulomb interaction
------------------------------------------------------------------------- */

double PPPMDisp::compute_qopt_ad()
{
  double qopt = 0.0;
  int k,l,m;
  double *prd;

  if (triclinic == 0) prd = domain->prd;
  else prd = domain->prd_lamda;

  double xprd = prd[0];
  double yprd = prd[1];
  double zprd = prd[2];
  double zprd_slab = zprd*slab_volfactor;


  double unitkx = (2.0*MY_PI/xprd);
  double unitky = (2.0*MY_PI/yprd);
  double unitkz = (2.0*MY_PI/zprd_slab);

  int nx,ny,nz,kper,lper,mper;
  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
  double u2, sqk;
  double sum1,sum2,sum3,sum4,dot2;

  int nbx = 2;
  int nby = 2;
  int nbz = 2;

  for (m = nzlo_fft; m <= nzhi_fft; m++) {
    mper = m - nz_pppm*(2*m/nz_pppm);

    for (l = nylo_fft; l <= nyhi_fft; l++) {
      lper = l - ny_pppm*(2*l/ny_pppm);

      for (k = nxlo_fft; k <= nxhi_fft; k++) {
        kper = k - nx_pppm*(2*k/nx_pppm);

        sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) +
          pow(unitkz*mper,2.0);

        if (sqk != 0.0) {

          sum1 = 0.0;
          sum2 = 0.0;
          sum3 = 0.0;
          sum4 = 0.0;
          for (nx = -nbx; nx <= nbx; nx++) {
            qx = unitkx*(kper+nx_pppm*nx);
            sx = exp(-0.25*pow(qx/g_ewald,2.0));
            wx = 1.0;
            argx = 0.5*qx*xprd/nx_pppm;
            if (argx != 0.0) wx = pow(sin(argx)/argx,order);
            for (ny = -nby; ny <= nby; ny++) {
              qy = unitky*(lper+ny_pppm*ny);
              sy = exp(-0.25*pow(qy/g_ewald,2.0));
              wy = 1.0;
              argy = 0.5*qy*yprd/ny_pppm;
              if (argy != 0.0) wy = pow(sin(argy)/argy,order);
              for (nz = -nbz; nz <= nbz; nz++) {
                qz = unitkz*(mper+nz_pppm*nz);
                sz = exp(-0.25*pow(qz/g_ewald,2.0));
                wz = 1.0;
                argz = 0.5*qz*zprd_slab/nz_pppm;
                if (argz != 0.0) wz = pow(sin(argz)/argz,order);

                dot2 = qx*qx+qy*qy+qz*qz;
                u2 =  pow(wx*wy*wz,2.0);
                sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI;
                sum2 += sx*sy*sz * u2*4.0*MY_PI;
                sum3 += u2;
                sum4 += dot2*u2;
              }
            }
          }
          sum2 *= sum2;
          qopt += sum1 - sum2/(sum3*sum4);
        }
      }
    }
  }
  return qopt;
}

/* ----------------------------------------------------------------------
   Compute qopt for the ik differentiation scheme and Dispersion interaction
------------------------------------------------------------------------- */

double PPPMDisp::compute_qopt_6_ik()
{
  double qopt = 0.0;
  int k,l,m;
  double *prd;

  if (triclinic == 0) prd = domain->prd;
  else prd = domain->prd_lamda;

  double xprd = prd[0];
  double yprd = prd[1];
  double zprd = prd[2];
  double zprd_slab = zprd*slab_volfactor;

  double unitkx = (2.0*MY_PI/xprd);
  double unitky = (2.0*MY_PI/yprd);
  double unitkz = (2.0*MY_PI/zprd_slab);

  int nx,ny,nz,kper,lper,mper;
  double sqk, u2;
  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
  double sum1,sum2, sum3;
  double dot1,dot2, rtdot2, term;
  double inv2ew = 2*g_ewald_6;
  inv2ew = 1.0/inv2ew;
  double rtpi = sqrt(MY_PI);

  int nbx = 2;
  int nby = 2;
  int nbz = 2;

  for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
    mper = m - nz_pppm_6*(2*m/nz_pppm_6);

    for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
      lper = l - ny_pppm_6*(2*l/ny_pppm_6);

      for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
        kper = k - nx_pppm_6*(2*k/nx_pppm_6);

        sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) +
          pow(unitkz*mper,2.0);

        if (sqk != 0.0) {
          sum1 = 0.0;
          sum2 = 0.0;
          sum3 = 0.0;
          for (nx = -nbx; nx <= nbx; nx++) {
            qx = unitkx*(kper+nx_pppm_6*nx);
            sx = exp(-qx*qx*inv2ew*inv2ew);
            wx = 1.0;
            argx = 0.5*qx*xprd/nx_pppm_6;
            if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
            for (ny = -nby; ny <= nby; ny++) {
              qy = unitky*(lper+ny_pppm_6*ny);
              sy = exp(-qy*qy*inv2ew*inv2ew);
              wy = 1.0;
              argy = 0.5*qy*yprd/ny_pppm_6;
              if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
              for (nz = -nbz; nz <= nbz; nz++) {
                qz = unitkz*(mper+nz_pppm_6*nz);
                sz = exp(-qz*qz*inv2ew*inv2ew);
                wz = 1.0;
                argz = 0.5*qz*zprd_slab/nz_pppm_6;
                if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);

                dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
                dot2 = qx*qx+qy*qy+qz*qz;
                rtdot2 = sqrt(dot2);
                term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz +
		       2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew);
                term *= g_ewald_6*g_ewald_6*g_ewald_6;
                u2 =  pow(wx*wy*wz,2.0);
                sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2;
                sum2 += -u2*term*MY_PI*rtpi/3.0*dot1;
		sum3 += u2;
              }
            }
          }
	  sum2 *= sum2;
	  sum3 *= sum3*sqk;
          qopt += sum1 -sum2/sum3;
        }
      }
    }
  }
  return qopt;
}

/* ----------------------------------------------------------------------
   Compute qopt for the ad differentiation scheme and Dispersion interaction
------------------------------------------------------------------------- */

double PPPMDisp::compute_qopt_6_ad()
{
  double qopt = 0.0;
  int k,l,m;
  double *prd;

  if (triclinic == 0) prd = domain->prd;
  else prd = domain->prd_lamda;

  double xprd = prd[0];
  double yprd = prd[1];
  double zprd = prd[2];
  double zprd_slab = zprd*slab_volfactor;

  double unitkx = (2.0*MY_PI/xprd);
  double unitky = (2.0*MY_PI/yprd);
  double unitkz = (2.0*MY_PI/zprd_slab);

  int nx,ny,nz,kper,lper,mper;
  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
  double u2, sqk;
  double sum1,sum2,sum3,sum4;
  double dot2, rtdot2, term;
  double inv2ew = 2*g_ewald_6;
  inv2ew = 1/inv2ew;
  double rtpi = sqrt(MY_PI);

  int nbx = 2;
  int nby = 2;
  int nbz = 2;

  for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
    mper = m - nz_pppm_6*(2*m/nz_pppm_6);

    for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
      lper = l - ny_pppm_6*(2*l/ny_pppm_6);

      for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
        kper = k - nx_pppm_6*(2*k/nx_pppm_6);

        sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) +
          pow(unitkz*mper,2.0);

        if (sqk != 0.0) {

          sum1 = 0.0;
          sum2 = 0.0;
          sum3 = 0.0;
          sum4 = 0.0;
          for (nx = -nbx; nx <= nbx; nx++) {
            qx = unitkx*(kper+nx_pppm_6*nx);
            sx = exp(-qx*qx*inv2ew*inv2ew);
            wx = 1.0;
            argx = 0.5*qx*xprd/nx_pppm_6;
            if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
            for (ny = -nby; ny <= nby; ny++) {
              qy = unitky*(lper+ny_pppm_6*ny);
              sy = exp(-qy*qy*inv2ew*inv2ew);
              wy = 1.0;
              argy = 0.5*qy*yprd/ny_pppm_6;
              if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
              for (nz = -nbz; nz <= nbz; nz++) {
                qz = unitkz*(mper+nz_pppm_6*nz);
                sz = exp(-qz*qz*inv2ew*inv2ew);
                wz = 1.0;
                argz = 0.5*qz*zprd_slab/nz_pppm_6;
                if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);

                dot2 = qx*qx+qy*qy+qz*qz;
                rtdot2 = sqrt(dot2);
                term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz +
		       2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew);
                term *= g_ewald_6*g_ewald_6*g_ewald_6;
                u2 =  pow(wx*wy*wz,2.0);
                sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2;
                sum2 += -term*MY_PI*rtpi/3.0 * u2 * dot2;
                sum3 += u2;
                sum4 += dot2*u2;
              }
            }
          }
          sum2 *= sum2;
          qopt += sum1 - sum2/(sum3*sum4);
        }
      }
    }
  }
  return qopt;
}

/* ----------------------------------------------------------------------
   set size of FFT grid  and g_ewald_6
   for Dispersion interactions
------------------------------------------------------------------------- */

void PPPMDisp::set_grid_6()
{
  // Calculate csum
  if (!csumflag) calc_csum();
  if (!gewaldflag_6) set_init_g6();
  if (!gridflag_6) set_n_pppm_6();
  while (!factorable(nx_pppm_6)) nx_pppm_6++;
  while (!factorable(ny_pppm_6)) ny_pppm_6++;
  while (!factorable(nz_pppm_6)) nz_pppm_6++;

}

/* ----------------------------------------------------------------------
   Calculate the sum of the squared dispersion coefficients and other
   related quantities required for the calculations
------------------------------------------------------------------------- */

void PPPMDisp::calc_csum()
{
  csumij = 0.0;
  csum = 0.0;

  int ntypes = atom->ntypes;
  int i,j,k;

  delete [] cii;
  cii = new double[ntypes +1];
  for (i = 0; i<=ntypes; i++) cii[i] = 0.0;
  delete [] csumi;
  csumi = new double[ntypes +1];
  for (i = 0; i<=ntypes; i++) csumi[i] = 0.0;
  int *neach = new int[ntypes+1];
  for (i = 0; i<=ntypes; i++) neach[i] = 0;

  //the following variables are needed to distinguish between arithmetic
  //  and geometric mixing

  if (function[1]) {
    for (i = 1; i <= ntypes; i++)
      cii[i] = B[i]*B[i];
    int tmp;
    for (i = 0; i < atom->nlocal; i++) {
      tmp = atom->type[i];
      neach[tmp]++;
      csum += B[tmp]*B[tmp];
    }
  }
  if (function[2]) {
    for (i = 1; i <= ntypes; i++)
      cii[i] = 64.0/20.0*B[7*i+3]*B[7*i+3];
    int tmp;
    for (i = 0; i < atom->nlocal; i++) {
      tmp = atom->type[i];
      neach[tmp]++;
      csum += 64.0/20.0*B[7*tmp+3]*B[7*tmp+3];
    }
  }
  if (function[3]) {
    for (i = 1; i <= ntypes; i++)
      for (j = 0; j < nsplit; j++)
        cii[i] += B[j]*B[nsplit*i + j]*B[nsplit*i + j];
    int tmp;
    for (i = 0; i < atom->nlocal; i++) {
      tmp = atom->type[i];
      neach[tmp]++;
      for (j = 0; j < nsplit; j++)
        csum += B[j]*B[nsplit*tmp + j]*B[nsplit*tmp + j];
    }
  }


  double tmp2;
  MPI_Allreduce(&csum,&tmp2,1,MPI_DOUBLE,MPI_SUM,world);
  csum = tmp2;
  csumflag = 1;

  int *neach_all = new int[ntypes+1];
  MPI_Allreduce(neach,neach_all,ntypes+1,MPI_INT,MPI_SUM,world);

  // copmute csumij and csumi
  double d1, d2;
  if (function[1]){
    for (i=1; i<=ntypes; i++) {
      for (j=1; j<=ntypes; j++) {
        csumi[i] += neach_all[j]*B[i]*B[j];
        d1 = neach_all[i]*B[i];
        d2 = neach_all[j]*B[j];
        csumij += d1*d2;
        //csumij += neach_all[i]*neach_all[j]*B[i]*B[j];
      }
    }
  }
  if (function[2]) {
    for (i=1; i<=ntypes; i++) {
      for (j=1; j<=ntypes; j++) {
        for (k=0; k<=6; k++) {
          csumi[i] += neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1];
          d1 = neach_all[i]*B[7*i + k];
          d2 = neach_all[j]*B[7*(j+1)-k-1];
          csumij += d1*d2;
          //csumij += neach_all[i]*neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1];
        }
      }
    }
  }
  if (function[3]) {
    for (i=1; i<=ntypes; i++) {
      for (j=1; j<=ntypes; j++) {
        for (k=0; k<nsplit; k++) {
	  csumi[i] += neach_all[j]*B[k]*B[nsplit*i+k]*B[nsplit*j+k];
	  d1 = neach_all[i]*B[nsplit*i+k];
	  d2 = neach_all[j]*B[nsplit*j+k];
          csumij += B[k]*d1*d2;
	}
      }
    }
  }

  delete [] neach;
  delete [] neach_all;
}

/* ----------------------------------------------------------------------
   adjust g_ewald_6 to the new grid size
------------------------------------------------------------------------- */

void PPPMDisp::adjust_gewald_6()
{
  // Use Newton solver to find g_ewald_6
  double dx;

  // Start loop

  for (int i = 0; i <  LARGE; i++) {
    dx = f_6() / derivf_6();
    g_ewald_6 -= dx; //update g_ewald_6
    if (fabs(f_6()) < SMALL) return;
  }

  // Failed to converge

  char str[128];
  sprintf(str, "Could not adjust g_ewald_6");
  error->all(FLERR, str);

}

/* ----------------------------------------------------------------------
 Calculate f(x) for Dispersion interaction
 ------------------------------------------------------------------------- */

double PPPMDisp::f_6()
{
  double df_rspace, df_kspace;
  double *prd;

  if (triclinic == 0) prd = domain->prd;
  else prd = domain->prd_lamda;

  double xprd = prd[0];
  double yprd = prd[1];
  double zprd = prd[2];
  double zprd_slab = zprd*slab_volfactor;
  bigint natoms = atom->natoms;

  df_rspace = lj_rspace_error();

  double qopt = compute_qopt_6();
  df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);

  return df_rspace - df_kspace;
}

/* ----------------------------------------------------------------------
 Calculate numerical derivative f'(x) using forward difference
 [f(x + h) - f(x)] / h
 ------------------------------------------------------------------------- */

double PPPMDisp::derivf_6()
{
  double h = 0.000001;  //Derivative step-size
  double df,f1,f2,g_ewald_old;

  f1 = f_6();
  g_ewald_old = g_ewald_6;
  g_ewald_6 += h;
  f2 = f_6();
  g_ewald_6 = g_ewald_old;
  df = (f2 - f1)/h;

  return df;
}


/* ----------------------------------------------------------------------
   calculate an initial value for g_ewald_6
   ---------------------------------------------------------------------- */

void PPPMDisp::set_init_g6()
{
  // use xprd,yprd,zprd even if triclinic so grid size is the same
  // adjust z dimension for 2d slab PPPM
  // 3d PPPM just uses zprd since slab_volfactor = 1.0

  // make initial g_ewald estimate
  // based on desired error and real space cutoff

  // compute initial value for df_real with g_ewald_6 = 1/cutoff_lj
  // if df_real > 0, repeat divide g_ewald_6 by 2 until df_real < 0
  // else, repeat multiply g_ewald_6 by 2 until df_real > 0
  // perform bisection for the last two values of
  double df_real;
  double g_ewald_old;
  double gmin, gmax;

  // check if there is a user defined accuracy
  double acc_rspace = accuracy;
  if (accuracy_real_6 > 0) acc_rspace = accuracy_real_6;

  g_ewald_old = g_ewald_6 = 1.0/cutoff_lj;
  df_real = lj_rspace_error() - acc_rspace;
  int counter = 0;
  if (df_real > 0) {
    while (df_real > 0 && counter < LARGE) {
      counter++;
      g_ewald_old = g_ewald_6;
      g_ewald_6 *= 2;
      df_real = lj_rspace_error() - acc_rspace;
    }
  }

  if (df_real < 0) {
    while (df_real < 0 && counter < LARGE) {
      counter++;
      g_ewald_old = g_ewald_6;
      g_ewald_6 *= 0.5;
      df_real = lj_rspace_error() - acc_rspace;
    }
  }

  if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp");

  gmin = MIN(g_ewald_6, g_ewald_old);
  gmax = MAX(g_ewald_6, g_ewald_old);
  g_ewald_6 = gmin + 0.5*(gmax-gmin);
  counter = 0;
  while (gmax-gmin > SMALL && counter < LARGE) {
    counter++;
    df_real = lj_rspace_error() -acc_rspace;
    if (df_real < 0) gmax = g_ewald_6;
    else gmin = g_ewald_6;
    g_ewald_6 = gmin + 0.5*(gmax-gmin);
  }
  if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp");

}

/* ----------------------------------------------------------------------
   calculate nx_pppm, ny_pppm, nz_pppm for dispersion interaction
   ---------------------------------------------------------------------- */

void PPPMDisp::set_n_pppm_6()
{
  bigint natoms = atom->natoms;

  double *prd;

  if (triclinic == 0) prd = domain->prd;
  else prd = domain->prd_lamda;

  double xprd = prd[0];
  double yprd = prd[1];
  double zprd = prd[2];
  double zprd_slab = zprd*slab_volfactor;
  double h, h_x,h_y,h_z;

  double acc_kspace = accuracy;
  if (accuracy_kspace_6 > 0.0) acc_kspace = accuracy_kspace_6;

  // initial value for the grid spacing
  h = h_x = h_y = h_z = 4.0/g_ewald_6;
  // decrease grid spacing untill required precision is obtained
  int count = 0;
  while(1) {

    // set grid dimension
    nx_pppm_6 = static_cast<int> (xprd/h_x);
    ny_pppm_6 = static_cast<int> (yprd/h_y);
    nz_pppm_6 = static_cast<int> (zprd_slab/h_z);

    if (nx_pppm_6 <= 1) nx_pppm_6 = 2;
    if (ny_pppm_6 <= 1) ny_pppm_6 = 2;
    if (nz_pppm_6 <= 1) nz_pppm_6 = 2;

    //set local grid dimension
    int npey_fft,npez_fft;
    if (nz_pppm_6 >= nprocs) {
      npey_fft = 1;
      npez_fft = nprocs;
    } else procs2grid2d(nprocs,ny_pppm_6,nz_pppm_6,&npey_fft,&npez_fft);

    int me_y = me % npey_fft;
    int me_z = me / npey_fft;

    nxlo_fft_6 = 0;
    nxhi_fft_6 = nx_pppm_6 - 1;
    nylo_fft_6 = me_y*ny_pppm_6/npey_fft;
    nyhi_fft_6 = (me_y+1)*ny_pppm_6/npey_fft - 1;
    nzlo_fft_6 = me_z*nz_pppm_6/npez_fft;
    nzhi_fft_6 = (me_z+1)*nz_pppm_6/npez_fft - 1;

    double qopt = compute_qopt_6();

    double df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);

    count++;

    // break loop if the accuracy has been reached or too many loops have been performed
    if (df_kspace <= acc_kspace) break;
    if (count > 500) error->all(FLERR, "Could not compute grid size for Dispersion");
    h *= 0.95;
    h_x = h_y = h_z = h;
  }
}

/* ----------------------------------------------------------------------
   calculate the real space error for dispersion interactions
   ---------------------------------------------------------------------- */

double PPPMDisp::lj_rspace_error()
{
  bigint natoms = atom->natoms;
  double xprd = domain->xprd;
  double yprd = domain->yprd;
  double zprd = domain->zprd;
  double zprd_slab = zprd*slab_volfactor;

  double deltaf;
  double rgs = (cutoff_lj*g_ewald_6);
  rgs *= rgs;
  double rgs_inv = 1.0/rgs;
  deltaf = csum/sqrt(natoms*xprd*yprd*zprd_slab*cutoff_lj)*sqrt(MY_PI)*pow(g_ewald_6, 5)*
    exp(-rgs)*(1+rgs_inv*(3+rgs_inv*(6+rgs_inv*6)));
  return deltaf;
}


/* ----------------------------------------------------------------------
   Compyute the modified (hockney-eastwood) coulomb green function
   ---------------------------------------------------------------------- */

void PPPMDisp::compute_gf()
{
  int k,l,m,n;
  double *prd;

  if (triclinic == 0) prd = domain->prd;
  else prd = domain->prd_lamda;

  double xprd = prd[0];
  double yprd = prd[1];
  double zprd = prd[2];
  double zprd_slab = zprd*slab_volfactor;
  volume = xprd * yprd * zprd_slab;

  double unitkx = (2.0*MY_PI/xprd);
  double unitky = (2.0*MY_PI/yprd);
  double unitkz = (2.0*MY_PI/zprd_slab);

  int kper,lper,mper;
  double snx,sny,snz,snx2,sny2,snz2;
  double sqk;
  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
  double numerator,denominator;


  n = 0;
  for (m = nzlo_fft; m <= nzhi_fft; m++) {
    mper = m - nz_pppm*(2*m/nz_pppm);
    qz = unitkz*mper;
    snz = sin(0.5*qz*zprd_slab/nz_pppm);
    snz2 = snz*snz;
    sz = exp(-0.25*pow(qz/g_ewald,2.0));
    wz = 1.0;
    argz = 0.5*qz*zprd_slab/nz_pppm;
    if (argz != 0.0) wz = pow(sin(argz)/argz,order);
    wz *= wz;

    for (l = nylo_fft; l <= nyhi_fft; l++) {
      lper = l - ny_pppm*(2*l/ny_pppm);
      qy = unitky*lper;
      sny = sin(0.5*qy*yprd/ny_pppm);
      sny2 = sny*sny;
      sy = exp(-0.25*pow(qy/g_ewald,2.0));
      wy = 1.0;
      argy = 0.5*qy*yprd/ny_pppm;
      if (argy != 0.0) wy = pow(sin(argy)/argy,order);
      wy *= wy;

      for (k = nxlo_fft; k <= nxhi_fft; k++) {
        kper = k - nx_pppm*(2*k/nx_pppm);
        qx = unitkx*kper;
        snx = sin(0.5*qx*xprd/nx_pppm);
        snx2 = snx*snx;
        sx = exp(-0.25*pow(qx/g_ewald,2.0));
        wx = 1.0;
        argx = 0.5*qx*xprd/nx_pppm;
        if (argx != 0.0) wx = pow(sin(argx)/argx,order);
        wx *= wx;

        sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);

        if (sqk != 0.0) {
          numerator = 4.0*MY_PI/sqk;
          denominator = gf_denom(snx2,sny2,snz2, gf_b, order);
          greensfn[n++] = numerator*sx*sy*sz*wx*wy*wz/denominator;
        } else greensfn[n++] = 0.0;
      }
    }
  }
}

/* ----------------------------------------------------------------------
   compute self force coefficients for ad-differentiation scheme
   and Coulomb interaction
------------------------------------------------------------------------- */

void PPPMDisp::compute_sf_precoeff(int nxp, int nyp, int nzp, int ord,
                                    int nxlo_ft, int nylo_ft, int nzlo_ft,
                                    int nxhi_ft, int nyhi_ft, int nzhi_ft,
                                    double *sf_pre1, double *sf_pre2, double *sf_pre3,
                                    double *sf_pre4, double *sf_pre5, double *sf_pre6)
{

  int i,k,l,m,n;
  double *prd;

  // volume-dependent factors
  // adjust z dimension for 2d slab PPPM
  // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0

  if (triclinic == 0) prd = domain->prd;
  else prd = domain->prd_lamda;

  double xprd = prd[0];
  double yprd = prd[1];
  double zprd = prd[2];
  double zprd_slab = zprd*slab_volfactor;

  double unitkx = (2.0*MY_PI/xprd);
  double unitky = (2.0*MY_PI/yprd);
  double unitkz = (2.0*MY_PI/zprd_slab);

  int nx,ny,nz,kper,lper,mper;
  double argx,argy,argz;
  double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5];
  double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2;
  double u0,u1,u2,u3,u4,u5,u6;
  double sum1,sum2,sum3,sum4,sum5,sum6;

  int nb = 2;

  n = 0;
  for (m = nzlo_ft; m <= nzhi_ft; m++) {
    mper = m - nzp*(2*m/nzp);

    for (l = nylo_ft; l <= nyhi_ft; l++) {
      lper = l - nyp*(2*l/nyp);

      for (k = nxlo_ft; k <= nxhi_ft; k++) {
        kper = k - nxp*(2*k/nxp);

        sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0;
        for (i = -nb; i <= nb; i++) {

          qx0 = unitkx*(kper+nxp*i);
          qx1 = unitkx*(kper+nxp*(i+1));
          qx2 = unitkx*(kper+nxp*(i+2));
          wx0[i+2] = 1.0;
          wx1[i+2] = 1.0;
          wx2[i+2] = 1.0;
          argx = 0.5*qx0*xprd/nxp;
          if (argx != 0.0) wx0[i+2] = pow(sin(argx)/argx,ord);
          argx = 0.5*qx1*xprd/nxp;
          if (argx != 0.0) wx1[i+2] = pow(sin(argx)/argx,ord);
          argx = 0.5*qx2*xprd/nxp;
          if (argx != 0.0) wx2[i+2] = pow(sin(argx)/argx,ord);

          qy0 = unitky*(lper+nyp*i);
          qy1 = unitky*(lper+nyp*(i+1));
          qy2 = unitky*(lper+nyp*(i+2));
          wy0[i+2] = 1.0;
          wy1[i+2] = 1.0;
          wy2[i+2] = 1.0;
          argy = 0.5*qy0*yprd/nyp;
          if (argy != 0.0) wy0[i+2] = pow(sin(argy)/argy,ord);
          argy = 0.5*qy1*yprd/nyp;
          if (argy != 0.0) wy1[i+2] = pow(sin(argy)/argy,ord);
          argy = 0.5*qy2*yprd/nyp;
          if (argy != 0.0) wy2[i+2] = pow(sin(argy)/argy,ord);

          qz0 = unitkz*(mper+nzp*i);
          qz1 = unitkz*(mper+nzp*(i+1));
          qz2 = unitkz*(mper+nzp*(i+2));
          wz0[i+2] = 1.0;
          wz1[i+2] = 1.0;
          wz2[i+2] = 1.0;
          argz = 0.5*qz0*zprd_slab/nzp;
          if (argz != 0.0) wz0[i+2] = pow(sin(argz)/argz,ord);
          argz = 0.5*qz1*zprd_slab/nzp;
          if (argz != 0.0) wz1[i+2] = pow(sin(argz)/argz,ord);
           argz = 0.5*qz2*zprd_slab/nzp;
          if (argz != 0.0) wz2[i+2] = pow(sin(argz)/argz,ord);
        }

        for (nx = 0; nx <= 4; nx++) {
          for (ny = 0; ny <= 4; ny++) {
            for (nz = 0; nz <= 4; nz++) {
              u0 = wx0[nx]*wy0[ny]*wz0[nz];
              u1 = wx1[nx]*wy0[ny]*wz0[nz];
              u2 = wx2[nx]*wy0[ny]*wz0[nz];
              u3 = wx0[nx]*wy1[ny]*wz0[nz];
              u4 = wx0[nx]*wy2[ny]*wz0[nz];
              u5 = wx0[nx]*wy0[ny]*wz1[nz];
              u6 = wx0[nx]*wy0[ny]*wz2[nz];

              sum1 += u0*u1;
              sum2 += u0*u2;
              sum3 += u0*u3;
              sum4 += u0*u4;
              sum5 += u0*u5;
              sum6 += u0*u6;
            }
          }
        }

        // store values

        sf_pre1[n] = sum1;
        sf_pre2[n] = sum2;
        sf_pre3[n] = sum3;
        sf_pre4[n] = sum4;
        sf_pre5[n] = sum5;
        sf_pre6[n++] = sum6;
      }
    }
  }
}

/* ----------------------------------------------------------------------
   Compute the modified (hockney-eastwood) dispersion green function
   ---------------------------------------------------------------------- */

void PPPMDisp::compute_gf_6()
{
  double *prd;
  int k,l,m,n;

  // volume-dependent factors
  // adjust z dimension for 2d slab PPPM
  // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0

  if (triclinic == 0) prd = domain->prd;
  else prd = domain->prd_lamda;

  double xprd = prd[0];
  double yprd = prd[1];
  double zprd = prd[2];
  double zprd_slab = zprd*slab_volfactor;

  double unitkx = (2.0*MY_PI/xprd);
  double unitky = (2.0*MY_PI/yprd);
  double unitkz = (2.0*MY_PI/zprd_slab);

  int kper,lper,mper;
  double sqk;
  double snx,sny,snz,snx2,sny2,snz2;
  double argx,argy,argz,wx,wy,wz,sx,sy,sz;
  double qx,qy,qz;
  double rtsqk, term;
  double numerator,denominator;
  double inv2ew = 2*g_ewald_6;
  inv2ew = 1/inv2ew;
  double rtpi = sqrt(MY_PI);

  numerator = -MY_PI*rtpi*g_ewald_6*g_ewald_6*g_ewald_6/(3.0);

  n = 0;
  for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
    mper = m - nz_pppm_6*(2*m/nz_pppm_6);
    qz = unitkz*mper;
    snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm_6);
    snz2 = snz*snz;
    sz = exp(-qz*qz*inv2ew*inv2ew);
    wz = 1.0;
    argz = 0.5*qz*zprd_slab/nz_pppm_6;
    if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
    wz *= wz;

    for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
      lper = l - ny_pppm_6*(2*l/ny_pppm_6);
      qy = unitky*lper;
      sny = sin(0.5*unitky*lper*yprd/ny_pppm_6);
      sny2 = sny*sny;
      sy = exp(-qy*qy*inv2ew*inv2ew);
      wy = 1.0;
      argy = 0.5*qy*yprd/ny_pppm_6;
      if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
      wy *= wy;

      for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
	kper = k - nx_pppm_6*(2*k/nx_pppm_6);
        qx = unitkx*kper;
	snx = sin(0.5*unitkx*kper*xprd/nx_pppm_6);
	snx2 = snx*snx;
        sx = exp(-qx*qx*inv2ew*inv2ew);
	wx = 1.0;
	argx = 0.5*qx*xprd/nx_pppm_6;
	if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
        wx *= wx;

	sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);

        if (sqk != 0.0) {
	  denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6);
	  rtsqk = sqrt(sqk);
          term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz +
                  2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew);
	  greensfn_6[n++] = numerator*term*wx*wy*wz/denominator;
        } else greensfn_6[n++] = 0.0;
      }
    }
  }
}

/* ----------------------------------------------------------------------
   compute self force coefficients for ad-differentiation scheme
   and Coulomb interaction
------------------------------------------------------------------------- */
void PPPMDisp::compute_sf_coeff()
{
  int i,k,l,m,n;
  double *prd;

  if (triclinic == 0) prd = domain->prd;
  else prd = domain->prd_lamda;

  double xprd = prd[0];
  double yprd = prd[1];
  double zprd = prd[2];
  double zprd_slab = zprd*slab_volfactor;
  volume = xprd * yprd * zprd_slab;

  for (i = 0; i <= 5; i++) sf_coeff[i] = 0.0;

  n = 0;
  for (m = nzlo_fft; m <= nzhi_fft; m++) {
    for (l = nylo_fft; l <= nyhi_fft; l++) {
      for (k = nxlo_fft; k <= nxhi_fft; k++) {
        sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
        sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
        sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
        sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
        sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
        sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
        ++n;
      }
    }
  }

  // Compute the coefficients for the self-force correction

  double prex, prey, prez;
  prex = prey = prez = MY_PI/volume;
  prex *= nx_pppm/xprd;
  prey *= ny_pppm/yprd;
  prez *= nz_pppm/zprd_slab;
  sf_coeff[0] *= prex;
  sf_coeff[1] *= prex*2;
  sf_coeff[2] *= prey;
  sf_coeff[3] *= prey*2;
  sf_coeff[4] *= prez;
  sf_coeff[5] *= prez*2;

  // communicate values with other procs

  double tmp[6];
  MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world);
  for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n];
}

/* ----------------------------------------------------------------------
   compute self force coefficients for ad-differentiation scheme
   and Dispersion interaction
------------------------------------------------------------------------- */

void PPPMDisp::compute_sf_coeff_6()
{
  int i,k,l,m,n;
  double *prd;

  if (triclinic == 0) prd = domain->prd;
  else prd = domain->prd_lamda;

  double xprd = prd[0];
  double yprd = prd[1];
  double zprd = prd[2];
  double zprd_slab = zprd*slab_volfactor;
  volume = xprd * yprd * zprd_slab;

  for (i = 0; i <= 5; i++) sf_coeff_6[i] = 0.0;

  n = 0;
  for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
    for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
      for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
        sf_coeff_6[0] += sf_precoeff1_6[n]*greensfn_6[n];
        sf_coeff_6[1] += sf_precoeff2_6[n]*greensfn_6[n];
        sf_coeff_6[2] += sf_precoeff3_6[n]*greensfn_6[n];
        sf_coeff_6[3] += sf_precoeff4_6[n]*greensfn_6[n];
        sf_coeff_6[4] += sf_precoeff5_6[n]*greensfn_6[n];
        sf_coeff_6[5] += sf_precoeff6_6[n]*greensfn_6[n];
        ++n;
      }
    }
  }


  // perform multiplication with prefactors

  double prex, prey, prez;
  prex = prey = prez = MY_PI/volume;
  prex *= nx_pppm_6/xprd;
  prey *= ny_pppm_6/yprd;
  prez *= nz_pppm_6/zprd_slab;
  sf_coeff_6[0] *= prex;
  sf_coeff_6[1] *= prex*2;
  sf_coeff_6[2] *= prey;
  sf_coeff_6[3] *= prey*2;
  sf_coeff_6[4] *= prez;
  sf_coeff_6[5] *= prez*2;

  // communicate values with other procs

  double tmp[6];
  MPI_Allreduce(sf_coeff_6,tmp,6,MPI_DOUBLE,MPI_SUM,world);
  for (n = 0; n < 6; n++) sf_coeff_6[n] = tmp[n];

}

/* ----------------------------------------------------------------------
   denominator for Hockney-Eastwood Green's function
     of x,y,z = sin(kx*deltax/2), etc

            inf                 n-1
   S(n,k) = Sum  W(k+pi*j)**2 = Sum b(l)*(z*z)**l
           j=-inf               l=0

          = -(z*z)**n /(2n-1)! * (d/dx)**(2n-1) cot(x)  at z = sin(x)
   gf_b = denominator expansion coeffs
------------------------------------------------------------------------- */

double PPPMDisp::gf_denom(double x, double y, double z, double *g_b, int ord)
{
  double sx,sy,sz;
  sz = sy = sx = 0.0;
  for (int l = ord-1; l >= 0; l--) {
    sx = g_b[l] + sx*x;
    sy = g_b[l] + sy*y;
    sz = g_b[l] + sz*z;
  }
  double s = sx*sy*sz;
  return s*s;
}

/* ----------------------------------------------------------------------
   pre-compute Green's function denominator expansion coeffs, Gamma(2n)
------------------------------------------------------------------------- */

void PPPMDisp::compute_gf_denom(double* gf, int ord)
{
  int k,l,m;

  for (l = 1; l < ord; l++) gf[l] = 0.0;
  gf[0] = 1.0;

  for (m = 1; m < ord; m++) {
    for (l = m; l > 0; l--)
      gf[l] = 4.0 * (gf[l]*(l-m)*(l-m-0.5)-gf[l-1]*(l-m-1)*(l-m-1));
    gf[0] = 4.0 * (gf[0]*(l-m)*(l-m-0.5));
  }

  bigint ifact = 1;
  for (k = 1; k < 2*ord; k++) ifact *= k;
  double gaminv = 1.0/ifact;
  for (l = 0; l < ord; l++) gf[l] *= gaminv;
}

/* ----------------------------------------------------------------------
   ghost-swap to accumulate full density in brick decomposition
   remap density from 3d brick decomposition to FFTdecomposition
   for coulomb interaction or dispersion interaction with geometric
   mixing
------------------------------------------------------------------------- */

void PPPMDisp::brick2fft(int nxlo_i, int nylo_i, int nzlo_i,
                         int nxhi_i, int nyhi_i, int nzhi_i,
                         FFT_SCALAR*** dbrick, FFT_SCALAR* dfft, FFT_SCALAR* work,
                         LAMMPS_NS::Remap* rmp)
{
  int n,ix,iy,iz;

  // copy grabs inner portion of density from 3d brick
  // remap could be done as pre-stage of FFT,
  //   but this works optimally on only double values, not complex values

  n = 0;
  for (iz = nzlo_i; iz <= nzhi_i; iz++)
    for (iy = nylo_i; iy <= nyhi_i; iy++)
      for (ix = nxlo_i; ix <= nxhi_i; ix++)
	dfft[n++] = dbrick[iz][iy][ix];

  rmp->perform(dfft,dfft,work);
}


/* ----------------------------------------------------------------------
   ghost-swap to accumulate full density in brick decomposition
   remap density from 3d brick decomposition to FFTdecomposition
   for dispersion with arithmetic mixing rule
------------------------------------------------------------------------- */

void PPPMDisp::brick2fft_a()
{
  int n,ix,iy,iz;

  // copy grabs inner portion of density from 3d brick
  // remap could be done as pre-stage of FFT,
  //   but this works optimally on only double values, not complex values

  n = 0;
  for (iz = nzlo_in_6; iz <= nzhi_in_6; iz++)
    for (iy = nylo_in_6; iy <= nyhi_in_6; iy++)
      for (ix = nxlo_in_6; ix <= nxhi_in_6; ix++) {
        density_fft_a0[n] = density_brick_a0[iz][iy][ix];
        density_fft_a1[n] = density_brick_a1[iz][iy][ix];
        density_fft_a2[n] = density_brick_a2[iz][iy][ix];
        density_fft_a3[n] = density_brick_a3[iz][iy][ix];
        density_fft_a4[n] = density_brick_a4[iz][iy][ix];
        density_fft_a5[n] = density_brick_a5[iz][iy][ix];
        density_fft_a6[n++] = density_brick_a6[iz][iy][ix];
      }

  remap_6->perform(density_fft_a0,density_fft_a0,work1_6);
  remap_6->perform(density_fft_a1,density_fft_a1,work1_6);
  remap_6->perform(density_fft_a2,density_fft_a2,work1_6);
  remap_6->perform(density_fft_a3,density_fft_a3,work1_6);
  remap_6->perform(density_fft_a4,density_fft_a4,work1_6);
  remap_6->perform(density_fft_a5,density_fft_a5,work1_6);
  remap_6->perform(density_fft_a6,density_fft_a6,work1_6);

}

/* ----------------------------------------------------------------------
   ghost-swap to accumulate full density in brick decomposition
   remap density from 3d brick decomposition to FFTdecomposition
   for dispersion with special case
------------------------------------------------------------------------- */

void PPPMDisp::brick2fft_none()
{
  int k,n,ix,iy,iz;

  // copy grabs inner portion of density from 3d brick
  // remap could be done as pre-stage of FFT,
  //   but this works optimally on only double values, not complex values

  for (k = 0; k<nsplit_alloc; k++) {
    n = 0;
    for (iz = nzlo_in_6; iz <= nzhi_in_6; iz++)
      for (iy = nylo_in_6; iy <= nyhi_in_6; iy++)
        for (ix = nxlo_in_6; ix <= nxhi_in_6; ix++)
          density_fft_none[k][n++] = density_brick_none[k][iz][iy][ix];
  }

  for (k=0; k<nsplit_alloc; k++)
    remap_6->perform(density_fft_none[k],density_fft_none[k],work1_6);
}

/* ----------------------------------------------------------------------
   find center grid pt for each of my particles
   check that full stencil for the particle will fit in my 3d brick
   store central grid pt indices in part2grid array
------------------------------------------------------------------------- */

void PPPMDisp::particle_map(double delx, double dely, double delz,
                             double sft, int** p2g, int nup, int nlow,
                             int nxlo, int nylo, int nzlo,
                             int nxhi, int nyhi, int nzhi)
{
  int nx,ny,nz;

  double **x = atom->x;
  int nlocal = atom->nlocal;

  if (!ISFINITE(boxlo[0]) || !ISFINITE(boxlo[1]) || !ISFINITE(boxlo[2]))
    error->one(FLERR,"Non-numeric box dimensions - simulation unstable");

  int flag = 0;
  for (int i = 0; i < nlocal; i++) {

    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
    // current particle coord can be outside global and local box
    // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1

    nx = static_cast<int> ((x[i][0]-boxlo[0])*delx+sft) - OFFSET;
    ny = static_cast<int> ((x[i][1]-boxlo[1])*dely+sft) - OFFSET;
    nz = static_cast<int> ((x[i][2]-boxlo[2])*delz+sft) - OFFSET;

    p2g[i][0] = nx;
    p2g[i][1] = ny;
    p2g[i][2] = nz;

    // check that entire stencil around nx,ny,nz will fit in my 3d brick

    if (nx+nlow < nxlo || nx+nup > nxhi ||
	ny+nlow < nylo || ny+nup > nyhi ||
	nz+nlow < nzlo || nz+nup > nzhi)
      flag = 1;
  }

  if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPMDisp");
}


void PPPMDisp::particle_map_c(double delx, double dely, double delz,
                               double sft, int** p2g, int nup, int nlow,
                               int nxlo, int nylo, int nzlo,
                               int nxhi, int nyhi, int nzhi)
{
  particle_map(delx, dely, delz, sft, p2g, nup, nlow,
               nxlo, nylo, nzlo, nxhi, nyhi, nzhi);
}

/* ----------------------------------------------------------------------
   create discretized "density" on section of global grid due to my particles
   density(x,y,z) = charge "density" at grid points of my 3d brick
   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
   in global grid
------------------------------------------------------------------------- */

void PPPMDisp::make_rho_c()
{
  int l,m,n,nx,ny,nz,mx,my,mz;
  FFT_SCALAR dx,dy,dz,x0,y0,z0;

  // clear 3d density array

  memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
	 ngrid*sizeof(FFT_SCALAR));

  // loop over my charges, add their contribution to nearby grid points
  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
  // (dx,dy,dz) = distance to "lower left" grid pt
  // (mx,my,mz) = global coords of moving stencil pt

  double *q = atom->q;
  double **x = atom->x;
  int nlocal = atom->nlocal;

  for (int i = 0; i < nlocal; i++) {

    nx = part2grid[i][0];
    ny = part2grid[i][1];
    nz = part2grid[i][2];
    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;

    compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);

    z0 = delvolinv * q[i];
    for (n = nlower; n <= nupper; n++) {
      mz = n+nz;
      y0 = z0*rho1d[2][n];
      for (m = nlower; m <= nupper; m++) {
	my = m+ny;
	x0 = y0*rho1d[1][m];
	for (l = nlower; l <= nupper; l++) {
	  mx = l+nx;
	  density_brick[mz][my][mx] += x0*rho1d[0][l];
	}
      }
    }
  }
}

/* ----------------------------------------------------------------------
   create discretized "density" on section of global grid due to my particles
   density(x,y,z) = dispersion "density" at grid points of my 3d brick
   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
   in global grid --- geometric mixing
------------------------------------------------------------------------- */

void PPPMDisp::make_rho_g()
{
  int l,m,n,nx,ny,nz,mx,my,mz;
  FFT_SCALAR dx,dy,dz,x0,y0,z0;

  // clear 3d density array

  memset(&(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
	 ngrid_6*sizeof(FFT_SCALAR));

  // loop over my charges, add their contribution to nearby grid points
  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
  // (dx,dy,dz) = distance to "lower left" grid pt
  // (mx,my,mz) = global coords of moving stencil pt
  int type;
  double **x = atom->x;
  int nlocal = atom->nlocal;

  for (int i = 0; i < nlocal; i++) {

    nx = part2grid_6[i][0];
    ny = part2grid_6[i][1];
    nz = part2grid_6[i][2];
    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;

    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
    type = atom->type[i];
    z0 = delvolinv_6 * B[type];
    for (n = nlower_6; n <= nupper_6; n++) {
      mz = n+nz;
      y0 = z0*rho1d_6[2][n];
      for (m = nlower_6; m <= nupper_6; m++) {
	my = m+ny;
	x0 = y0*rho1d_6[1][m];
	for (l = nlower_6; l <= nupper_6; l++) {
	  mx = l+nx;
	  density_brick_g[mz][my][mx] += x0*rho1d_6[0][l];
	}
      }
    }
  }
}


/* ----------------------------------------------------------------------
   create discretized "density" on section of global grid due to my particles
   density(x,y,z) = dispersion "density" at grid points of my 3d brick
   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
   in global grid --- arithmetic mixing
------------------------------------------------------------------------- */

void PPPMDisp::make_rho_a()
{
  int l,m,n,nx,ny,nz,mx,my,mz;
  FFT_SCALAR dx,dy,dz,x0,y0,z0,w;

  // clear 3d density array

  memset(&(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
	 ngrid_6*sizeof(FFT_SCALAR));
  memset(&(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
	 ngrid_6*sizeof(FFT_SCALAR));
  memset(&(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
	 ngrid_6*sizeof(FFT_SCALAR));
  memset(&(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
	 ngrid_6*sizeof(FFT_SCALAR));
  memset(&(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
	 ngrid_6*sizeof(FFT_SCALAR));
  memset(&(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
	 ngrid_6*sizeof(FFT_SCALAR));
  memset(&(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
	 ngrid_6*sizeof(FFT_SCALAR));

  // loop over my particles, add their contribution to nearby grid points
  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
  // (dx,dy,dz) = distance to "lower left" grid pt
  // (mx,my,mz) = global coords of moving stencil pt
  int type;
  double **x = atom->x;
  int nlocal = atom->nlocal;

  for (int i = 0; i < nlocal; i++) {

    //do the following for all 4 grids
    nx = part2grid_6[i][0];
    ny = part2grid_6[i][1];
    nz = part2grid_6[i][2];
    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
    type = atom->type[i];
    z0 = delvolinv_6;
    for (n = nlower_6; n <= nupper_6; n++) {
      mz = n+nz;
      y0 = z0*rho1d_6[2][n];
      for (m = nlower_6; m <= nupper_6; m++) {
	my = m+ny;
	x0 = y0*rho1d_6[1][m];
	for (l = nlower_6; l <= nupper_6; l++) {
	  mx = l+nx;
          w = x0*rho1d_6[0][l];
	  density_brick_a0[mz][my][mx] += w*B[7*type];
	  density_brick_a1[mz][my][mx] += w*B[7*type+1];
	  density_brick_a2[mz][my][mx] += w*B[7*type+2];
	  density_brick_a3[mz][my][mx] += w*B[7*type+3];
	  density_brick_a4[mz][my][mx] += w*B[7*type+4];
	  density_brick_a5[mz][my][mx] += w*B[7*type+5];
	  density_brick_a6[mz][my][mx] += w*B[7*type+6];
	}
      }
    }
  }
}

/* ----------------------------------------------------------------------
   create discretized "density" on section of global grid due to my particles
   density(x,y,z) = dispersion "density" at grid points of my 3d brick
   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
   in global grid --- case when mixing rules don't apply
------------------------------------------------------------------------- */

void PPPMDisp::make_rho_none()
{
  int k,l,m,n,nx,ny,nz,mx,my,mz;
  FFT_SCALAR dx,dy,dz,x0,y0,z0,w;

  // clear 3d density array
  for (k = 0; k < nsplit_alloc; k++)
    memset(&(density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
	   ngrid_6*sizeof(FFT_SCALAR));


  // loop over my particles, add their contribution to nearby grid points
  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
  // (dx,dy,dz) = distance to "lower left" grid pt
  // (mx,my,mz) = global coords of moving stencil pt
  int type;
  double **x = atom->x;
  int nlocal = atom->nlocal;

  for (int i = 0; i < nlocal; i++) {

    //do the following for all 4 grids
    nx = part2grid_6[i][0];
    ny = part2grid_6[i][1];
    nz = part2grid_6[i][2];
    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
    type = atom->type[i];
    z0 = delvolinv_6;
    for (n = nlower_6; n <= nupper_6; n++) {
      mz = n+nz;
      y0 = z0*rho1d_6[2][n];
      for (m = nlower_6; m <= nupper_6; m++) {
	my = m+ny;
	x0 = y0*rho1d_6[1][m];
	for (l = nlower_6; l <= nupper_6; l++) {
	  mx = l+nx;
          w = x0*rho1d_6[0][l];
          for (k = 0; k < nsplit; k++)
	    density_brick_none[k][mz][my][mx] += w*B[nsplit*type + k];
	}
      }
    }
  }
}


/* ----------------------------------------------------------------------
   FFT-based Poisson solver for ik differentiation
------------------------------------------------------------------------- */

void PPPMDisp::poisson_ik(FFT_SCALAR* wk1, FFT_SCALAR* wk2,
                           FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2,
                           int nx_p, int ny_p, int nz_p, int nft,
                           int nxlo_ft, int nylo_ft, int nzlo_ft,
                           int nxhi_ft, int nyhi_ft, int nzhi_ft,
                           int nxlo_i, int nylo_i, int nzlo_i,
                           int nxhi_i, int nyhi_i, int nzhi_i,
                           double& egy, double* gfn,
                           double* kx, double* ky, double* kz,
                           double* kx2, double* ky2, double* kz2,
                           FFT_SCALAR*** vx_brick, FFT_SCALAR*** vy_brick, FFT_SCALAR*** vz_brick,
                           double* vir, double** vcoeff, double** vcoeff2,
                           FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
                           FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)


{
  int i,j,k,n;
  double eng;

  // transform charge/dispersion density (r -> k)
  n = 0;
  for (i = 0; i < nft; i++) {
    wk1[n++] = dfft[i];
    wk1[n++] = ZEROF;
  }

  ft1->compute(wk1,wk1,1);

  // if requested, compute energy and virial contribution

  double scaleinv = 1.0/(nx_p*ny_p*nz_p);
  double s2 = scaleinv*scaleinv;

  if (eflag_global || vflag_global) {
    if (vflag_global) {
      n = 0;
      for (i = 0; i < nft; i++) {
	eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
	for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j];
	if (eflag_global) egy += eng;
	n += 2;
      }
    } else {
      n = 0;
      for (i = 0; i < nft; i++) {
	egy +=
	  s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
	n += 2;
      }
    }
  }

  // scale by 1/total-grid-pts to get rho(k)
  // multiply by Green's function to get V(k)

  n = 0;
  for (i = 0; i < nft; i++) {
    wk1[n++] *= scaleinv * gfn[i];
    wk1[n++] *= scaleinv * gfn[i];
  }

  // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
  // FFT leaves data in 3d brick decomposition
  // copy it into inner portion of vdx,vdy,vdz arrays

  // x & y direction gradient

  n = 0;
  for (k = nzlo_ft; k <= nzhi_ft; k++)
    for (j = nylo_ft; j <= nyhi_ft; j++)
      for (i = nxlo_ft; i <= nxhi_ft; i++) {
	wk2[n] = 0.5*(kx[i]-kx2[i])*wk1[n+1] + 0.5*(ky[j]-ky2[j])*wk1[n];
	wk2[n+1] = -0.5*(kx[i]-kx2[i])*wk1[n] + 0.5*(ky[j]-ky2[j])*wk1[n+1];
	n += 2;
      }

  ft2->compute(wk2,wk2,-1);

  n = 0;
  for (k = nzlo_i; k <= nzhi_i; k++)
    for (j = nylo_i; j <= nyhi_i; j++)
      for (i = nxlo_i; i <= nxhi_i; i++) {
	vx_brick[k][j][i] = wk2[n++];
	vy_brick[k][j][i] = wk2[n++];
      }

  if (!eflag_atom) {
    // z direction gradient only

    n = 0;
    for (k = nzlo_ft; k <= nzhi_ft; k++)
      for (j = nylo_ft; j <= nyhi_ft; j++)
        for (i = nxlo_ft; i <= nxhi_ft; i++) {
	  wk2[n] = kz[k]*wk1[n+1];
	  wk2[n+1] = -kz[k]*wk1[n];
	  n += 2;
        }

    ft2->compute(wk2,wk2,-1);


    n = 0;
    for (k = nzlo_i; k <= nzhi_i; k++)
      for (j = nylo_i; j <= nyhi_i; j++)
        for (i = nxlo_i; i <= nxhi_i; i++) {
	  vz_brick[k][j][i] = wk2[n];
	  n += 2;
        }

  }

  else {
    // z direction gradient & per-atom energy

    n = 0;
    for (k = nzlo_ft; k <= nzhi_ft; k++)
      for (j = nylo_ft; j <= nyhi_ft; j++)
        for (i = nxlo_ft; i <= nxhi_ft; i++) {
	  wk2[n] = 0.5*(kz[k]-kz2[k])*wk1[n+1] - wk1[n+1];
	  wk2[n+1] = -0.5*(kz[k]-kz2[k])*wk1[n] + wk1[n];
	  n += 2;
        }

    ft2->compute(wk2,wk2,-1);

    n = 0;
    for (k = nzlo_i; k <= nzhi_i; k++)
      for (j = nylo_i; j <= nyhi_i; j++)
        for (i = nxlo_i; i <= nxhi_i; i++) {
	  vz_brick[k][j][i] = wk2[n++];
	  u_pa[k][j][i] = wk2[n++];;
        }
  }

  if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft,
                                  nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i,
                                  v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa);
}

/* ----------------------------------------------------------------------
   FFT-based Poisson solver for ad differentiation
------------------------------------------------------------------------- */

void PPPMDisp::poisson_ad(FFT_SCALAR* wk1, FFT_SCALAR* wk2,
                           FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2,
                           int nx_p, int ny_p, int nz_p, int nft,
                           int nxlo_ft, int nylo_ft, int nzlo_ft,
                           int nxhi_ft, int nyhi_ft, int nzhi_ft,
                           int nxlo_i, int nylo_i, int nzlo_i,
                           int nxhi_i, int nyhi_i, int nzhi_i,
                           double& egy, double* gfn,
                           double* vir, double** vcoeff, double** vcoeff2,
                           FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
                           FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)


{
  int i,j,k,n;
  double eng;

  // transform charge/dispersion density (r -> k)
  n = 0;
  for (i = 0; i < nft; i++) {
    wk1[n++] = dfft[i];
    wk1[n++] = ZEROF;
  }

  ft1->compute(wk1,wk1,1);

  // if requested, compute energy and virial contribution

  double scaleinv = 1.0/(nx_p*ny_p*nz_p);
  double s2 = scaleinv*scaleinv;

  if (eflag_global || vflag_global) {
    if (vflag_global) {
      n = 0;
      for (i = 0; i < nft; i++) {
	eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
	for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j];
	if (eflag_global) egy += eng;
	n += 2;
      }
    } else {
      n = 0;
      for (i = 0; i < nft; i++) {
	egy +=
	  s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
	n += 2;
      }
    }
  }

  // scale by 1/total-grid-pts to get rho(k)
  // multiply by Green's function to get V(k)

  n = 0;
  for (i = 0; i < nft; i++) {
    wk1[n++] *= scaleinv * gfn[i];
    wk1[n++] *= scaleinv * gfn[i];
  }


  n = 0;
  for (k = nzlo_ft; k <= nzhi_ft; k++)
    for (j = nylo_ft; j <= nyhi_ft; j++)
      for (i = nxlo_ft; i <= nxhi_ft; i++) {
        wk2[n] = wk1[n];
	wk2[n+1] = wk1[n+1];
	n += 2;
      }

  ft2->compute(wk2,wk2,-1);


  n = 0;
  for (k = nzlo_i; k <= nzhi_i; k++)
    for (j = nylo_i; j <= nyhi_i; j++)
      for (i = nxlo_i; i <= nxhi_i; i++) {
	u_pa[k][j][i] = wk2[n++];
        n++;
      }


  if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft,
                                  nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i,
                                  v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa);

}

/* ----------------------------------------------------------------------
   Fourier Transform for per atom virial calculations
------------------------------------------------------------------------- */

void PPPMDisp:: poisson_peratom(FFT_SCALAR* wk1, FFT_SCALAR* wk2, LAMMPS_NS::FFT3d* ft2,
                                 double** vcoeff, double** vcoeff2, int nft,
                                 int nxlo_i, int nylo_i, int nzlo_i,
                                 int nxhi_i, int nyhi_i, int nzhi_i,
                                 FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
                                 FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
{
 //v0 & v1 term
  int n, i, j, k;
  n = 0;
  for (i = 0; i < nft; i++) {
    wk2[n] = wk1[n]*vcoeff[i][0] - wk1[n+1]*vcoeff[i][1];
    wk2[n+1] = wk1[n+1]*vcoeff[i][0] +  wk1[n]*vcoeff[i][1];
    n += 2;
  }

  ft2->compute(wk2,wk2,-1);

  n = 0;
  for (k = nzlo_i; k <= nzhi_i; k++)
    for (j = nylo_i; j <= nyhi_i; j++)
      for (i = nxlo_i; i <= nxhi_i; i++) {
        v0_pa[k][j][i] = wk2[n++];
        v1_pa[k][j][i] = wk2[n++];
      }

  //v2 & v3 term

  n = 0;
  for (i = 0; i < nft; i++) {
    wk2[n] = wk1[n]*vcoeff[i][2] - wk1[n+1]*vcoeff2[i][0];
    wk2[n+1] = wk1[n+1]*vcoeff[i][2] + wk1[n]*vcoeff2[i][0];
    n += 2;
  }

  ft2->compute(wk2,wk2,-1);

  n = 0;
  for (k = nzlo_i; k <= nzhi_i; k++)
    for (j = nylo_i; j <= nyhi_i; j++)
      for (i = nxlo_i; i <= nxhi_i; i++) {
        v2_pa[k][j][i] = wk2[n++];
        v3_pa[k][j][i] = wk2[n++];
      }

  //v4 & v5 term

  n = 0;
  for (i = 0; i < nft; i++) {
    wk2[n] = wk1[n]*vcoeff2[i][1] - wk1[n+1]*vcoeff2[i][2];
    wk2[n+1] = wk1[n+1]*vcoeff2[i][1] + wk1[n]*vcoeff2[i][2];
    n += 2;
  }

  ft2->compute(wk2,wk2,-1);

  n = 0;
  for (k = nzlo_i; k <= nzhi_i; k++)
    for (j = nylo_i; j <= nyhi_i; j++)
      for (i = nxlo_i; i <= nxhi_i; i++) {
        v4_pa[k][j][i] = wk2[n++];
        v5_pa[k][j][i] = wk2[n++];
      }

}

/* ----------------------------------------------------------------------
   Poisson solver for one mesh with 2 different dispersion densities
   for ik scheme
------------------------------------------------------------------------- */

void PPPMDisp::poisson_2s_ik(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
                              FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1,
                              FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2,
                              FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
                              FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
                              FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
                              FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)

{
  int i,j,k,n;
  double eng;

  double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);

  // transform charge/dispersion density (r -> k)
  // only one tansform required when energies and pressures do not
  //  need to be calculated
  if (eflag_global + vflag_global == 0) {
    n = 0;
    for (i = 0; i < nfft_6; i++) {
      work1_6[n++] = dfft_1[i];
      work1_6[n++] = dfft_2[i];
    }

    fft1_6->compute(work1_6,work1_6,1);
  }
  // two transforms are required when energies and pressures are
  //   calculated
  else {
    n = 0;
    for (i = 0; i < nfft_6; i++) {
      work1_6[n] = dfft_1[i];
      work2_6[n++] = ZEROF;
      work1_6[n] = ZEROF;
      work2_6[n++] = dfft_2[i];
    }

    fft1_6->compute(work1_6,work1_6,1);
    fft1_6->compute(work2_6,work2_6,1);

    double s2 = scaleinv*scaleinv;

    if (vflag_global) {
      n = 0;
      for (i = 0; i < nfft_6; i++) {
	eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
	for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
	if (eflag_global)energy_6 += eng;
	n += 2;
      }
    } else {
      n = 0;
      for (i = 0; i < nfft_6; i++) {
	energy_6 +=
	  2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
	n += 2;
      }
    }
    // unify the two transformed vectors for efficient calculations later
    for ( i = 0; i < 2*nfft_6; i++) {
      work1_6[i] += work2_6[i];
    }
  }

  n = 0;
  for (i = 0; i < nfft_6; i++) {
    work1_6[n++] *= scaleinv * greensfn_6[i];
    work1_6[n++] *= scaleinv * greensfn_6[i];
  }

  // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
  // FFT leaves data in 3d brick decomposition
  // copy it into inner portion of vdx,vdy,vdz arrays

  // x direction gradient

  n = 0;
  for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
    for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
      for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
	work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1];
	work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n];
	n += 2;
      }

  fft2_6->compute(work2_6,work2_6,-1);

  n = 0;
  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
    for (j = nylo_in_6; j <= nyhi_in_6; j++)
      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
	vxbrick_1[k][j][i] = work2_6[n++];
        vxbrick_2[k][j][i] = work2_6[n++];
      }

  // y direction gradient

  n = 0;
  for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
    for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
      for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
	work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1];
	work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n];
	n += 2;
      }

  fft2_6->compute(work2_6,work2_6,-1);

  n = 0;
  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
    for (j = nylo_in_6; j <= nyhi_in_6; j++)
      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
	vybrick_1[k][j][i] = work2_6[n++];
        vybrick_2[k][j][i] = work2_6[n++];
      }

  // z direction gradient

  n = 0;
  for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
    for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
      for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
	work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1];
	work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n];
	n += 2;
      }

  fft2_6->compute(work2_6,work2_6,-1);

  n = 0;
  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
    for (j = nylo_in_6; j <= nyhi_in_6; j++)
      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
	vzbrick_1[k][j][i] = work2_6[n++];
	vzbrick_2[k][j][i] = work2_6[n++];
      }

  //Per-atom energy

  if (eflag_atom) {
    n = 0;
    for (i = 0; i < nfft_6; i++) {
      work2_6[n] = work1_6[n];
      work2_6[n+1] = work1_6[n+1];
      n += 2;
    }

    fft2_6->compute(work2_6,work2_6,-1);

    n = 0;
    for (k = nzlo_in_6; k <= nzhi_in_6; k++)
      for (j = nylo_in_6; j <= nyhi_in_6; j++)
        for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
          u_pa_1[k][j][i] = work2_6[n++];
          u_pa_2[k][j][i] = work2_6[n++];
        }
  }

  if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1,
                                     v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2);
}


/* ----------------------------------------------------------------------
   Poisson solver for one mesh with 2 different dispersion densities
   for ik scheme
------------------------------------------------------------------------- */

void PPPMDisp::poisson_none_ik(int n1, int n2,FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
                              FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1,
                              FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2,
                              FFT_SCALAR**** u_pa, FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa,
                              FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa)
{
  int i,j,k,n;
  double eng;

  double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);

  // transform charge/dispersion density (r -> k)
  // only one tansform required when energies and pressures do not
  //  need to be calculated
  if (eflag_global + vflag_global == 0) {
    n = 0;
    for (i = 0; i < nfft_6; i++) {
      work1_6[n++] = dfft_1[i];
      work1_6[n++] = dfft_2[i];
    }

    fft1_6->compute(work1_6,work1_6,1);
  }


  // two transforms are required when energies and pressures are
  //   calculated
  else {
    n = 0;
    for (i = 0; i < nfft_6; i++) {
      work1_6[n] = dfft_1[i];
      work2_6[n++] = ZEROF;
      work1_6[n] = ZEROF;
      work2_6[n++] = dfft_2[i];
    }


    fft1_6->compute(work1_6,work1_6,1);
    fft1_6->compute(work2_6,work2_6,1);

    double s2 = scaleinv*scaleinv;

    if (vflag_global) {
      n = 0;
      for (i = 0; i < nfft_6; i++) {
	eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
	for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
	if (eflag_global)energy_6 += eng;
	n += 2;
      }
    } else {
      n = 0;
      for (i = 0; i < nfft_6; i++) {
	energy_6 +=
	  s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
	n += 2;
      }
    }
    // unify the two transformed vectors for efficient calculations later
    for ( i = 0; i < 2*nfft_6; i++) {
      work1_6[i] += work2_6[i];
    }
  }

  n = 0;
  for (i = 0; i < nfft_6; i++) {
    work1_6[n++] *= scaleinv * greensfn_6[i];
    work1_6[n++] *= scaleinv * greensfn_6[i];
  }

  // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
  // FFT leaves data in 3d brick decomposition
  // copy it into inner portion of vdx,vdy,vdz arrays

  // x direction gradient

  n = 0;
  for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
    for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
      for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
	work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1];
	work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n];
	n += 2;
      }

  fft2_6->compute(work2_6,work2_6,-1);

  n = 0;
  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
    for (j = nylo_in_6; j <= nyhi_in_6; j++)
      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
	vxbrick_1[k][j][i] = B[n1]*work2_6[n++];
        vxbrick_2[k][j][i] = B[n2]*work2_6[n++];
      }

  // y direction gradient

  n = 0;
  for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
    for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
      for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
	work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1];
	work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n];
	n += 2;
      }

  fft2_6->compute(work2_6,work2_6,-1);

  n = 0;
  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
    for (j = nylo_in_6; j <= nyhi_in_6; j++)
      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
	vybrick_1[k][j][i] = B[n1]*work2_6[n++];
        vybrick_2[k][j][i] = B[n2]*work2_6[n++];
      }

  // z direction gradient

  n = 0;
  for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
    for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
      for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
	work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1];
	work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n];
	n += 2;
      }

  fft2_6->compute(work2_6,work2_6,-1);

  n = 0;
  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
    for (j = nylo_in_6; j <= nyhi_in_6; j++)
      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
	vzbrick_1[k][j][i] = B[n1]*work2_6[n++];
	vzbrick_2[k][j][i] = B[n2]*work2_6[n++];
      }

  //Per-atom energy

  if (eflag_atom) {
    n = 0;
    for (i = 0; i < nfft_6; i++) {
      work2_6[n] = work1_6[n];
      work2_6[n+1] = work1_6[n+1];
      n += 2;
    }

    fft2_6->compute(work2_6,work2_6,-1);

    n = 0;
    for (k = nzlo_in_6; k <= nzhi_in_6; k++)
      for (j = nylo_in_6; j <= nyhi_in_6; j++)
        for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
          u_pa[n1][k][j][i] = B[n1]*work2_6[n++];
          u_pa[n2][k][j][i] = B[n2]*work2_6[n++];
        }
  }

  if (vflag_atom) poisson_none_peratom(n1,n2,
                                       v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1],
                                       v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]);
}

/* ----------------------------------------------------------------------
   Poisson solver for one mesh with 2 different dispersion densities
   for ad scheme
------------------------------------------------------------------------- */

void PPPMDisp::poisson_2s_ad(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
                              FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
                              FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
                              FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
                              FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)

{
  int i,j,k,n;
  double eng;

  double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);

  // transform charge/dispersion density (r -> k)
  // only one tansform required when energies and pressures do not
  //  need to be calculated
  if (eflag_global + vflag_global == 0) {
    n = 0;
    for (i = 0; i < nfft_6; i++) {
      work1_6[n++] = dfft_1[i];
      work1_6[n++] = dfft_2[i];
    }

    fft1_6->compute(work1_6,work1_6,1);
  }
  // two transforms are required when energies and pressures are
  //   calculated
  else {
    n = 0;
    for (i = 0; i < nfft_6; i++) {
      work1_6[n] = dfft_1[i];
      work2_6[n++] = ZEROF;
      work1_6[n] = ZEROF;
      work2_6[n++] = dfft_2[i];
    }

    fft1_6->compute(work1_6,work1_6,1);
    fft1_6->compute(work2_6,work2_6,1);

    double s2 = scaleinv*scaleinv;

    if (vflag_global) {
      n = 0;
      for (i = 0; i < nfft_6; i++) {
	eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
	for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
	if (eflag_global)energy_6 += eng;
	n += 2;
      }
    } else {
      n = 0;
      for (i = 0; i < nfft_6; i++) {
	energy_6 +=
	  2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
	n += 2;
      }
    }
    // unify the two transformed vectors for efficient calculations later
    for ( i = 0; i < 2*nfft_6; i++) {
      work1_6[i] += work2_6[i];
    }
  }


  n = 0;
  for (i = 0; i < nfft_6; i++) {
    work1_6[n++] *= scaleinv * greensfn_6[i];
    work1_6[n++] *= scaleinv * greensfn_6[i];
  }


  n = 0;
  for (i = 0; i < nfft_6; i++) {
    work2_6[n] = work1_6[n];
    work2_6[n+1] = work1_6[n+1];
    n += 2;
  }

  fft2_6->compute(work2_6,work2_6,-1);

  n = 0;
  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
    for (j = nylo_in_6; j <= nyhi_in_6; j++)
      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
        u_pa_1[k][j][i] = work2_6[n++];
        u_pa_2[k][j][i] = work2_6[n++];
      }

  if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1,
                                     v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2);
}

/* ----------------------------------------------------------------------
   Poisson solver for one mesh with 2 different dispersion densities
   for ad scheme
------------------------------------------------------------------------- */

void PPPMDisp::poisson_none_ad(int n1, int n2, FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
                               FFT_SCALAR*** u_pa_1, FFT_SCALAR*** u_pa_2,
                               FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa,
                               FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa)
{
  int i,j,k,n;
  double eng;

  double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);

  // transform charge/dispersion density (r -> k)
  // only one tansform required when energies and pressures do not
  //  need to be calculated
  if (eflag_global + vflag_global == 0) {
    n = 0;
    for (i = 0; i < nfft_6; i++) {
      work1_6[n++] = dfft_1[i];
      work1_6[n++] = dfft_2[i];
    }

    fft1_6->compute(work1_6,work1_6,1);
  }
  // two transforms are required when energies and pressures are
  //   calculated
  else {
    n = 0;
    for (i = 0; i < nfft_6; i++) {
      work1_6[n] = dfft_1[i];
      work2_6[n++] = ZEROF;
      work1_6[n] = ZEROF;
      work2_6[n++] = dfft_2[i];
    }

    fft1_6->compute(work1_6,work1_6,1);
    fft1_6->compute(work2_6,work2_6,1);

    double s2 = scaleinv*scaleinv;

    if (vflag_global) {
      n = 0;
      for (i = 0; i < nfft_6; i++) {
	eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
	for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
	if (eflag_global)energy_6 += eng;
	n += 2;
      }
    } else {
      n = 0;
      for (i = 0; i < nfft_6; i++) {
	energy_6 +=
	  s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
	n += 2;
      }
    }
    // unify the two transformed vectors for efficient calculations later
    for ( i = 0; i < 2*nfft_6; i++) {
      work1_6[i] += work2_6[i];
    }
  }


  n = 0;
  for (i = 0; i < nfft_6; i++) {
    work1_6[n++] *= scaleinv * greensfn_6[i];
    work1_6[n++] *= scaleinv * greensfn_6[i];
  }


  n = 0;
  for (i = 0; i < nfft_6; i++) {
    work2_6[n] = work1_6[n];
    work2_6[n+1] = work1_6[n+1];
    n += 2;
  }

  fft2_6->compute(work2_6,work2_6,-1);

  n = 0;
  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
    for (j = nylo_in_6; j <= nyhi_in_6; j++)
      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
        u_pa_1[k][j][i] = B[n1]*work2_6[n++];
        u_pa_2[k][j][i] = B[n2]*work2_6[n++];
      }

  if (vflag_atom) poisson_none_peratom(n1,n2,
                                       v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1],
                                       v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]);
}

/* ----------------------------------------------------------------------
   Fourier Transform for per atom virial calculations
------------------------------------------------------------------------- */

void PPPMDisp::poisson_2s_peratom(FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
                                   FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
                                   FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
                                   FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
{
  //Compute first virial term v0
  int n, i, j, k;

  n = 0;
  for (i = 0; i < nfft_6; i++) {
    work2_6[n] = work1_6[n]*vg_6[i][0];
    work2_6[n+1] = work1_6[n+1]*vg_6[i][0];
    n += 2;
  }

  fft2_6->compute(work2_6,work2_6,-1);

  n = 0;
  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
    for (j = nylo_in_6; j <= nyhi_in_6; j++)
      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
        v0_pa_1[k][j][i] = work2_6[n++];
        v0_pa_2[k][j][i] = work2_6[n++];
      }

  //Compute second virial term v1

  n = 0;
  for (i = 0; i < nfft_6; i++) {
    work2_6[n] = work1_6[n]*vg_6[i][1];
    work2_6[n+1] = work1_6[n+1]*vg_6[i][1];
    n += 2;
  }

  fft2_6->compute(work2_6,work2_6,-1);

  n = 0;
  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
    for (j = nylo_in_6; j <= nyhi_in_6; j++)
      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
        v1_pa_1[k][j][i] = work2_6[n++];
        v1_pa_2[k][j][i] = work2_6[n++];
      }

  //Compute third virial term v2

  n = 0;
  for (i = 0; i < nfft_6; i++) {
    work2_6[n] = work1_6[n]*vg_6[i][2];
    work2_6[n+1] = work1_6[n+1]*vg_6[i][2];
    n += 2;
  }

  fft2_6->compute(work2_6,work2_6,-1);

  n = 0;
  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
    for (j = nylo_in_6; j <= nyhi_in_6; j++)
      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
        v2_pa_1[k][j][i] = work2_6[n++];
        v2_pa_2[k][j][i] = work2_6[n++];
      }

  //Compute fourth virial term v3

  n = 0;
  for (i = 0; i < nfft_6; i++) {
    work2_6[n] = work1_6[n]*vg2_6[i][0];
    work2_6[n+1] = work1_6[n+1]*vg2_6[i][0];
    n += 2;
  }

  fft2_6->compute(work2_6,work2_6,-1);

  n = 0;
  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
    for (j = nylo_in_6; j <= nyhi_in_6; j++)
      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
        v3_pa_1[k][j][i] = work2_6[n++];
        v3_pa_2[k][j][i] = work2_6[n++];
      }

  //Compute fifth virial term v4

  n = 0;
  for (i = 0; i < nfft_6; i++) {
    work2_6[n] = work1_6[n]*vg2_6[i][1];
    work2_6[n+1] = work1_6[n+1]*vg2_6[i][1];
    n += 2;
  }

  fft2_6->compute(work2_6,work2_6,-1);

  n = 0;
  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
    for (j = nylo_in_6; j <= nyhi_in_6; j++)
      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
        v4_pa_1[k][j][i] = work2_6[n++];
        v4_pa_2[k][j][i] = work2_6[n++];
      }

  //Compute last virial term v5

  n = 0;
  for (i = 0; i < nfft_6; i++) {
    work2_6[n] = work1_6[n]*vg2_6[i][2];
    work2_6[n+1] = work1_6[n+1]*vg2_6[i][2];
    n += 2;
  }

  fft2_6->compute(work2_6,work2_6,-1);

  n = 0;
  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
    for (j = nylo_in_6; j <= nyhi_in_6; j++)
      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
        v5_pa_1[k][j][i] = work2_6[n++];
        v5_pa_2[k][j][i] = work2_6[n++];
      }
}

/* ----------------------------------------------------------------------
   Fourier Transform for per atom virial calculations
------------------------------------------------------------------------- */

void PPPMDisp::poisson_none_peratom(int n1, int n2,
                                 FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
                                 FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
                                 FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
                                 FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
{
  //Compute first virial term v0
  int n, i, j, k;

  n = 0;
  for (i = 0; i < nfft_6; i++) {
    work2_6[n] = work1_6[n]*vg_6[i][0];
    work2_6[n+1] = work1_6[n+1]*vg_6[i][0];
    n += 2;
  }

  fft2_6->compute(work2_6,work2_6,-1);

  n = 0;
  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
    for (j = nylo_in_6; j <= nyhi_in_6; j++)
      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
        v0_pa_1[k][j][i] = B[n1]*work2_6[n++];
        v0_pa_2[k][j][i] = B[n2]*work2_6[n++];
      }

  //Compute second virial term v1

  n = 0;
  for (i = 0; i < nfft_6; i++) {
    work2_6[n] = work1_6[n]*vg_6[i][1];
    work2_6[n+1] = work1_6[n+1]*vg_6[i][1];
    n += 2;
  }

  fft2_6->compute(work2_6,work2_6,-1);

  n = 0;
  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
    for (j = nylo_in_6; j <= nyhi_in_6; j++)
      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
        v1_pa_1[k][j][i] = B[n1]*work2_6[n++];
        v1_pa_2[k][j][i] = B[n2]*work2_6[n++];
      }

  //Compute third virial term v2

  n = 0;
  for (i = 0; i < nfft_6; i++) {
    work2_6[n] = work1_6[n]*vg_6[i][2];
    work2_6[n+1] = work1_6[n+1]*vg_6[i][2];
    n += 2;
  }

  fft2_6->compute(work2_6,work2_6,-1);

  n = 0;
  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
    for (j = nylo_in_6; j <= nyhi_in_6; j++)
      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
        v2_pa_1[k][j][i] = B[n1]*work2_6[n++];
        v2_pa_2[k][j][i] = B[n2]*work2_6[n++];
      }

  //Compute fourth virial term v3

  n = 0;
  for (i = 0; i < nfft_6; i++) {
    work2_6[n] = work1_6[n]*vg2_6[i][0];
    work2_6[n+1] = work1_6[n+1]*vg2_6[i][0];
    n += 2;
  }

  fft2_6->compute(work2_6,work2_6,-1);

  n = 0;
  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
    for (j = nylo_in_6; j <= nyhi_in_6; j++)
      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
        v3_pa_1[k][j][i] = B[n1]*work2_6[n++];
        v3_pa_2[k][j][i] = B[n2]*work2_6[n++];
      }

  //Compute fifth virial term v4

  n = 0;
  for (i = 0; i < nfft_6; i++) {
    work2_6[n] = work1_6[n]*vg2_6[i][1];
    work2_6[n+1] = work1_6[n+1]*vg2_6[i][1];
    n += 2;
  }

  fft2_6->compute(work2_6,work2_6,-1);

  n = 0;
  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
    for (j = nylo_in_6; j <= nyhi_in_6; j++)
      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
        v4_pa_1[k][j][i] = B[n1]*work2_6[n++];
        v4_pa_2[k][j][i] = B[n2]*work2_6[n++];
      }

  //Compute last virial term v5

  n = 0;
  for (i = 0; i < nfft_6; i++) {
    work2_6[n] = work1_6[n]*vg2_6[i][2];
    work2_6[n+1] = work1_6[n+1]*vg2_6[i][2];
    n += 2;
  }

  fft2_6->compute(work2_6,work2_6,-1);

  n = 0;
  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
    for (j = nylo_in_6; j <= nyhi_in_6; j++)
      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
        v5_pa_1[k][j][i] = B[n1]*work2_6[n++];
        v5_pa_2[k][j][i] = B[n2]*work2_6[n++];
      }
}

/* ----------------------------------------------------------------------
   interpolate from grid to get electric field & force on my particles
   for ik scheme
------------------------------------------------------------------------- */

void PPPMDisp::fieldforce_c_ik()
{
  int i,l,m,n,nx,ny,nz,mx,my,mz;
  FFT_SCALAR dx,dy,dz,x0,y0,z0;
  FFT_SCALAR ekx,eky,ekz;

  // loop over my charges, interpolate electric field from nearby grid points
  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
  // (dx,dy,dz) = distance to "lower left" grid pt
  // (mx,my,mz) = global coords of moving stencil pt
  // ek = 3 components of E-field on particle

  double *q = atom->q;
  double **x = atom->x;
  double **f = atom->f;

  int nlocal = atom->nlocal;

  for (i = 0; i < nlocal; i++) {
    nx = part2grid[i][0];
    ny = part2grid[i][1];
    nz = part2grid[i][2];
    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;

    compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);

    ekx = eky = ekz = ZEROF;
    for (n = nlower; n <= nupper; n++) {
      mz = n+nz;
      z0 = rho1d[2][n];
      for (m = nlower; m <= nupper; m++) {
	my = m+ny;
	y0 = z0*rho1d[1][m];
	for (l = nlower; l <= nupper; l++) {
	  mx = l+nx;
	  x0 = y0*rho1d[0][l];
	  ekx -= x0*vdx_brick[mz][my][mx];
	  eky -= x0*vdy_brick[mz][my][mx];
	  ekz -= x0*vdz_brick[mz][my][mx];
	}
      }
    }

    // convert E-field to force

    const double qfactor = force->qqrd2e * scale * q[i];
    f[i][0] += qfactor*ekx;
    f[i][1] += qfactor*eky;
    if (slabflag != 2) f[i][2] += qfactor*ekz;
  }
}
/* ----------------------------------------------------------------------
   interpolate from grid to get electric field & force on my particles
   for ad scheme
------------------------------------------------------------------------- */

void PPPMDisp::fieldforce_c_ad()
{
  int i,l,m,n,nx,ny,nz,mx,my,mz;
  FFT_SCALAR dx,dy,dz;
  FFT_SCALAR ekx,eky,ekz;
  double s1,s2,s3;
  double sf = 0.0;

  double *prd;

  if (triclinic == 0) prd = domain->prd;
  else prd = domain->prd_lamda;

  double xprd = prd[0];
  double yprd = prd[1];
  double zprd = prd[2];
  double zprd_slab = zprd*slab_volfactor;

  double hx_inv = nx_pppm/xprd;
  double hy_inv = ny_pppm/yprd;
  double hz_inv = nz_pppm/zprd_slab;

  // loop over my charges, interpolate electric field from nearby grid points
  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
  // (dx,dy,dz) = distance to "lower left" grid pt
  // (mx,my,mz) = global coords of moving stencil pt
  // ek = 3 components of E-field on particle

  double *q = atom->q;
  double **x = atom->x;
  double **f = atom->f;

  int nlocal = atom->nlocal;

  for (i = 0; i < nlocal; i++) {
    nx = part2grid[i][0];
    ny = part2grid[i][1];
    nz = part2grid[i][2];
    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;

    compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
    compute_drho1d(dx,dy,dz, order, drho_coeff, drho1d);

    ekx = eky = ekz = ZEROF;
    for (n = nlower; n <= nupper; n++) {
      mz = n+nz;
      for (m = nlower; m <= nupper; m++) {
        my = m+ny;
        for (l = nlower; l <= nupper; l++) {
          mx = l+nx;
          ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
          eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
          ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
        }
      }
    }
    ekx *= hx_inv;
    eky *= hy_inv;
    ekz *= hz_inv;
    // convert E-field to force and substract self forces
    const double qfactor = force->qqrd2e * scale;

    s1 = x[i][0]*hx_inv;
    s2 = x[i][1]*hy_inv;
    s3 = x[i][2]*hz_inv;
    sf = sf_coeff[0]*sin(2*MY_PI*s1);
    sf += sf_coeff[1]*sin(4*MY_PI*s1);
    sf *= 2*q[i]*q[i];
    f[i][0] += qfactor*(ekx*q[i] - sf);

    sf = sf_coeff[2]*sin(2*MY_PI*s2);
    sf += sf_coeff[3]*sin(4*MY_PI*s2);
    sf *= 2*q[i]*q[i];
    f[i][1] += qfactor*(eky*q[i] - sf);


    sf = sf_coeff[4]*sin(2*MY_PI*s3);
    sf += sf_coeff[5]*sin(4*MY_PI*s3);
    sf *= 2*q[i]*q[i];
    if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
  }
}

/* ----------------------------------------------------------------------
   interpolate from grid to get electric field & force on my particles
------------------------------------------------------------------------- */

void PPPMDisp::fieldforce_c_peratom()
{
  int i,l,m,n,nx,ny,nz,mx,my,mz;
  FFT_SCALAR dx,dy,dz,x0,y0,z0;
  FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5;

  // loop over my charges, interpolate electric field from nearby grid points
  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
  // (dx,dy,dz) = distance to "lower left" grid pt
  // (mx,my,mz) = global coords of moving stencil pt
  // ek = 3 components of E-field on particle

  double *q = atom->q;
  double **x = atom->x;

  int nlocal = atom->nlocal;

  for (i = 0; i < nlocal; i++) {
    nx = part2grid[i][0];
    ny = part2grid[i][1];
    nz = part2grid[i][2];
    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;

    compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);

    u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
    for (n = nlower; n <= nupper; n++) {
      mz = n+nz;
      z0 = rho1d[2][n];
      for (m = nlower; m <= nupper; m++) {
	my = m+ny;
	y0 = z0*rho1d[1][m];
	for (l = nlower; l <= nupper; l++) {
	  mx = l+nx;
	  x0 = y0*rho1d[0][l];
	  if (eflag_atom) u_pa += x0*u_brick[mz][my][mx];
	  if (vflag_atom) {
            v0 += x0*v0_brick[mz][my][mx];
            v1 += x0*v1_brick[mz][my][mx];
            v2 += x0*v2_brick[mz][my][mx];
            v3 += x0*v3_brick[mz][my][mx];
            v4 += x0*v4_brick[mz][my][mx];
            v5 += x0*v5_brick[mz][my][mx];
          }
	}
      }
    }

    // convert E-field to force

    const double qfactor = 0.5*force->qqrd2e * scale * q[i];

    if (eflag_atom) eatom[i] += u_pa*qfactor;
    if (vflag_atom) {
      vatom[i][0] += v0*qfactor;
      vatom[i][1] += v1*qfactor;
      vatom[i][2] += v2*qfactor;
      vatom[i][3] += v3*qfactor;
      vatom[i][4] += v4*qfactor;
      vatom[i][5] += v5*qfactor;
    }
  }
}

/* ----------------------------------------------------------------------
   interpolate from grid to get dispersion field & force on my particles
   for geometric mixing rule
------------------------------------------------------------------------- */

void PPPMDisp::fieldforce_g_ik()
{
  int i,l,m,n,nx,ny,nz,mx,my,mz;
  FFT_SCALAR dx,dy,dz,x0,y0,z0;
  FFT_SCALAR ekx,eky,ekz;

  // loop over my charges, interpolate electric field from nearby grid points
  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
  // (dx,dy,dz) = distance to "lower left" grid pt
  // (mx,my,mz) = global coords of moving stencil pt
  // ek = 3 components of dispersion field on particle

  double **x = atom->x;
  double **f = atom->f;
  int type;
  double lj;

  int nlocal = atom->nlocal;

  for (i = 0; i < nlocal; i++) {
    nx = part2grid_6[i][0];
    ny = part2grid_6[i][1];
    nz = part2grid_6[i][2];
    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;

    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);

    ekx = eky = ekz = ZEROF;
    for (n = nlower_6; n <= nupper_6; n++) {
      mz = n+nz;
      z0 = rho1d_6[2][n];
      for (m = nlower_6; m <= nupper_6; m++) {
	my = m+ny;
	y0 = z0*rho1d_6[1][m];
	for (l = nlower_6; l <= nupper_6; l++) {
	  mx = l+nx;
	  x0 = y0*rho1d_6[0][l];
	  ekx -= x0*vdx_brick_g[mz][my][mx];
	  eky -= x0*vdy_brick_g[mz][my][mx];
	  ekz -= x0*vdz_brick_g[mz][my][mx];
	}
      }
    }

    // convert E-field to force
    type = atom->type[i];
    lj = B[type];
    f[i][0] += lj*ekx;
    f[i][1] += lj*eky;
    if (slabflag != 2) f[i][2] += lj*ekz;
  }
}

/* ----------------------------------------------------------------------
   interpolate from grid to get dispersion field & force on my particles
   for geometric mixing rule for ad scheme
------------------------------------------------------------------------- */

void PPPMDisp::fieldforce_g_ad()
{
  int i,l,m,n,nx,ny,nz,mx,my,mz;
  FFT_SCALAR dx,dy,dz;
  FFT_SCALAR ekx,eky,ekz;
  double s1,s2,s3;
  double sf = 0.0;
  double *prd;

  if (triclinic == 0) prd = domain->prd;
  else prd = domain->prd_lamda;

  double xprd = prd[0];
  double yprd = prd[1];
  double zprd = prd[2];
  double zprd_slab = zprd*slab_volfactor;

  double hx_inv = nx_pppm_6/xprd;
  double hy_inv = ny_pppm_6/yprd;
  double hz_inv = nz_pppm_6/zprd_slab;

  // loop over my charges, interpolate electric field from nearby grid points
  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
  // (dx,dy,dz) = distance to "lower left" grid pt
  // (mx,my,mz) = global coords of moving stencil pt
  // ek = 3 components of dispersion field on particle

  double **x = atom->x;
  double **f = atom->f;
  int type;
  double lj;

  int nlocal = atom->nlocal;


  for (i = 0; i < nlocal; i++) {
    nx = part2grid_6[i][0];
    ny = part2grid_6[i][1];
    nz = part2grid_6[i][2];
    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;

    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
    compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);


    ekx = eky = ekz = ZEROF;
    for (n = nlower_6; n <= nupper_6; n++) {
      mz = n+nz;
      for (m = nlower_6; m <= nupper_6; m++) {
        my = m+ny;
        for (l = nlower_6; l <= nupper_6; l++) {
          mx = l+nx;
          ekx += drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx];
          eky += rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx];
          ekz += rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]*u_brick_g[mz][my][mx];
        }
      }
    }
    ekx *= hx_inv;
    eky *= hy_inv;
    ekz *= hz_inv;

    // convert E-field to force
    type = atom->type[i];
    lj = B[type];

    s1 = x[i][0]*hx_inv;
    s2 = x[i][1]*hy_inv;
    s3 = x[i][2]*hz_inv;

    sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
    sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
    sf *= 2*lj*lj;
    f[i][0] += ekx*lj - sf;

    sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
    sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
    sf *= 2*lj*lj;
    f[i][1] += eky*lj - sf;


    sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
    sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
    sf *= 2*lj*lj;
    if (slabflag != 2) f[i][2] += ekz*lj - sf;

  }
}

/* ----------------------------------------------------------------------
   interpolate from grid to get dispersion field & force on my particles
   for geometric mixing rule for per atom quantities
------------------------------------------------------------------------- */

void PPPMDisp::fieldforce_g_peratom()
{
  int i,l,m,n,nx,ny,nz,mx,my,mz;
  FFT_SCALAR dx,dy,dz,x0,y0,z0;
  FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5;

  // loop over my charges, interpolate electric field from nearby grid points
  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
  // (dx,dy,dz) = distance to "lower left" grid pt
  // (mx,my,mz) = global coords of moving stencil pt
  // ek = 3 components of dispersion field on particle

  double **x = atom->x;
  int type;
  double lj;

  int nlocal = atom->nlocal;

  for (i = 0; i < nlocal; i++) {
    nx = part2grid_6[i][0];
    ny = part2grid_6[i][1];
    nz = part2grid_6[i][2];
    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;

    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);

    u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
    for (n = nlower_6; n <= nupper_6; n++) {
      mz = n+nz;
      z0 = rho1d_6[2][n];
      for (m = nlower_6; m <= nupper_6; m++) {
	my = m+ny;
	y0 = z0*rho1d_6[1][m];
	for (l = nlower_6; l <= nupper_6; l++) {
	  mx = l+nx;
	  x0 = y0*rho1d_6[0][l];
	  if (eflag_atom) u_pa += x0*u_brick_g[mz][my][mx];
	  if (vflag_atom) {
            v0 += x0*v0_brick_g[mz][my][mx];
            v1 += x0*v1_brick_g[mz][my][mx];
            v2 += x0*v2_brick_g[mz][my][mx];
            v3 += x0*v3_brick_g[mz][my][mx];
            v4 += x0*v4_brick_g[mz][my][mx];
            v5 += x0*v5_brick_g[mz][my][mx];
          }
	}
      }
    }

    // convert E-field to force
    type = atom->type[i];
    lj = B[type]*0.5;

    if (eflag_atom) eatom[i] += u_pa*lj;
    if (vflag_atom) {
      vatom[i][0] += v0*lj;
      vatom[i][1] += v1*lj;
      vatom[i][2] += v2*lj;
      vatom[i][3] += v3*lj;
      vatom[i][4] += v4*lj;
      vatom[i][5] += v5*lj;
    }
  }
}

/* ----------------------------------------------------------------------
   interpolate from grid to get dispersion field & force on my particles
   for arithmetic mixing rule and ik scheme
------------------------------------------------------------------------- */

void PPPMDisp::fieldforce_a_ik()
{
  int i,l,m,n,nx,ny,nz,mx,my,mz;
  FFT_SCALAR dx,dy,dz,x0,y0,z0;
  FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
  FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
  FFT_SCALAR ekx6, eky6, ekz6;

  // loop over my charges, interpolate electric field from nearby grid points
  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
  // (dx,dy,dz) = distance to "lower left" grid pt
  // (mx,my,mz) = global coords of moving stencil pt
  // ek = 3 components of dispersion field on particle

  double **x = atom->x;
  double **f = atom->f;
  int type;
  double lj0, lj1, lj2, lj3, lj4, lj5, lj6;

  int nlocal = atom->nlocal;

  for (i = 0; i < nlocal; i++) {

    nx = part2grid_6[i][0];
    ny = part2grid_6[i][1];
    nz = part2grid_6[i][2];
    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
    ekx0 = eky0 = ekz0 = ZEROF;
    ekx1 = eky1 = ekz1 = ZEROF;
    ekx2 = eky2 = ekz2 = ZEROF;
    ekx3 = eky3 = ekz3 = ZEROF;
    ekx4 = eky4 = ekz4 = ZEROF;
    ekx5 = eky5 = ekz5 = ZEROF;
    ekx6 = eky6 = ekz6 = ZEROF;
    for (n = nlower_6; n <= nupper_6; n++) {
      mz = n+nz;
      z0 = rho1d_6[2][n];
      for (m = nlower_6; m <= nupper_6; m++) {
	my = m+ny;
	y0 = z0*rho1d_6[1][m];
	for (l = nlower_6; l <= nupper_6; l++) {
	  mx = l+nx;
	  x0 = y0*rho1d_6[0][l];
	  ekx0 -= x0*vdx_brick_a0[mz][my][mx];
	  eky0 -= x0*vdy_brick_a0[mz][my][mx];
	  ekz0 -= x0*vdz_brick_a0[mz][my][mx];
	  ekx1 -= x0*vdx_brick_a1[mz][my][mx];
	  eky1 -= x0*vdy_brick_a1[mz][my][mx];
	  ekz1 -= x0*vdz_brick_a1[mz][my][mx];
          ekx2 -= x0*vdx_brick_a2[mz][my][mx];
	  eky2 -= x0*vdy_brick_a2[mz][my][mx];
	  ekz2 -= x0*vdz_brick_a2[mz][my][mx];
	  ekx3 -= x0*vdx_brick_a3[mz][my][mx];
	  eky3 -= x0*vdy_brick_a3[mz][my][mx];
	  ekz3 -= x0*vdz_brick_a3[mz][my][mx];
	  ekx4 -= x0*vdx_brick_a4[mz][my][mx];
	  eky4 -= x0*vdy_brick_a4[mz][my][mx];
	  ekz4 -= x0*vdz_brick_a4[mz][my][mx];
          ekx5 -= x0*vdx_brick_a5[mz][my][mx];
	  eky5 -= x0*vdy_brick_a5[mz][my][mx];
	  ekz5 -= x0*vdz_brick_a5[mz][my][mx];
          ekx6 -= x0*vdx_brick_a6[mz][my][mx];
	  eky6 -= x0*vdy_brick_a6[mz][my][mx];
	  ekz6 -= x0*vdz_brick_a6[mz][my][mx];
	}
      }
    }
    // convert D-field to force
    type = atom->type[i];
    lj0 = B[7*type+6];
    lj1 = B[7*type+5];
    lj2 = B[7*type+4];
    lj3 = B[7*type+3];
    lj4 = B[7*type+2];
    lj5 = B[7*type+1];
    lj6 = B[7*type];
    f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6;
    f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6;
    if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6;
  }
}

/* ----------------------------------------------------------------------
   interpolate from grid to get dispersion field & force on my particles
   for arithmetic mixing rule for the ad scheme
------------------------------------------------------------------------- */

void PPPMDisp::fieldforce_a_ad()
{
  int i,l,m,n,nx,ny,nz,mx,my,mz;
  FFT_SCALAR dx,dy,dz,x0,y0,z0;
  FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
  FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
  FFT_SCALAR ekx6, eky6, ekz6;

  double s1,s2,s3;
  double sf = 0.0;
  double *prd;

  if (triclinic == 0) prd = domain->prd;
  else prd = domain->prd_lamda;

  double xprd = prd[0];
  double yprd = prd[1];
  double zprd = prd[2];
  double zprd_slab = zprd*slab_volfactor;

  double hx_inv = nx_pppm_6/xprd;
  double hy_inv = ny_pppm_6/yprd;
  double hz_inv = nz_pppm_6/zprd_slab;

  // loop over my charges, interpolate electric field from nearby grid points
  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
  // (dx,dy,dz) = distance to "lower left" grid pt
  // (mx,my,mz) = global coords of moving stencil pt
  // ek = 3 components of dispersion field on particle

  double **x = atom->x;
  double **f = atom->f;
  int type;
  double lj0, lj1, lj2, lj3, lj4, lj5, lj6;

  int nlocal = atom->nlocal;

  for (i = 0; i < nlocal; i++) {

    nx = part2grid_6[i][0];
    ny = part2grid_6[i][1];
    nz = part2grid_6[i][2];
    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;

    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
    compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);

    ekx0 = eky0 = ekz0 = ZEROF;
    ekx1 = eky1 = ekz1 = ZEROF;
    ekx2 = eky2 = ekz2 = ZEROF;
    ekx3 = eky3 = ekz3 = ZEROF;
    ekx4 = eky4 = ekz4 = ZEROF;
    ekx5 = eky5 = ekz5 = ZEROF;
    ekx6 = eky6 = ekz6 = ZEROF;
    for (n = nlower_6; n <= nupper_6; n++) {
      mz = n+nz;
      for (m = nlower_6; m <= nupper_6; m++) {
	my = m+ny;
	for (l = nlower_6; l <= nupper_6; l++) {
	  mx = l+nx;
          x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n];
          y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n];
          z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n];

          ekx0 += x0*u_brick_a0[mz][my][mx];
          eky0 += y0*u_brick_a0[mz][my][mx];
          ekz0 += z0*u_brick_a0[mz][my][mx];

          ekx1 += x0*u_brick_a1[mz][my][mx];
          eky1 += y0*u_brick_a1[mz][my][mx];
          ekz1 += z0*u_brick_a1[mz][my][mx];

          ekx2 += x0*u_brick_a2[mz][my][mx];
          eky2 += y0*u_brick_a2[mz][my][mx];
          ekz2 += z0*u_brick_a2[mz][my][mx];

          ekx3 += x0*u_brick_a3[mz][my][mx];
          eky3 += y0*u_brick_a3[mz][my][mx];
          ekz3 += z0*u_brick_a3[mz][my][mx];

          ekx4 += x0*u_brick_a4[mz][my][mx];
          eky4 += y0*u_brick_a4[mz][my][mx];
          ekz4 += z0*u_brick_a4[mz][my][mx];

          ekx5 += x0*u_brick_a5[mz][my][mx];
          eky5 += y0*u_brick_a5[mz][my][mx];
          ekz5 += z0*u_brick_a5[mz][my][mx];

          ekx6 += x0*u_brick_a6[mz][my][mx];
          eky6 += y0*u_brick_a6[mz][my][mx];
          ekz6 += z0*u_brick_a6[mz][my][mx];
	}
      }
    }

    ekx0 *= hx_inv;
    eky0 *= hy_inv;
    ekz0 *= hz_inv;

    ekx1 *= hx_inv;
    eky1 *= hy_inv;
    ekz1 *= hz_inv;

    ekx2 *= hx_inv;
    eky2 *= hy_inv;
    ekz2 *= hz_inv;

    ekx3 *= hx_inv;
    eky3 *= hy_inv;
    ekz3 *= hz_inv;

    ekx4 *= hx_inv;
    eky4 *= hy_inv;
    ekz4 *= hz_inv;

    ekx5 *= hx_inv;
    eky5 *= hy_inv;
    ekz5 *= hz_inv;

    ekx6 *= hx_inv;
    eky6 *= hy_inv;
    ekz6 *= hz_inv;

    // convert D-field to force
    type = atom->type[i];
    lj0 = B[7*type+6];
    lj1 = B[7*type+5];
    lj2 = B[7*type+4];
    lj3 = B[7*type+3];
    lj4 = B[7*type+2];
    lj5 = B[7*type+1];
    lj6 = B[7*type];

    s1 = x[i][0]*hx_inv;
    s2 = x[i][1]*hy_inv;
    s3 = x[i][2]*hz_inv;

    sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
    sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
    sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
    f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6 - sf;

    sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
    sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
    sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
    f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6 - sf;

    sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
    sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
    sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
    if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf;
  }
}

/* ----------------------------------------------------------------------
   interpolate from grid to get dispersion field & force on my particles
   for arithmetic mixing rule for per atom quantities
------------------------------------------------------------------------- */

void PPPMDisp::fieldforce_a_peratom()
{
  int i,l,m,n,nx,ny,nz,mx,my,mz;
  FFT_SCALAR dx,dy,dz,x0,y0,z0;
  FFT_SCALAR u_pa0,v00,v10,v20,v30,v40,v50;
  FFT_SCALAR u_pa1,v01,v11,v21,v31,v41,v51;
  FFT_SCALAR u_pa2,v02,v12,v22,v32,v42,v52;
  FFT_SCALAR u_pa3,v03,v13,v23,v33,v43,v53;
  FFT_SCALAR u_pa4,v04,v14,v24,v34,v44,v54;
  FFT_SCALAR u_pa5,v05,v15,v25,v35,v45,v55;
  FFT_SCALAR u_pa6,v06,v16,v26,v36,v46,v56;

  // loop over my charges, interpolate electric field from nearby grid points
  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
  // (dx,dy,dz) = distance to "lower left" grid pt
  // (mx,my,mz) = global coords of moving stencil pt
  // ek = 3 components of dispersion field on particle

  double **x = atom->x;
  int type;
  double lj0, lj1, lj2, lj3, lj4, lj5, lj6;

  int nlocal = atom->nlocal;

  for (i = 0; i < nlocal; i++) {

    nx = part2grid_6[i][0];
    ny = part2grid_6[i][1];
    nz = part2grid_6[i][2];
    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);

    u_pa0 = v00 = v10 = v20 = v30 = v40 = v50 = ZEROF;
    u_pa1 = v01 = v11 = v21 = v31 = v41 = v51 = ZEROF;
    u_pa2 = v02 = v12 = v22 = v32 = v42 = v52 = ZEROF;
    u_pa3 = v03 = v13 = v23 = v33 = v43 = v53 = ZEROF;
    u_pa4 = v04 = v14 = v24 = v34 = v44 = v54 = ZEROF;
    u_pa5 = v05 = v15 = v25 = v35 = v45 = v55 = ZEROF;
    u_pa6 = v06 = v16 = v26 = v36 = v46 = v56 = ZEROF;
    for (n = nlower_6; n <= nupper_6; n++) {
      mz = n+nz;
      z0 = rho1d_6[2][n];
      for (m = nlower_6; m <= nupper_6; m++) {
	my = m+ny;
	y0 = z0*rho1d_6[1][m];
	for (l = nlower_6; l <= nupper_6; l++) {
	  mx = l+nx;
	  x0 = y0*rho1d_6[0][l];
          if (eflag_atom) {
            u_pa0 += x0*u_brick_a0[mz][my][mx];
            u_pa1 += x0*u_brick_a1[mz][my][mx];
            u_pa2 += x0*u_brick_a2[mz][my][mx];
            u_pa3 += x0*u_brick_a3[mz][my][mx];
            u_pa4 += x0*u_brick_a4[mz][my][mx];
            u_pa5 += x0*u_brick_a5[mz][my][mx];
            u_pa6 += x0*u_brick_a6[mz][my][mx];
	  }
          if (vflag_atom) {
            v00 += x0*v0_brick_a0[mz][my][mx];
            v10 += x0*v1_brick_a0[mz][my][mx];
            v20 += x0*v2_brick_a0[mz][my][mx];
            v30 += x0*v3_brick_a0[mz][my][mx];
            v40 += x0*v4_brick_a0[mz][my][mx];
            v50 += x0*v5_brick_a0[mz][my][mx];
            v01 += x0*v0_brick_a1[mz][my][mx];
            v11 += x0*v1_brick_a1[mz][my][mx];
            v21 += x0*v2_brick_a1[mz][my][mx];
            v31 += x0*v3_brick_a1[mz][my][mx];
            v41 += x0*v4_brick_a1[mz][my][mx];
            v51 += x0*v5_brick_a1[mz][my][mx];
            v02 += x0*v0_brick_a2[mz][my][mx];
            v12 += x0*v1_brick_a2[mz][my][mx];
            v22 += x0*v2_brick_a2[mz][my][mx];
            v32 += x0*v3_brick_a2[mz][my][mx];
            v42 += x0*v4_brick_a2[mz][my][mx];
            v52 += x0*v5_brick_a2[mz][my][mx];
            v03 += x0*v0_brick_a3[mz][my][mx];
            v13 += x0*v1_brick_a3[mz][my][mx];
            v23 += x0*v2_brick_a3[mz][my][mx];
            v33 += x0*v3_brick_a3[mz][my][mx];
            v43 += x0*v4_brick_a3[mz][my][mx];
            v53 += x0*v5_brick_a3[mz][my][mx];
            v04 += x0*v0_brick_a4[mz][my][mx];
            v14 += x0*v1_brick_a4[mz][my][mx];
            v24 += x0*v2_brick_a4[mz][my][mx];
            v34 += x0*v3_brick_a4[mz][my][mx];
            v44 += x0*v4_brick_a4[mz][my][mx];
            v54 += x0*v5_brick_a4[mz][my][mx];
            v05 += x0*v0_brick_a5[mz][my][mx];
            v15 += x0*v1_brick_a5[mz][my][mx];
            v25 += x0*v2_brick_a5[mz][my][mx];
            v35 += x0*v3_brick_a5[mz][my][mx];
            v45 += x0*v4_brick_a5[mz][my][mx];
            v55 += x0*v5_brick_a5[mz][my][mx];
            v06 += x0*v0_brick_a6[mz][my][mx];
            v16 += x0*v1_brick_a6[mz][my][mx];
            v26 += x0*v2_brick_a6[mz][my][mx];
            v36 += x0*v3_brick_a6[mz][my][mx];
            v46 += x0*v4_brick_a6[mz][my][mx];
            v56 += x0*v5_brick_a6[mz][my][mx];
          }
	}
      }
    }
    // convert D-field to force
    type = atom->type[i];
    lj0 = B[7*type+6]*0.5;
    lj1 = B[7*type+5]*0.5;
    lj2 = B[7*type+4]*0.5;
    lj3 = B[7*type+3]*0.5;
    lj4 = B[7*type+2]*0.5;
    lj5 = B[7*type+1]*0.5;
    lj6 = B[7*type]*0.5;


    if (eflag_atom)
      eatom[i] += u_pa0*lj0 + u_pa1*lj1 + u_pa2*lj2 +
        u_pa3*lj3 + u_pa4*lj4 + u_pa5*lj5 + u_pa6*lj6;
    if (vflag_atom) {
      vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 +
        v04*lj4 + v05*lj5 + v06*lj6;
      vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 +
        v14*lj4 + v15*lj5 + v16*lj6;
      vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 +
        v24*lj4 + v25*lj5 + v26*lj6;
      vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 +
        v34*lj4 + v35*lj5 + v36*lj6;
      vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 +
        v44*lj4 + v45*lj5 + v46*lj6;
      vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 +
        v54*lj4 + v55*lj5 + v56*lj6;
    }
  }
}

/* ----------------------------------------------------------------------
   interpolate from grid to get dispersion field & force on my particles
   for arithmetic mixing rule and ik scheme
------------------------------------------------------------------------- */

void PPPMDisp::fieldforce_none_ik()
{
  int i,k,l,m,n,nx,ny,nz,mx,my,mz;
  FFT_SCALAR dx,dy,dz,x0,y0,z0;
  FFT_SCALAR *ekx, *eky, *ekz;

  ekx = new FFT_SCALAR[nsplit];
  eky = new FFT_SCALAR[nsplit];
  ekz = new FFT_SCALAR[nsplit];
  // loop over my charges, interpolate electric field from nearby grid points
  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
  // (dx,dy,dz) = distance to "lower left" grid pt
  // (mx,my,mz) = global coords of moving stencil pt
  // ek = 3 components of dispersion field on particle

  double **x = atom->x;
  double **f = atom->f;
  int type;
  double lj;

  int nlocal = atom->nlocal;

  for (i = 0; i < nlocal; i++) {

    nx = part2grid_6[i][0];
    ny = part2grid_6[i][1];
    nz = part2grid_6[i][2];
    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
    for (k = 0; k < nsplit; k++)
      ekx[k] = eky[k] = ekz[k] = ZEROF;
    for (n = nlower_6; n <= nupper_6; n++) {
      mz = n+nz;
      z0 = rho1d_6[2][n];
      for (m = nlower_6; m <= nupper_6; m++) {
	my = m+ny;
	y0 = z0*rho1d_6[1][m];
	for (l = nlower_6; l <= nupper_6; l++) {
	  mx = l+nx;
	  x0 = y0*rho1d_6[0][l];
          for (k = 0; k < nsplit; k++) {
	    ekx[k] -= x0*vdx_brick_none[k][mz][my][mx];
	    eky[k] -= x0*vdy_brick_none[k][mz][my][mx];
	    ekz[k] -= x0*vdz_brick_none[k][mz][my][mx];
          }
	}
      }
    }
    // convert D-field to force
    type = atom->type[i];
    for (k = 0; k < nsplit; k++) {
      lj = B[nsplit*type + k];
      f[i][0] += lj*ekx[k];
      f[i][1] +=lj*eky[k];
      if (slabflag != 2) f[i][2] +=lj*ekz[k];
    }
  }

  delete [] ekx;
  delete [] eky;
  delete [] ekz;
}

/* ----------------------------------------------------------------------
   interpolate from grid to get dispersion field & force on my particles
   for arithmetic mixing rule for the ad scheme
------------------------------------------------------------------------- */

void PPPMDisp::fieldforce_none_ad()
{
  int i,k,l,m,n,nx,ny,nz,mx,my,mz;
  FFT_SCALAR dx,dy,dz,x0,y0,z0;
  FFT_SCALAR *ekx, *eky, *ekz;

  ekx = new FFT_SCALAR[nsplit];
  eky = new FFT_SCALAR[nsplit];
  ekz = new FFT_SCALAR[nsplit];


  double s1,s2,s3;
  double sf1,sf2,sf3;
  double sf = 0.0;
  double *prd;

  if (triclinic == 0) prd = domain->prd;
  else prd = domain->prd_lamda;

  double xprd = prd[0];
  double yprd = prd[1];
  double zprd = prd[2];
  double zprd_slab = zprd*slab_volfactor;

  double hx_inv = nx_pppm_6/xprd;
  double hy_inv = ny_pppm_6/yprd;
  double hz_inv = nz_pppm_6/zprd_slab;

  // loop over my charges, interpolate electric field from nearby grid points
  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
  // (dx,dy,dz) = distance to "lower left" grid pt
  // (mx,my,mz) = global coords of moving stencil pt
  // ek = 3 components of dispersion field on particle

  double **x = atom->x;
  double **f = atom->f;
  int type;
  double lj;

  int nlocal = atom->nlocal;

  for (i = 0; i < nlocal; i++) {

    nx = part2grid_6[i][0];
    ny = part2grid_6[i][1];
    nz = part2grid_6[i][2];
    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;

    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
    compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);

    for (k = 0; k < nsplit; k++)
      ekx[k] = eky[k] = ekz[k] = ZEROF;

    for (n = nlower_6; n <= nupper_6; n++) {
      mz = n+nz;
      for (m = nlower_6; m <= nupper_6; m++) {
	my = m+ny;
	for (l = nlower_6; l <= nupper_6; l++) {
	  mx = l+nx;
          x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n];
          y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n];
          z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n];

          for (k = 0; k < nsplit; k++) {
            ekx[k] += x0*u_brick_none[k][mz][my][mx];
            eky[k] += y0*u_brick_none[k][mz][my][mx];
            ekz[k] += z0*u_brick_none[k][mz][my][mx];
          }
	}
      }
    }

    for (k = 0; k < nsplit; k++) {
      ekx[k] *= hx_inv;
      eky[k] *= hy_inv;
      ekz[k] *= hz_inv;
    }

    // convert D-field to force
    type = atom->type[i];

    s1 = x[i][0]*hx_inv;
    s2 = x[i][1]*hy_inv;
    s3 = x[i][2]*hz_inv;

    sf1 = sf_coeff_6[0]*sin(2*MY_PI*s1);
    sf1 += sf_coeff_6[1]*sin(4*MY_PI*s1);

    sf2 = sf_coeff_6[2]*sin(2*MY_PI*s2);
    sf2 += sf_coeff_6[3]*sin(4*MY_PI*s2);

    sf3 = sf_coeff_6[4]*sin(2*MY_PI*s3);
    sf3 += sf_coeff_6[5]*sin(4*MY_PI*s3);

    for (k = 0; k < nsplit; k++) {
      lj = B[nsplit*type + k];

      sf = sf1*B[k]*2*lj*lj;
      f[i][0] += lj*ekx[k] - sf;


      sf = sf2*B[k]*2*lj*lj;
      f[i][1] += lj*eky[k] - sf;

      sf = sf3*B[k]*2*lj*lj;
      if (slabflag != 2) f[i][2] += lj*ekz[k] - sf;
    }
  }

  delete [] ekx;
  delete [] eky;
  delete [] ekz;
}

/* ----------------------------------------------------------------------
   interpolate from grid to get dispersion field & force on my particles
   for arithmetic mixing rule for per atom quantities
------------------------------------------------------------------------- */

void PPPMDisp::fieldforce_none_peratom()
{
  int i,k,l,m,n,nx,ny,nz,mx,my,mz;
  FFT_SCALAR dx,dy,dz,x0,y0,z0;
  FFT_SCALAR *u_pa,*v0,*v1,*v2,*v3,*v4,*v5;

  u_pa = new FFT_SCALAR[nsplit];
  v0 = new FFT_SCALAR[nsplit];
  v1 = new FFT_SCALAR[nsplit];
  v2 = new FFT_SCALAR[nsplit];
  v3 = new FFT_SCALAR[nsplit];
  v4 = new FFT_SCALAR[nsplit];
  v5 = new FFT_SCALAR[nsplit];

  // loop over my charges, interpolate electric field from nearby grid points
  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
  // (dx,dy,dz) = distance to "lower left" grid pt
  // (mx,my,mz) = global coords of moving stencil pt
  // ek = 3 components of dispersion field on particle

  double **x = atom->x;
  int type;
  double lj;

  int nlocal = atom->nlocal;

  for (i = 0; i < nlocal; i++) {

    nx = part2grid_6[i][0];
    ny = part2grid_6[i][1];
    nz = part2grid_6[i][2];
    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);

    for (k = 0; k < nsplit; k++)
      u_pa[k] = v0[k] = v1[k] = v2[k] = v3[k] = v4[k] = v5[k] = ZEROF;

    for (n = nlower_6; n <= nupper_6; n++) {
      mz = n+nz;
      z0 = rho1d_6[2][n];
      for (m = nlower_6; m <= nupper_6; m++) {
	my = m+ny;
	y0 = z0*rho1d_6[1][m];
	for (l = nlower_6; l <= nupper_6; l++) {
	  mx = l+nx;
	  x0 = y0*rho1d_6[0][l];
          if (eflag_atom) {
            for (k = 0; k < nsplit; k++)
              u_pa[k] += x0*u_brick_none[k][mz][my][mx];
	  }
          if (vflag_atom) {
            for (k = 0; k < nsplit; k++) {
              v0[k] += x0*v0_brick_none[k][mz][my][mx];
              v1[k] += x0*v1_brick_none[k][mz][my][mx];
              v2[k] += x0*v2_brick_none[k][mz][my][mx];
              v3[k] += x0*v3_brick_none[k][mz][my][mx];
              v4[k] += x0*v4_brick_none[k][mz][my][mx];
              v5[k] += x0*v5_brick_none[k][mz][my][mx];
            }
          }
	}
      }
    }
    // convert D-field to force
    type = atom->type[i];
    for (k = 0; k < nsplit; k++) {
      lj = B[nsplit*type + k]*0.5;

      if (eflag_atom) {
        eatom[i] += u_pa[k]*lj;
      }
      if (vflag_atom) {
        vatom[i][0] += v0[k]*lj;
        vatom[i][1] += v1[k]*lj;
        vatom[i][2] += v2[k]*lj;
        vatom[i][3] += v3[k]*lj;
        vatom[i][4] += v4[k]*lj;
        vatom[i][5] += v5[k]*lj;
      }
    }
  }

  delete [] u_pa;
  delete [] v0;
  delete [] v1;
  delete [] v2;
  delete [] v3;
  delete [] v4;
  delete [] v5;
}

/* ----------------------------------------------------------------------
   pack values to buf to send to another proc
------------------------------------------------------------------------- */

void PPPMDisp::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
{
  int n = 0;

  switch (flag) {

  // Coulomb interactions

  case FORWARD_IK: {
    FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
    for (int i = 0; i < nlist; i++) {
      buf[n++] = xsrc[list[i]];
      buf[n++] = ysrc[list[i]];
      buf[n++] = zsrc[list[i]];
    }
    break;
  }

  case FORWARD_AD: {
    FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out];
    for (int i = 0; i < nlist; i++)
      buf[i] = src[list[i]];
    break;
  }

  case FORWARD_IK_PERATOM: {
    FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
    for (int i = 0; i < nlist; i++) {
      if (eflag_atom) buf[n++] = esrc[list[i]];
      if (vflag_atom) {
        buf[n++] = v0src[list[i]];
        buf[n++] = v1src[list[i]];
        buf[n++] = v2src[list[i]];
        buf[n++] = v3src[list[i]];
        buf[n++] = v4src[list[i]];
        buf[n++] = v5src[list[i]];
      }
    }
    break;
  }

  case FORWARD_AD_PERATOM: {
    FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
    for (int i = 0; i < nlist; i++) {
      buf[n++] = v0src[list[i]];
      buf[n++] = v1src[list[i]];
      buf[n++] = v2src[list[i]];
      buf[n++] = v3src[list[i]];
      buf[n++] = v4src[list[i]];
      buf[n++] = v5src[list[i]];
    }
    break;
  }

  // Dispersion interactions, geometric mixing

  case FORWARD_IK_G: {
    FFT_SCALAR *xsrc = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *ysrc = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *zsrc = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    for (int i = 0; i < nlist; i++) {
      buf[n++] = xsrc[list[i]];
      buf[n++] = ysrc[list[i]];
      buf[n++] = zsrc[list[i]];
    }
    break;
  }

  case FORWARD_AD_G: {
    FFT_SCALAR *src = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    for (int i = 0; i < nlist; i++)
      buf[i] = src[list[i]];
    break;
  }

  case FORWARD_IK_PERATOM_G: {
    FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    for (int i = 0; i < nlist; i++) {
      if (eflag_atom) buf[n++] = esrc[list[i]];
      if (vflag_atom) {
        buf[n++] = v0src[list[i]];
        buf[n++] = v1src[list[i]];
        buf[n++] = v2src[list[i]];
        buf[n++] = v3src[list[i]];
        buf[n++] = v4src[list[i]];
        buf[n++] = v5src[list[i]];
      }
    }
    break;
  }

  case FORWARD_AD_PERATOM_G: {
    FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    for (int i = 0; i < nlist; i++) {
      buf[n++] = v0src[list[i]];
      buf[n++] = v1src[list[i]];
      buf[n++] = v2src[list[i]];
      buf[n++] = v3src[list[i]];
      buf[n++] = v4src[list[i]];
      buf[n++] = v5src[list[i]];
    }
    break;
  }

  // Dispersion interactions, arithmetic mixing

  case FORWARD_IK_A: {
    FFT_SCALAR *xsrc0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *ysrc0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *zsrc0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *xsrc1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *ysrc1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *zsrc1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *xsrc2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *ysrc2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *zsrc2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *xsrc3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *ysrc3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *zsrc3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *xsrc4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *ysrc4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *zsrc4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *xsrc5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *ysrc5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *zsrc5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *xsrc6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *ysrc6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *zsrc6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];

    for (int i = 0; i < nlist; i++) {
      buf[n++] = xsrc0[list[i]];
      buf[n++] = ysrc0[list[i]];
      buf[n++] = zsrc0[list[i]];

      buf[n++] = xsrc1[list[i]];
      buf[n++] = ysrc1[list[i]];
      buf[n++] = zsrc1[list[i]];

      buf[n++] = xsrc2[list[i]];
      buf[n++] = ysrc2[list[i]];
      buf[n++] = zsrc2[list[i]];

      buf[n++] = xsrc3[list[i]];
      buf[n++] = ysrc3[list[i]];
      buf[n++] = zsrc3[list[i]];

      buf[n++] = xsrc4[list[i]];
      buf[n++] = ysrc4[list[i]];
      buf[n++] = zsrc4[list[i]];

      buf[n++] = xsrc5[list[i]];
      buf[n++] = ysrc5[list[i]];
      buf[n++] = zsrc5[list[i]];

      buf[n++] = xsrc6[list[i]];
      buf[n++] = ysrc6[list[i]];
      buf[n++] = zsrc6[list[i]];
    }
    break;
  }

  case FORWARD_AD_A: {
    FFT_SCALAR *src0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *src1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *src2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *src3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *src4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *src5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *src6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];

    for (int i = 0; i < nlist; i++) {
      buf[n++] = src0[list[i]];
      buf[n++] = src1[list[i]];
      buf[n++] = src2[list[i]];
      buf[n++] = src3[list[i]];
      buf[n++] = src4[list[i]];
      buf[n++] = src5[list[i]];
      buf[n++] = src6[list[i]];
    }
    break;
  }

  case FORWARD_IK_PERATOM_A: {
    FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];

    for (int i = 0; i < nlist; i++) {
      if (eflag_atom) {
        buf[n++] = esrc0[list[i]];
        buf[n++] = esrc1[list[i]];
        buf[n++] = esrc2[list[i]];
        buf[n++] = esrc3[list[i]];
        buf[n++] = esrc4[list[i]];
        buf[n++] = esrc5[list[i]];
        buf[n++] = esrc6[list[i]];
      }
      if (vflag_atom) {
        buf[n++] = v0src0[list[i]];
        buf[n++] = v1src0[list[i]];
        buf[n++] = v2src0[list[i]];
        buf[n++] = v3src0[list[i]];
        buf[n++] = v4src0[list[i]];
        buf[n++] = v5src0[list[i]];

        buf[n++] = v0src1[list[i]];
        buf[n++] = v1src1[list[i]];
        buf[n++] = v2src1[list[i]];
        buf[n++] = v3src1[list[i]];
        buf[n++] = v4src1[list[i]];
        buf[n++] = v5src1[list[i]];

        buf[n++] = v0src2[list[i]];
        buf[n++] = v1src2[list[i]];
        buf[n++] = v2src2[list[i]];
        buf[n++] = v3src2[list[i]];
        buf[n++] = v4src2[list[i]];
        buf[n++] = v5src2[list[i]];

        buf[n++] = v0src3[list[i]];
        buf[n++] = v1src3[list[i]];
        buf[n++] = v2src3[list[i]];
        buf[n++] = v3src3[list[i]];
        buf[n++] = v4src3[list[i]];
        buf[n++] = v5src3[list[i]];

        buf[n++] = v0src4[list[i]];
        buf[n++] = v1src4[list[i]];
        buf[n++] = v2src4[list[i]];
        buf[n++] = v3src4[list[i]];
        buf[n++] = v4src4[list[i]];
        buf[n++] = v5src4[list[i]];

        buf[n++] = v0src5[list[i]];
        buf[n++] = v1src5[list[i]];
        buf[n++] = v2src5[list[i]];
        buf[n++] = v3src5[list[i]];
        buf[n++] = v4src5[list[i]];
        buf[n++] = v5src5[list[i]];

        buf[n++] = v0src6[list[i]];
        buf[n++] = v1src6[list[i]];
        buf[n++] = v2src6[list[i]];
        buf[n++] = v3src6[list[i]];
        buf[n++] = v4src6[list[i]];
        buf[n++] = v5src6[list[i]];
      }
    }
    break;
  }

  case FORWARD_AD_PERATOM_A: {
    FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];

    for (int i = 0; i < nlist; i++) {
      buf[n++] = v0src0[list[i]];
      buf[n++] = v1src0[list[i]];
      buf[n++] = v2src0[list[i]];
      buf[n++] = v3src0[list[i]];
      buf[n++] = v4src0[list[i]];
      buf[n++] = v5src0[list[i]];

      buf[n++] = v0src1[list[i]];
      buf[n++] = v1src1[list[i]];
      buf[n++] = v2src1[list[i]];
      buf[n++] = v3src1[list[i]];
      buf[n++] = v4src1[list[i]];
      buf[n++] = v5src1[list[i]];

      buf[n++] = v0src2[list[i]];
      buf[n++] = v1src2[list[i]];
      buf[n++] = v2src2[list[i]];
      buf[n++] = v3src2[list[i]];
      buf[n++] = v4src2[list[i]];
      buf[n++] = v5src2[list[i]];

      buf[n++] = v0src3[list[i]];
      buf[n++] = v1src3[list[i]];
      buf[n++] = v2src3[list[i]];
      buf[n++] = v3src3[list[i]];
      buf[n++] = v4src3[list[i]];
      buf[n++] = v5src3[list[i]];

      buf[n++] = v0src4[list[i]];
      buf[n++] = v1src4[list[i]];
      buf[n++] = v2src4[list[i]];
      buf[n++] = v3src4[list[i]];
      buf[n++] = v4src4[list[i]];
      buf[n++] = v5src4[list[i]];

      buf[n++] = v0src5[list[i]];
      buf[n++] = v1src5[list[i]];
      buf[n++] = v2src5[list[i]];
      buf[n++] = v3src5[list[i]];
      buf[n++] = v4src5[list[i]];
      buf[n++] = v5src5[list[i]];

      buf[n++] = v0src6[list[i]];
      buf[n++] = v1src6[list[i]];
      buf[n++] = v2src6[list[i]];
      buf[n++] = v3src6[list[i]];
      buf[n++] = v4src6[list[i]];
      buf[n++] = v5src6[list[i]];
    }
    break;
  }

  // Dispersion interactions, no mixing

  case FORWARD_IK_NONE: {
    for (int k = 0; k < nsplit_alloc; k++) {
      FFT_SCALAR *xsrc = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *ysrc = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *zsrc = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      for (int i = 0; i < nlist; i++) {
        buf[n++] = xsrc[list[i]];
        buf[n++] = ysrc[list[i]];
        buf[n++] = zsrc[list[i]];
      }
    }
    break;
  }

  case FORWARD_AD_NONE: {
    for (int k = 0; k < nsplit_alloc; k++) {
      FFT_SCALAR *src = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      for (int i = 0; i < nlist; i++)
        buf[n++] = src[list[i]];
    }
    break;
  }

  case FORWARD_IK_PERATOM_NONE: {
    for (int k = 0; k < nsplit_alloc; k++) {
      FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      for (int i = 0; i < nlist; i++) {
        if (eflag_atom) buf[n++] = esrc[list[i]];
        if (vflag_atom) {
          buf[n++] = v0src[list[i]];
          buf[n++] = v1src[list[i]];
          buf[n++] = v2src[list[i]];
          buf[n++] = v3src[list[i]];
          buf[n++] = v4src[list[i]];
          buf[n++] = v5src[list[i]];
        }
      }
    }
    break;
  }

  case FORWARD_AD_PERATOM_NONE: {
    for (int k = 0; k < nsplit_alloc; k++) {
      FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      for (int i = 0; i < nlist; i++) {
        buf[n++] = v0src[list[i]];
        buf[n++] = v1src[list[i]];
        buf[n++] = v2src[list[i]];
        buf[n++] = v3src[list[i]];
        buf[n++] = v4src[list[i]];
        buf[n++] = v5src[list[i]];
      }
    }
    break;
  }

  }
}

/* ----------------------------------------------------------------------
   unpack another proc's own values from buf and set own ghost values
------------------------------------------------------------------------- */

void PPPMDisp::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
{
  int n = 0;

  switch (flag) {

  // Coulomb interactions

  case FORWARD_IK: {
    FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
    for (int i = 0; i < nlist; i++) {
      xdest[list[i]] = buf[n++];
      ydest[list[i]] = buf[n++];
      zdest[list[i]] = buf[n++];
    }
    break;
  }

  case FORWARD_AD: {
    FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out];
    for (int i = 0; i < nlist; i++)
      dest[list[i]] = buf[n++];
    break;
  }

  case FORWARD_IK_PERATOM: {
    FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
    for (int i = 0; i < nlist; i++) {
      if (eflag_atom) esrc[list[i]] = buf[n++];
      if (vflag_atom) {
        v0src[list[i]] = buf[n++];
        v1src[list[i]] = buf[n++];
        v2src[list[i]] = buf[n++];
        v3src[list[i]] = buf[n++];
        v4src[list[i]] = buf[n++];
        v5src[list[i]] = buf[n++];
      }
    }
    break;
  }

  case FORWARD_AD_PERATOM: {
    FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
    FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
    for (int i = 0; i < nlist; i++) {
      v0src[list[i]] = buf[n++];
      v1src[list[i]] = buf[n++];
      v2src[list[i]] = buf[n++];
      v3src[list[i]] = buf[n++];
      v4src[list[i]] = buf[n++];
      v5src[list[i]] = buf[n++];
    }
    break;
  }

  // Disperion interactions, geometric mixing

  case FORWARD_IK_G: {
    FFT_SCALAR *xdest = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *ydest = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *zdest = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    for (int i = 0; i < nlist; i++) {
      xdest[list[i]] = buf[n++];
      ydest[list[i]] = buf[n++];
      zdest[list[i]] = buf[n++];
    }
    break;
  }

  case FORWARD_AD_G: {
    FFT_SCALAR *dest = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    for (int i = 0; i < nlist; i++)
      dest[list[i]] = buf[n++];
    break;
  }

  case FORWARD_IK_PERATOM_G: {
    FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    for (int i = 0; i < nlist; i++) {
      if (eflag_atom) esrc[list[i]] = buf[n++];
      if (vflag_atom) {
        v0src[list[i]] = buf[n++];
        v1src[list[i]] = buf[n++];
        v2src[list[i]] = buf[n++];
        v3src[list[i]] = buf[n++];
        v4src[list[i]] = buf[n++];
        v5src[list[i]] = buf[n++];
      }
    }
    break;
  }

  case FORWARD_AD_PERATOM_G: {
    FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    for (int i = 0; i < nlist; i++) {
      v0src[list[i]] = buf[n++];
      v1src[list[i]] = buf[n++];
      v2src[list[i]] = buf[n++];
      v3src[list[i]] = buf[n++];
      v4src[list[i]] = buf[n++];
      v5src[list[i]] = buf[n++];
    }
    break;
  }

  // Disperion interactions, arithmetic mixing

  case FORWARD_IK_A: {
    FFT_SCALAR *xdest0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *ydest0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *zdest0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *xdest1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *ydest1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *zdest1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *xdest2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *ydest2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *zdest2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *xdest3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *ydest3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *zdest3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *xdest4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *ydest4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *zdest4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *xdest5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *ydest5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *zdest5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *xdest6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *ydest6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *zdest6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];

    for (int i = 0; i < nlist; i++) {
      xdest0[list[i]] = buf[n++];
      ydest0[list[i]] = buf[n++];
      zdest0[list[i]] = buf[n++];

      xdest1[list[i]] = buf[n++];
      ydest1[list[i]] = buf[n++];
      zdest1[list[i]] = buf[n++];

      xdest2[list[i]] = buf[n++];
      ydest2[list[i]] = buf[n++];
      zdest2[list[i]] = buf[n++];

      xdest3[list[i]] = buf[n++];
      ydest3[list[i]] = buf[n++];
      zdest3[list[i]] = buf[n++];

      xdest4[list[i]] = buf[n++];
      ydest4[list[i]] = buf[n++];
      zdest4[list[i]] = buf[n++];

      xdest5[list[i]] = buf[n++];
      ydest5[list[i]] = buf[n++];
      zdest5[list[i]] = buf[n++];

      xdest6[list[i]] = buf[n++];
      ydest6[list[i]] = buf[n++];
      zdest6[list[i]] = buf[n++];
    }
    break;
  }

  case FORWARD_AD_A: {
    FFT_SCALAR *dest0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *dest1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *dest2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *dest3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *dest4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *dest5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *dest6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];

    for (int i = 0; i < nlist; i++) {
      dest0[list[i]] = buf[n++];
      dest1[list[i]] = buf[n++];
      dest2[list[i]] = buf[n++];
      dest3[list[i]] = buf[n++];
      dest4[list[i]] = buf[n++];
      dest5[list[i]] = buf[n++];
      dest6[list[i]] = buf[n++];
    }
    break;
  }

  case FORWARD_IK_PERATOM_A: {
    FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];

    for (int i = 0; i < nlist; i++) {
      if (eflag_atom) {
        esrc0[list[i]] = buf[n++];
        esrc1[list[i]] = buf[n++];
        esrc2[list[i]] = buf[n++];
        esrc3[list[i]] = buf[n++];
        esrc4[list[i]] = buf[n++];
        esrc5[list[i]] = buf[n++];
        esrc6[list[i]] = buf[n++];
      }
      if (vflag_atom) {
        v0src0[list[i]] = buf[n++];
        v1src0[list[i]] = buf[n++];
        v2src0[list[i]] = buf[n++];
        v3src0[list[i]] = buf[n++];
        v4src0[list[i]] = buf[n++];
        v5src0[list[i]] = buf[n++];

        v0src1[list[i]] = buf[n++];
        v1src1[list[i]] = buf[n++];
        v2src1[list[i]] = buf[n++];
        v3src1[list[i]] = buf[n++];
        v4src1[list[i]] = buf[n++];
        v5src1[list[i]] = buf[n++];

        v0src2[list[i]] = buf[n++];
        v1src2[list[i]] = buf[n++];
        v2src2[list[i]] = buf[n++];
        v3src2[list[i]] = buf[n++];
        v4src2[list[i]] = buf[n++];
        v5src2[list[i]] = buf[n++];

        v0src3[list[i]] = buf[n++];
        v1src3[list[i]] = buf[n++];
        v2src3[list[i]] = buf[n++];
        v3src3[list[i]] = buf[n++];
        v4src3[list[i]] = buf[n++];
        v5src3[list[i]] = buf[n++];

        v0src4[list[i]] = buf[n++];
        v1src4[list[i]] = buf[n++];
        v2src4[list[i]] = buf[n++];
        v3src4[list[i]] = buf[n++];
        v4src4[list[i]] = buf[n++];
        v5src4[list[i]] = buf[n++];

        v0src5[list[i]] = buf[n++];
        v1src5[list[i]] = buf[n++];
        v2src5[list[i]] = buf[n++];
        v3src5[list[i]] = buf[n++];
        v4src5[list[i]] = buf[n++];
        v5src5[list[i]] = buf[n++];

        v0src6[list[i]] = buf[n++];
        v1src6[list[i]] = buf[n++];
        v2src6[list[i]] = buf[n++];
        v3src6[list[i]] = buf[n++];
        v4src6[list[i]] = buf[n++];
        v5src6[list[i]] = buf[n++];
      }
    }
    break;
  }

  case FORWARD_AD_PERATOM_A: {
    FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];

    FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];

    for (int i = 0; i < nlist; i++) {
      v0src0[list[i]] = buf[n++];
      v1src0[list[i]] = buf[n++];
      v2src0[list[i]] = buf[n++];
      v3src0[list[i]] = buf[n++];
      v4src0[list[i]] = buf[n++];
      v5src0[list[i]] = buf[n++];

      v0src1[list[i]] = buf[n++];
      v1src1[list[i]] = buf[n++];
      v2src1[list[i]] = buf[n++];
      v3src1[list[i]] = buf[n++];
      v4src1[list[i]] = buf[n++];
      v5src1[list[i]] = buf[n++];

      v0src2[list[i]] = buf[n++];
      v1src2[list[i]] = buf[n++];
      v2src2[list[i]] = buf[n++];
      v3src2[list[i]] = buf[n++];
      v4src2[list[i]] = buf[n++];
      v5src2[list[i]] = buf[n++];

      v0src3[list[i]] = buf[n++];
      v1src3[list[i]] = buf[n++];
      v2src3[list[i]] = buf[n++];
      v3src3[list[i]] = buf[n++];
      v4src3[list[i]] = buf[n++];
      v5src3[list[i]] = buf[n++];

      v0src4[list[i]] = buf[n++];
      v1src4[list[i]] = buf[n++];
      v2src4[list[i]] = buf[n++];
      v3src4[list[i]] = buf[n++];
      v4src4[list[i]] = buf[n++];
      v5src4[list[i]] = buf[n++];

      v0src5[list[i]] = buf[n++];
      v1src5[list[i]] = buf[n++];
      v2src5[list[i]] = buf[n++];
      v3src5[list[i]] = buf[n++];
      v4src5[list[i]] = buf[n++];
      v5src5[list[i]] = buf[n++];

      v0src6[list[i]] = buf[n++];
      v1src6[list[i]] = buf[n++];
      v2src6[list[i]] = buf[n++];
      v3src6[list[i]] = buf[n++];
      v4src6[list[i]] = buf[n++];
      v5src6[list[i]] = buf[n++];
    }
    break;
  }

  // Disperion interactions, geometric mixing

  case FORWARD_IK_NONE: {
    for (int k = 0; k < nsplit_alloc; k++) {
      FFT_SCALAR *xdest = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *ydest = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *zdest = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      for (int i = 0; i < nlist; i++) {
        xdest[list[i]] = buf[n++];
        ydest[list[i]] = buf[n++];
        zdest[list[i]] = buf[n++];
      }
    }
    break;
  }

  case FORWARD_AD_NONE: {
    for (int k = 0; k < nsplit_alloc; k++) {
      FFT_SCALAR *dest = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      for (int i = 0; i < nlist; i++)
        dest[list[i]] = buf[n++];
    }
    break;
  }

  case FORWARD_IK_PERATOM_NONE: {
    for (int k = 0; k < nsplit_alloc; k++) {
      FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      for (int i = 0; i < nlist; i++) {
        if (eflag_atom) esrc[list[i]] = buf[n++];
        if (vflag_atom) {
          v0src[list[i]] = buf[n++];
          v1src[list[i]] = buf[n++];
          v2src[list[i]] = buf[n++];
          v3src[list[i]] = buf[n++];
          v4src[list[i]] = buf[n++];
          v5src[list[i]] = buf[n++];
        }
      }
    }
    break;
  }

  case FORWARD_AD_PERATOM_NONE: {
    for (int k = 0; k < nsplit_alloc; k++) {
      FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      for (int i = 0; i < nlist; i++) {
        v0src[list[i]] = buf[n++];
        v1src[list[i]] = buf[n++];
        v2src[list[i]] = buf[n++];
        v3src[list[i]] = buf[n++];
        v4src[list[i]] = buf[n++];
        v5src[list[i]] = buf[n++];
      }
    }
    break;
  }

  }
}

/* ----------------------------------------------------------------------
   pack ghost values into buf to send to another proc
------------------------------------------------------------------------- */

void PPPMDisp::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
{
  int n = 0;

  //Coulomb interactions

  if (flag == REVERSE_RHO) {
    FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out];
    for (int i = 0; i < nlist; i++)
      buf[i] = src[list[i]];

  //Dispersion interactions, geometric mixing

  } else if (flag == REVERSE_RHO_G) {
    FFT_SCALAR *src = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    for (int i = 0; i < nlist; i++)
      buf[i] = src[list[i]];

  //Dispersion interactions, arithmetic mixing

  } else if (flag == REVERSE_RHO_A) {
    FFT_SCALAR *src0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *src1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *src2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *src3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *src4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *src5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *src6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    for (int i = 0; i < nlist; i++) {
      buf[n++] = src0[list[i]];
      buf[n++] = src1[list[i]];
      buf[n++] = src2[list[i]];
      buf[n++] = src3[list[i]];
      buf[n++] = src4[list[i]];
      buf[n++] = src5[list[i]];
      buf[n++] = src6[list[i]];
    }

  //Dispersion interactions, no mixing

  } else if (flag == REVERSE_RHO_NONE) {
    for (int k = 0; k < nsplit_alloc; k++) {
      FFT_SCALAR *src = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      for (int i = 0; i < nlist; i++) {
        buf[n++] = src[list[i]];
      }
    }
  }
}

/* ----------------------------------------------------------------------
   unpack another proc's ghost values from buf and add to own values
------------------------------------------------------------------------- */

void PPPMDisp::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
{
  int n = 0;

  //Coulomb interactions

  if (flag == REVERSE_RHO) {
    FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out];
    for (int i = 0; i < nlist; i++)
      dest[list[i]] += buf[i];

  //Dispersion interactions, geometric mixing

  } else if (flag == REVERSE_RHO_G) {
    FFT_SCALAR *dest = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
    for (int i = 0; i < nlist; i++)
      dest[list[i]] += buf[i];

  //Dispersion interactions, arithmetic mixing

  } else if (flag == REVERSE_RHO_A) {
    FFT_SCALAR *dest0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *dest1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *dest2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *dest3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *dest4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *dest5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
    FFT_SCALAR *dest6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
    for (int i = 0; i < nlist; i++) {
      dest0[list[i]] += buf[n++];
      dest1[list[i]] += buf[n++];
      dest2[list[i]] += buf[n++];
      dest3[list[i]] += buf[n++];
      dest4[list[i]] += buf[n++];
      dest5[list[i]] += buf[n++];
      dest6[list[i]] += buf[n++];
    }

  //Dispersion interactions, no mixing

  } else if (flag == REVERSE_RHO_NONE) {
    for (int k = 0; k < nsplit_alloc; k++) {
      FFT_SCALAR *dest = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
      for (int i = 0; i < nlist; i++)
        dest[list[i]] += buf[n++];
    }
  }
}

/* ----------------------------------------------------------------------
   map nprocs to NX by NY grid as PX by PY procs - return optimal px,py
------------------------------------------------------------------------- */

void PPPMDisp::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
{
  // loop thru all possible factorizations of nprocs
  // surf = surface area of largest proc sub-domain
  // innermost if test minimizes surface area and surface/volume ratio

  int bestsurf = 2 * (nx + ny);
  int bestboxx = 0;
  int bestboxy = 0;

  int boxx,boxy,surf,ipx,ipy;

  ipx = 1;
  while (ipx <= nprocs) {
    if (nprocs % ipx == 0) {
      ipy = nprocs/ipx;
      boxx = nx/ipx;
      if (nx % ipx) boxx++;
      boxy = ny/ipy;
      if (ny % ipy) boxy++;
      surf = boxx + boxy;
      if (surf < bestsurf ||
	  (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
	bestsurf = surf;
	bestboxx = boxx;
	bestboxy = boxy;
	*px = ipx;
	*py = ipy;
      }
    }
    ipx++;
  }
}

/* ----------------------------------------------------------------------
   charge assignment into rho1d
   dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */

void PPPMDisp::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
			      const FFT_SCALAR &dz, int ord,
                             FFT_SCALAR **rho_c, FFT_SCALAR **r1d)
{
  int k,l;
  FFT_SCALAR r1,r2,r3;

  for (k = (1-ord)/2; k <= ord/2; k++) {
    r1 = r2 = r3 = ZEROF;

    for (l = ord-1; l >= 0; l--) {
      r1 = rho_c[l][k] + r1*dx;
      r2 = rho_c[l][k] + r2*dy;
      r3 = rho_c[l][k] + r3*dz;
    }
    r1d[0][k] = r1;
    r1d[1][k] = r2;
    r1d[2][k] = r3;
  }
}

/* ----------------------------------------------------------------------
   charge assignment into drho1d
   dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */

void PPPMDisp::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
                          const FFT_SCALAR &dz, int ord,
                              FFT_SCALAR **drho_c, FFT_SCALAR **dr1d)
{
  int k,l;
  FFT_SCALAR r1,r2,r3;

  for (k = (1-ord)/2; k <= ord/2; k++) {
    r1 = r2 = r3 = ZEROF;

    for (l = ord-2; l >= 0; l--) {
      r1 = drho_c[l][k] + r1*dx;
      r2 = drho_c[l][k] + r2*dy;
      r3 = drho_c[l][k] + r3*dz;
    }
    dr1d[0][k] = r1;
    dr1d[1][k] = r2;
    dr1d[2][k] = r3;
  }
}

/* ----------------------------------------------------------------------
   generate coeffients for the weight function of order n

              (n-1)
  Wn(x) =     Sum    wn(k,x) , Sum is over every other integer
           k=-(n-1)
  For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1
      k is odd integers if n is even and even integers if n is odd
              ---
             | n-1
             | Sum a(l,j)*(x-k/2)**l   if abs(x-k/2) < 1/2
  wn(k,x) = <  l=0
             |
             |  0                       otherwise
              ---
  a coeffients are packed into the array rho_coeff to eliminate zeros
  rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k)
------------------------------------------------------------------------- */

void PPPMDisp::compute_rho_coeff(FFT_SCALAR **coeff , FFT_SCALAR **dcoeff,
                                 int ord)
{
  int j,k,l,m;
  FFT_SCALAR s;

  FFT_SCALAR **a;
  memory->create2d_offset(a,ord,-ord,ord,"pppm/disp:a");

  for (k = -ord; k <= ord; k++)
    for (l = 0; l < ord; l++)
      a[l][k] = 0.0;

  a[0][0] = 1.0;
  for (j = 1; j < ord; j++) {
    for (k = -j; k <= j; k += 2) {
      s = 0.0;
      for (l = 0; l < j; l++) {
	a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
#ifdef FFT_SINGLE
	s += powf(0.5,(float) l+1) *
	  (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
#else
	s += pow(0.5,(double) l+1) *
	  (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
#endif
      }
      a[0][k] = s;
    }
  }

  m = (1-ord)/2;
  for (k = -(ord-1); k < ord; k += 2) {
    for (l = 0; l < ord; l++)
      coeff[l][m] = a[l][k];
    for (l = 1; l < ord; l++)
      dcoeff[l-1][m] = l*a[l][k];
    m++;
  }

  memory->destroy2d_offset(a,-ord);
}

/* ----------------------------------------------------------------------
   Slab-geometry correction term to dampen inter-slab interactions between
   periodically repeating slabs.  Yields good approximation to 2D Ewald if
   adequate empty space is left between repeating slabs (J. Chem. Phys.
   111, 3155).  Slabs defined here to be parallel to the xy plane. Also
   extended to non-neutral systems (J. Chem. Phys. 131, 094107).
------------------------------------------------------------------------- */

void PPPMDisp::slabcorr(int eflag)
{
  // compute local contribution to global dipole moment

  double *q = atom->q;
  double **x = atom->x;
  double zprd = domain->zprd;
  int nlocal = atom->nlocal;

  double dipole = 0.0;
  for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];

  // sum local contributions to get global dipole moment

  double dipole_all;
  MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);

  // need to make non-neutral systems and/or
  //  per-atom energy translationally invariant

  double dipole_r2 = 0.0;
  if (eflag_atom || fabs(qsum) > SMALL) {
    for (int i = 0; i < nlocal; i++)
      dipole_r2 += q[i]*x[i][2]*x[i][2];

    // sum local contributions

    double tmp;
    MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
    dipole_r2 = tmp;
  }

  // compute corrections

  const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
    qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
  const double qscale = force->qqrd2e * scale;

  if (eflag_global) energy_1 += qscale * e_slabcorr;

  // per-atom energy

  if (eflag_atom) {
    double efact = qscale * MY_2PI/volume;
    for (int i = 0; i < nlocal; i++)
      eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
        qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
  }

  // add on force corrections

  double ffact = qscale * (-4.0*MY_PI/volume);
  double **f = atom->f;

  for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
}

/* ----------------------------------------------------------------------
   perform and time the 1d FFTs required for N timesteps
------------------------------------------------------------------------- */

int PPPMDisp::timing_1d(int n, double &time1d)
{
  double time1,time2;
  int mixing = 1;
  if (function[2]) mixing = 4;
  if (function[3]) mixing = nsplit_alloc/2;

  if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
  if (function[1] + function[2] + function[3])
    for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF;

  MPI_Barrier(world);
  time1 = MPI_Wtime();

  if (function[0]) {
    for (int i = 0; i < n; i++) {
      fft1->timing1d(work1,nfft_both,1);
      fft2->timing1d(work1,nfft_both,-1);
      if (differentiation_flag != 1){
        fft2->timing1d(work1,nfft_both,-1);
        fft2->timing1d(work1,nfft_both,-1);
      }
    }
  }

  MPI_Barrier(world);
  time2 = MPI_Wtime();
  time1d = time2 - time1;

  MPI_Barrier(world);
  time1 = MPI_Wtime();

  if (function[1] + function[2] + function[3]) {
    for (int i = 0; i < n; i++) {
      fft1_6->timing1d(work1_6,nfft_both_6,1);
      fft2_6->timing1d(work1_6,nfft_both_6,-1);
      if (differentiation_flag != 1){
        fft2_6->timing1d(work1_6,nfft_both_6,-1);
        fft2_6->timing1d(work1_6,nfft_both_6,-1);
      }
    }
  }

  MPI_Barrier(world);
  time2 = MPI_Wtime();
  time1d += (time2 - time1)*mixing;

  if (differentiation_flag) return 2;
  return 4;
}

/* ----------------------------------------------------------------------
   perform and time the 3d FFTs required for N timesteps
------------------------------------------------------------------------- */

int PPPMDisp::timing_3d(int n, double &time3d)
{
  double time1,time2;
  int mixing = 1;
  if (function[2]) mixing = 4;
  if (function[3]) mixing = nsplit_alloc/2;

  if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
  if (function[1] + function[2] + function[3])
    for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF;


  MPI_Barrier(world);
  time1 = MPI_Wtime();

  if (function[0]) {
    for (int i = 0; i < n; i++) {
      fft1->compute(work1,work1,1);
      fft2->compute(work1,work1,-1);
      if (differentiation_flag != 1) {
        fft2->compute(work1,work1,-1);
        fft2->compute(work1,work1,-1);
      }
    }
  }

  MPI_Barrier(world);
  time2 = MPI_Wtime();
  time3d = time2 - time1;

  MPI_Barrier(world);
  time1 = MPI_Wtime();

  if (function[1] + function[2] + function[3]) {
    for (int i = 0; i < n; i++) {
      fft1_6->compute(work1_6,work1_6,1);
      fft2_6->compute(work1_6,work1_6,-1);
      if (differentiation_flag != 1) {
        fft2_6->compute(work1_6,work1_6,-1);
        fft2_6->compute(work1_6,work1_6,-1);
      }
    }
  }

  MPI_Barrier(world);
  time2 = MPI_Wtime();
  time3d += (time2 - time1) * mixing;

  if (differentiation_flag) return 2;
  return 4;
}

/* ----------------------------------------------------------------------
   memory usage of local arrays
------------------------------------------------------------------------- */

double PPPMDisp::memory_usage()
{
  double bytes = nmax*3 * sizeof(double);
  int mixing = 1;
  int diff = 3;     //depends on differentiation
  int per = 7;      //depends on per atom calculations
  if (differentiation_flag) {
    diff = 1;
    per = 6;
  }
  if (!evflag_atom) per = 0;
  if (function[2]) mixing = 7;
  if (function[3]) mixing = nsplit_alloc;

  if (function[0]) {
    int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
      (nzhi_out-nzlo_out+1);
    bytes += (1 + diff +  per) * nbrick * sizeof(FFT_SCALAR);     //brick memory
    bytes += 6 * nfft_both * sizeof(double);      // vg
    bytes += nfft_both * sizeof(double);          // greensfn
    bytes += nfft_both * 3 * sizeof(FFT_SCALAR);    // density_FFT, work1, work2
    bytes += cg->memory_usage();
  }

  if (function[1] + function[2] + function[3]) {
    int nbrick = (nxhi_out_6-nxlo_out_6+1) * (nyhi_out_6-nylo_out_6+1) *
      (nzhi_out_6-nzlo_out_6+1);
    bytes += (1 + diff + per ) * nbrick * sizeof(FFT_SCALAR) * mixing;     // density_brick + vd_brick + per atom bricks
    bytes += 6 * nfft_both_6 * sizeof(double);      // vg
    bytes += nfft_both_6 * sizeof(double);          // greensfn
    bytes += nfft_both_6 * (mixing + 2) * sizeof(FFT_SCALAR);    // density_FFT, work1, work2
    bytes += cg_6->memory_usage();
  }
  return bytes;
}